using Distributions, Random 

function dgp_keane_neal(; b=0, n=1000, F, rho)

    """
    Generates one sample of size n following the DGP of Keane & Neal (2024) page 193

    ### Input

    - `b`   -- structural coefficient beta (scalar)
    - `n`   -- sample size
    - `F`   -- reduced form F-stat 
    - `rho` -- degree of endogeneity

    ### Output

    - `x`   -- (n by 1) vector representing endogenous regressor
    - `y`   -- (n by 1) vector representing outcome variable
    - `z`   -- (n by 1) vector representing instrumental variable
    """

    p = sqrt(F/n)
    u = rand(Normal(0, 1), n)
    eta = rand(Normal(0, 1), n)
    z = rand(Normal(0, 1), n)
    v = rho*u + sqrt(1-rho^2)*eta
    x = p*z .+ v
    y = b*x .+ u

    return (; x, y, z)

end

dgp_keane_neal (generic function with 1 method)


function ols_estimator(x, y)

    """
    Implements OLS estimation of linear model with one exogenous regressor.

    ### Input

    - `x`   -- (n by 1) vector representing regressor
    - `y`   -- (n by 1) vector representing outcome variable

    ### Output

    - `bhat` --OLS estimate of beta
    - `se`   --standard error of OLS estimate
    - `t`    --t-statistic of OLS estimator
    """

        # OLS estimator
        bhat= x\y

        # standard error
        uhat = y-x*bhat
        s = uhat'uhat/length(y)
        se= sqrt(s/(x'x))

        # t-statistic (absolute value)
        t = bhat/se

        return (; bhat, se, t) # returning named tuple

end

ols_estimator (generic function with 1 method)


function iv_estimator(x, y, z)

    """
    Implements IV estimation of linear model with one endogenous variable, and one instrument.

    ### Input

    - `x`   -- (n by 1) vector representing endogenous regressor
    - `y`   -- (n by 1) vector representing outcome variable
    - `z`   -- (n by 1) vector representing instrumental variable

    ### Output

    - `biv`  -- IV estimate of beta
    - `se`   -- standard error of IV estimate
    - `t`    -- t-statistic of IV estimator
 
    ### Notes

    For calculation of standard error, we're using the formula on page 190 of Keane & Neal (2024)
    """

    # IV estimator
    bhat= (z'y)/(x'z)

    # standard error (using formula in Keane & Neal (2024))
    n = length(y)
    pihat = z\x         # reduced form coefficient estimate
    TSS = n*pihat^2*var(z)
    uhat = y-x*bhat
    s = uhat'*uhat/n
    se = sqrt(s/TSS)

    # t-statistic (absolute value)
    t = bhat/se
    
    return (; bhat, se, t) # returning `named tuple`

end

iv_estimator (generic function with 1 method)


function simulate_distribution(; b=0, F, rho, rep=10000)

    """
    Creates finite sample distributions of 
    - IV estimator,
    - standard error of IV estimator
    - tstat of IV estimator

    How does it create finite sample distribution? It creates `rep` number of DGPs and each time
    calculates IV estimator, its standard error, and tstat.
    
    ### Input

    - `F`       -- reduced form F-stat 
    - `rho`     -- degree of endogeneity
    - `rep`     -- number of repititions/simulations run

    ### Output
    
    - `bols_dst`    -- (rep by 1) vector collecting rep simulations of OLS estimator
    - `sols_dst`    -- (rep by 1) vector collecting rep simulations of standard error of OLS estimator
    - `tols_dst`    -- (rep by 1) vector collecting rep simulations of tstat of OLS estimator
    - `biv_dst`     -- (rep by 1) vector collecting rep simulations of IV estimator
    - `siv_dst`     -- (rep by 1) vector collecting rep simulations of standard error of IV estimator
    - `tiv_dst`     -- (rep by 1) vector collecting rep simulations of tstat of IV estimator
    - `tar_dst`     -- (rep by 1) vector collecting rep simulations of AR-statistic
    """

    bols_dst = Array{Float64}(undef, rep)
    sols_dst = Array{Float64}(undef, rep)
    tols_dst = Array{Float64}(undef, rep)

    biv_dst = Array{Float64}(undef, rep)
    siv_dst = Array{Float64}(undef, rep)
    tiv_dst = Array{Float64}(undef, rep)

    ar_dst = Array{Float64}(undef, rep)
    
    for i = 1:rep

        x, y, z = dgp_keane_neal(b=b, F=F, rho=rho)

        # calculating simulated distribution for bols, seols, and tols
        bols_dst[i], sols_dst[i], tols_dst[i] = ols_estimator(x, y)

        # calculating simulated distribution for biv, seiv, and tiv
        biv_dst[i], siv_dst[i], tiv_dst[i] = iv_estimator(x, y, z)

        # calculating AR statistic
        # by regressing Y on Z
        ar_dst[i] = ols_estimator(z, y).t

    end
    
    return (; bols_dst, sols_dst, tols_dst, biv_dst, siv_dst, tiv_dst, ar_dst)

end

simulate_distribution (generic function with 1 method)


# no endogeneity DGP, with strong IV
dgp_zero = simulate_distribution(rho=0, F=73.75)

# all other DGPS
parms_rho = (0.10, 0.30, 0.50)
parms_F = (1.82, 2.30, 10, 29.44, 73.75)
dgps = [simulate_distribution(rho = rho, F = F) for rho in parms_rho, F in parms_F];


using Plots
using Plots.PlotMeasures: mm
Plots.theme(:wong2)

plt = plot(
    layout=(length(parms_rho),length(parms_F)), 
    size = (1800, 800), 
    plot_title = "Empirical Distriution of standard errors (truncated at 90th percentile) for Different DGPs")

[histogram!(plt, 
        dgps[i,j].siv_dst,
        normalize = true, 
        subplot = length(parms_F)*(i-1)+j,
        bins = range(0, quantile(dgps[i, j].siv_dst, 0.90), length=51),
        legend=false, 
        title = "\\rho = $rho and popF = $F")
        for (i, rho) in enumerate(parms_rho), (j, F) in enumerate(parms_F)]
display(plt)


using Plots
using Plots.PlotMeasures: mm
Plots.theme(:wong2)

plot(dgp_zero.bols_dst, dgp_zero.sols_dst, 
    size=(800,600),
    seriestype=:scatter, 
    legend=false, 
    title = "OLS Estimates vs Their Standard errors \\n (with \\rho = 0 and popF = 73.75)")
xlabel!("OLS estimate")
ylabel!("Standard error")
plot!(dgp_zero.bols_dst[abs.(dgp_zero.tols_dst).>1.96], dgp_zero.sols_dst[abs.(dgp_zero.tols_dst).>1.96], seriestype=:scatter, mc=:red)
plot!([0, 4], [0, 2], seriestype=:straightline, lc=:blue, linestyle=:dash)
plot!([0, 4], [0, -2], seriestype=:straightline, lc=:blue, linestyle=:dash)


using Statistics
cor(dgp_zero.bols_dst, dgp_zero.sols_dst)

0.005275048579007424


using Plots
using Plots.PlotMeasures: mm
Plots.theme(:wong2)

plot(dgps[1,5].biv_dst, dgps[1,5].siv_dst, 
    size=(800,600),
    seriestype=:scatter, 
    legend=false, 
    title = "IV Estimates vs Their Standard errors \\n (with \\rho = 0.1 and popF = 73.75)")
xlabel!("IV estimate")
ylabel!("Standard error")
plot!(dgps[1,5].biv_dst[abs.(dgps[1,5].tiv_dst).>1.96], dgps[1,5].siv_dst[abs.(dgps[1,5].tiv_dst).>1.96], seriestype=:scatter, mc=:red)
plot!([0, 4], [0, 2], seriestype=:straightline, lc=:blue, linestyle=:dash)
plot!([0, 4], [0, -2], seriestype=:straightline, lc=:blue, linestyle=:dash)


using Plots
using Plots.PlotMeasures: mm
Plots.theme(:wong2)

plot(dgps[3,1].biv_dst, dgps[3,1].siv_dst, 
    size=(800,600),
    seriestype=:scatter, 
    legend=false, 
    title = "IV Estimates vs Their Standard errors \\n (with \\rho = 0.5 and popF = 1.82)")
xlabel!("IV estimate")
ylabel!("Standard error")
plot!(dgps[3,1].biv_dst[abs.(dgps[3,1].tiv_dst).>1.96], dgps[3,1].siv_dst[abs.(dgps[3,1].tiv_dst).>1.96], seriestype=:scatter, mc=:red)
plot!([0, 4], [0, 2], seriestype=:straightline, lc=:blue, linestyle=:dot)
plot!([0, 4], [0, -2], seriestype=:straightline, lc=:blue, linestyle=:dot)


using Plots
using Plots.PlotMeasures: mm
Plots.theme(:wong2)

plot(dgps[3,1].biv_dst, dgps[3,1].siv_dst, 
    size=(800,600),
    xlims = (-4,4),
    ylims = (0, min(4, quantile(dgps[3,1].siv_dst, 0.99))),
    seriestype=:scatter, 
    legend=false, 
    title = "IV Estimates vs Their Standard errors \\n (with \\rho = 0.5 and popF = 1.82, outliers removed)")
xlabel!("IV estimate")
ylabel!("Standard error")
plot!(dgps[3,1].biv_dst[abs.(dgps[3,1].tiv_dst).>1.96], dgps[3,1].siv_dst[abs.(dgps[3,1].tiv_dst).>1.96], seriestype=:scatter, mc=:red)
plot!([0, 4], [0, 2], seriestype=:straightline, lc=:blue, linestyle=:dash)
plot!([0, 4], [0, -2], seriestype=:straightline, lc=:blue, linestyle=:dash)
vline!([0.5, 0.5], lw=:3, lc=:red, linestyle=:dot)


using Plots
using Plots.PlotMeasures: mm
Plots.theme(:wong2)

plt = plot(layout=(length(parms_rho),length(parms_F)), size = (1800, 800),
        plot_title = "Plot of IV Estimator vs its Standard Error for different DGPs")

for (i, rho) in enumerate(parms_rho)
    for (j, F) in enumerate(parms_F)
        k = length(parms_F)*(i-1)+j # subplot counter
        plot!(plt, 
            dgps[i,j].biv_dst, dgps[i,j].siv_dst, 
            seriestype=:scatter, 
            subplot = k, 
            ylims = (0, min(4, quantile(dgps[i,j].siv_dst, 0.99))),
            legend=false, 
            title = "\\rho = $rho and popF = $F")
        plot!(dgps[i,j].biv_dst[abs.(dgps[i,j].tiv_dst).>1.96], dgps[i,j].siv_dst[abs.(dgps[i,j].tiv_dst).>1.96], seriestype=:scatter, subplot = k, mc=:red)
        plot!([0, 4], [0, 2], seriestype=:straightline, subplot = k, lc=:blue, linestyle=:dash)
        plot!([0, 4], [0, -2], seriestype=:straightline, subplot = k, lc=:blue, linestyle=:dash)
        vline!([rho, rho], subplot = k, lw=:3, lc=:red, linestyle=:dot)
        xlims!(-4,4)
    end
end

display(plt)


function power_function(; brange=-1.00:0.10:1.00, F, rho)

    """
    Calculates statistical power (probability to reject null hypothesis H0: truebeta = 0)
    when the underlying true beta ranges in values determined by brange.
    
    ### Input

    - `brange`  -- range of values for true beta used in DGP creation
    - `F`       -- first stage population F-stat
    - `rho`     -- degree of endogeneity

    ### Output
    - `brange`  -- range of values for true beta used in DGP
    - `power_t` -- power function for t-test
    - `power_ar`-- power function for AR-test
    """

    power_tols = similar(brange)
    power_tiv = similar(brange)
    power_ar = similar(brange)

    for (i, b) in enumerate(brange)
        simdst = simulate_distribution(b=b, F=F, rho=rho)
        power_tols[i] = mean(abs.(simdst.tols_dst) .> 1.96)
        power_tiv[i] = mean(abs.(simdst.tiv_dst) .> 1.96)
        power_ar[i] = mean(abs.(simdst.ar_dst) .> 1.96)
    end

    return brange, power_tols, power_tiv, power_ar

end

power_function (generic function with 1 method)


using Plots
using Plots.PlotMeasures: mm
Plots.theme(:wong2)

brange, pow_ols, pow_iv, pow_ar = power_function(brange=-1:0.010:1,F=73.75, rho=0)

plot(brange, [pow_ols, pow_iv], 
    size=(800,600),
    xticks=-1:0.1:1,
    label=["OLS" "IV"],
    linewidth=3,
    linestyle=[:solid :dash],
    linecolor=:black,
    legend=:bottomright,
    title="Empirical Power Curves Compared: \\rho=0 and popF=73.75")
hline!([0.05, 0.05], linestyle=:dash, label=false)
ylims!(0,1)
xlabel!("True \\beta")


using Plots
using Plots.PlotMeasures: mm
Plots.theme(:wong2)


plt = plot(layout=(length(parms_rho),length(parms_F)), size = (1800, 800),
        plot_title = "Empirical Power Curves: OLS (solid) vs IV (dashed) (empirical size based on IV)")

for (i, rho) in enumerate(parms_rho)
    for (j, F) in enumerate(parms_F)
        k = length(parms_F)*(i-1)+j # subplot counter
        brange, pow_ols, pow_iv, pow_ar = power_function(F=F, rho=rho)
        size = round(100 * pow_iv[brange.==0][], digits=3)
        plot!(plt, 
            brange, [pow_ols, pow_iv], 
            label=["OLS" "IV"],
            linestyle=[:solid :dash],
            linecolor=:black,
            legend=false,
            subplot=k,
            margin=5mm,
            title = "\\rho = $rho and popF = $F \\n Empirical size = $size %")
        hline!([0.05, 0.05], linestyle=:dot, subplot=k, legend=false)
        ylims!(0,1)
        xlabel!("True \\beta")
    end
end

display(plt)


using Plots
using Plots.PlotMeasures: mm
Plots.theme(:wong2)


plt = plot(layout=(length(parms_rho),length(parms_F)), size = (1800, 800),
        plot_title = "Empirical Power Curves for: AR (solid) vs IV (dashed) (empirical size based on AR)")

for (i, rho) in enumerate(parms_rho)
    for (j, F) in enumerate(parms_F)
        k = length(parms_F)*(i-1)+j # subplot counter
        brange, pow_ols, pow_iv, pow_ar = power_function(F=F, rho=rho)
        size = round(100 * pow_ar[brange.==0][], digits=3)
        plot!(plt, 
            brange, [pow_ar, pow_iv], 
            label=["AR" "IV"],
            linestyle=[:solid :dash],
            linecolor=:black,
            legend=false,
            subplot=k,
            margin=5mm,
            title = "\\rho = $rho and popF = $F \\n Empirical size = $size %")
        hline!([0.05, 0.05], linestyle=:dot, subplot=k, legend=false)
        ylims!(0,1)
        xlabel!("True \\beta")
    end
end

display(plt)

population F	sample F
1.82	8.96
2.30	10.00
5.78	16.38
10.00	23.10
29.44	50.00
73.75	104.70

Lecture 8: Deeper Look at Weak Instruments: Keane and Neal (2024)¶

Summary of Stock & Yogo (2005), (Lecture 7)¶

Keane and Neal (2024)¶

Practical Lessons¶

Data Generating Process (DGP)¶

Julia Functions¶

Creating DGPs¶

Standard Errors¶

Plotting IV Estimates vs Their Standard Errors¶

\hat{\beta}_{\text{IV}} - \beta¶

s_{ZX}¶

Power Functions¶

Digression: Effect Size¶

The AR Test¶

Discussion¶

values for $\rho$ that are of practicla relevance
0.00 (no endogeneity)
0.10
0.30
0.50