Instrumental variable simulations
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as stats
We simulate $$ y_i = x_i \beta + \beta^o u_i + \epsilon_i $$ $$ x_i = z_i \kappa + u_i$$
N=500
kappa = 0.1
beta = 1.0
beta_ommited = 0.3
def sim_iv(N,kappa,beta,beta_omitted):
Z = np.random.normal(size=N)
U = np.random.normal(size=N)
X = kappa * Z + U
Y = beta * X + beta_omitted*U + np.random.normal(size=N)
beta_iv = np.cov(Y,Z)[0,1] / np.cov(X,Z)[0,1]
beta_ols = np.cov(Y,X)[0,1] / np.cov(X,X)[0,1]
beta_iv_sd = np.std( Y - beta_iv * X ) * np.std( Z ) / ( np.abs(np.cov(X,Z)[0,1]) * np.sqrt(N) )
beta_ols_sd = np.std( Y - beta_ols * X ) / ( np.std( X ) * np.sqrt(N) )
return( {
'beta_iv':beta_iv,
'beta_ols':beta_ols,
'beta_iv_sd':beta_iv_sd,
'beta_ols_sd':beta_ols_sd})
betas = [ sim_iv(N=75, kappa=0.5, beta=1.0, beta_omitted= -0.3)
for _ in range(10000)]
betas_ols = [ b['beta_ols'] for b in betas]
betas_iv = [ b['beta_iv'] for b in betas]
np.mean(betas_ols),np.mean(betas_iv)
(0.7597604420000623, 1.0233194138345478)
plt.hist(
np.clip(betas_iv,-0.5,2.5),
50, color="blue",
alpha=0.3)
#plt.hist(np.clip(betas_ols,-4,6),50,color="orange",alpha=0.3)
plt.axvline(np.mean(betas_iv),color="blue",label="IV")
plt.axvline(np.mean(betas_ols),color="orange",label="OLS")
plt.axvline(1.0,color="red",label="Truth")
plt.legend()
plt.show()
simdata = pd.DataFrame(betas)
np.mean( np.abs(simdata['beta_ols'] - beta) /simdata['beta_ols_sd'] < 1.96)
0.0
np.mean( np.abs(simdata['beta_iv'] - beta) /simdata['beta_iv_sd'] < 1.96)
0.9508
stats.skew(simdata['beta_ols'])
-0.002974289644278755
stats.skew(simdata['beta_iv'])
0.19327725217920103
# Normal has kurotosis close to 0
stats.kurtosis( np.random.normal(size=5000))
-0.12704102275045948
stats.skew( np.random.normal(size=5000))
0.01297569886871565
X = np.random.normal(size=5000)
np.mean(X**3)
0.0706431429377597
We look for a simple case where E[XE]=0 and E[E|X \neq 0 ]
We use a simple case where X and U is N(0,1) and E = abs(X) + U
we get that E[E|U] = abs(X) and
E[E U] = E[X abs(X) ] = E[X^2 | X>0] - E[X^2 | X<0] = 0
U = np.random.normal(size=5000)
X = np.random.normal(size=5000)
E = np.abs(X) + U
np.mean(E * X)
-0.030434790298165496
I = np.argsort(X)
plt.scatter(X,E,alpha=0.3,color="gray")
plt.plot(X[I], np.abs(X[I]), linewidth=5)
plt.plot(X[I], X[I] * np.abs(X[I]), linewidth=5)
plt.show()
-0.005484281723216472