Instrumental variable simulations¶
In [87]:
Copied!
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as stats
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as stats
We simulate $$ y_i = x_i \beta + \beta^o u_i + \epsilon_i $$ $$ x_i = z_i \kappa + u_i$$
In [145]:
Copied!
N=500
kappa = 0.1
beta = 1.0
beta_ommited = 0.3
N=500
kappa = 0.1
beta = 1.0
beta_ommited = 0.3
In [146]:
Copied!
def sim_iv(N,kappa,beta,beta_omitted):
Z = np.random.normal(size=N)
U = np.random.normal(size=N)
X = kappa * Z + U
Y = beta * X + beta_omitted*U + np.random.normal(size=N)
beta_iv = np.cov(Y,Z)[0,1] / np.cov(X,Z)[0,1]
beta_ols = np.cov(Y,X)[0,1] / np.cov(X,X)[0,1]
beta_iv_sd = np.std( Y - beta_iv * X ) * np.std( Z ) / ( np.abs(np.cov(X,Z)[0,1]) * np.sqrt(N) )
beta_ols_sd = np.std( Y - beta_ols * X ) / ( np.std( X ) * np.sqrt(N) )
return( {
'beta_iv':beta_iv,
'beta_ols':beta_ols,
'beta_iv_sd':beta_iv_sd,
'beta_ols_sd':beta_ols_sd})
def sim_iv(N,kappa,beta,beta_omitted):
Z = np.random.normal(size=N)
U = np.random.normal(size=N)
X = kappa * Z + U
Y = beta * X + beta_omitted*U + np.random.normal(size=N)
beta_iv = np.cov(Y,Z)[0,1] / np.cov(X,Z)[0,1]
beta_ols = np.cov(Y,X)[0,1] / np.cov(X,X)[0,1]
beta_iv_sd = np.std( Y - beta_iv * X ) * np.std( Z ) / ( np.abs(np.cov(X,Z)[0,1]) * np.sqrt(N) )
beta_ols_sd = np.std( Y - beta_ols * X ) / ( np.std( X ) * np.sqrt(N) )
return( {
'beta_iv':beta_iv,
'beta_ols':beta_ols,
'beta_iv_sd':beta_iv_sd,
'beta_ols_sd':beta_ols_sd})
In [200]:
Copied!
betas = [ sim_iv(N=75, kappa=0.5, beta=1.0, beta_omitted= -0.3)
for _ in range(10000)]
betas_ols = [ b['beta_ols'] for b in betas]
betas_iv = [ b['beta_iv'] for b in betas]
np.mean(betas_ols),np.mean(betas_iv)
betas = [ sim_iv(N=75, kappa=0.5, beta=1.0, beta_omitted= -0.3)
for _ in range(10000)]
betas_ols = [ b['beta_ols'] for b in betas]
betas_iv = [ b['beta_iv'] for b in betas]
np.mean(betas_ols),np.mean(betas_iv)
Out[200]:
(0.7597604420000623, 1.0233194138345478)
In [201]:
Copied!
plt.hist(
np.clip(betas_iv,-0.5,2.5),
50, color="blue",
alpha=0.3)
#plt.hist(np.clip(betas_ols,-4,6),50,color="orange",alpha=0.3)
plt.axvline(np.mean(betas_iv),color="blue",label="IV")
plt.axvline(np.mean(betas_ols),color="orange",label="OLS")
plt.axvline(1.0,color="red",label="Truth")
plt.legend()
plt.show()
plt.hist(
np.clip(betas_iv,-0.5,2.5),
50, color="blue",
alpha=0.3)
#plt.hist(np.clip(betas_ols,-4,6),50,color="orange",alpha=0.3)
plt.axvline(np.mean(betas_iv),color="blue",label="IV")
plt.axvline(np.mean(betas_ols),color="orange",label="OLS")
plt.axvline(1.0,color="red",label="Truth")
plt.legend()
plt.show()
In [177]:
Copied!
simdata = pd.DataFrame(betas)
np.mean( np.abs(simdata['beta_ols'] - beta) /simdata['beta_ols_sd'] < 1.96)
simdata = pd.DataFrame(betas)
np.mean( np.abs(simdata['beta_ols'] - beta) /simdata['beta_ols_sd'] < 1.96)
Out[177]:
0.0
In [178]:
Copied!
np.mean( np.abs(simdata['beta_iv'] - beta) /simdata['beta_iv_sd'] < 1.96)
np.mean( np.abs(simdata['beta_iv'] - beta) /simdata['beta_iv_sd'] < 1.96)
Out[178]:
0.9508
In [179]:
Copied!
stats.skew(simdata['beta_ols'])
stats.skew(simdata['beta_ols'])
Out[179]:
-0.002974289644278755
In [203]:
Copied!
stats.skew(simdata['beta_iv'])
stats.skew(simdata['beta_iv'])
Out[203]:
0.19327725217920103
In [144]:
Copied!
# Normal has kurotosis close to 0
stats.kurtosis( np.random.normal(size=5000))
# Normal has kurotosis close to 0
stats.kurtosis( np.random.normal(size=5000))
Out[144]:
-0.12704102275045948
In [204]:
Copied!
stats.skew( np.random.normal(size=5000))
stats.skew( np.random.normal(size=5000))
Out[204]:
0.01297569886871565
In [205]:
Copied!
X = np.random.normal(size=5000)
X = np.random.normal(size=5000)
In [207]:
Copied!
np.mean(X**3)
np.mean(X**3)
Out[207]:
0.0706431429377597
We look for a simple case where $E[XE]=0$ and $E[E|X \neq 0 ]$
We use a simple case where X and U is N(0,1) and $E = abs(X) + U$
we get that $E[E|U] = abs(X)$ and
$$E[E U] = E[X abs(X) ] = E[X^2 | X>0] - E[X^2 | X<0] = 0$$
In [241]:
Copied!
U = np.random.normal(size=5000)
X = np.random.normal(size=5000)
E = np.abs(X) + U
np.mean(E * X)
U = np.random.normal(size=5000)
X = np.random.normal(size=5000)
E = np.abs(X) + U
np.mean(E * X)
Out[241]:
-0.030434790298165496
In [256]:
Copied!
I = np.argsort(X)
plt.scatter(X,E,alpha=0.3,color="gray")
plt.plot(X[I], np.abs(X[I]), linewidth=5)
plt.plot(X[I], X[I] * np.abs(X[I]), linewidth=5)
plt.show()
I = np.argsort(X)
plt.scatter(X,E,alpha=0.3,color="gray")
plt.plot(X[I], np.abs(X[I]), linewidth=5)
plt.plot(X[I], X[I] * np.abs(X[I]), linewidth=5)
plt.show()
In [240]:
Copied!
Out[240]:
-0.005484281723216472
In [ ]:
Copied!