Interactions on a circle
We simply draw individual with quality \alpha_i and we let them work in pairs.
And we generate output between individuals next to each other
y_{i,i-1} = \alpha_i + \alpha_{i-1} + \epsilon_i
Extreme case
We consider N-1 observations and hence, with a normalization, the system is exactly inverstible.
import numpy as np
from scipy.linalg import toeplitz
import matplotlib.pylab as plt
N=100
eps_sd = 0.2
# i worth with i+1 and i-1
A = np.random.normal(size=N)
A = np.sort(A)
A = A - A[0] # normalize first A to 0
A0 = A[1:N]
# we arrange individual by quality
# we assigne them to firms
M = toeplitz( np.concatenate( (np.ones(1),np.zeros(N-2))) ,np.concatenate( (np.ones(2),np.zeros(N-2))) )
M = M[:,1:N]
Y = np.matmul(M, A0) + eps_sd*np.random.normal(size = N-1)
A_hat = np.linalg.solve( np.matmul(M.T,M), np.matmul(M.T,Y) )
plt.plot(A0,A_hat)
plt.plot(A0,A_hat,'o')
plt.plot(A0[[0,N-2]],A0[[0,N-2]],':')
plt.title(r"$\hat{\alpha}$ versus $\alpha$")
plt.show()
plt.plot(A0[0:(N-2)],A0[1:(N-1)])
plt.plot(A_hat[0:(N-2)],A_hat[1:(N-1)],'o')
np.cov(A0[0:(N-2)],A0[1:(N-1)]),np.cov(A_hat[0:(N-2)],A_hat[1:(N-1)])
(array([[0.98273706, 0.97395766],
[0.97395766, 0.97112546]]),
array([[ 3.06835778, -1.05242758],
[-1.05242758, 3.0070062 ]]))
Slightly less extreme
We complement with some additional observations with some additional random pairing
I = np.sort(np.random.randint(N-1,size = 2*N))
M2 = np.concatenate([M,M[I,:]],axis=0)
L = M2.shape[0]
plt.spy(M2)
<matplotlib.image.AxesImage at 0x11ee5de90>
Y2 = np.matmul(M2, A0) + eps_sd*np.random.normal(size = L)
A2_hat = np.linalg.solve( np.matmul(M2.T,M2), np.matmul(M2.T,Y2) )
plt.plot(A0,A2_hat)
plt.plot(A0,A2_hat,'o')
plt.plot(A0[[0,N-2]],A0[[0,N-2]],':')
plt.title(r"$\hat{\alpha}$ versus $\alpha$")
plt.show()
np.cov(A0[0:(N-2)],A0[1:(N-1)]),np.cov(A2_hat[0:(N-2)],A2_hat[1:(N-1)])
(array([[0.98273706, 0.97395766],
[0.97395766, 0.97112546]]),
array([[1.40412753, 0.59746337],
[0.59746337, 1.40881853]]))
Looking at square residuals
R2 = Y2 - M2 @ A2_hat
np.var(R2),eps_sd**2
(0.024416446458847333, 0.04000000000000001)
# degree of freedom correction
L/(L - N + 1) * np.var(R2)
0.036502587455976766
Ridge
from sklearn.linear_model import Ridge
clf = Ridge(alpha=1.0)
res = clf.fit(M2, Y2)
A_hat_ridge = res.predict(np.diag( np.ones(N-1)))
plt.plot(A0,A_hat_ridge)
plt.plot(A0,A_hat_ridge,'o')
plt.plot(A0[[0,N-2]],A[[0,N-2]],':')
[<matplotlib.lines.Line2D at 0x1a1d437050>]
np.cov(A_hat_ridge[0:(N-2)],A_hat_ridge[1:(N-1)])
array([[0.86190545, 0.60146522],
[0.60146522, 0.66102999]])
Distribution of ranking
from scipy.stats import rankdata
Rmax = np.zeros(100)
Rmin = np.zeros(100)
for r in range(100):
Y = np.matmul(M, A0) + eps_sd*np.random.normal(size = N-1)
A_hat = np.linalg.solve( np.matmul(M.T,M), np.matmul(M.T,Y) )
Rmax[r] = rankdata(A_hat)[N-2]
Rmin[r] = rankdata(A_hat)[0]
plt.hist(Rmin)
plt.show()
Bias correction
# we look at the variance
Q = np.diag(np.ones(N-1)) - 1/(N-1)*np.ones((N-1,N-1))
1/(N-1)*np.matmul( A2_hat, np.matmul(Q,A2_hat )), np.var(A2_hat)
(1.446334288916639, 1.4463342889166393)
# compare R' Q R to var(R)
R = np.random.normal(size=N-1)
1/(N-1) * R.T @ Q @ R, np.var(R)
(0.7495637735158497, 0.7495637735158495)
B = \frac{\sigma^2}{N-1} \text{Tr}\Big[ (M'M)^{-1} Q \Big]
# WE compute the bias formula
MMinv = np.linalg.inv( np.matmul(M2.T,M2) )
B = (eps_sd)**2/(N-1) * np.trace( np.matmul(MMinv,Q ))
B
0.8473573444886087
np.var(A0), np.var(A2_hat), np.var(A2_hat) - B,
(1.0208037318843184, 1.3165232609361357, 0.46916591644752703)
res = []
for r in range(500):
Y2 = np.matmul(M2, A0) + eps_sd*np.random.normal(size = L)
A2_hat = np.linalg.solve( np.matmul(M2.T,M2), np.matmul(M2.T,Y2) )
res.append(np.var(A2_hat))
plt.hist(np.log(res-B),alpha=0.5)
plt.hist(np.log(res),alpha=0.5)
plt.axvline(np.log(np.var(A0)),color="red",linestyle=":",label="truth")
plt.axvline(np.log(np.mean(res-B)),color="blue",linestyle=":",label="FE-BC")
plt.axvline(np.log(np.mean(res)),color="orange",linestyle=":",label="FE")
plt.legend()
plt.show()