Interactions on a circle¶

We simply draw individual with quality $\alpha_i$ and we let them work in pairs.

And we generate output between individuals next to each other

$$ y_{i,i-1} = \alpha_i + \alpha_{i-1} + \epsilon_i $$

Extreme case¶

We consider N-1 observations and hence, with a normalization, the system is exactly inverstible.

In [389]:

Copied!





import numpy as np
from scipy.linalg import toeplitz
import matplotlib.pylab as plt

N=100
eps_sd = 0.2
np.random.seed(6534565)

# i worth with i+1 and i-1
A = 0.7*np.random.normal(size=N)
A = np.sort(A) # we arrange individual by quality
A = A - A[0]   # normalize first A to 0 (we have N-1 observations)
A0 = A[1:N]

# we assign pairs
M = toeplitz(  np.concatenate( (np.ones(1),np.zeros(N-2))) ,np.concatenate( (np.ones(2),np.zeros(N-2)))  )    
M = M[:,1:N]
import numpy as np
from scipy.linalg import toeplitz
import matplotlib.pylab as plt

N=100
eps_sd = 0.2
np.random.seed(6534565)

# i worth with i+1 and i-1
A = 0.7*np.random.normal(size=N)
A = np.sort(A) # we arrange individual by quality
A = A - A[0]   # normalize first A to 0 (we have N-1 observations)
A0 = A[1:N]

# we assign pairs
M = toeplitz(  np.concatenate( (np.ones(1),np.zeros(N-2))) ,np.concatenate( (np.ones(2),np.zeros(N-2)))  )    
M = M[:,1:N]

The matrix is

$$ \begin{bmatrix} 1 & 1 & 0 & 0 & \cdots & 0 \\ 0 & 1 & 1 & 0 & \cdots & 0 \\ 0 & 0 & 1 & 1 & \cdots & 0 \\ \vdots & \vdots & \ddots & \ddots & \ddots & \vdots \\ 0 & 0 & \cdots & 0 & 1 & 1 \\ 0 & 0 & \cdots & 0 & 0 & 1 \\ \end{bmatrix} $$

In [390]:

Copied!





# we compute the outcome
Y = np.matmul(M, A0) + eps_sd*np.random.normal(size = N-1)
# we compute the OLS estimator
A_hat = np.linalg.solve( np.matmul(M.T,M),  np.matmul(M.T,Y) )
# we compute the outcome
Y = np.matmul(M, A0) + eps_sd*np.random.normal(size = N-1)
# we compute the OLS estimator
A_hat = np.linalg.solve( np.matmul(M.T,M),  np.matmul(M.T,Y) )

In [391]:

Copied!





plt.plot(A0,A_hat)
plt.plot(A0,A_hat,'o')
plt.plot(A0[[0,N-2]],A0[[0,N-2]],':')
plt.title(r"$\hat{\alpha}$ versus $\alpha$")
plt.xlabel("truth")
plt.ylabel("estimates")
plt.show()
plt.plot(A0,A_hat)
plt.plot(A0,A_hat,'o')
plt.plot(A0[[0,N-2]],A0[[0,N-2]],':')
plt.title(r"$\hat{\alpha}$ versus $\alpha$")
plt.xlabel("truth")
plt.ylabel("estimates")
plt.show()

No description has been provided for this image

In [392]:

Copied!





plt.plot(A0[0:(N-2)],A0[1:(N-1)], 'o')
plt.plot(A_hat[0:(N-2)],A_hat[1:(N-1)],'o')
plt.title("We scatter the 2 member of each teams")
plt.xlabel("Team partner 1")
plt.ylabel("Team partner 2")
plt.plot(A0[0:(N-2)],A0[1:(N-1)], 'o')
plt.plot(A_hat[0:(N-2)],A_hat[1:(N-1)],'o')
plt.title("We scatter the 2 member of each teams")
plt.xlabel("Team partner 1")
plt.ylabel("Team partner 2")

Out[392]:

Text(0, 0.5, 'Team partner 2')

In [393]:

Copied!





P1 = np.arange(0,(N-2))
P2 = np.arange(1,(N-1))
print(f" Cov(P1,P2)         = {np.corrcoef(A0[P1],A0[P2])[0,1]: 3.3f}")
print(f" Cov(P1_hat,P2_hat) = {np.corrcoef(A_hat[P1],A_hat[P2])[0,1]: 3.3f}")
P1 = np.arange(0,(N-2))
P2 = np.arange(1,(N-1))
print(f" Cov(P1,P2)         = {np.corrcoef(A0[P1],A0[P2])[0,1]: 3.3f}")
print(f" Cov(P1_hat,P2_hat) = {np.corrcoef(A_hat[P1],A_hat[P2])[0,1]: 3.3f}")

 Cov(P1,P2)         =  0.999
 Cov(P1_hat,P2_hat) =  0.000

Slightly less extreme¶

We complement with some additional observations with some additional random pairing

In [394]:

Copied!





I = np.sort(np.random.randint(N-1,size = 5*N))
M2 = np.concatenate([M,M[I,:]],axis=0)
L = M2.shape[0]
plt.spy(M2)
I = np.sort(np.random.randint(N-1,size = 5*N))
M2 = np.concatenate([M,M[I,:]],axis=0)
L = M2.shape[0]
plt.spy(M2)

Out[394]:

<matplotlib.image.AxesImage at 0x300f64470>

In [395]:

Copied!

Y2 = np.matmul(M2, A0) + eps_sd*np.random.normal(size = L)
A2_hat = np.linalg.solve( np.matmul(M2.T,M2),  np.matmul(M2.T,Y2) )
Y2 = np.matmul(M2, A0) + eps_sd*np.random.normal(size = L)
A2_hat = np.linalg.solve( np.matmul(M2.T,M2),  np.matmul(M2.T,Y2) )

In [396]:

Copied!





plt.plot(A0,A2_hat)
plt.plot(A0,A2_hat,'o')
plt.plot(A0[[0,N-2]],A0[[0,N-2]],':')
plt.title(r"$\hat{\alpha}$ versus $\alpha$")
plt.show()
plt.plot(A0,A2_hat)
plt.plot(A0,A2_hat,'o')
plt.plot(A0[[0,N-2]],A0[[0,N-2]],':')
plt.title(r"$\hat{\alpha}$ versus $\alpha$")
plt.show()

In [397]:

Copied!

print(f" Cov(P1,P2)         = {np.cov(A0[P1],A0[P2])[0,1]: 3.3f}")
print(f" Cov(P1_hat,P2_hat) = {np.cov(A2_hat[P1],A2_hat[P2])[0,1]: 3.3f}")
print(f" Cov(P1,P2)         = {np.cov(A0[P1],A0[P2])[0,1]: 3.3f}")
print(f" Cov(P1_hat,P2_hat) = {np.cov(A2_hat[P1],A2_hat[P2])[0,1]: 3.3f}")

 Cov(P1,P2)         =  0.451
 Cov(P1_hat,P2_hat) =  0.029

Looking at square residuals¶

In [398]:

Copied!

R2 = Y2 - M2 @ A2_hat
print(f" var(E_hat) = {np.var(R2):3.3f}")
print(f" var(E)     = {eps_sd**2:3.3f}")
R2 = Y2 - M2 @ A2_hat
print(f" var(E_hat) = {np.var(R2):3.3f}")
print(f" var(E)     = {eps_sd**2:3.3f}")

 var(E_hat) = 0.035
 var(E)     = 0.040

In [399]:

Copied!

# degree of freedom correction
L/(L - N + 1) * np.var(R2)
# degree of freedom correction
L/(L - N + 1) * np.var(R2)

Out[399]:

np.float64(0.04197843810992346)

Ridge¶

In [400]:

Copied!





from sklearn.linear_model import Ridge
clf = Ridge(alpha=1.0)
res = clf.fit(M2, Y2)
A_hat_ridge = res.predict(np.diag( np.ones(N-1)))
from sklearn.linear_model import Ridge
clf = Ridge(alpha=1.0)
res = clf.fit(M2, Y2)
A_hat_ridge = res.predict(np.diag( np.ones(N-1)))

In [401]:

Copied!

plt.plot(A0,A_hat_ridge)
plt.plot(A0,A_hat_ridge,'o')
plt.plot(A0[[0,N-2]],A[[0,N-2]],':')
plt.plot(A0,A_hat_ridge)
plt.plot(A0,A_hat_ridge,'o')
plt.plot(A0[[0,N-2]],A[[0,N-2]],':')

Out[401]:

[<matplotlib.lines.Line2D at 0x30152c380>]

In [402]:

Copied!

np.cov(A_hat_ridge[0:(N-2)],A_hat_ridge[1:(N-1)])
np.cov(A_hat_ridge[0:(N-2)],A_hat_ridge[1:(N-1)])

Out[402]:

array([[0.45751909, 0.40177832],
       [0.40177832, 0.41693006]])

Distribution of ranking¶

In [403]:

Copied!





from scipy.stats import rankdata

Rmax = np.zeros(100)
Rmin = np.zeros(100)
for r in range(100):
    Y = np.matmul(M, A0) + eps_sd*np.random.normal(size = N-1)
    A_hat = np.linalg.solve( np.matmul(M.T,M),  np.matmul(M.T,Y) )
    Rmax[r] = rankdata(A_hat)[N-2]
    Rmin[r] = rankdata(A_hat)[0]
from scipy.stats import rankdata

Rmax = np.zeros(100)
Rmin = np.zeros(100)
for r in range(100):
    Y = np.matmul(M, A0) + eps_sd*np.random.normal(size = N-1)
    A_hat = np.linalg.solve( np.matmul(M.T,M),  np.matmul(M.T,Y) )
    Rmax[r] = rankdata(A_hat)[N-2]
    Rmin[r] = rankdata(A_hat)[0]
    

In [404]:

Copied!

plt.hist(Rmin)
plt.show()
plt.hist(Rmin)
plt.show()

Bias correction¶

We start by creating the A matrix we are interested in. We check that such Q matrix gives us the variance of interest.

In [405]:

Copied!





#  we look at the variance
Q = np.diag(np.ones(N-1)) - 1/(N-1)*np.ones((N-1,N-1))
print(f" a_hat 'Q a_hat   = {1/(N-1)*np.matmul( A2_hat, np.matmul(Q,A2_hat )):3.3f}")
print(f" Var(a_hat)       = {np.var(A2_hat):3.3f}")
print(f" Var(a0)          = {np.var(A0):3.3f}")
#  we look at the variance
Q = np.diag(np.ones(N-1)) - 1/(N-1)*np.ones((N-1,N-1))
print(f" a_hat 'Q a_hat   = {1/(N-1)*np.matmul( A2_hat, np.matmul(Q,A2_hat )):3.3f}")
print(f" Var(a_hat)       = {np.var(A2_hat):3.3f}")
print(f" Var(a0)          = {np.var(A0):3.3f}")

 a_hat 'Q a_hat   = 0.882
 Var(a_hat)       = 0.882
 Var(a0)          = 0.466

We can do the same for any random vector:

In [406]:

Copied!





# compare R' Q R to var(R)
R = np.random.normal(size=N-1)
s = 1/(N-1) * R.T @ Q @ R
print(f" R 'Q R   = {s:3.3f}")
print(f" Var(R)   = {np.var(R):3.3f}")
# compare R' Q R to var(R)
R = np.random.normal(size=N-1)
s = 1/(N-1) * R.T @ Q @ R
print(f" R 'Q R   = {s:3.3f}")
print(f" Var(R)   = {np.var(R):3.3f}")

 R 'Q R   = 0.936
 Var(R)   = 0.936

We also take a look at the variance of the error, recall that:

In [407]:

Copied!

R2 = Y2 - M2 @ A2_hat
print(f" var(E_hat) = {np.var(R2):3.3f}")
print(f" var(E)     = {eps_sd**2:3.3f}")
R2 = Y2 - M2 @ A2_hat
print(f" var(E_hat) = {np.var(R2):3.3f}")
print(f" var(E)     = {eps_sd**2:3.3f}")

 var(E_hat) = 0.035
 var(E)     = 0.040

We compute the corection on this error by using

$$ \text{tr}( I_d - M' (M'M)^{-1} M') $$

In [408]:

Copied!





R2_c = M2.shape[0] * np.var(R2) / sum(np.diag( np.eye(M2.shape[0]) -  M2 @ np.linalg.inv( M2.transpose() @ M2) @ M2.transpose()))
print(f" var(E_hat) = {np.var(R2):3.3f}")
print(f" var(E)     = {eps_sd**2:3.3f} *")
print(f" var(E_bc)  = {R2_c:3.3f} *")
R2_c = M2.shape[0] * np.var(R2) / sum(np.diag( np.eye(M2.shape[0]) -  M2 @ np.linalg.inv( M2.transpose() @ M2) @ M2.transpose()))
print(f" var(E_hat) = {np.var(R2):3.3f}")
print(f" var(E)     = {eps_sd**2:3.3f} *")
print(f" var(E_bc)  = {R2_c:3.3f} *")

 var(E_hat) = 0.035
 var(E)     = 0.040 *
 var(E_bc)  = 0.042 *

$$ B = \frac{\sigma^2}{N-1} \text{Tr}\Big[ (M'M)^{-1} Q \Big]$$

In [409]:

Copied!





# WE compute the bias formula
MMinv = np.linalg.inv( np.matmul(M2.T,M2) )
B = (eps_sd)**2/(N-1) * np.trace( np.matmul(MMinv,Q ))
print(f" Var(a0)          = {np.var(A0):3.3f} <<")
print(f" Var(a_hat)       = {np.var(A2_hat):3.3f}")
print(f" Bias             = {B:3.3f}")
print(f" Var(a_hat) - B   = {np.var(A2_hat)-B:3.3f} <<")
# WE compute the bias formula
MMinv = np.linalg.inv( np.matmul(M2.T,M2) )
B = (eps_sd)**2/(N-1) * np.trace( np.matmul(MMinv,Q ))
print(f" Var(a0)          = {np.var(A0):3.3f} <<")
print(f" Var(a_hat)       = {np.var(A2_hat):3.3f}")
print(f" Bias             = {B:3.3f}")
print(f" Var(a_hat) - B   = {np.var(A2_hat)-B:3.3f} <<")

 Var(a0)          = 0.466 <<
 Var(a_hat)       = 0.882
 Bias             = 0.397
 Var(a_hat) - B   = 0.485 <<

We see that here, the bias correction did an excellent job at recentering the estimate of the variance. The result we have is that this will be centered on average. Let's run a Monte-Carlo, fixing the matrix and redrawing the errors to check this.

In [410]:

Copied!





res = []
for r in range(500):
    # redraw the epsilson
    Y2 = np.matmul(M2, A0) + eps_sd * np.random.normal(size = L)
    # solve for the estimates
    A2_hat = np.linalg.solve( np.matmul(M2.T,M2),  np.matmul(M2.T,Y2) )
    #save the variance
    res.append(np.var(A2_hat))
res = []
for r in range(500):
    # redraw the epsilson
    Y2 = np.matmul(M2, A0) + eps_sd * np.random.normal(size = L)
    # solve for the estimates
    A2_hat = np.linalg.solve( np.matmul(M2.T,M2),  np.matmul(M2.T,Y2) )
    #save the variance
    res.append(np.var(A2_hat))

In [411]:

Copied!





plt.hist(np.log(res-B),alpha=0.5)
plt.hist(np.log(res),alpha=0.5)
plt.axvline(np.log(np.var(A0)),color="red",linestyle=":",label="truth")
plt.axvline(np.log(np.mean(res-B)),color="blue",linestyle=":",label="FE-BC")
plt.axvline(np.log(np.mean(res)),color="orange",linestyle=":",label="FE")
plt.legend()
plt.show()
plt.hist(np.log(res-B),alpha=0.5)
plt.hist(np.log(res),alpha=0.5)
plt.axvline(np.log(np.var(A0)),color="red",linestyle=":",label="truth")
plt.axvline(np.log(np.mean(res-B)),color="blue",linestyle=":",label="FE-BC")
plt.axvline(np.log(np.mean(res)),color="orange",linestyle=":",label="FE")
plt.legend()
plt.show()

We notice the large dispersion in estimates, yet one is centered, the other isn't.

In [414]:

Copied!

print(f" Var(A)                   = {np.var(A0):3.3f} <<")
print(f" MonteCarlo mean of FE    = {np.mean(res):3.3f}")
print(f" MonteCarlo mean of FE-BC = {np.mean(res - B):3.3f} <<")
print(f" Var(A)                   = {np.var(A0):3.3f} <<")
print(f" MonteCarlo mean of FE    = {np.mean(res):3.3f}")
print(f" MonteCarlo mean of FE-BC = {np.mean(res - B):3.3f} <<")

 Var(A)                   = 0.466 <<
 MonteCarlo mean of FE    = 0.870
 MonteCarlo mean of FE-BC = 0.472 <<