6.6.1 Ridge Regression¶

n_lambdas = 100
lambdas = np.logspace(5, -2, n_lambdas)
alphas = lambdas/2

coefs = []
scores = []
for alpha in alphas:
    ridge = Ridge(alpha=alpha)
    ridge.fit(X_train_scaled, y_train)
    coefs.append(ridge.coef_)
    scores.append(mean_squared_error(y_test, ridge.predict(X_test_scaled)))
coefs = np.array(coefs)[:,0,:]
scores = np.array(scores)

      
    

norm_coefs = linalg.norm(coefs, axis=1)
norm_coefs_line = plt.plot(alphas, norm_coefs)
plt.plot(alphas, coefs)
plt.xscale('log')
plt.xlabel('alpha')
plt.ylabel('Norm of Ridge coefficients');
plt.legend(norm_coefs_line, ['Total norm']);

      
    

idx_4 = find_nearest_idx(lambdas, 4)
idx_10e10 = find_nearest_idx(lambdas, 10e10)

ridge = Ridge(alpha=0)
ridge.fit(X_train_scaled, y_train)
lr_MSE = mean_squared_error(y_test, ridge.predict(X_test_scaled))

print(f'MSE for intercept-only model: {np.mean(((y_train.mean()-y_test)**2))[0]:.0f}')
print(f'MSE for lambda = 0:           {lr_MSE:.0f}')
print(f'MSE for lambda = 4:           {scores[idx_4]:.0f}')
print(f'MSE for lambda = 10^10:       {scores[idx_10e10]:.0f}')

MSE for intercept-only model: 193253
MSE for lambda = 0:           114781
MSE for lambda = 4:           98606
MSE for lambda = 10^10:       190139

ridgeCV = RidgeCV(alphas=alphas, cv=10, scoring='neg_mean_squared_error')
ridgeCV.fit(X_train_scaled, y_train)
ridgeCV_alpha = ridgeCV.alpha_

ridge = Ridge(alpha=ridgeCV_alpha)
ridge.fit(X_train_scaled, y_train)
ridge_MSE = mean_squared_error(y_test, ridge.predict(X_test_scaled))
print(f'MSE for the best lambda = {ridgeCV_alpha*2:.0f}: {ridge_MSE:.0f}')

MSE for the best lambda = 242: 96922

ridge = Ridge(alpha=ridgeCV_alpha)
ridge.fit(X_scaled, y)
pd.Series(np.array((ridge.intercept_, *ridge.coef_)), index=['Intercept', *X.columns])

Intercept      535.925882
Assists          6.256680
AtBat            4.230942
CAtBat          25.453157
CHits           42.138824
CHmRun          37.083594
CRBI            44.617907
CRuns           42.728298
CWalks           7.394743
Errors         -12.112109
Hits            45.839927
HmRun            1.061817
PutOuts         53.824576
RBI             22.566189
Runs            28.448012
Walks           39.297081
Years            0.467366
Division_W     -46.095585
League_N        13.701139
NewLeague_N      3.533970
dtype: float64

ACT6100

6.6.1 Ridge Regression¶

6.6.2 The Lasso¶