6.6.1 Ridge Regression

n_lambdas = 100
lambdas = np.logspace(5, -2, n_lambdas)
alphas = lambdas/2

coefs = []
scores = []
for alpha in alphas:
    ridge = Ridge(alpha=alpha)
    ridge.fit(X_train_scaled, y_train)
    coefs.append(ridge.coef_)
    scores.append(mean_squared_error(y_test, ridge.predict(X_test_scaled)))
coefs = np.array(coefs)[:,0,:]
scores = np.array(scores)
norm_coefs = linalg.norm(coefs, axis=1)
norm_coefs_line = plt.plot(alphas, norm_coefs)
plt.plot(alphas, coefs)
plt.xscale('log')
plt.xlabel('alpha')
plt.ylabel('Norm of Ridge coefficients');
plt.legend(norm_coefs_line, ['Total norm']);
../_images/Ridge_Lasso_lab_2_0.png
idx_4 = find_nearest_idx(lambdas, 4)
idx_10e10 = find_nearest_idx(lambdas, 10e10)

ridge = Ridge(alpha=0)
ridge.fit(X_train_scaled, y_train)
lr_MSE = mean_squared_error(y_test, ridge.predict(X_test_scaled))
print(f'MSE for intercept-only model: {np.mean(((y_train.mean()-y_test)**2))[0]:.0f}')
print(f'MSE for lambda = 0:           {lr_MSE:.0f}')
print(f'MSE for lambda = 4:           {scores[idx_4]:.0f}')
print(f'MSE for lambda = 10^10:       {scores[idx_10e10]:.0f}')
MSE for intercept-only model: 193253
MSE for lambda = 0:           114781
MSE for lambda = 4:           98606
MSE for lambda = 10^10:       190139
ridgeCV = RidgeCV(alphas=alphas, cv=10, scoring='neg_mean_squared_error')
ridgeCV.fit(X_train_scaled, y_train)
ridgeCV_alpha = ridgeCV.alpha_
ridge = Ridge(alpha=ridgeCV_alpha)
ridge.fit(X_train_scaled, y_train)
ridge_MSE = mean_squared_error(y_test, ridge.predict(X_test_scaled))
print(f'MSE for the best lambda = {ridgeCV_alpha*2:.0f}: {ridge_MSE:.0f}')
MSE for the best lambda = 242: 96922
ridge = Ridge(alpha=ridgeCV_alpha)
ridge.fit(X_scaled, y)
pd.Series(np.array((ridge.intercept_, *ridge.coef_)), index=['Intercept', *X.columns])
Intercept      535.925882
Assists          6.256680
AtBat            4.230942
CAtBat          25.453157
CHits           42.138824
CHmRun          37.083594
CRBI            44.617907
CRuns           42.728298
CWalks           7.394743
Errors         -12.112109
Hits            45.839927
HmRun            1.061817
PutOuts         53.824576
RBI             22.566189
Runs            28.448012
Walks           39.297081
Years            0.467366
Division_W     -46.095585
League_N        13.701139
NewLeague_N      3.533970
dtype: float64

6.6.2 The Lasso

n_alphas = 200
alphas = np.logspace(3, -2, n_alphas)

coefs = []
scores = []
for alpha in alphas:
    lasso = Lasso(alpha=alpha, max_iter=10000)
    lasso.fit(X_train_scaled, y_train)
    coefs.append(lasso.coef_)
    scores.append(mean_squared_error(y_test, lasso.predict(X_test_scaled)))
# norm of coefs
norm_coefs = linalg.norm(coefs, axis=1)
norm_coefs_line = plt.plot(alphas, norm_coefs)
plt.plot(alphas, coefs)
plt.xscale('log')
plt.xlabel('alpha')
plt.ylabel('Norm of Ridge coefficients');
plt.legend(norm_coefs_line, ['Total norm']);
../_images/Ridge_Lasso_lab_10_0.png
lassoCV = LassoCV(alphas=alphas, cv=10, max_iter=10000)
lassoCV.fit(X_train_scaled, y_train.values.ravel())
lassoCV_alpha = lassoCV.alpha_
lasso = Lasso(alpha=lassoCV_alpha)
lasso.fit(X_train_scaled, y_train)
lasso_MSE = mean_squared_error(y_test, lasso.predict(X_test_scaled))
print(f'MSE for the best lambda = {lassoCV_alpha*2:.0f}: {lasso_MSE:.0f}')
MSE for the best lambda = 62: 101781
lasso = Lasso(alpha=lassoCV_alpha)
lasso.fit(X_scaled, y)
coefs_lasso = pd.Series(np.array((lasso.intercept_, *lasso.coef_)), index=['Intercept', *X.columns])
coefs_lasso = coefs_lasso[coefs_lasso != 0]
coefs_lasso
Intercept     535.925882
CRBI          127.568265
CRuns          63.205299
Hits           78.669615
PutOuts        51.280542
Walks          44.412184
Division_W    -38.812489
dtype: float64