6.6.1 Ridge Regression¶
n_lambdas = 100
lambdas = np.logspace(5, -2, n_lambdas)
alphas = lambdas/2
coefs = []
scores = []
for alpha in alphas:
ridge = Ridge(alpha=alpha)
ridge.fit(X_train_scaled, y_train)
coefs.append(ridge.coef_)
scores.append(mean_squared_error(y_test, ridge.predict(X_test_scaled)))
coefs = np.array(coefs)[:,0,:]
scores = np.array(scores)
norm_coefs = linalg.norm(coefs, axis=1)
norm_coefs_line = plt.plot(alphas, norm_coefs)
plt.plot(alphas, coefs)
plt.xscale('log')
plt.xlabel('alpha')
plt.ylabel('Norm of Ridge coefficients');
plt.legend(norm_coefs_line, ['Total norm']);
idx_4 = find_nearest_idx(lambdas, 4)
idx_10e10 = find_nearest_idx(lambdas, 10e10)
ridge = Ridge(alpha=0)
ridge.fit(X_train_scaled, y_train)
lr_MSE = mean_squared_error(y_test, ridge.predict(X_test_scaled))
print(f'MSE for intercept-only model: {np.mean(((y_train.mean()-y_test)**2))[0]:.0f}')
print(f'MSE for lambda = 0: {lr_MSE:.0f}')
print(f'MSE for lambda = 4: {scores[idx_4]:.0f}')
print(f'MSE for lambda = 10^10: {scores[idx_10e10]:.0f}')
MSE for intercept-only model: 193253
MSE for lambda = 0: 114781
MSE for lambda = 4: 98606
MSE for lambda = 10^10: 190139
ridgeCV = RidgeCV(alphas=alphas, cv=10, scoring='neg_mean_squared_error')
ridgeCV.fit(X_train_scaled, y_train)
ridgeCV_alpha = ridgeCV.alpha_
ridge = Ridge(alpha=ridgeCV_alpha)
ridge.fit(X_train_scaled, y_train)
ridge_MSE = mean_squared_error(y_test, ridge.predict(X_test_scaled))
print(f'MSE for the best lambda = {ridgeCV_alpha*2:.0f}: {ridge_MSE:.0f}')
MSE for the best lambda = 242: 96922
ridge = Ridge(alpha=ridgeCV_alpha)
ridge.fit(X_scaled, y)
pd.Series(np.array((ridge.intercept_, *ridge.coef_)), index=['Intercept', *X.columns])
Intercept 535.925882
Assists 6.256680
AtBat 4.230942
CAtBat 25.453157
CHits 42.138824
CHmRun 37.083594
CRBI 44.617907
CRuns 42.728298
CWalks 7.394743
Errors -12.112109
Hits 45.839927
HmRun 1.061817
PutOuts 53.824576
RBI 22.566189
Runs 28.448012
Walks 39.297081
Years 0.467366
Division_W -46.095585
League_N 13.701139
NewLeague_N 3.533970
dtype: float64
6.6.2 The Lasso¶
n_alphas = 200
alphas = np.logspace(3, -2, n_alphas)
coefs = []
scores = []
for alpha in alphas:
lasso = Lasso(alpha=alpha, max_iter=10000)
lasso.fit(X_train_scaled, y_train)
coefs.append(lasso.coef_)
scores.append(mean_squared_error(y_test, lasso.predict(X_test_scaled)))
# norm of coefs
norm_coefs = linalg.norm(coefs, axis=1)
norm_coefs_line = plt.plot(alphas, norm_coefs)
plt.plot(alphas, coefs)
plt.xscale('log')
plt.xlabel('alpha')
plt.ylabel('Norm of Ridge coefficients');
plt.legend(norm_coefs_line, ['Total norm']);
lassoCV = LassoCV(alphas=alphas, cv=10, max_iter=10000)
lassoCV.fit(X_train_scaled, y_train.values.ravel())
lassoCV_alpha = lassoCV.alpha_
lasso = Lasso(alpha=lassoCV_alpha)
lasso.fit(X_train_scaled, y_train)
lasso_MSE = mean_squared_error(y_test, lasso.predict(X_test_scaled))
print(f'MSE for the best lambda = {lassoCV_alpha*2:.0f}: {lasso_MSE:.0f}')
MSE for the best lambda = 62: 101781
lasso = Lasso(alpha=lassoCV_alpha)
lasso.fit(X_scaled, y)
coefs_lasso = pd.Series(np.array((lasso.intercept_, *lasso.coef_)), index=['Intercept', *X.columns])
coefs_lasso = coefs_lasso[coefs_lasso != 0]
coefs_lasso
Intercept 535.925882
CRBI 127.568265
CRuns 63.205299
Hits 78.669615
PutOuts 51.280542
Walks 44.412184
Division_W -38.812489
dtype: float64