| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 
 | 
 
 
 
 
 import pandas as pd
 import warnings
 import matplotlib.pyplot as plt
 import seaborn as sns
 from sklearn.metrics import mean_squared_error
 from sklearn.metrics import mean_absolute_error
 from sklearn.metrics import r2_score
 import numpy as np
 from sklearn.model_selection import train_test_split
 from sklearn.linear_model import Ridge
 import matplotlib.pyplot as plt
 from sklearn.linear_model import LinearRegression
 
 
 
 
 warnings.filterwarnings('ignore')
 abalone=pd.read_csv("鲍鱼.csv")
 abalone.head()
 sex_onehot=pd.get_dummies(abalone['Sex'],prefix='Sex')
 abalone[sex_onehot.columns]=sex_onehot
 abalone['ones']=1
 abalone['age']=abalone['Rings']+1.5
 y=abalone['age']
 features_with_ones=["Length","Diameter","Height" ,  "Whole_weight" , "Shucked_weight" , "Viscera_weight" , "Shell_weight" , 'Sex_F','Sex_M','Sex_I',"ones"]
 features_without_ones=["Length","Diameter","Height" ,  "Whole_weight" , "Shucked_weight" , "Viscera_weight" , "Shell_weight" , 'Sex_F','Sex_M','Sex_I']
 x=abalone[features_with_ones]
 
 
 ridge=Ridge(alpha=1.0)
 x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=111)
 ridge.fit(x_train[features_without_ones],y_train)
 
 w_ridge=[]
 w_ridge.extend(ridge.coef_)
 w_ridge.append(ridge.intercept_)
 w1=[]
 
 
 alphas=np.logspace(-10,10,20)
 coef=pd.DataFrame()
 for alpha in alphas:
 ridge_clf=Ridge(alpha=alpha)
 ridge_clf.fit(x_train[features_without_ones],y_train)
 df=pd.DataFrame([ridge_clf.coef_],columns=x_train[features_without_ones].columns)
 df['alpha']=alpha
 coef=coef.append(df,ignore_index=True)
 coef.round(decimals=2)
 
 
 
 plt.rcParams['font.sans-serif']=['Microsoft Yahei']
 plt.rcParams['axes.unicode_minus']=False
 plt.rcParams['figure.dpi']=100
 plt.figure(figsize=(9,6))
 coef['alpha']=coef['alpha']
 for feature in x_train.columns[:-1]:
 plt.plot('alpha',feature,data=coef)
 ax=plt.gca()
 ax.set_xscale('log')
 plt.legend(loc='upper right')
 plt.xlabel(r'$\alpha$',fontsize=15)
 plt.ylabel('系数',fontsize=15)
 plt.show()
 
 
 x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=111)
 ridge.fit(x_train[features_without_ones],y_train)
 predict_value=ridge.predict(x[features_without_ones])
 
 
 lr=LinearRegression()
 lr.fit(x_train[features_without_ones],y_train)
 y_test_pred_lr=lr.predict(x_test.iloc[:,:-1])
 
 
 
 
 y_test_pred_lr=lr.predict(x_test.iloc[:,:-1])
 print("线性回归mae: ",round(mean_absolute_error(y_test,y_test_pred_lr),4))
 y_test_pred_ridge=ridge.predict(x_test[features_without_ones])
 print("岭回归mae: ",round(mean_absolute_error(y_test,y_test_pred_ridge),4))
 
 
 y_test_pred_lr=lr.predict(x_test.iloc[:,:-1])
 print("线性回归mse: ",round(mean_squared_error(y_test,y_test_pred_lr),4))
 y_test_pred_ridge=ridge.predict(x_test[features_without_ones])
 print("岭回归mse: ",round(mean_squared_error(y_test,y_test_pred_ridge),4))
 
 
 
 
 
 
 
 
 x1 = range(len(y))
 
 y1 = y
 y2 = predict_value
 plt.figure(figsize=(10, 6), dpi=80)
 plt.title(' raw & Predict折线图')
 plt.xlabel('鲍鱼序号')
 plt.ylabel('年龄')
 plt.plot(x1, y1, color='orange', label='Raw')
 plt.plot(x1, y2, color='blue', label='Predict')
 
 plt.grid(alpha=0.5)
 
 plt.legend(loc='upper right')
 plt.show()
 
 |