| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 
 | 
 
 
 
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import csv
 
 from numpy import linalg
 
 
 def multiplyList(myList) :
 for x in myList:
 if x == 0:
 return True
 return False;
 def loadDataSet(fileName):
 dataMat = [];
 labelMat = []
 with open(fileName, 'r') as f:
 reader = csv.reader(f, delimiter='\t')
 for row in reader:
 row = [float(x) for x in row]
 dataMat.append(row[:-1])
 labelMat.append(row[-1])
 for i in dataMat:
 if i[0]==0:
 i[0]=2
 elif i[0]==-1:
 i[0]=3
 return dataMat, labelMat
 def get_w(X, Y):
 
 X_I = np.linalg.inv(np.dot(X.T, X))
 w = np.dot(np.dot(X_I, X.T), Y)
 return w
 def get_w_lwlr(X, Y, x_test):
 m = X.shape[0]
 
 k=0.02
 weight = np.eye(m)
 for j in range(m):
 
 diff = x_test - X[j, :]
 weight[j, j] = np.exp(np.dot(diff.T, diff) / (-2 * k ** 2))
 
 X_w = np.linalg.inv(np.dot(np.dot(X.T, weight), X))
 w = np.dot(np.dot(np.dot(X_w, X.T), weight), Y)
 return w
 
 def score(X,Y,testx,testy):
 num = 0
 sum1 = 0
 sum2 = 0
 sum4 = 0
 sum5 = 0
 sum3 = sum(testy[0:1000])-testy[165]-testy[276]-testy[277]-testy[355]-testy[762]-testy[999]-testy[510]
 sum3 = sum3/993
 mse = 0
 mae = 0
 for i in range(1000):
 if multiplyList(testx[i]) or i==165 or i==276 or i==277 or i==355 or i==762 or i==999 or i==510:
 continue
 w_lwlr = get_w_lwlr(X, Y, testx[i])
 y2 = np.dot(np.array(testx[i]), w_lwlr.T)
 if (testy[i]-0.5)<=y2<=(testy[i]+0.5):
 num = num +1
 print(testx[i],i)
 print("真实值:",testy[i])
 w = get_w(X, Y)
 y1 = np.dot(np.array(testx[i]), w.T)
 sum1 = (y2-testy[i])**2+sum1
 sum4 = (y1-testy[i])**2+sum4
 sum2 = (sum3-testy[i])**2+sum2
 sum5 = abs(y2-testy[i])+sum5
 print("局部加权预测值:", y2)
 print("普通线性回归预测值:", y1)
 print("局部加权线性回归R^2分析:",1-sum1/sum2)
 print("普通线性回归R^2分析:", 1 - sum4/sum2)
 print("mae平均绝对误差:",sum1/993)
 print("mse均方误差:",sum5/993)
 
 def main():
 dataMat, labelMat = loadDataSet('abalone.txt')
 print(dataMat)
 X = np.array(dataMat)
 Y = np.array(labelMat).T
 score(X,Y,dataMat,labelMat);
 
 if __name__ == '__main__':
 main()
 
 |