吴恩达《机器学习》——第三次作业:多元分类

写了好长时间的驼峰命名,最近有点恶心了,决定python用下划线,C++用驼峰。
这次作业是对手写数字的数据集进行训练。多元分类的一个任务。

参考:https://github.com/fengdu78/Coursera-ML-AndrewNg-Notes/tree/master/code

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import matplotlib.pyplot as plt
import numpy as np
import scipy.io as sio
import matplotlib
import scipy.optimize as opt
from sklearn.metrics import classification_report

def load_data(path, transpose=True):
'''载入数据'''
data = sio.loadmat(path)
y = data.get('y')

y = y.reshape(y.shape[0])

X = data.get('X')

if transpose:
X = np.array([im.reshape((20, 20)).T for im in X])

X = np.array([im.reshape(400) for im in X])

return X, y



raw_X, raw_y = load_data('ex3data1.mat')

#def plot_image(image):
# '''随机显示一张图片'''
# fig, ax = plt.subplots(figsize=(1, 1))
# ax.matshow(image.reshape((20, 20)), cmap=matplotlib.cm.binary)
# plt.xticks(np.array([]))
# plt.yticks(np.array([]))


#pick_one = np.random.randint(0, 5000) #在(0,5000)氛围内随机取一个数
#plot_image(X[pick_one, :])
#plt.show()
#print("this should be {}".format(y[pick_one]))

#def plot_100_image(X):
# '''显示100张图片'''
# size = int(np.sqrt(X.shape[1]))
#
# sample_idx = np.random.choice(np.arange(X.shape[0]), 100)
# sample_images = X[sample_idx, :]
#
# fig, ax_array = plt.subplots(nrows=10, ncols=10, sharey=True, sharex=True, figsize=(8, 8))
#
# for r in range(10):
# for c in range(10):
# ax_array[r, c].matshow(sample_images[10 * r + c].reshape((size, size)),
# cmap=matplotlib.cm.binary)
#
# plt.xticks(np.array([]))
# plt.yticks(np.array([]))


#准备数据
X = np.insert(raw_X, 0, values=np.ones(raw_X.shape[0]), axis=1) #插入第一列,全部为1

y_matrix = []

for k in range(1, 11):
y_matrix.append((raw_y == k).astype(int))

y_matrix = [y_matrix[-1]] + y_matrix[:-1]
y = np.array(y_matrix)


#print(y.shape)

#训练一维模型
def sigmoid(z):
'''激活函数'''
return 1 / (1 + np.exp(-z))

def gradient(theta, X, y):
'''一次梯度下降'''
return (1 / len(X)) * X.T @ (sigmoid(X@theta) - y)

def cost(theta, X, y):

'''代价函数'''
h = sigmoid(X@theta)
inner = y.T * np.log(h) + (1-y).T*np.log(1-h)
return - (np.sum(inner)/len(X))
#return np.mean(-y * np.log(sigmoid(X*theta.T)) - (1-y)*np.log(1-sigmoid(X*theta.T)))

def regularized_cost(theta, X, y, l=1):
'''正则化'''
theta_j1_to_n = theta[1:]
regularized_term = (1 / (2 * len(X))) * np.power(theta_j1_to_n, 2).sum()

return cost(theta, X, y) + regularized_term

def regularized_gradient(theta, X, y, l=1):

theta_j1_to_n = theta[:-1]
regularized_theta = (l / len(X)) * theta_j1_to_n

regularized_term = np.concatenate([np.array([0]), regularized_theta])

return gradient(theta, X, y) + regularized_term

def logistic_regression(X, y, l=1):

theta = np.zeros(X.shape[1])
res = opt.minimize(fun=regularized_cost,
x0=theta,
args=(X, y, l),
method='TNC',
jac=regularized_gradient,
options={'disp': True})

final_theta = res.x

return final_theta

def predict(x, theta):
'''预测函数'''
prob = sigmoid(X @ theta)
return (prob >= 0.5).astype(int)

t0 = logistic_regression(X, y[0])

#print(t0.shape)
#y_pred = predict(X, t0)
#
#print('Accuracy = {}'.format(np.mean(y[0] == y_pred)))


#训练k维模型

k_theta = np.array([logistic_regression(X, y[k]) for k in range(10)])
#print(k_theta.shape)

prob_matrix = sigmoid(X@k_theta.T)
np.set_printoptions(suppress=True)

y_pred = np.argmax(prob_matrix, axis=1)
#返回沿轴axis最大值的索引,axis=1代表行

y_answer = raw_y.copy()
y_answer[y_answer == 10] = 0
print(classification_report(y_answer, y_pred))
------ 本文结束 ------
0%