吴恩达《机器学习》——第四次作业:BP神经网络

本此作业和上次作业的目标一样,属于多元分类任务。不过这次是使用BP神经网络。准确度比没有使用神经网络的第三次作业要高,但是训练的时间也长了很多。
参考:https://github.com/fengdu78/Coursera-ML-AndrewNg-Notes/tree/master/code

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import matplotlib.pyplot as plt
import numpy as np
import scipy.io as sio
import matplotlib

import scipy.optimize as opt
from sklearn.metrics import classification_report

from sklearn.preprocessing import OneHotEncoder

data = sio.loadmat('ex4data1.mat')
X = data['X']
y = data['y']

encoder = OneHotEncoder(sparse=False)
y_onehot = encoder.fit_transform(y)

def sigmoid(z):
'''激活函数'''
return 1 / (1 + np.exp(-z))

def forward_propagate(X, theta1, theta2):
'''前向传播算法'''
m = X.shape[0]

a1 = np.insert(X, 0, values=np.ones(m), axis=1)
z2 = a1 * theta1.T
a2 = sigmoid(z2)
a2 = np.insert(a2, 0, values=np.ones(m), axis=1)
z3 = a2 * theta2.T
h = sigmoid(z3)

return a1, z2, a2, z3, h

def cost(params, input_size, hidden_size, num_labels, X, y, learning_rate):
'''代价函数'''
m = X.shape[0]
X = np.matrix(X)
y = np.matrix(y)

theta1 = np.matrix(np.reshape(params[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))
theta2 = np.matrix(np.reshape(params[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))

a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)

J = 0
for i in range(m):
first_term = np.multiply(-y[i, :], np.log(h[i, :]))
second_term = np.multiply(1 - y[i, :], np.log(1 - h[i, :]))
J += np.sum(first_term - second_term)

J = J/m

J += (float(learning_rate) / (2 * m)) * (np.sum(np.power(theta1[:, 1:], 2)) + np.sum(np.power(theta2[:, 1:], 2)))

return J
input_size = 400
hidden_size = 25
num_labels = 10
learning_rate = 1
params = (np.random.random(size=hidden_size * (input_size + 1) + num_labels * (hidden_size + 1)) - 0.5) * 0.25
#print(cost(params, input_size, hidden_size, num_labels, X, y_onehot, learning_rate))

def sigmoid_gradient(z):
return np.multiply(sigmoid(z), (1 - sigmoid(z)))

def back_prop(params, input_size, hidden_size, num_labels, X, y, learning_rate):
'''反向传播算法'''
m = X.shape[0]
X = np.matrix(X)
y = np.matrix(y)

theta1 = np.matrix(np.reshape(params[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))
theta2 = np.matrix(np.reshape(params[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))

a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)

J = 0
delta1 = np.zeros(theta1.shape)
delta2 = np.zeros(theta2.shape)

for i in range(m):
first_term = np.multiply(-y[i, :], np.log(h[i, :]))
second_term = np.multiply(1 - y[i, :], np.log(1 - h[i, :]))
J += np.sum(first_term - second_term)

J = J / m

J += (float(learning_rate) / (2 * m)) * (np.sum(np.power(theta1[:, 1:], 2)) + np.sum(np.power(theta2[:, 1:], 2)))

for t in range(m):
a1t = a1[t, :]
z2t = z2[t, :]
a2t = a2[t, :]
ht = h[t, :]
yt = y[t, :]

d3t = ht - yt
z2t = np.insert(z2t, 0, values=np.ones(1))
d2t = np.multiply((theta2.T * d3t.T).T, sigmoid_gradient(z2t))

delta1 = delta1 + (d2t[:, 1:]).T * a1t
delta2 = delta2 + d3t.T * a2t

delta1 /= m
delta2 /= m

delta1[:, 1:] = delta1[:, 1:] + (theta1[:, 1:] * learning_rate) / m
delta2[:, 1:] = delta2[:, 1:] + (theta2[:, 1:] * learning_rate) / m

grad = np.concatenate((np.ravel(delta1), np.ravel(delta2)))

return J, grad

fmin = opt.minimize(fun=back_prop, x0=params, args=(input_size, hidden_size, num_labels, X, y_onehot, learning_rate),
method='TNC', jac=True, options={'maxiter': 250})

X = np.matrix(X)


X = np.matrix(X)
theta1 = np.matrix(np.reshape(fmin.x[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))
theta2 = np.matrix(np.reshape(fmin.x[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))

a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)
y_pred = np.array(np.argmax(h, axis=1) + 1)

#准确度
print(classification_report(y, y_pred))
------ 本文结束 ------
0%