吴恩达《机器学习》——第三次作业：多元分类

写了好长时间的驼峰命名，最近有点恶心了，决定python用下划线，C++用驼峰。
这次作业是对手写数字的数据集进行训练。多元分类的一个任务。
参考：https://github.com/fengdu78/Coursera-ML-AndrewNg-Notes/tree/master/code
import matplotlib.pyplot as plt
import numpy as np
import scipy.io as sio
import matplotlib
import scipy.optimize as opt
from sklearn.metrics import classification_report

def load_data(path, transpose=True):
    '''载入数据'''
    data = sio.loadmat(path)
    y = data.get('y')

    y = y.reshape(y.shape[0])

    X = data.get('X')

    if transpose:
        X = np.array([im.reshape((20, 20)).T for im in X])

        X = np.array([im.reshape(400) for im in X])

    return X, y



raw_X, raw_y = load_data('ex3data1.mat')

#def plot_image(image):
#    '''随机显示一张图片'''
#    fig, ax = plt.subplots(figsize=(1, 1))
#    ax.matshow(image.reshape((20, 20)), cmap=matplotlib.cm.binary)
#    plt.xticks(np.array([]))
#    plt.yticks(np.array([]))


#pick_one = np.random.randint(0, 5000)              #在(0,5000)氛围内随机取一个数
#plot_image(X[pick_one, :])
#plt.show()
#print("this should be {}".format(y[pick_one]))

#def plot_100_image(X):
#    '''显示100张图片'''
#    size = int(np.sqrt(X.shape[1]))
#
#    sample_idx = np.random.choice(np.arange(X.shape[0]), 100)
#    sample_images = X[sample_idx, :]
#
#    fig, ax_array = plt.subplots(nrows=10, ncols=10, sharey=True, sharex=True, figsize=(8, 8))
#
#    for r in range(10):
#        for c in range(10):
#            ax_array[r, c].matshow(sample_images[10 * r + c].reshape((size, size)),
#                                   cmap=matplotlib.cm.binary)
#
#            plt.xticks(np.array([]))
#            plt.yticks(np.array([]))


#准备数据
X = np.insert(raw_X, 0, values=np.ones(raw_X.shape[0]), axis=1)     #插入第一列，全部为1

y_matrix = []

for k in range(1, 11):
    y_matrix.append((raw_y == k).astype(int))

y_matrix = [y_matrix[-1]] + y_matrix[:-1]
y = np.array(y_matrix)


#print(y.shape)

#训练一维模型
def sigmoid(z):
    '''激活函数'''
    return 1 / (1 + np.exp(-z))

def gradient(theta, X, y):
    '''一次梯度下降'''
    return (1 / len(X)) * X.T @ (sigmoid(X@theta) - y)

def cost(theta, X, y):

    '''代价函数'''
    h = sigmoid(X@theta)
    inner = y.T * np.log(h) + (1-y).T*np.log(1-h)
    return - (np.sum(inner)/len(X))
    #return np.mean(-y * np.log(sigmoid(X*theta.T)) - (1-y)*np.log(1-sigmoid(X*theta.T)))

def regularized_cost(theta, X, y, l=1):
    '''正则化'''
    theta_j1_to_n = theta[1:]
    regularized_term = (1 / (2 * len(X))) * np.power(theta_j1_to_n, 2).sum()

    return cost(theta, X, y) + regularized_term

def regularized_gradient(theta, X, y, l=1):

    theta_j1_to_n = theta[:-1]
    regularized_theta = (l / len(X)) * theta_j1_to_n

    regularized_term = np.concatenate([np.array([0]), regularized_theta])

    return gradient(theta, X, y) + regularized_term

def logistic_regression(X, y, l=1):

    theta = np.zeros(X.shape[1])
    res = opt.minimize(fun=regularized_cost,
                       x0=theta,
                       args=(X, y, l),
                       method='TNC',
                       jac=regularized_gradient,
                       options={'disp': True})

    final_theta = res.x

    return final_theta

def predict(x, theta):
    '''预测函数'''
    prob = sigmoid(X @ theta)
    return (prob >= 0.5).astype(int)

t0 = logistic_regression(X, y[0])

#print(t0.shape)
#y_pred = predict(X, t0)
#
#print('Accuracy = {}'.format(np.mean(y[0] == y_pred)))


#训练k维模型

k_theta = np.array([logistic_regression(X, y[k]) for k in range(10)])
#print(k_theta.shape)

prob_matrix = sigmoid(X@k_theta.T)
np.set_printoptions(suppress=True)

y_pred = np.argmax(prob_matrix, axis=1)
#返回沿轴axis最大值的索引，axis=1代表行

y_answer = raw_y.copy()
y_answer[y_answer == 10] = 0
print(classification_report(y_answer, y_pred))