吴恩达《机器学习》——第七次作业:k-means算法

  1. 2D-kmeans算法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import seaborn as sb
from scipy.io import loadmat

def find_closet_centroids(X, centroids):
'''将每个特征计算属于那个特征'''
m = X.shape[0]
k = centroids.shape[0]
idx = np.zeros(m)

for i in range(m):

min_dist = 1000000
for j in range(k):

dist = np.sum((X[i, :] - centroids[j, :]) ** 2)
if dist < min_dist:
min_dist = dist
idx[i] = j

return idx



def compute_centroids(X, idx, k):
'''更新聚类中心'''
m, n = X.shape
centroids = np.zeros((k, n))

for i in range(k):

indices = np.where(idx == i)
centroids[i, :] = (np.sum(X[indices, :], axis=1) / len(indices[0])).ravel()

return centroids

def run_one_kmeans(X, initial_centroids, max_iters):
'''运行k-means均值算法进行聚类'''
m, n = X.shape
k = initial_centroids.shape[0]
idx = np.zeros(m)
centroids = initial_centroids

for i in range(max_iters):
idx = find_closet_centroids(X, centroids)
centroids = compute_centroids(X, idx, k)

return idx, centroids

def init_centroids(X, k):
'''随机初始化聚类中心'''
m, n = X.shape
centroids = np.zeros((k, n))
idx = np.random.randint(0, m, k)

for i in range(k):
centroids[i, :] = X[idx[i], :]
return centroids



def run_all_kmeans(X, k, n_init, max_iter):
'''kmeans算法'''
min_dist = 1000000
m, n = X.shape
result_centroids = np.zeros((k, n))
result_idx = np.zeros(m)
cnt = 0
for i in range(n_init):
initial_centroids = init_centroids(X, k) #
#print(initial_centroids)
idx, centroids = run_one_kmeans(X, initial_centroids, max_iter)
dist = 0
for j in range(m):
dist += np.sum((X[j, :] - centroids[int(idx[j]), :]) ** 2) / len(X)
if dist < min_dist:
min_dist = dist
result_idx = idx
result_centroids = centroids
cnt = i
#print(cnt)

return result_idx, result_centroids




data = loadmat('data/ex7data2.mat')
X = data['X']


idx, centroids = run_all_kmeans(X, 3, 30, 10)

cluster1 = X[np.where(idx == 0)[0], :]
cluster2 = X[np.where(idx == 1)[0], :]
cluster3 = X[np.where(idx == 2)[0], :]

fig, ax = plt.subplots(figsize=(12, 8))
ax.scatter(cluster1[:, 0], cluster1[:, 1], s=30, color='r', label='Cluster 1')
ax.scatter(cluster2[:, 0], cluster2[:, 1], s=30, color='g', label='Cluster 2')
ax.scatter(cluster3[:, 0], cluster3[:, 1], s=30, color='b', label='Cluster 3')
ax.legend()
plt.show()

在这里插入图片描述

  1. k-means应用:图像压缩
    图像压缩前:
    在这里插入图片描述
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from IPython.display import Image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import seaborn as sb

from k_means_and_PCA.kmeans_2D import run_all_kmeans

from scipy.io import loadmat

Image(filename='data/bird_small.png')

image_data = loadmat('data/bird_small.mat')

A = image_data['A'] / 255


X = np.reshape(A, (A.shape[0] * A.shape[1], A.shape[2]))

idx, centroids = run_all_kmeans(X, 16, 10, 10)

X_recovered = centroids[idx.astype(int), :]
X_recovered = np.reshape(X_recovered, (A.shape[0], A.shape[1], A.shape[2]))
X_recovered *= 255
plt.imshow(X_recovered.astype(int))
plt.show()

图像压缩处理后 :
在这里插入图片描述

------ 本文结束 ------
0%