目标 使用numpy实现多层感知机的正向和反向传播
层次构建 全连接层 正向传播 正向传播的公式为:$Y = f(W \times X + b)$,其中,Y为输出,W为权值,b为偏置
反向传播 对于反向传播,已知上一层传回的梯度为dY,对应的反向传播公式为:
代码实现 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 class numpy_fc (object ): def __init__ (self, in_channel, out_channel, optim ): self.weight = np.float64(np.random.randn(out_channel, in_channel) * 0.1 ) self.bias = np.zeros((out_channel, 1 ),dtype=np.float64) self.in_data = np.zeros((1 , in_channel)) self.out_data = None self.weight_grad = None self.bias_grad = None self.optimizer = optim def forward (self, data ): self.in_data = data self.out_data = np.dot(self.weight, data) + self.bias return self.out_data def backward (self, grad ): data_grad = np.dot(self.weight.T, grad) self.weight_grad = np.dot(grad, self.in_data.T) self.bias_grad = np.sum (grad, axis=1 ).reshape((-1 ,1 )) return data_grad def step (self ): self.weight += self.optimizer(self.weight_grad) self.bias += self.optimizer(self.bias_grad)
代码测试 1 2 3 4 5 6 7 test_fc = numpy_fc(16 ,8 ,None ) test_fc_forward = test_fc.forward(np.random.rand(16 ,10 )) print(test_fc_forward.shape) test_fc_back = test_fc.backward(test_fc_forward) print(test_fc_back.shape) print(test_fc.weight_grad.shape,test_fc.weight.shape) print(test_fc.bias_grad.shape,test_fc.bias.shape)
(8, 10)
(16, 10)
(8, 16) (8, 16)
(8, 1) (8, 1)
激活函数 sigmoid函数 sigmoid函数是常用的二分类问题输出层激活函数,前向传播和反向传播分别如下所示:
1 2 3 4 5 6 7 8 9 10 11 12 13 class numpy_sigmoid (object ): def __init__ (self ): self.result = None def forward (self,data ): self.result = 1 / (1 + np.exp(-data)) return self.result def backward (self,grad ): return grad * self.result * (1 - self.result) def step (self ): pass
relu函数 relu是现阶段最常用的隐层激活函数,前向传播和反向传播如下所示
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 class numpy_relu (object ): def __init__ (self ): self.result = None def forward (self,data ): self.result = data self.result[data < 0 ] = 0 return self.result def backward (self,grad ): relu_grad = self.result relu_grad[self.result > 0 ] = 1 return grad * relu_grad def step (self ): pass
其他组件构建 代价函数 MES MES代价函数的前向传播和反向传播为:
1 2 3 4 def MES_loss (y_pre,y ): loss = np.sum ((y_pre - y) ** 2 ) loss_back = np.abs (y_pre - y) return loss,loss_back
交叉熵 交叉熵的前向传播和反向传播分别为:
1 2 3 4 def Cross_loss (y_pre,y ): loss = -np.sum (y*np.log(y_pre)+(1 -y)*np.log(1 -y_pre)) loss_back = y/y_pre + (1 -y)/(1 -y_pre) return loss,-loss_back
带交叉熵的softmax函数 softmax函数是多分类问题常用的输出激活函数,一般与交叉熵代价函数结合使用,组合函数(softmax+交叉熵)的前向传播如下:
反向传播如下:
详细推导可参见这里
1 2 3 4 5 6 7 8 def Softmax_cross_loss (y_pre,y ): softmax = np.exp(y_pre) / np.sum (np.exp(y_pre),axis=0 ) loss = - np.sum (y * np.log(softmax)) loss_back = softmax - y return loss,loss_back
1 Softmax_cross_loss(np.random.randn(2 ,4 ),np.random.randn(2 ,4 ))
(-4.9084963417988003,
array([[-0.09065384, 0.07506358, 0.32789286, 1.26735185],
[ 1.93958915, 0.01316283, 1.20922904, 2.87550082]]))
优化器SGD 随机梯度下降优化器是一种比较简单的优化方法,优化公式如下:
1 2 3 4 5 6 7 class optim_sgd (object ): def __init__ (self,learning_rate ): super (optim_sgd,self).__init__() self.learning_rate = learning_rate def __call__ (self,grad ): return -self.learning_rate * grad
其他组件 导入数据集——乳腺癌数据集 下载数据集 1 2 import reimport pandas as pd
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 data_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data" data_label = """ 1. Sample code number 1id number 2. Clump Thickness 1 - 10 3. Uniformity of Cell Size 1 - 10 4. Uniformity of Cell Shape 1 - 10 5. Marginal Adhesion 1 - 10 6. Single Epithelial Cell Size 1 - 10 7. Bare Nuclei 1 - 10 8. Bland Chromatin 1 - 10 9. Normal Nucleoli 1 - 10 10. Mitoses 1 - 10 11. Class 2 for benign, 4 for malignant) """ data_label = [re.sub(r"\s+\d" ,"" ,x[2 :]) for x in re.findall(r"\. [\w\s]+\d" ,data_label)] data = pd.read_csv(data_url,names=data_label) print(data.info())
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 699 entries, 0 to 698
Data columns (total 11 columns):
Sample code numberid number 699 non-null int64
Clump Thickness 699 non-null int64
Uniformity of Cell Size 699 non-null int64
Uniformity of Cell Shape 699 non-null int64
Marginal Adhesion 699 non-null int64
Single Epithelial Cell Size 699 non-null int64
Bare Nuclei 699 non-null object
Bland Chromatin 699 non-null int64
Normal Nucleoli 699 non-null int64
Mitoses 699 non-null int64
Class 699 non-null int64
dtypes: int64(10), object(1)
memory usage: 60.1+ KB
None
清洗数据集 1 2 3 4 data = data.replace(to_replace="?" ,value=np.nan) data = data.dropna(how='any' ) data["Bare Nuclei" ] = data["Bare Nuclei" ].map (int ) print(data.info())
<class 'pandas.core.frame.DataFrame'>
Int64Index: 683 entries, 0 to 698
Data columns (total 11 columns):
Sample code numberid number 683 non-null int64
Clump Thickness 683 non-null int64
Uniformity of Cell Size 683 non-null int64
Uniformity of Cell Shape 683 non-null int64
Marginal Adhesion 683 non-null int64
Single Epithelial Cell Size 683 non-null int64
Bare Nuclei 683 non-null int64
Bland Chromatin 683 non-null int64
Normal Nucleoli 683 non-null int64
Mitoses 683 non-null int64
Class 683 non-null int64
dtypes: int64(11)
memory usage: 64.0 KB
None
切分数据集 1 2 3 4 5 from sklearn.model_selection import train_test_splitx_train,x_test,y_train,y_test = train_test_split(data[data_label[1 :10 ]],data[data_label[10 ]],test_size=0.25 ,random_state=1 ) print(x_train.shape,x_test.shape) print(y_train.shape) print(pd.value_counts(y_train))
(512, 9) (171, 9)
(512,)
2 333
4 179
Name: Class, dtype: int64
标准化 1 2 3 4 5 6 7 8 from sklearn.preprocessing import StandardScalerss = StandardScaler() x_train_ss = ss.fit_transform(x_train) x_test_ss = ss.transform(x_test) print(type (x_train_ss))
<class 'numpy.ndarray'>
1 2 3 4 5 6 def y_standard (data ): return (data / 2 ) - 1 y_train_ss = y_standard(y_train).values y_test_ss = y_standard(y_test).values print(pd.value_counts(y_train_ss)) print(pd.value_counts(y_test_ss))
0.0 333
1.0 179
dtype: int64
0.0 111
1.0 60
dtype: int64
制作可迭代数据集 1 2 3 4 5 6 import randomdef dataset (data,lable,batch_size=100 ,epoch=10 ): for _ in range (epoch): index = [random.randint(0 ,data.shape[0 ]-1 ) for _ in range (batch_size)] yield data[index],lable[index]
1 2 3 4 for i in dataset(x_train_ss,y_train_ss,batch_size=100 ): print(i[0 ].shape,i[1 ].shape) break
(100, 9) (100,)
独热码编码 1 2 3 4 5 def onehot (data,tp_num ): x = np.zeros((data.shape[0 ],tp_num)) for i in range (data.shape[0 ]): x[i][int (data[i])] = 1 return x
1 2 test_onehot = np.arange(2 ) onehot(test_onehot,2 )
array([[ 1., 0.],
[ 0., 1.]])
网络 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 class numpy_network_base (object ): def __init__ (self,network_list ): self.network = network_list def forward (self,x ): for layer in self.network: x = layer.forward(x) return x def backward (self,grad ): last_grad = grad.copy() for layer in self.network[::-1 ]: last_grad = layer.backward(last_grad) return last_grad def step (self ): for layer in self.network: layer.step()
准确率计算 1 2 3 def accuracy (y_pre,lable ): y_pre = np.argmax(y_pre,axis=0 ) return np.mean(np.int8(y_pre == lable))
1 2 3 a = np.arange(4 *8 ).reshape((4 ,8 )) b = np.ones((1 ,8 )) * 3 accuracy(a,b)
1.0
网络训练与测试 网络搭建 1 2 3 4 5 6 7 8 9 10 11 12 13 network = numpy_network_base([numpy_fc(9 ,20 ,optim_sgd(0.001 )),numpy_relu(),numpy_fc(20 ,2 ,optim_sgd(0.001 ))]) for i,(din,lable) in enumerate (dataset(x_train_ss,y_train_ss,epoch=10 ,batch_size=100 )): result = network.forward(din.T) loss,grad = Softmax_cross_loss(result.T,onehot(lable,2 )) print(accuracy(result,lable)) network.backward(grad.T) network.step()
0.19
0.32
0.86
0.96
0.94
0.93
0.9
0.96
0.98
0.95
网络测试 1 2 result = network.forward(x_test_ss.T) print(accuracy(result,y_test_ss))
0.982456140351