diff --git a/神经网络/main.py b/神经网络/main.py index c62e992..13fa323 100644 --- a/神经网络/main.py +++ b/神经网络/main.py @@ -21,7 +21,6 @@ class NeuralNetwork: structure: List[int], hidden_activate: Literal["relu"] = "relu", output_activate: Literal["linear", "softmax"] = "linear", - momentum: float = 0.9, epsilon: float = 1e-9, ): """ @@ -29,7 +28,6 @@ class NeuralNetwork: :param structure: 神经网络结构,例如[2, 10, 1]表示2层神经网络,具体为输入层2个神经元、隐含层10个神经元、输出层1个神经元 :param hidden_activate: 隐含层的激活函数,默认为relu :param output_activate: 输出层的激活函数,默认为linear - :param momentum: 动量因子,默认为0.9 :param epsilon: 极小值,默认为1e-9 """ print("正在初始化神经网络...", end="") @@ -55,7 +53,7 @@ class NeuralNetwork: # 神经网络层数(定义第0层为输入层,第L层为输出层(L为神经网络层数),第l层为隐含层(l=1,2,...,L-1),深度为L+1) self.layer_counts = len(structure) - 1 - self.parameters = {} + self.parameters = {0: {}} # 初始化神经网络参数 for layer_index in range(1, self.layer_counts + 1): # 上一层和当前层神经元数量 @@ -83,18 +81,6 @@ class NeuralNetwork: ) ), # 权重,维度为[当前层神经元数量,上一层神经元数量],适配加权输入=权重*输入+平移。隐含层使用He初始化权重方法、输出层激活函数若为linear则使用标准Xavier初始化权重方法否则使用改进Xavier初始化权重方法 "bias": numpy.zeros((current_layer_neuron_counts, 1)), # 平移 - "moving_average": numpy.zeros( - (current_layer_neuron_counts, 1) - ), # 批归一化的移动平均值 - "moving_variance": numpy.ones( - (current_layer_neuron_counts, 1) - ), # 批归一化的移动方差 - "gamma": numpy.ones( - (current_layer_neuron_counts, 1) - ), # 批归一化的缩放因子 - "beta": numpy.zeros( - (current_layer_neuron_counts, 1) - ), # 批归一化的平移因子 "activate": ( self.hidden_activate if layer_index < self.layer_counts @@ -102,10 +88,6 @@ class NeuralNetwork: ), # 激活函数 } - self.momentum = momentum - # 初始化是否训练模式 - self.training = None - self.epsilon = epsilon print("已完成") @@ -120,8 +102,8 @@ class NeuralNetwork: ) -> None: """ 训练神经网络 - :param X: 输入层的输入,维度为[输入特征数, 样本数] - :param y_true: 输出层的输出真实,维度为[输出特征数, 样本数] + :param X: 输入,维度为[输入神经元数, 样本数] + :param y_true: 真实输出,维度为[输出神经元数, 样本数] :param target_loss: 目标损失 :param epochs: 学习轮数 :param learning_rate: 学习率 @@ -138,19 +120,19 @@ class NeuralNetwork: else False ): raise RuntimeError( - f"输入层的输入和输出层的输出应为数组,其中输入层的输入维度应为[输入特征数, 样本数],输出层的输出维度应为[输出特征数, 样本数]。样本数应相同,输入特征数应为 {self.structure[0]},输出特征数应为 {self.structure[-1]}" + "输入和真实输出应为数组,其中输入维度应为[输入神经元数, 样本数],真实输出维度应为[输出神经元数, 样本数],样本数应需相同" ) - # 开启训练模式 - self.training = True - # 归一化输入层的输入 - X = self._normalize(input=X) + # 归一化输入 + self.parameters[0]["activation"] = self._normalize( + input=X + ) # 将输入作为输入层的输出 - epoch = 0 + epoch = 1 while True: # 前向传播 - y_predict = self._forward_propagate(X=X) + self._forward_propagate() # 计算损失 - loss = self._calculate_loss(y_true=y_true, y_predict=y_predict) + loss = self._calculate_loss(y_true=y_true) if loss <= target_loss: print( f"第 {epoch:6d} 轮损失已达到目标损失 {target_loss:9.3f},训练结束" @@ -161,7 +143,7 @@ class NeuralNetwork: break # 后向传播 - self._backward_propagate(X=X, y_true=y_true, y_predict=y_predict) + self._backward_propagate(y_true=y_true) # 更新神经网络参数 self._update_parameters(learning_rate=learning_rate) @@ -171,7 +153,7 @@ class NeuralNetwork: for idx in numpy.random.choice(X.shape[1], size=10, replace=False): y_true_val = y_true[0, idx] - y_pred_val = y_predict[0, idx] + y_pred_val = self.parameters[self.layer_counts]["activation"][0, idx] error = abs(y_true_val - y_pred_val) print(f"{idx:<10} {y_true_val:<15.4f} {y_pred_val:<15.4f} {error:<15.4f}") @@ -188,98 +170,30 @@ class NeuralNetwork: numpy.var(input, ddof=0, axis=1, keepdims=True) + self.epsilon ) - def _forward_propagate(self, X: numpy.ndarray) -> numpy.ndarray: + def _forward_propagate(self) -> None: """ 前向传播 - :param X: 输入层的输入,维度为[输入特征数, 样本数] - :return: 输出层的输出预测,维度为[输出特征数, 样本数] + :return: 输出层的输出预测,维度为[输出神经元数, 样本数] """ - activation = X # 将输入层的输入作为第0层的输出 for layer_index in range(1, self.layer_counts + 1): self.parameters[layer_index].update( { - "x": (x := activation), # 将上一层的输出作为当前层的输入 "weighted_input": ( weighted_input := numpy.dot( - self.parameters[layer_index]["weight"], x + self.parameters[layer_index]["weight"], + self.parameters[layer_index - 1]["activation"], ) + self.parameters[layer_index]["bias"] - ), # 加权输入,维度为[当前层神经元数量,样本数] - **( - output := self._batch_normalize( - input=weighted_input, - moving_average=self.parameters[layer_index][ - "moving_average" - ], - moving_variance=self.parameters[layer_index][ - "moving_variance" - ], - gamma=self.parameters[layer_index]["gamma"], - beta=self.parameters[layer_index]["beta"], - ) - ), # 加权输入的批归一化 + ), # 加权输入,维度为[当前层神经元数量,样本数],将上一层的输出作为当前层的输入 "activation": ( activation := self._activate( activate=self.parameters[layer_index]["activate"], - input=output["normalization"], + input=weighted_input, ) ), # 输出 } ) - y_predict = activation # 将第L层(输出层)的输出作为输出层的输出预测 - return y_predict - - def _batch_normalize( - self, - input: numpy.ndarray, - moving_average: numpy.ndarray, - moving_variance: numpy.ndarray, - gamma: numpy.ndarray, - beta: numpy.ndarray, - ) -> Dict[str, numpy.ndarray]: - """ - 批归一化 - :param input: 输入 - :param moving_average: 批归一化的移动平均值,维度为[输入维度的行, 1] - :param moving_variance: 批归一化的移动方差,维度为[输入维度的行, 1] - :param gamma: 批归一化的缩放因子,维度为[输入维度的行, 1] - :param beta: 批归一化的平移因子,维度为[输入维度的行, 1] - :return: 批归一化后的输入,维度与输入相同 - """ - return { - "average": ( - average := ( - numpy.mean(input, axis=1, keepdims=True) - if self.training - else moving_average - ) - ), # 就各行所有列求平均值,维度为[输入维度的行, 1] - "variance": ( - variance := ( - numpy.var(input, ddof=0, axis=1, keepdims=True) - if self.training - else moving_variance - ) - ), # 就各行所有列求方差,维度为[输入维度的行, 1] - "moving_average": ( - self.momentum * moving_average + (1 - self.momentum) * average - if self.training - else moving_average - ), # 更新批归一化的移动平均值 - "moving_variance": ( - self.momentum * moving_variance + (1 - self.momentum) * variance - if self.training - else moving_variance - ), # 更新批归一化的移动方差 - "standard_deviation": ( - standard_deviation := numpy.sqrt(variance + self.epsilon) - ), # 就各行所有列求标准差,维度为[输入维度的行, 1] - "normalization": ( - (input - average) / standard_deviation * gamma + beta - ), # 归一化后的输入,维度与输入相同 - } - def _activate( self, activate: Literal["relu", "linear", "softmax"], @@ -288,8 +202,8 @@ class NeuralNetwork: """ 根据激活函数计算输入 :param activate: 激活函数 - :param input: 输入,维度为[当前层神经元数量,样本数] - :return: 经过激活函数计算后的输入,维度为[当前层神经元数量,样本数] + :param input: 输入 + :return: 经过激活函数计算后的输入,维度与输入相同 """ match activate: case "relu": @@ -308,114 +222,55 @@ class NeuralNetwork: def _calculate_loss( self, y_true: numpy.ndarray, - y_predict: numpy.ndarray, ) -> numpy.floating: """ 计算损失 - :param y_true: 输出层的输出真实,维度为[输出特征数, 样本数] - :param y_predict: 输出层的输出预测,维度为[输出特征数, 样本数] - :return: 损失值 + :param y_true: 真实输出,维度为[输出神经元数, 样本数] + :return: 损失 """ return ( - 0.5 * numpy.mean(numpy.square(y_true - y_predict)) + 0.5 + * numpy.mean( + numpy.square(y_true - self.parameters[self.layer_counts]["activation"]) + ) if self.parameters[self.layer_counts]["activate"] == "linear" else -1 * numpy.mean( numpy.sum( y_true - * numpy.log(numpy.clip(y_predict, self.epsilon, 1 - self.epsilon)), + * numpy.log( + numpy.clip( + self.parameters[self.layer_counts]["activation"], + self.epsilon, + 1 - self.epsilon, + ) + ), axis=0, ) ) - ) # 若输出层的激活函数为linear则损失函数使用均方误差否则使用交叉熵 + ) # 若输出层的激活函数为linear则损失函数基于均方误差否则基于交叉熵 def _backward_propagate( self, - X: numpy.ndarray, y_true: numpy.ndarray, - y_predict: numpy.ndarray, ) -> None: """ 后向传播 - :param X: 输入层的输入,维度为[输入特征数, 样本数] - :param y_true: 输出层的输出真实,维度为[输出特征数, 样本数] - :param y_predict: 输出层的输出预测,维度为[输出特征数, 样本数] + :param y_true: 真实输输出,维度为[输出神经元数, 样本数] :return: 无 """ sample_counts = X.shape[1] # 样本数 - # 损失函数对输出层的就加权输入批归一化的梯度 - self.parameters[self.layer_counts]["delta_normalization"] = ( - y_predict - y_true - ) / sample_counts # 均方误差和交叉熵对输出层的输出预测的梯度是相同的 + # 损失对输出层的加权输入的梯度 + self.parameters[self.layer_counts]["delta_weighted_input"] = ( + self.parameters[self.layer_counts]["activation"] - y_true + ) / sample_counts # 损失函数基于均方误差和交叉熵对输出层的加权输入的梯度相同 for layer_index in range(self.layer_counts, 0, -1): self.parameters[layer_index].update( { - "delta_gamma": numpy.sum( - self.parameters[layer_index]["delta_normalization"] - * ( - self.parameters[layer_index]["weighted_input"] - - self.parameters[layer_index]["weighted_input_average"] - ) - / self.parameters[layer_index][ - "weighted_input_standard_deviation" - ], - axis=1, - keepdims=True, - ), # 批归一化的缩放因子的梯度 - "delta_beta": numpy.sum( - self.parameters[layer_index]["delta_normalization"], - axis=1, - keepdims=True, - ), # 批归一化的平移因子的梯度 - "delta_weighted_input": ( - delta_weighted_input := ( - sample_counts - * self.parameters[layer_index]["gamma"] - * self.parameters[layer_index]["delta_normalization"] - - numpy.sum( - self.parameters[layer_index]["gamma"] - * self.parameters[layer_index]["delta_normalization"], - axis=1, - keepdims=True, - ) - - ( - ( - self.parameters[layer_index]["weighted_input"] - - self.parameters[layer_index][ - "weighted_input_average" - ] - ) - / self.parameters[layer_index][ - "weighted_input_standard_deviation" - ] - ) - * numpy.sum( - self.parameters[layer_index]["gamma"] - * self.parameters[layer_index]["delta_normalization"] - * ( - ( - self.parameters[layer_index]["weighted_input"] - - self.parameters[layer_index][ - "weighted_input_average" - ] - ) - / self.parameters[layer_index][ - "weighted_input_standard_deviation" - ] - ), - axis=1, - keepdims=True, - ) - ) - * (1.0 / sample_counts) - / self.parameters[layer_index][ - "weighted_input_standard_deviation" - ] - ), # 加权输入的梯度 "delta_weight": numpy.dot( - delta_weighted_input, + self.parameters[layer_index]["delta_weighted_input"], ( X if layer_index == 1 @@ -423,21 +278,42 @@ class NeuralNetwork: ).T, ), # 权重的梯度 "delta_bias": numpy.sum( - delta_weighted_input, + self.parameters[layer_index]["delta_weighted_input"], axis=1, keepdims=True, ), # 偏置的梯度 } ) - if layer_index > 1: - self.parameters[layer_index - 1]["delta_normalization"] = numpy.dot( - self.parameters[layer_index]["weight"].T, - self.parameters[layer_index]["delta_weighted_input"], - ) * (self.parameters[layer_index - 1]["normalization"] > 0).astype( - numpy.float32 + if layer_index != 1: + self.parameters[layer_index - 1].update( + { + "delta_weighted_input": numpy.dot( + self.parameters[layer_index]["weight"].T, + self.parameters[layer_index]["delta_weighted_input"], + ) + * self._activate_derivative( + activate=self.parameters[layer_index - 1]["activate"], + input=self.parameters[layer_index - 1]["weighted_input"], + ), + } ) + def _activate_derivative( + self, + activate: Literal["relu"], + input: numpy.ndarray, + ) -> numpy.ndarray: + """ + 根据激活函数计算输入的导数 + :param activate: 激活函数 + :param input: 输入 + :return: 经过激活函数计算后的输入,维度与输入相同 + """ + match activate: + case "relu": + return numpy.where(input > 0, 1, 0) + def _update_parameters(self, learning_rate: float) -> None: """ 更新神经网络参数 @@ -453,12 +329,6 @@ class NeuralNetwork: "bias": self.parameters[layer_index]["bias"] - self.parameters[layer_index]["delta_bias"] * learning_rate, # 平移 - "gamma": self.parameters[layer_index]["gamma"] - - self.parameters[layer_index]["delta_gamma"] - * learning_rate, # 批归一化的缩放因子 - "beta": self.parameters[layer_index]["beta"] - - self.parameters[layer_index]["delta_beta"] - * learning_rate, # 批归一化的平移因子 } ) @@ -467,16 +337,18 @@ class NeuralNetwork: if __name__ == "__main__": # 生成测试数据(回归任务) numpy.random.seed(42) # 设置随机种子保证可复现 - X = numpy.random.randn(2, 100) * 5 + X = numpy.random.randn(2, 100) # 真实函数:y = 2*x1 + 3*x2 + 1 (加噪声) - y_true = 2 * X[0:1, :] ** 2 + 3 * X[1:2, :] + 1 + numpy.random.randn(1, 100) * 0.1 + y_true = 2 * X[0:1, :] ** 2 + 3 * X[1:2, :] + 1 # 创建并训练神经网络 neural_network = NeuralNetwork( - structure=[2, 200, 100, 50, 1], # 2输入,10隐藏神经元,1输出 + structure=[2, 16, 4, 1], # 2输入,10隐藏神经元,1输出 ) # 训练 neural_network.train( - X=X, y_true=y_true, target_loss=0.001, epochs=10000, learning_rate=0.001 + X=X, y_true=y_true, target_loss=0.001, epochs=1000, learning_rate=0.001 ) + + print(neural_network.parameters[2]["activation"])