# -*- coding: utf-8 -*- """ 神经网络 """ # 导入模块 from typing import List, Literal, Optional, Dict import numpy class NeuralNetwork: """ 神经网络 """ HIDDEN_ACTIVATES = ["relu"] OUTPUT_ACTIVATES = ["linear", "softmax"] def __init__( self, structure: List[int], hidden_activate: Literal["relu"] = "relu", output_activate: Literal["linear", "softmax"] = "linear", epsilon: float = 1e-9, ): """ 初始化 :param structure: 神经网络结构,例如[2, 10, 1]表示2层神经网络,具体为输入层2个神经元、隐含层10个神经元、输出层1个神经元 :param hidden_activate: 隐含层的激活函数,默认为relu :param output_activate: 输出层的激活函数,默认为linear :param epsilon: 极小值,默认为1e-9 """ print("正在初始化神经网络...", end="") if not ( all(x >= 1 if isinstance(x, int) else False for x in structure) if isinstance(structure, list) and len(structure) >= 3 else False ): raise RuntimeError( "神经网络结构应为长度大于等于3的列表且列表元素应为大于等于1的整数" ) # 初始化神经网络结构 self.structure = structure if hidden_activate not in self.HIDDEN_ACTIVATES: raise RuntimeError(f"该隐含层激活函数 {hidden_activate} 暂不支持") self.hidden_activate = hidden_activate if output_activate not in self.OUTPUT_ACTIVATES: raise RuntimeError(f"该输出层激活函数 {output_activate} 暂不支持") self.output_activate = output_activate # 神经网络层数(定义第0层为输入层,第L层为输出层(L为神经网络层数),第l层为隐含层(l=1,2,...,L-1),深度为L+1) self.layer_counts = len(structure) - 1 self.parameters = {0: {}} # 初始化神经网络参数 for layer_index in range(1, self.layer_counts + 1): # 上一层和当前层神经元数量 previous_layer_neuron_counts, current_layer_neuron_counts = ( self.structure[layer_index - 1], self.structure[layer_index], ) self.parameters[layer_index] = { "weight": numpy.random.randn( current_layer_neuron_counts, previous_layer_neuron_counts ) * ( numpy.sqrt(2 / previous_layer_neuron_counts) if layer_index < self.layer_counts else ( numpy.sqrt(1 / previous_layer_neuron_counts) if self.output_activate == "linear" else numpy.sqrt( 2 / ( previous_layer_neuron_counts + current_layer_neuron_counts ) ) ) ), # 权重,维度为[当前层神经元数量,上一层神经元数量],适配加权输入=权重*输入+平移。隐含层使用He初始化权重方法、输出层激活函数若为linear则使用标准Xavier初始化权重方法否则使用改进Xavier初始化权重方法 "bias": numpy.zeros((current_layer_neuron_counts, 1)), # 平移 "activate": ( self.hidden_activate if layer_index < self.layer_counts else self.output_activate ), # 激活函数 } self.epsilon = epsilon print("已完成") def train( self, X: numpy.ndarray, y_true: numpy.ndarray, target_loss: float = 1e-3, epochs: int = 200, learning_rate: float = 0.001, ) -> None: """ 训练神经网络 :param X: 输入,维度为[输入神经元数, 样本数] :param y_true: 真实输出,维度为[输出神经元数, 样本数] :param target_loss: 目标损失 :param epochs: 学习轮数 :param learning_rate: 学习率 :return: 无 """ print( f"开始训练:目标损失为 {target_loss},学习轮数为 {epochs},学习率为 {learning_rate}..." ) if not ( X.shape[1] == y_true.shape[1] and X.shape[0] == self.structure[0] and y_true.shape[0] == self.structure[-1] if isinstance(X, numpy.ndarray) and isinstance(y_true, numpy.ndarray) else False ): raise RuntimeError( "输入和真实输出应为数组,其中输入维度应为[输入神经元数, 样本数],真实输出维度应为[输出神经元数, 样本数],样本数应需相同" ) # 归一化输入 self.parameters[0]["activation"] = self._normalize( input=X ) # 将输入作为输入层的输出 epoch = 1 while True: # 前向传播 self._forward_propagate() # 计算损失 loss = self._calculate_loss(y_true=y_true) if loss <= target_loss: print( f"第 {epoch:6d} 轮损失已达到目标损失 {target_loss:9.3f},训练结束" ) break if epoch > epochs: print(f"已达到最大学习轮数,损失为 {loss:9.3f},训练结束") break # 后向传播 self._backward_propagate(y_true=y_true) # 更新神经网络参数 self._update_parameters(learning_rate=learning_rate) if epoch % 100 == 0: print(f"第 {epoch:6d} 轮损失为 {loss:9.3f},继续训练...") epoch += 1 for idx in numpy.random.choice(X.shape[1], size=10, replace=False): y_true_val = y_true[0, idx] y_pred_val = self.parameters[self.layer_counts]["activation"][0, idx] error = abs(y_true_val - y_pred_val) print(f"{idx:<10} {y_true_val:<15.4f} {y_pred_val:<15.4f} {error:<15.4f}") def _normalize( self, input: numpy.ndarray, ) -> numpy.ndarray: """ 归一化 :param input: 输入 :return: 归一化后的输入,维度与输入相同 """ return (input - numpy.mean(input, axis=1, keepdims=True)) / numpy.sqrt( numpy.var(input, ddof=0, axis=1, keepdims=True) + self.epsilon ) def _forward_propagate(self) -> None: """ 前向传播 :return: 输出层的输出预测,维度为[输出神经元数, 样本数] """ for layer_index in range(1, self.layer_counts + 1): self.parameters[layer_index].update( { "weighted_input": ( weighted_input := numpy.dot( self.parameters[layer_index]["weight"], self.parameters[layer_index - 1]["activation"], ) + self.parameters[layer_index]["bias"] ), # 加权输入,维度为[当前层神经元数量,样本数],将上一层的输出作为当前层的输入 "activation": ( activation := self._activate( activate=self.parameters[layer_index]["activate"], input=weighted_input, ) ), # 输出 } ) def _activate( self, activate: Literal["relu", "linear", "softmax"], input: numpy.ndarray, ) -> numpy.ndarray: """ 根据激活函数计算输入 :param activate: 激活函数 :param input: 输入 :return: 经过激活函数计算后的输入,维度与输入相同 """ match activate: case "relu": return numpy.maximum(0, input) case "linear": return input case "softmax": # 加权输入的指数项 e_weighted_input = numpy.exp( input - numpy.max(input, axis=0, keepdims=True) ) # 减去各样本所有神经元最大值以避免指数溢出 return e_weighted_input / numpy.sum( e_weighted_input, axis=0, keepdims=True ) def _calculate_loss( self, y_true: numpy.ndarray, ) -> numpy.floating: """ 计算损失 :param y_true: 真实输出,维度为[输出神经元数, 样本数] :return: 损失 """ return ( 0.5 * numpy.mean( numpy.square(y_true - self.parameters[self.layer_counts]["activation"]) ) if self.parameters[self.layer_counts]["activate"] == "linear" else -1 * numpy.mean( numpy.sum( y_true * numpy.log( numpy.clip( self.parameters[self.layer_counts]["activation"], self.epsilon, 1 - self.epsilon, ) ), axis=0, ) ) ) # 若输出层的激活函数为linear则损失函数基于均方误差否则基于交叉熵 def _backward_propagate( self, y_true: numpy.ndarray, ) -> None: """ 后向传播 :param y_true: 真实输输出,维度为[输出神经元数, 样本数] :return: 无 """ sample_counts = X.shape[1] # 样本数 # 损失对输出层的加权输入的梯度 self.parameters[self.layer_counts]["delta_weighted_input"] = ( self.parameters[self.layer_counts]["activation"] - y_true ) / sample_counts # 损失函数基于均方误差和交叉熵对输出层的加权输入的梯度相同 for layer_index in range(self.layer_counts, 0, -1): self.parameters[layer_index].update( { "delta_weight": numpy.dot( self.parameters[layer_index]["delta_weighted_input"], ( X if layer_index == 1 else self.parameters[layer_index - 1]["activation"] ).T, ), # 权重的梯度 "delta_bias": numpy.sum( self.parameters[layer_index]["delta_weighted_input"], axis=1, keepdims=True, ), # 偏置的梯度 } ) if layer_index != 1: self.parameters[layer_index - 1].update( { "delta_weighted_input": numpy.dot( self.parameters[layer_index]["weight"].T, self.parameters[layer_index]["delta_weighted_input"], ) * self._activate_derivative( activate=self.parameters[layer_index - 1]["activate"], input=self.parameters[layer_index - 1]["weighted_input"], ), } ) def _activate_derivative( self, activate: Literal["relu"], input: numpy.ndarray, ) -> numpy.ndarray: """ 根据激活函数计算输入的导数 :param activate: 激活函数 :param input: 输入 :return: 经过激活函数计算后的输入,维度与输入相同 """ match activate: case "relu": return numpy.where(input > 0, 1, 0) def _update_parameters(self, learning_rate: float) -> None: """ 更新神经网络参数 :param learning_rate: 学习率 :return: 无 """ for layer_index in range(1, self.layer_counts + 1): self.parameters[layer_index].update( { "weight": self.parameters[layer_index]["weight"] - self.parameters[layer_index]["delta_weight"] * learning_rate, # 权重 "bias": self.parameters[layer_index]["bias"] - self.parameters[layer_index]["delta_bias"] * learning_rate, # 平移 } ) # 测试代码 if __name__ == "__main__": # 生成测试数据(回归任务) numpy.random.seed(42) # 设置随机种子保证可复现 X = numpy.random.randn(2, 100) # 真实函数:y = 2*x1 + 3*x2 + 1 (加噪声) y_true = 2 * X[0:1, :] ** 2 + 3 * X[1:2, :] + 1 # 创建并训练神经网络 neural_network = NeuralNetwork( structure=[2, 16, 4, 1], # 2输入,10隐藏神经元,1输出 ) # 训练 neural_network.train( X=X, y_true=y_true, target_loss=0.001, epochs=1000, learning_rate=0.001 ) print(neural_network.parameters[2]["activation"]) print(neural_network.parameters[3]["activation"])