# -*- coding: utf-8 -*- """ 神经网络 """ # 导入模块 from typing import List, Literal import numpy class NeuralNetwork: """ 神经网络 """ def __init__( self, neurons: List[int], hidden_activate: Literal["relu"] = "relu", output_activate: Literal["linear", "softmax"] = "linear", ): """ 初始化 :param neurons: 神经元结构,例如[2, 10, 1]表示输入层为2个神经元、第一层隐含层为10个神经元、输出层为1个神经元 :param hidden_activate: 隐含层激活函数,默认为relu :param output_activate: 输出层激活函数,默认为linear """ print("正在初始化神经网络...", end="") # 初始化神经元结构 self.neurons = neurons # 初始化隐含层激活函数 self.hidden_activate = hidden_activate # 初始化输出层激活函数 self.output_activate = output_activate # 初始化神经网络结构 self.neural_network = {} # 初始化神经网络所有层权重和偏置 self._init_neural_network() print("已完成") def _init_neural_network(self): """ 初始化神经网络所有层权重和偏置 """ for idx in range(1, len(self.neurons)): # 若为隐含层则根据隐含层激活函数计算当前层权重的标准偏差,若为输出层则根据输出层激活函数计算当前层权重的标准偏差 if idx != len(self.neurons) - 1: # 激活函数 activate = self.hidden_activate match self.hidden_activate: case "relu": # 当前层权重的标准偏差 standard_deviation = numpy.sqrt( 2 / self.neurons[idx - 1] ) # 使用He方差公式 case _: raise RuntimeError( f"暂不支持该隐含层激活函数 {self.hidden_activate}" ) else: # 激活函数 activate = self.output_activate match self.output_activate: case "linear": # 当前层权重的标准偏差 standard_deviation = numpy.sqrt(1 / self.neurons[idx - 1]) case "softmax": # 当前层权重的标准偏差 standard_deviation = numpy.sqrt( 2 / (self.neurons[idx - 1] + self.neurons[idx]) ) # 使用Xavier方差公式 case _: raise RuntimeError( f"暂不支持该输出层激活函数 {self.output_activate}" ) self.neural_network[f"layer:{idx:03d}"] = { "weight": numpy.random.randn(self.neurons[idx - 1], self.neurons[idx]) * standard_deviation, # 当前层权重 "bias": numpy.zeros((1, self.neurons[idx])), # 当前层偏置 "activate": activate, # 当前层激活函数 "gamma": numpy.ones((1, self.neurons[idx])), # 当前层批标准化的缩放因子 "beta": numpy.zeros((1, self.neurons[idx])), # 当前层批标准化的偏移因子 } def _forward_propagate(self, x: numpy.ndarray) -> numpy.ndarray: """ 前向传播 :param x: 输入层输入 :return: 输出层预测值 """ activation = x # 将输入层输入作为第0层的激活值 for layer_name, layer in self.neural_network.items(): self.neural_network[layer_name].update( { "weighted_sum": ( weighted_sum := numpy.dot(activation, layer["weight"]) + layer["bias"] ), # 当前层加权和 "batch_normalized_weighted_sum": ( batch_normalized_weighted_sum := layer["gamma"] * ( weighted_sum - numpy.mean(weighted_sum, axis=0, keepdims=True) ) / numpy.sqrt( numpy.var( weighted_sum, ddof=0, axis=0, keepdims=True ) # 使用有偏方差公式 + 1e-8 ) + layer["beta"] ), # 当前层批标准化加权和 "activation": ( activation := self._activate( activate=layer["activate"], weighted_sum=batch_normalized_weighted_sum, ) ), # 当前层激活值 } ) y_predict = activation # 将第L-1层(最后一层)的激活值作为输出层预测值(L为神经网络层数) return y_predict def _activate( self, activate: Literal["relu", "linear", "softmax"], weighted_sum: numpy.ndarray, ) -> numpy.ndarray: """ 激活函数 :param activate: 激活函数 :param weighted_sum: 加权和 :return: 激活值 """ match activate: case "relu": return numpy.maximum(0, weighted_sum) case "linear": return weighted_sum case "softmax": # 加权和指数项 e_weighted_sum = numpy.exp( weighted_sum - numpy.max(weighted_sum, axis=1, keepdims=True) ) return e_weighted_sum / numpy.sum(e_weighted_sum, axis=1, keepdims=True) def _calculate_loss( self, y_true: numpy.ndarray, y_predict: numpy.ndarray, ) -> numpy.floating: """ 计算损失 :param y_true: 输出层真实值 :param y_predict: 输出层预测值 :return: 损失值 """ # 第L-1层(最后一层)的层名 layer_name = list(self.neural_network.keys())[-1] # 根据第L-1层(最后一层)的激活函数计算损失 match activate := self.neural_network[layer_name]["activate"]: case "linear": loss = 0.5 * numpy.mean( numpy.square(y_true - y_predict) ) # 使用均方误差公式 case "softmax": loss = -1 * numpy.mean( numpy.sum(y_true * numpy.log(y_predict + 1e-8), axis=1) ) # 使用交叉熵损失公式 case _: raise RuntimeError(f"暂不支持该输出层激活函数 {activate}") return loss def _backward_propagate( self, x: numpy.ndarray, y_true: numpy.ndarray, y_predict: numpy.ndarray, ) -> None: """ 后向传播 :param x: 输入层输入 :param y_true: 输出层真实值 :param y_predict: 输出层预测值 :return: 无 """ # 所有层的层名 layer_names = list(self.neural_network.keys()) for idx, layer_name in enumerate(reversed(layer_names)): # 当前层激活函数、加权和、批标准化加权和和激活值 activate, weighted_sum, batch_normalized_weighted_sum, activation = ( self.neural_network[layer_name]["activate"], self.neural_network[layer_name]["weighted_sum"], self.neural_network[layer_name]["batch_normalized_weighted_sum"], self.neural_network[layer_name]["activation"], ) # 输出层的误差项 if idx == 0: match activate: case "linear" | "softmax": delta = y_predict - y_true # 损失函数对第L-1层(最后一层)激活值的梯度 case _: raise RuntimeError(f"暂不支持该输出层激活函数 {activate}") # 隐含层的误差项 else: delta = numpy.dot(delta, self.neural_network[layer_names[idx - 1]]["weight"].T) delta = 0