This commit is contained in:
liubiren 2026-02-01 14:52:19 +08:00
parent 91b272a658
commit 57f7a0607f
1 changed files with 44 additions and 37 deletions

View File

@ -4,7 +4,7 @@
""" """
# 导入模块 # 导入模块
from typing import List, Literal, Optional, Dict from typing import List, Literal, Optional, Dict, Tuple
import numpy import numpy
@ -35,13 +35,12 @@ class NeuralNetwork:
print("正在初始化神经网络...", end="") print("正在初始化神经网络...", end="")
if not ( if not (
all(x >= 1 if isinstance(x, int) else False for x in structure) len(structure) >= 3
if isinstance(structure, list) and len(structure) >= 3 and all(x >= 1 if isinstance(x, int) else False for x in structure)
if isinstance(structure, list)
else False else False
): ):
raise RuntimeError( raise RuntimeError("神经网络结构应为列表长度大于等于3且元素均为正整数")
"神经网络结构应为长度大于等于3的列表且列表元素应为大于等于1的整数"
)
# 初始化神经网络结构 # 初始化神经网络结构
self.structure = structure self.structure = structure
@ -55,31 +54,28 @@ class NeuralNetwork:
# 神经网络层数定义第0层为输入层第l层为隐含层l=1,2,...,L-1第L层为输出层L为神经网络层数深度为L+1 # 神经网络层数定义第0层为输入层第l层为隐含层l=1,2,...,L-1第L层为输出层L为神经网络层数深度为L+1
self.layer_counts = len(structure) - 1 self.layer_counts = len(structure) - 1
# 初始化是否训练
self.training = True
numpy.random.seed(seed) # 设置随机种子 numpy.random.seed(seed) # 设置随机种子
self.parameters = {} self.parameters = {0: {}}
# 初始化神经网络参数 # 初始化神经网络参数
for layer_index in range(1, self.layer_counts + 1): for layer_index in range(1, self.layer_counts + 1):
# 上一层和当前层神经元数
previous_layer_neuron_counts, current_layer_neuron_counts = (
self.structure[layer_index - 1],
self.structure[layer_index],
)
self.parameters[layer_index] = { self.parameters[layer_index] = {
"activate": ( "activate": (
activate := ( activate := (
self.hidden_activate self.output_activate
if layer_index != self.layer_counts if layer_index == self.layer_counts
else self.output_activate else self.hidden_activate
) )
), # 激活函数 ), # 激活函数
"weight": numpy.random.randn( "weight": self._init_weight(
current_layer_neuron_counts, previous_layer_neuron_counts
)
* self._calculate_init_weight_scale(
activate=activate, activate=activate,
previous_layer_neuron_counts=previous_layer_neuron_counts, previous_layer_neuron_counts=self.structure[layer_index - 1],
current_layer_neuron_counts=current_layer_neuron_counts, current_layer_neuron_counts=(
current_layer_neuron_counts := self.structure[layer_index]
),
), # 权重,维度为[当前层神经元数,上一层神经元数],适配加权输入=权重*输入+平移 ), # 权重,维度为[当前层神经元数,上一层神经元数],适配加权输入=权重*输入+平移
"bias": numpy.zeros((current_layer_neuron_counts, 1)), # 平移 "bias": numpy.zeros((current_layer_neuron_counts, 1)), # 平移
} }
@ -88,30 +84,33 @@ class NeuralNetwork:
print("已完成") print("已完成")
def _calculate_init_weight_scale( def _init_weight(
self, self,
activate: Literal["relu", "linear", "softmax"], activate: Literal["relu", "linear", "softmax"],
previous_layer_neuron_counts: int, previous_layer_neuron_counts: int,
current_layer_neuron_counts: int, current_layer_neuron_counts: int,
) -> numpy.floating: ) -> numpy.floating:
""" """
计算初始化权重时缩放因子 初始化权重
:param activate: 激活函数 :param activate: 激活函数
:param previous_layer_neuron_counts: 上一层神经元数 :param previous_layer_neuron_counts: 上一层神经元数
:param current_layer_neuron_counts: 当前层神经元数 :param current_layer_neuron_counts: 当前层神经元数
:return: 初始化权重时缩放因子 :return: 初始化后的权重维度为[当前层神经元数上一层神经元数]
""" """
weight = numpy.random.randn(
current_layer_neuron_counts, previous_layer_neuron_counts
)
match activate: match activate:
case "relu": case "relu":
return numpy.sqrt( return weight * numpy.sqrt(
2 / previous_layer_neuron_counts 2 / previous_layer_neuron_counts
) # 使用He初始化权重方法 ) # 使用He初始化权重方法
case "linear": case "linear":
return numpy.sqrt( return weight * numpy.sqrt(
2 / previous_layer_neuron_counts 2 / previous_layer_neuron_counts
) # 使用He初始化权重方法 ) # 使用He初始化权重方法
case "softmax": case "softmax":
return numpy.sqrt( return weight * numpy.sqrt(
2 / (previous_layer_neuron_counts + current_layer_neuron_counts) 2 / (previous_layer_neuron_counts + current_layer_neuron_counts)
) # 使用Xavier初始化权重方法 ) # 使用Xavier初始化权重方法
@ -145,8 +144,8 @@ class NeuralNetwork:
raise RuntimeError( raise RuntimeError(
"输入和真实输出应为数组,其中输入维度应为[输入神经元数, 样本数],真实输出维度应为[输出神经元数, 样本数],样本数应需相同" "输入和真实输出应为数组,其中输入维度应为[输入神经元数, 样本数],真实输出维度应为[输出神经元数, 样本数],样本数应需相同"
) )
# 归一化输入并将其作为输入层的输出 # 归一化输入
self.parameters[0] = {"activation": self._normalize(input=X)} self.parameters[0].update({"activation": self._normalize(input=X)})
epoch = 1 epoch = 1
while True: while True:
@ -170,6 +169,7 @@ class NeuralNetwork:
if epoch % 100 == 0: if epoch % 100 == 0:
print(f"{epoch:6d} 轮损失为 {loss:9.3f},继续训练...") print(f"{epoch:6d} 轮损失为 {loss:9.3f},继续训练...")
epoch += 1 epoch += 1
def _normalize( def _normalize(
@ -178,11 +178,20 @@ class NeuralNetwork:
) -> numpy.ndarray: ) -> numpy.ndarray:
""" """
归一化 归一化
:param input: 输入 :param input: 输入维度为[输入神经元数, 样本数]
:return: 归一化后的输入维度与输入相同 :return: 归一化后的输入
""" """
return (input - numpy.mean(input, axis=1, keepdims=True)) / numpy.sqrt( # 若为训练模式则更新各输入神经元所有样本的平均值和方差,维度为[输入神经元数, 1]
numpy.var(input, ddof=0, axis=1, keepdims=True) + self.epsilon if self.training:
self.parameters[0].update(
{
"mean": numpy.mean(input, axis=1, keepdims=True),
"variance": numpy.var(input, ddof=0, axis=1, keepdims=True),
}
)
return (input - self.parameters[0]["mean"]) / numpy.sqrt(
self.parameters[0]["variance"] + self.epsilon
) )
def _forward_propagate(self) -> None: def _forward_propagate(self) -> None:
@ -290,17 +299,15 @@ class NeuralNetwork:
:param y_true: 真实输输出维度为[输出神经元数, 样本数] :param y_true: 真实输输出维度为[输出神经元数, 样本数]
:return: :return:
""" """
sample_counts = y_true.shape[1] # 样本数
for layer_index in range(self.layer_counts, 0, -1): for layer_index in range(self.layer_counts, 0, -1):
self.parameters[layer_index].update( self.parameters[layer_index].update(
{ {
"delta_activation": ( "delta_activation": (
delta_activation := ( delta_activation := (
(self.parameters[layer_index]["activation"] - y_true) (self.parameters[layer_index]["activation"] - y_true)
/ sample_counts / y_true.shape[1]
# 若为输出层且激活函数为linear则直接计算输出的梯度若为softmax则简化计算输出的梯度 # 若为输出层则直接计算输出的梯度,若为隐含层则基于下一层的加权输入的梯度推出当前层的输出梯度
if layer_index == self.layer_counts if layer_index == self.layer_counts
# 若为隐含层则基于下一层的权重转置和加权输入的梯度计算当前层的输出梯度
else numpy.dot( else numpy.dot(
self.parameters[layer_index + 1]["weight"].T, self.parameters[layer_index + 1]["weight"].T,
self.parameters[layer_index + 1][ self.parameters[layer_index + 1][