This commit is contained in:
parent
68f11f911f
commit
96451c16ed
274
神经网络/main.py
274
神经网络/main.py
|
|
@ -21,7 +21,6 @@ class NeuralNetwork:
|
|||
structure: List[int],
|
||||
hidden_activate: Literal["relu"] = "relu",
|
||||
output_activate: Literal["linear", "softmax"] = "linear",
|
||||
momentum: float = 0.9,
|
||||
epsilon: float = 1e-9,
|
||||
):
|
||||
"""
|
||||
|
|
@ -29,7 +28,6 @@ class NeuralNetwork:
|
|||
:param structure: 神经网络结构,例如[2, 10, 1]表示2层神经网络,具体为输入层2个神经元、隐含层10个神经元、输出层1个神经元
|
||||
:param hidden_activate: 隐含层的激活函数,默认为relu
|
||||
:param output_activate: 输出层的激活函数,默认为linear
|
||||
:param momentum: 动量因子,默认为0.9
|
||||
:param epsilon: 极小值,默认为1e-9
|
||||
"""
|
||||
print("正在初始化神经网络...", end="")
|
||||
|
|
@ -55,7 +53,7 @@ class NeuralNetwork:
|
|||
# 神经网络层数(定义第0层为输入层,第L层为输出层(L为神经网络层数),第l层为隐含层(l=1,2,...,L-1),深度为L+1)
|
||||
self.layer_counts = len(structure) - 1
|
||||
|
||||
self.parameters = {}
|
||||
self.parameters = {0: {}}
|
||||
# 初始化神经网络参数
|
||||
for layer_index in range(1, self.layer_counts + 1):
|
||||
# 上一层和当前层神经元数量
|
||||
|
|
@ -83,18 +81,6 @@ class NeuralNetwork:
|
|||
)
|
||||
), # 权重,维度为[当前层神经元数量,上一层神经元数量],适配加权输入=权重*输入+平移。隐含层使用He初始化权重方法、输出层激活函数若为linear则使用标准Xavier初始化权重方法否则使用改进Xavier初始化权重方法
|
||||
"bias": numpy.zeros((current_layer_neuron_counts, 1)), # 平移
|
||||
"moving_average": numpy.zeros(
|
||||
(current_layer_neuron_counts, 1)
|
||||
), # 批归一化的移动平均值
|
||||
"moving_variance": numpy.ones(
|
||||
(current_layer_neuron_counts, 1)
|
||||
), # 批归一化的移动方差
|
||||
"gamma": numpy.ones(
|
||||
(current_layer_neuron_counts, 1)
|
||||
), # 批归一化的缩放因子
|
||||
"beta": numpy.zeros(
|
||||
(current_layer_neuron_counts, 1)
|
||||
), # 批归一化的平移因子
|
||||
"activate": (
|
||||
self.hidden_activate
|
||||
if layer_index < self.layer_counts
|
||||
|
|
@ -102,10 +88,6 @@ class NeuralNetwork:
|
|||
), # 激活函数
|
||||
}
|
||||
|
||||
self.momentum = momentum
|
||||
# 初始化是否训练模式
|
||||
self.training = None
|
||||
|
||||
self.epsilon = epsilon
|
||||
|
||||
print("已完成")
|
||||
|
|
@ -120,8 +102,8 @@ class NeuralNetwork:
|
|||
) -> None:
|
||||
"""
|
||||
训练神经网络
|
||||
:param X: 输入层的输入,维度为[输入特征数, 样本数]
|
||||
:param y_true: 输出层的输出真实,维度为[输出特征数, 样本数]
|
||||
:param X: 输入,维度为[输入神经元数, 样本数]
|
||||
:param y_true: 真实输出,维度为[输出神经元数, 样本数]
|
||||
:param target_loss: 目标损失
|
||||
:param epochs: 学习轮数
|
||||
:param learning_rate: 学习率
|
||||
|
|
@ -138,19 +120,19 @@ class NeuralNetwork:
|
|||
else False
|
||||
):
|
||||
raise RuntimeError(
|
||||
f"输入层的输入和输出层的输出应为数组,其中输入层的输入维度应为[输入特征数, 样本数],输出层的输出维度应为[输出特征数, 样本数]。样本数应相同,输入特征数应为 {self.structure[0]},输出特征数应为 {self.structure[-1]}"
|
||||
"输入和真实输出应为数组,其中输入维度应为[输入神经元数, 样本数],真实输出维度应为[输出神经元数, 样本数],样本数应需相同"
|
||||
)
|
||||
# 开启训练模式
|
||||
self.training = True
|
||||
# 归一化输入层的输入
|
||||
X = self._normalize(input=X)
|
||||
# 归一化输入
|
||||
self.parameters[0]["activation"] = self._normalize(
|
||||
input=X
|
||||
) # 将输入作为输入层的输出
|
||||
|
||||
epoch = 0
|
||||
epoch = 1
|
||||
while True:
|
||||
# 前向传播
|
||||
y_predict = self._forward_propagate(X=X)
|
||||
self._forward_propagate()
|
||||
# 计算损失
|
||||
loss = self._calculate_loss(y_true=y_true, y_predict=y_predict)
|
||||
loss = self._calculate_loss(y_true=y_true)
|
||||
if loss <= target_loss:
|
||||
print(
|
||||
f"第 {epoch:6d} 轮损失已达到目标损失 {target_loss:9.3f},训练结束"
|
||||
|
|
@ -161,7 +143,7 @@ class NeuralNetwork:
|
|||
break
|
||||
|
||||
# 后向传播
|
||||
self._backward_propagate(X=X, y_true=y_true, y_predict=y_predict)
|
||||
self._backward_propagate(y_true=y_true)
|
||||
# 更新神经网络参数
|
||||
self._update_parameters(learning_rate=learning_rate)
|
||||
|
||||
|
|
@ -171,7 +153,7 @@ class NeuralNetwork:
|
|||
|
||||
for idx in numpy.random.choice(X.shape[1], size=10, replace=False):
|
||||
y_true_val = y_true[0, idx]
|
||||
y_pred_val = y_predict[0, idx]
|
||||
y_pred_val = self.parameters[self.layer_counts]["activation"][0, idx]
|
||||
error = abs(y_true_val - y_pred_val)
|
||||
print(f"{idx:<10} {y_true_val:<15.4f} {y_pred_val:<15.4f} {error:<15.4f}")
|
||||
|
||||
|
|
@ -188,98 +170,30 @@ class NeuralNetwork:
|
|||
numpy.var(input, ddof=0, axis=1, keepdims=True) + self.epsilon
|
||||
)
|
||||
|
||||
def _forward_propagate(self, X: numpy.ndarray) -> numpy.ndarray:
|
||||
def _forward_propagate(self) -> None:
|
||||
"""
|
||||
前向传播
|
||||
:param X: 输入层的输入,维度为[输入特征数, 样本数]
|
||||
:return: 输出层的输出预测,维度为[输出特征数, 样本数]
|
||||
:return: 输出层的输出预测,维度为[输出神经元数, 样本数]
|
||||
"""
|
||||
activation = X # 将输入层的输入作为第0层的输出
|
||||
for layer_index in range(1, self.layer_counts + 1):
|
||||
self.parameters[layer_index].update(
|
||||
{
|
||||
"x": (x := activation), # 将上一层的输出作为当前层的输入
|
||||
"weighted_input": (
|
||||
weighted_input := numpy.dot(
|
||||
self.parameters[layer_index]["weight"], x
|
||||
self.parameters[layer_index]["weight"],
|
||||
self.parameters[layer_index - 1]["activation"],
|
||||
)
|
||||
+ self.parameters[layer_index]["bias"]
|
||||
), # 加权输入,维度为[当前层神经元数量,样本数]
|
||||
**(
|
||||
output := self._batch_normalize(
|
||||
input=weighted_input,
|
||||
moving_average=self.parameters[layer_index][
|
||||
"moving_average"
|
||||
],
|
||||
moving_variance=self.parameters[layer_index][
|
||||
"moving_variance"
|
||||
],
|
||||
gamma=self.parameters[layer_index]["gamma"],
|
||||
beta=self.parameters[layer_index]["beta"],
|
||||
)
|
||||
), # 加权输入的批归一化
|
||||
), # 加权输入,维度为[当前层神经元数量,样本数],将上一层的输出作为当前层的输入
|
||||
"activation": (
|
||||
activation := self._activate(
|
||||
activate=self.parameters[layer_index]["activate"],
|
||||
input=output["normalization"],
|
||||
input=weighted_input,
|
||||
)
|
||||
), # 输出
|
||||
}
|
||||
)
|
||||
|
||||
y_predict = activation # 将第L层(输出层)的输出作为输出层的输出预测
|
||||
return y_predict
|
||||
|
||||
def _batch_normalize(
|
||||
self,
|
||||
input: numpy.ndarray,
|
||||
moving_average: numpy.ndarray,
|
||||
moving_variance: numpy.ndarray,
|
||||
gamma: numpy.ndarray,
|
||||
beta: numpy.ndarray,
|
||||
) -> Dict[str, numpy.ndarray]:
|
||||
"""
|
||||
批归一化
|
||||
:param input: 输入
|
||||
:param moving_average: 批归一化的移动平均值,维度为[输入维度的行, 1]
|
||||
:param moving_variance: 批归一化的移动方差,维度为[输入维度的行, 1]
|
||||
:param gamma: 批归一化的缩放因子,维度为[输入维度的行, 1]
|
||||
:param beta: 批归一化的平移因子,维度为[输入维度的行, 1]
|
||||
:return: 批归一化后的输入,维度与输入相同
|
||||
"""
|
||||
return {
|
||||
"average": (
|
||||
average := (
|
||||
numpy.mean(input, axis=1, keepdims=True)
|
||||
if self.training
|
||||
else moving_average
|
||||
)
|
||||
), # 就各行所有列求平均值,维度为[输入维度的行, 1]
|
||||
"variance": (
|
||||
variance := (
|
||||
numpy.var(input, ddof=0, axis=1, keepdims=True)
|
||||
if self.training
|
||||
else moving_variance
|
||||
)
|
||||
), # 就各行所有列求方差,维度为[输入维度的行, 1]
|
||||
"moving_average": (
|
||||
self.momentum * moving_average + (1 - self.momentum) * average
|
||||
if self.training
|
||||
else moving_average
|
||||
), # 更新批归一化的移动平均值
|
||||
"moving_variance": (
|
||||
self.momentum * moving_variance + (1 - self.momentum) * variance
|
||||
if self.training
|
||||
else moving_variance
|
||||
), # 更新批归一化的移动方差
|
||||
"standard_deviation": (
|
||||
standard_deviation := numpy.sqrt(variance + self.epsilon)
|
||||
), # 就各行所有列求标准差,维度为[输入维度的行, 1]
|
||||
"normalization": (
|
||||
(input - average) / standard_deviation * gamma + beta
|
||||
), # 归一化后的输入,维度与输入相同
|
||||
}
|
||||
|
||||
def _activate(
|
||||
self,
|
||||
activate: Literal["relu", "linear", "softmax"],
|
||||
|
|
@ -288,8 +202,8 @@ class NeuralNetwork:
|
|||
"""
|
||||
根据激活函数计算输入
|
||||
:param activate: 激活函数
|
||||
:param input: 输入,维度为[当前层神经元数量,样本数]
|
||||
:return: 经过激活函数计算后的输入,维度为[当前层神经元数量,样本数]
|
||||
:param input: 输入
|
||||
:return: 经过激活函数计算后的输入,维度与输入相同
|
||||
"""
|
||||
match activate:
|
||||
case "relu":
|
||||
|
|
@ -308,114 +222,55 @@ class NeuralNetwork:
|
|||
def _calculate_loss(
|
||||
self,
|
||||
y_true: numpy.ndarray,
|
||||
y_predict: numpy.ndarray,
|
||||
) -> numpy.floating:
|
||||
"""
|
||||
计算损失
|
||||
:param y_true: 输出层的输出真实,维度为[输出特征数, 样本数]
|
||||
:param y_predict: 输出层的输出预测,维度为[输出特征数, 样本数]
|
||||
:return: 损失值
|
||||
:param y_true: 真实输出,维度为[输出神经元数, 样本数]
|
||||
:return: 损失
|
||||
"""
|
||||
return (
|
||||
0.5 * numpy.mean(numpy.square(y_true - y_predict))
|
||||
0.5
|
||||
* numpy.mean(
|
||||
numpy.square(y_true - self.parameters[self.layer_counts]["activation"])
|
||||
)
|
||||
if self.parameters[self.layer_counts]["activate"] == "linear"
|
||||
else -1
|
||||
* numpy.mean(
|
||||
numpy.sum(
|
||||
y_true
|
||||
* numpy.log(numpy.clip(y_predict, self.epsilon, 1 - self.epsilon)),
|
||||
* numpy.log(
|
||||
numpy.clip(
|
||||
self.parameters[self.layer_counts]["activation"],
|
||||
self.epsilon,
|
||||
1 - self.epsilon,
|
||||
)
|
||||
),
|
||||
axis=0,
|
||||
)
|
||||
)
|
||||
) # 若输出层的激活函数为linear则损失函数使用均方误差否则使用交叉熵
|
||||
) # 若输出层的激活函数为linear则损失函数基于均方误差否则基于交叉熵
|
||||
|
||||
def _backward_propagate(
|
||||
self,
|
||||
X: numpy.ndarray,
|
||||
y_true: numpy.ndarray,
|
||||
y_predict: numpy.ndarray,
|
||||
) -> None:
|
||||
"""
|
||||
后向传播
|
||||
:param X: 输入层的输入,维度为[输入特征数, 样本数]
|
||||
:param y_true: 输出层的输出真实,维度为[输出特征数, 样本数]
|
||||
:param y_predict: 输出层的输出预测,维度为[输出特征数, 样本数]
|
||||
:param y_true: 真实输输出,维度为[输出神经元数, 样本数]
|
||||
:return: 无
|
||||
"""
|
||||
sample_counts = X.shape[1] # 样本数
|
||||
|
||||
# 损失函数对输出层的就加权输入批归一化的梯度
|
||||
self.parameters[self.layer_counts]["delta_normalization"] = (
|
||||
y_predict - y_true
|
||||
) / sample_counts # 均方误差和交叉熵对输出层的输出预测的梯度是相同的
|
||||
# 损失对输出层的加权输入的梯度
|
||||
self.parameters[self.layer_counts]["delta_weighted_input"] = (
|
||||
self.parameters[self.layer_counts]["activation"] - y_true
|
||||
) / sample_counts # 损失函数基于均方误差和交叉熵对输出层的加权输入的梯度相同
|
||||
|
||||
for layer_index in range(self.layer_counts, 0, -1):
|
||||
self.parameters[layer_index].update(
|
||||
{
|
||||
"delta_gamma": numpy.sum(
|
||||
self.parameters[layer_index]["delta_normalization"]
|
||||
* (
|
||||
self.parameters[layer_index]["weighted_input"]
|
||||
- self.parameters[layer_index]["weighted_input_average"]
|
||||
)
|
||||
/ self.parameters[layer_index][
|
||||
"weighted_input_standard_deviation"
|
||||
],
|
||||
axis=1,
|
||||
keepdims=True,
|
||||
), # 批归一化的缩放因子的梯度
|
||||
"delta_beta": numpy.sum(
|
||||
self.parameters[layer_index]["delta_normalization"],
|
||||
axis=1,
|
||||
keepdims=True,
|
||||
), # 批归一化的平移因子的梯度
|
||||
"delta_weighted_input": (
|
||||
delta_weighted_input := (
|
||||
sample_counts
|
||||
* self.parameters[layer_index]["gamma"]
|
||||
* self.parameters[layer_index]["delta_normalization"]
|
||||
- numpy.sum(
|
||||
self.parameters[layer_index]["gamma"]
|
||||
* self.parameters[layer_index]["delta_normalization"],
|
||||
axis=1,
|
||||
keepdims=True,
|
||||
)
|
||||
- (
|
||||
(
|
||||
self.parameters[layer_index]["weighted_input"]
|
||||
- self.parameters[layer_index][
|
||||
"weighted_input_average"
|
||||
]
|
||||
)
|
||||
/ self.parameters[layer_index][
|
||||
"weighted_input_standard_deviation"
|
||||
]
|
||||
)
|
||||
* numpy.sum(
|
||||
self.parameters[layer_index]["gamma"]
|
||||
* self.parameters[layer_index]["delta_normalization"]
|
||||
* (
|
||||
(
|
||||
self.parameters[layer_index]["weighted_input"]
|
||||
- self.parameters[layer_index][
|
||||
"weighted_input_average"
|
||||
]
|
||||
)
|
||||
/ self.parameters[layer_index][
|
||||
"weighted_input_standard_deviation"
|
||||
]
|
||||
),
|
||||
axis=1,
|
||||
keepdims=True,
|
||||
)
|
||||
)
|
||||
* (1.0 / sample_counts)
|
||||
/ self.parameters[layer_index][
|
||||
"weighted_input_standard_deviation"
|
||||
]
|
||||
), # 加权输入的梯度
|
||||
"delta_weight": numpy.dot(
|
||||
delta_weighted_input,
|
||||
self.parameters[layer_index]["delta_weighted_input"],
|
||||
(
|
||||
X
|
||||
if layer_index == 1
|
||||
|
|
@ -423,20 +278,41 @@ class NeuralNetwork:
|
|||
).T,
|
||||
), # 权重的梯度
|
||||
"delta_bias": numpy.sum(
|
||||
delta_weighted_input,
|
||||
self.parameters[layer_index]["delta_weighted_input"],
|
||||
axis=1,
|
||||
keepdims=True,
|
||||
), # 偏置的梯度
|
||||
}
|
||||
)
|
||||
|
||||
if layer_index > 1:
|
||||
self.parameters[layer_index - 1]["delta_normalization"] = numpy.dot(
|
||||
if layer_index != 1:
|
||||
self.parameters[layer_index - 1].update(
|
||||
{
|
||||
"delta_weighted_input": numpy.dot(
|
||||
self.parameters[layer_index]["weight"].T,
|
||||
self.parameters[layer_index]["delta_weighted_input"],
|
||||
) * (self.parameters[layer_index - 1]["normalization"] > 0).astype(
|
||||
numpy.float32
|
||||
)
|
||||
* self._activate_derivative(
|
||||
activate=self.parameters[layer_index - 1]["activate"],
|
||||
input=self.parameters[layer_index - 1]["weighted_input"],
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
def _activate_derivative(
|
||||
self,
|
||||
activate: Literal["relu"],
|
||||
input: numpy.ndarray,
|
||||
) -> numpy.ndarray:
|
||||
"""
|
||||
根据激活函数计算输入的导数
|
||||
:param activate: 激活函数
|
||||
:param input: 输入
|
||||
:return: 经过激活函数计算后的输入,维度与输入相同
|
||||
"""
|
||||
match activate:
|
||||
case "relu":
|
||||
return numpy.where(input > 0, 1, 0)
|
||||
|
||||
def _update_parameters(self, learning_rate: float) -> None:
|
||||
"""
|
||||
|
|
@ -453,12 +329,6 @@ class NeuralNetwork:
|
|||
"bias": self.parameters[layer_index]["bias"]
|
||||
- self.parameters[layer_index]["delta_bias"]
|
||||
* learning_rate, # 平移
|
||||
"gamma": self.parameters[layer_index]["gamma"]
|
||||
- self.parameters[layer_index]["delta_gamma"]
|
||||
* learning_rate, # 批归一化的缩放因子
|
||||
"beta": self.parameters[layer_index]["beta"]
|
||||
- self.parameters[layer_index]["delta_beta"]
|
||||
* learning_rate, # 批归一化的平移因子
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -467,16 +337,18 @@ class NeuralNetwork:
|
|||
if __name__ == "__main__":
|
||||
# 生成测试数据(回归任务)
|
||||
numpy.random.seed(42) # 设置随机种子保证可复现
|
||||
X = numpy.random.randn(2, 100) * 5
|
||||
X = numpy.random.randn(2, 100)
|
||||
# 真实函数:y = 2*x1 + 3*x2 + 1 (加噪声)
|
||||
y_true = 2 * X[0:1, :] ** 2 + 3 * X[1:2, :] + 1 + numpy.random.randn(1, 100) * 0.1
|
||||
y_true = 2 * X[0:1, :] ** 2 + 3 * X[1:2, :] + 1
|
||||
|
||||
# 创建并训练神经网络
|
||||
neural_network = NeuralNetwork(
|
||||
structure=[2, 200, 100, 50, 1], # 2输入,10隐藏神经元,1输出
|
||||
structure=[2, 16, 4, 1], # 2输入,10隐藏神经元,1输出
|
||||
)
|
||||
|
||||
# 训练
|
||||
neural_network.train(
|
||||
X=X, y_true=y_true, target_loss=0.001, epochs=10000, learning_rate=0.001
|
||||
X=X, y_true=y_true, target_loss=0.001, epochs=1000, learning_rate=0.001
|
||||
)
|
||||
|
||||
print(neural_network.parameters[2]["activation"])
|
||||
|
|
|
|||
Loading…
Reference in New Issue