219 lines
8.3 KiB
Python
219 lines
8.3 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
神经网络
|
||
"""
|
||
|
||
# 导入模块
|
||
from typing import List, Literal
|
||
import numpy
|
||
|
||
|
||
class NeuralNetwork:
|
||
"""
|
||
神经网络
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
neurons: List[int],
|
||
hidden_activate: Literal["relu"] = "relu",
|
||
output_activate: Literal["linear", "softmax"] = "linear",
|
||
):
|
||
"""
|
||
初始化
|
||
:param neurons: 神经元结构,例如[2, 10, 1]表示输入层为2个神经元、第一层隐含层为10个神经元、输出层为1个神经元
|
||
:param hidden_activate: 隐含层激活函数,默认为relu
|
||
:param output_activate: 输出层激活函数,默认为linear
|
||
"""
|
||
print("正在初始化神经网络...", end="")
|
||
|
||
# 初始化神经元结构
|
||
self.neurons = neurons
|
||
|
||
# 初始化隐含层激活函数
|
||
self.hidden_activate = hidden_activate
|
||
# 初始化输出层激活函数
|
||
self.output_activate = output_activate
|
||
|
||
# 初始化神经网络结构
|
||
self.neural_network = {}
|
||
# 初始化神经网络所有层权重和偏置
|
||
self._init_neural_network()
|
||
|
||
print("已完成")
|
||
|
||
def _init_neural_network(self):
|
||
"""
|
||
初始化神经网络所有层权重和偏置
|
||
"""
|
||
for idx in range(1, len(self.neurons)):
|
||
# 若为隐含层则根据隐含层激活函数计算当前层权重的标准偏差,若为输出层则根据输出层激活函数计算当前层权重的标准偏差
|
||
if idx != len(self.neurons) - 1:
|
||
# 激活函数
|
||
activate = self.hidden_activate
|
||
match self.hidden_activate:
|
||
case "relu":
|
||
# 当前层权重的标准偏差
|
||
standard_deviation = numpy.sqrt(
|
||
2 / self.neurons[idx - 1]
|
||
) # 使用He方差公式
|
||
case _:
|
||
raise RuntimeError(
|
||
f"暂不支持该隐含层激活函数 {self.hidden_activate}"
|
||
)
|
||
else:
|
||
# 激活函数
|
||
activate = self.output_activate
|
||
match self.output_activate:
|
||
case "linear":
|
||
# 当前层权重的标准偏差
|
||
standard_deviation = numpy.sqrt(1 / self.neurons[idx - 1])
|
||
case "softmax":
|
||
# 当前层权重的标准偏差
|
||
standard_deviation = numpy.sqrt(
|
||
2 / (self.neurons[idx - 1] + self.neurons[idx])
|
||
) # 使用Xavier方差公式
|
||
case _:
|
||
raise RuntimeError(
|
||
f"暂不支持该输出层激活函数 {self.output_activate}"
|
||
)
|
||
|
||
self.neural_network[f"layer:{idx:03d}"] = {
|
||
"weight": numpy.random.randn(self.neurons[idx - 1], self.neurons[idx])
|
||
* standard_deviation, # 当前层权重
|
||
"bias": numpy.zeros((1, self.neurons[idx])), # 当前层偏置
|
||
"activate": activate, # 当前层激活函数
|
||
"gamma": numpy.ones((1, self.neurons[idx])), # 当前层批标准化的缩放因子
|
||
"beta": numpy.zeros((1, self.neurons[idx])), # 当前层批标准化的偏移因子
|
||
}
|
||
|
||
def _forward_propagate(self, x: numpy.ndarray) -> numpy.ndarray:
|
||
"""
|
||
前向传播
|
||
:param x: 输入层输入
|
||
:return: 输出层预测值
|
||
"""
|
||
activation = x # 将输入层输入作为第0层的激活值
|
||
for layer_name, layer in self.neural_network.items():
|
||
self.neural_network[layer_name].update(
|
||
{
|
||
"weighted_sum": (
|
||
weighted_sum := numpy.dot(activation, layer["weight"])
|
||
+ layer["bias"]
|
||
), # 当前层加权和
|
||
"batch_normalized_weighted_sum": (
|
||
batch_normalized_weighted_sum := layer["gamma"]
|
||
* (
|
||
weighted_sum
|
||
- numpy.mean(weighted_sum, axis=0, keepdims=True)
|
||
)
|
||
/ numpy.sqrt(
|
||
numpy.var(
|
||
weighted_sum, ddof=0, axis=0, keepdims=True
|
||
) # 使用有偏方差公式
|
||
+ 1e-8
|
||
)
|
||
+ layer["beta"]
|
||
), # 当前层批标准化加权和
|
||
"activation": (
|
||
activation := self._activate(
|
||
activate=layer["activate"],
|
||
weighted_sum=batch_normalized_weighted_sum,
|
||
)
|
||
), # 当前层激活值
|
||
}
|
||
)
|
||
|
||
y_predict = activation # 将第L-1层(最后一层)的激活值作为输出层预测值(L为神经网络层数)
|
||
return y_predict
|
||
|
||
def _activate(
|
||
self,
|
||
activate: Literal["relu", "linear", "softmax"],
|
||
weighted_sum: numpy.ndarray,
|
||
) -> numpy.ndarray:
|
||
"""
|
||
激活函数
|
||
:param activate: 激活函数
|
||
:param weighted_sum: 加权和
|
||
:return: 激活值
|
||
"""
|
||
match activate:
|
||
case "relu":
|
||
return numpy.maximum(0, weighted_sum)
|
||
case "linear":
|
||
return weighted_sum
|
||
case "softmax":
|
||
# 加权和指数项
|
||
e_weighted_sum = numpy.exp(
|
||
weighted_sum - numpy.max(weighted_sum, axis=1, keepdims=True)
|
||
)
|
||
return e_weighted_sum / numpy.sum(e_weighted_sum, axis=1, keepdims=True)
|
||
|
||
def _calculate_loss(
|
||
self,
|
||
y_true: numpy.ndarray,
|
||
y_predict: numpy.ndarray,
|
||
) -> numpy.floating:
|
||
"""
|
||
计算损失
|
||
:param y_true: 输出层真实值
|
||
:param y_predict: 输出层预测值
|
||
:return: 损失值
|
||
"""
|
||
# 第L-1层(最后一层)的层名
|
||
layer_name = list(self.neural_network.keys())[-1]
|
||
# 根据第L-1层(最后一层)的激活函数计算损失
|
||
match activate := self.neural_network[layer_name]["activate"]:
|
||
case "linear":
|
||
loss = 0.5 * numpy.mean(
|
||
numpy.square(y_true - y_predict)
|
||
) # 使用均方误差公式
|
||
case "softmax":
|
||
loss = -1 * numpy.mean(
|
||
numpy.sum(y_true * numpy.log(y_predict + 1e-8), axis=1)
|
||
) # 使用交叉熵损失公式
|
||
case _:
|
||
raise RuntimeError(f"暂不支持该输出层激活函数 {activate}")
|
||
|
||
return loss
|
||
|
||
def _backward_propagate(
|
||
self,
|
||
x: numpy.ndarray,
|
||
y_true: numpy.ndarray,
|
||
y_predict: numpy.ndarray,
|
||
) -> None:
|
||
"""
|
||
后向传播
|
||
:param x: 输入层输入
|
||
:param y_true: 输出层真实值
|
||
:param y_predict: 输出层预测值
|
||
:return: 无
|
||
"""
|
||
# 所有层的层名
|
||
layer_names = list(self.neural_network.keys())
|
||
|
||
for idx, layer_name in enumerate(reversed(layer_names)):
|
||
# 当前层激活函数、加权和、批标准化加权和和激活值
|
||
activate, weighted_sum, batch_normalized_weighted_sum, activation = (
|
||
self.neural_network[layer_name]["activate"],
|
||
self.neural_network[layer_name]["weighted_sum"],
|
||
self.neural_network[layer_name]["batch_normalized_weighted_sum"],
|
||
self.neural_network[layer_name]["activation"],
|
||
)
|
||
|
||
# 输出层的误差项
|
||
if idx == 0:
|
||
match activate:
|
||
case "linear" | "softmax":
|
||
delta = y_predict - y_true # 损失函数对第L-1层(最后一层)激活值的梯度
|
||
case _:
|
||
raise RuntimeError(f"暂不支持该输出层激活函数 {activate}")
|
||
# 隐含层的误差项
|
||
else:
|
||
delta = numpy.dot(delta, self.neural_network[layer_names[idx - 1]]["weight"].T)
|
||
|
||
delta = 0
|
||
|