Python编码的神经网络无法正常学习 [英] Python-coded neural network does not learn properly

查看:61
本文介绍了Python编码的神经网络无法正常学习的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我的网络没有经过训练以分别识别输入,它要么输出平均结果,要么偏向一个特定的输出.我在做什么错了?

My network is not trained to recognize inputs separately, it either outputs the averaged result or becomes biased to one particular output. What am I doing wrong?

import numpy as np

sigmoid = lambda x: 1 / (1 + np.exp(-x))
sigmoid_der = lambda x: sigmoid(x) * (1 - sigmoid(x))
ReLU = lambda x: np.maximum(0, x)
ReLU_der = lambda x: x > 0


class NeuralNetwork:
    def __init__(self, shape: tuple):
        self.layers = len(shape) # The amount layers
        self.shape = shape # The amount of neurons per each layer
        self.weights = [
            np.array([np.random.rand(shape[l - 1]) for _ in range(shape[l])])
            for l in range(1, self.layers)
        ] # A list of matrices of weights connecting neighbouring layers
        self.weighted_sums = [np.zeros(l) for l in shape]
        self.activations = [np.zeros(l) for l in shape]

    def inspect(self):
        print("=============NeuralNetwork===============")
        print(f"Shape: {self.shape}")
        print(f"Weights: {self.weights}")
        print(f"Activations: {self.activations}")

    def forward_prop(self, X):
        self.activations[0] = X
        for l in range(1, self.layers):
            self.weighted_sums[l] = self.weights[l - 1] @ self.activations[l - 1]
            self.activations[l] = sigmoid(self.weighted_sums[l])

    def backprop(self, X, Y):
        delta = [np.empty(self.shape[l]) for l in range(1, self.layers)] # Here errors get stored
        delta[-1] = (Y - self.activations[-1]) * sigmoid_der(self.weighted_sums[-1]) # The output error
        for l in reversed(range(self.layers - 2)): # The errors get backpropagated
            delta[l] = self.weights[l + 1].T @ delta[l + 1] * sigmoid_der(self.weighted_sums[l])
        for l in range(self.layers - 1): # The weights get updated online
            for j in range(self.shape[l + 1]):
                self.weights[l][j] -= 0.1 * self.activations[l + 1][j] * delta[l][j]


nn = NeuralNetwork((2, 2, 1))

X = np.array([
    [1, 0],
    [0, 1],
    [1, 1],
    [0, 0]
])

Y = np.array([
    [1],
    [1],
    [0],
    [0]
])

# I train my network by randomly picking an example from my training sets
for _ in range(1000):
    i = np.random.randint(0, 4)
    nn.forward_prop(X[i])
    nn.backprop(X[i], Y[i])

for x in X:
    nn.forward_prop(x)
    print(nn.activations[-1])

推荐答案

反向传播的矩阵数学非常困难.尤其令人困惑的是,权重矩阵和增量列表的长度(实际上也是偏差数组的列表)应该比网络中的层数少一,这会使索引产生混淆.显然,问题是由于索引错误.终于成功了!

The matrix math of backpropagation is quite tough. It is especially confusing that the length of the lists of weight matrices and deltas (actually the list of bias arrays too) should be one less than the amount of layers in a network which makes indexing confusing. Apparently, the problem was due to misindexing. Finally it works!

import numpy as np

sigmoid = lambda x: 1 / (1 + np.exp(-x))
sigmoid_der = lambda x: sigmoid(x) * (1 - sigmoid(x))
ReLU = lambda x: np.maximum(0, x)
ReLU_der = lambda x: x > 0


class NeuralNetwork:
    def __init__(self, shape: tuple):
        self.layers = len(shape)
        self.shape = shape
        self.weights = [
            np.array([2 * np.random.random(shape[l - 1]) - 1 for _ in range(shape[l])])
            for l in range(1, self.layers)
        ]
        self.biases = [np.zeros(l) for l in shape[1:]]
        self.weighted_sums = [None for l in shape]
        self.activations = [None for l in shape]
        self.deltas = [None for l in shape[1:]]

    def inspect(self):
        print("=============NeuralNetwork===============")
        print(f"Shape: {self.shape}")
        print(f"Weights: {self.weights}")
        print(f"Activations: {self.activations}")

    def forward_prop(self, X):
        self.activations[0] = X
        for l in range(1, self.layers):
            self.weighted_sums[l] = self.weights[l - 1] @ self.activations[l - 1] + self.biases[l - 1]
            self.activations[l] = sigmoid(self.weighted_sums[l])

    def backprop(self, X, Y, lr):
        self.deltas[-1] = (Y - self.activations[-1]) * sigmoid_der(self.weighted_sums[-1])
        for l in range(self.layers - 2, 0, -1):
            self.deltas[l - 1] = self.weights[l].T @ self.deltas[l] * sigmoid_der(self.weighted_sums[l])
        for l in range(self.layers - 1):
            for j in range(self.shape[l + 1]):
                self.weights[l][j] += lr * self.activations[l] * self.deltas[l][j]
            self.biases[l] += self.deltas[l]

    def train(self, X, Y, lr, epochs):
        for e in range(epochs):
            if not e % 1000: self.test(X)
            i = np.random.randint(len(X))
            self.forward_prop(X[i])
            self.backprop(X[i], Y[i], lr)

    def test(self, X):
        print()
        for x in X:
            self.forward_prop(x)
            print(x, self.activations[-1])


if __name__ == "__main__":
    nn = NeuralNetwork((2, 3, 2, 1))

    X = np.array([
        [1, 0],
        [0, 1],
        [1, 1],
        [0, 0]
    ])

    Y = np.array([
        [1],
        [1],
        [0],
        [0]
    ])

    nn.train(X, Y, 0.4, 20000)
    nn.test(X)

这篇关于Python编码的神经网络无法正常学习的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆