感知器学习算法没有收敛到 0 [英] Perceptron learning algorithm not converging to 0

查看:33
本文介绍了感知器学习算法没有收敛到 0的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

这是我在 ANSI C 中的感知器实现:

#include #include #include <math.h>浮动随机浮动(){srand(时间(空));float r = (float)rand()/(float)RAND_MAX;返回 r;}int计算输出(浮点权重[],浮点x,浮点y){浮动总和 = x * 权重 [0] + y * 权重 [1];返回(总和 >= 0)?1:-1;}int main(int argc, char *argv[]){//训练集的 X、Y 坐标.浮动 x[208], y[208];//训练集输出.整数输出[208];int i = 0;//迭代器文件 *fp;if ((fp = fopen("test1.txt", "r")) == NULL){printf("无法打开文件.
");}别的{而 (fscanf(fp, "%f %f %d", &x[i], &y[i], &outputs[i]) != EOF){如果(输出[i] == 0){输出[i] = -1;}printf("%f %f %d
", x[i], y[i], 输出[i]);我++;}}系统(暂停");int patternCount = sizeof(x)/sizeof(int);浮动权重[2];权重[0] = randomFloat();权重[1] = randomFloat();浮动学习率 = 0.1;整数迭代 = 0;浮动全局错误;做 {全局错误 = 0;整数 p = 0;//迭代器for (p = 0; p 

我使用的训练集:数据集

我已经删除了所有不相关的代码.基本上它现在所做的就是读取 test1.txt 文件并将其中的值加载到三个数组中:xyoutputs.

然后是 感知器学习算法,由于某种原因,它没有收敛到 0(globalError 应该收敛到 0),因此我得到一个无限的 do while 循环.

当我使用较小的训练集(如 5 分)时,效果很好.任何想法可能是问题所在?

我写的这个算法非常类似于这个C#感知器算法:

<小时>

这是一个较小训练集的示例:

#include #include #include <math.h>浮动随机浮动(){float r = (float)rand()/(float)RAND_MAX;返回 r;}int计算输出(浮点权重[],浮点x,浮点y){浮动总和 = x * 权重 [0] + y * 权重 [1];返回(总和 >= 0)?1:-1;}int main(int argc, char *argv[]){srand(时间(空));//训练集的 X 坐标.浮动 x[] = { -3.2, 1.1, 2.7, -1 };//训练集的 Y 坐标.浮动 y[] = { 1.5, 3.3, 5.12, 2.1 };//训练集输出.int 输出[] = { 1, -1, -1, 1 };int i = 0;//迭代器文件 *fp;系统(暂停");int patternCount = sizeof(x)/sizeof(int);浮动权重[2];权重[0] = randomFloat();权重[1] = randomFloat();浮动学习率 = 0.1;整数迭代 = 0;浮动全局错误;做 {全局错误 = 0;整数 p = 0;//迭代器for (p = 0; p 

解决方案

在您当前的代码中,perceptron 成功学习了决策边界的方向,但无法翻译它.

<前>y y^ ^|- + \ + |- \ + +|- +\ + + |- \ + + +|- - \ + |- - \ +|- - + \ + |- - \ + +---------------------> x ------> x像这样卡住需要像这样

(正如有人指出的,这是一个更准确的版本)

问题在于您的感知器没有偏差项,即第三个权重分量连接到值为 1 的输入.

<前>w0 -----x---->|||f |----> 输出 (+1/-1)你---->||w1 -----^ w21(偏差)---|

以下是我纠正问题的方法:

#include #include #include <math.h>#include #define LEARNING_RATE 0.1#define MAX_ITERATION 100浮动随机浮动(){返回(浮动)rand()/(浮动)RAND_MAX;}int计算输出(浮点权重[],浮点x,浮点y){float sum = x * weights[0] + y * weights[1] + weights[2];返回(总和 >= 0)?1:-1;}int main(int argc, char *argv[]){srand(时间(空));浮动 x[208],y[208],权重 [3],localError,globalError;int 输出[208],patternCount,i,p,迭代,输出;文件 *fp;if ((fp = fopen("test1.txt", "r")) == NULL) {printf("无法打开文件.
");退出(1);}我 = 0;而 (fscanf(fp, "%f %f %d", &x[i], &y[i], &outputs[i]) != EOF) {如果(输出[i] == 0){输出[i] = -1;}我++;}模式计数=我;权重[0] = randomFloat();权重[1] = randomFloat();权重[2] = randomFloat();迭代 = 0;做 {迭代++;全局错误 = 0;for (p = 0; p 

...具有以下输出:

迭代 1:RMSE = 0.7206迭代 2:RMSE = 0.5189迭代 3:RMSE = 0.4804迭代 4:RMSE = 0.4804迭代 5:RMSE = 0.3101迭代 6:RMSE = 0.4160迭代 7:RMSE = 0.4599迭代 8:RMSE = 0.3922迭代 9:RMSE = 0.0000决策边界(线)方程:-2.37*x + -2.51*y + -7.55 = 0

这是上面使用 MATLAB 的代码的简短动画,显示了每个位置的

Here is my perceptron implementation in ANSI C:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

float randomFloat()
{
    srand(time(NULL));
    float r = (float)rand() / (float)RAND_MAX;
    return r;
}

int calculateOutput(float weights[], float x, float y)
{
    float sum = x * weights[0] + y * weights[1];
    return (sum >= 0) ? 1 : -1;
}

int main(int argc, char *argv[])
{
    // X, Y coordinates of the training set.
    float x[208], y[208];

    // Training set outputs.
    int outputs[208];

    int i = 0; // iterator

    FILE *fp;

    if ((fp = fopen("test1.txt", "r")) == NULL)
    {
        printf("Cannot open file.
");
    }
    else
    {
        while (fscanf(fp, "%f %f %d", &x[i], &y[i], &outputs[i]) != EOF)
        {
            if (outputs[i] == 0)
            {
                outputs[i] = -1;
            }
            printf("%f   %f   %d
", x[i], y[i], outputs[i]);
            i++;
        }
    }

    system("PAUSE");

    int patternCount = sizeof(x) / sizeof(int);

    float weights[2];
    weights[0] = randomFloat();
    weights[1] = randomFloat();

    float learningRate = 0.1;

    int iteration = 0;
    float globalError;

    do {
        globalError = 0;
        int p = 0; // iterator
        for (p = 0; p < patternCount; p++)
        {
            // Calculate output.
            int output = calculateOutput(weights, x[p], y[p]);

            // Calculate error.
            float localError = outputs[p] - output;

            if (localError != 0)
            {
                // Update weights.
                for (i = 0; i < 2; i++)
                {
                    float add = learningRate * localError;
                    if (i == 0)
                    {
                        add *= x[p];
                    }
                    else if (i == 1)
                    {
                        add *= y[p];
                    }
                    weights[i] +=  add;
                }
            }

            // Convert error to absolute value.
            globalError += fabs(localError);

            printf("Iteration %d Error %.2f %.2f
", iteration, globalError, localError);

            iteration++;
        }

        system("PAUSE");

    } while (globalError != 0);

    system("PAUSE");
    return 0;
}

The training set I'm using: Data Set

I have removed all irrelevant code. Basically what it does now it reads test1.txt file and loads values from it to three arrays: x, y, outputs.

Then there is a perceptron learning algorithm which, for some reason, is not converging to 0 (globalError should converge to 0) and therefore I get an infinite do while loop.

When I use a smaller training set (like 5 points), it works pretty well. Any ideas where could be the problem?

I wrote this algorithm very similar to this C# Perceptron algorithm:


EDIT:

Here is an example with a smaller training set:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

float randomFloat()
{
    float r = (float)rand() / (float)RAND_MAX;
    return r;
}

int calculateOutput(float weights[], float x, float y)
{
    float sum = x * weights[0] + y * weights[1];
    return (sum >= 0) ? 1 : -1;
}

int main(int argc, char *argv[])
{
    srand(time(NULL));

    // X coordinates of the training set.
    float x[] = { -3.2, 1.1, 2.7, -1 };

    // Y coordinates of the training set.
    float y[] = { 1.5, 3.3, 5.12, 2.1 };

    // The training set outputs.
    int outputs[] = { 1, -1, -1, 1 };

    int i = 0; // iterator

    FILE *fp;

    system("PAUSE");

    int patternCount = sizeof(x) / sizeof(int);

    float weights[2];
    weights[0] = randomFloat();
    weights[1] = randomFloat();

    float learningRate = 0.1;

    int iteration = 0;
    float globalError;

    do {
        globalError = 0;
        int p = 0; // iterator
        for (p = 0; p < patternCount; p++)
        {
            // Calculate output.
            int output = calculateOutput(weights, x[p], y[p]);

            // Calculate error.
            float localError = outputs[p] - output;

            if (localError != 0)
            {
                // Update weights.
                for (i = 0; i < 2; i++)
                {
                    float add = learningRate * localError;
                    if (i == 0)
                    {
                        add *= x[p];
                    }
                    else if (i == 1)
                    {
                        add *= y[p];
                    }
                    weights[i] +=  add;
                }
            }

            // Convert error to absolute value.
            globalError += fabs(localError);

            printf("Iteration %d Error %.2f
", iteration, globalError);          
        }

        iteration++;

    } while (globalError != 0);

    // Display network generalisation.
    printf("X       Y     Output
");
    float j, k;
    for (j = -1; j <= 1; j += .5)
    {
        for (j = -1; j <= 1; j += .5)
        {
            // Calculate output.
            int output = calculateOutput(weights, j, k);
            printf("%.2f  %.2f  %s
", j, k, (output == 1) ? "Blue" : "Red");
        }
    }

    // Display modified weights.
    printf("Modified weights: %.2f %.2f
", weights[0], weights[1]);

    system("PAUSE");
    return 0;
}

解决方案

In your current code, the perceptron successfully learns the direction of the decision boundary BUT is unable to translate it.

    y                              y
    ^                              ^
    |  - + \  +                   |  - \ +   +
    | -    +\ +   +               | -   \  + +   +
    | - -    \ +                  | - -  \    +
    | -  -  + \  +                | -  -  \ +   +
    ---------------------> x       --------------------> x
        stuck like this            need to get like this

(as someone pointed out, here is a more accurate version)

The problem lies in the fact that your perceptron has no bias term, i.e. a third weight component connected to an input of value 1.

       w0   -----
    x ---->|     |
           |  f  |----> output (+1/-1)
    y ---->|     |
       w1   -----
               ^ w2
    1(bias) ---|

The following is how I corrected the problem:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>

#define LEARNING_RATE    0.1
#define MAX_ITERATION    100

float randomFloat()
{
    return (float)rand() / (float)RAND_MAX;
}

int calculateOutput(float weights[], float x, float y)
{
    float sum = x * weights[0] + y * weights[1] + weights[2];
    return (sum >= 0) ? 1 : -1;
}

int main(int argc, char *argv[])
{
    srand(time(NULL));

    float x[208], y[208], weights[3], localError, globalError;
    int outputs[208], patternCount, i, p, iteration, output;

    FILE *fp;
    if ((fp = fopen("test1.txt", "r")) == NULL) {
        printf("Cannot open file.
");
        exit(1);
    }

    i = 0;
    while (fscanf(fp, "%f %f %d", &x[i], &y[i], &outputs[i]) != EOF) {
        if (outputs[i] == 0) {
            outputs[i] = -1;
        }
        i++;
    }
    patternCount = i;

    weights[0] = randomFloat();
    weights[1] = randomFloat();
    weights[2] = randomFloat();

    iteration = 0;
    do {
        iteration++;
        globalError = 0;
        for (p = 0; p < patternCount; p++) {
            output = calculateOutput(weights, x[p], y[p]);

            localError = outputs[p] - output;
            weights[0] += LEARNING_RATE * localError * x[p];
            weights[1] += LEARNING_RATE * localError * y[p];
            weights[2] += LEARNING_RATE * localError;

            globalError += (localError*localError);
        }

        /* Root Mean Squared Error */
        printf("Iteration %d : RMSE = %.4f
",
            iteration, sqrt(globalError/patternCount));
    } while (globalError > 0 && iteration <= MAX_ITERATION);

    printf("
Decision boundary (line) equation: %.2f*x + %.2f*y + %.2f = 0
",
        weights[0], weights[1], weights[2]);

    return 0;
}

... with the following output:

Iteration 1 : RMSE = 0.7206
Iteration 2 : RMSE = 0.5189
Iteration 3 : RMSE = 0.4804
Iteration 4 : RMSE = 0.4804
Iteration 5 : RMSE = 0.3101
Iteration 6 : RMSE = 0.4160
Iteration 7 : RMSE = 0.4599
Iteration 8 : RMSE = 0.3922
Iteration 9 : RMSE = 0.0000

Decision boundary (line) equation: -2.37*x + -2.51*y + -7.55 = 0

And here's a short animation of the code above using MATLAB, showing the decision boundary at each iteration:

这篇关于感知器学习算法没有收敛到 0的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆