NegaMax算法和井字游戏...怎么了? [英] NegaMax Algorithm and TicTacToe...What's wrong?

查看:150
本文介绍了NegaMax算法和井字游戏...怎么了?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我是新来的博弈树算法,我一直试图实现一个简单的井字游戏,利用NegaMax算法来评估瓦比分为电脑AI球员。 然而,AI的行为不是在一个巧妙的方法(我可以赢得所有的时间,因为AI不会阻止我的动作),我想我实现了NegaMax算法错误。

如果有人可以帮助我下面的code,这将是巨大的。我花了这么长的时间尝试不同的东西出来,但我从来没有得到它的工作。

 的#include< stdlib.h中>
#包括< stdio.h中>
#包括< malloc.h所>
#包括< limits.h中>
#包括game_board.h

/ **
 *构造一个新的游戏板。
 *
 *参数尺寸电路板的尺寸(字段的长度)
 返回:一个新的游戏键盘
 * /
游戏键盘* game_board_new(INT尺寸)
{
    游戏键盘*板=(游戏键盘*)释放calloc(1,sizeof的(游戏键盘));
    板级>大小=大小;
    板级>关闭=白色;
    板级>场=(枚举球员*)释放calloc(大小,sizeof的(枚举播放器));
    返回板;
}

/ **
 *推出一个游戏板。
 *
 *参数板上的游戏键盘发布
 * /
无效game_board_free(游戏键盘*板)
{
    免费(板级>场);
    免费(板);
}

/ **
 *复制一个游戏板。
 *
 * @参数原来的复制游戏键盘
 返回:它是从原始复制一个新的游戏键盘
 * /
游戏键盘* game_board_copy(游戏键盘*原件)
{
    游戏键盘*复印件;
    INT I;

    副本= game_board_new(original->大小);
    有版权>大小= original->大小;
    有版权>关闭= original->打开;

    对于(i = 0; I< original->大小;我++)
    {
        有版权>领域[I] = original->领域[I]
    }

    返回副本;
}

/ **
 *返回赢家当前游戏或空if
 *没有分出胜负,但。
 *
 *参数板上的游戏键盘检查
 返回:获奖玩家或空
 * /
枚举球员game_board_check_winner(游戏键盘*板)
{
    枚举球员* F;

    F =板级>领域;

    //列
    如果(F [0] = EMPTY&安培;!&安培; F [0] == F [3]&安培;&安培; F [0] == F [6])
        返回F [0];
    如果(F [1] = EMPTY&安培;!&安培; F [1] == F [4]&安培;&安培; F [1] == F [7])
        返回F [1];
    如果(F [2] = EMPTY&安培;!&安培; F [2] == F [5]&安培;&安培; F [2] == F [8])
        返回F [2];

    //行
    如果(F [0] = EMPTY&安培;!&安培; F [0] == F [1]安培;&安培; F [1] == F [2])
        返回F [0];
    如果(F [3] = EMPTY&安培;!&安培; F [3] == F [4]&安培;&安培; F [4] == F [5])
        返回F [3];
    如果(F [6] = EMPTY&安培;!&安培; F [6] == F [7]&安培;&安培; F [7] == F [8])
        返回F [6];

    // 对角线
    如果(F [0] = EMPTY&安培;!&安培; F [0] == F [4]&安培;&安培; F [4] == F [8])
        返回F [0];
    如果(F [2] = EMPTY&安培;!&安培; F [2] == F [4]&安培;&安培; F [4] == F [6])
        返回F [2];

    返回空的;
}

无效game_board_toggle_player(游戏键盘*板)
{
    如果(板级>把== WHITE)
        板级>关闭=黑色;
    否则,如果(板级>把== BLACK)
        板级>关闭=白色;
}

的int game_board_is_ended(游戏键盘*板)
{
    返回game_board_check_winner(板)|| game_board_is_full(板);
}

静态INT评估(游戏键盘*板)
{
    枚举球员得主= game_board_check_winner(板);

    如果(冠军=空&放大器;!&安培;赢家==板级>转)
    {
        返回1;
    }
    否则,如果(冠军=空&放大器;!&安培;赢家=板级>!转)
    {
        返回-1;
    }
    其他
    {
        返回0;
    }
}

INT negamax(游戏键盘*板,​​INT深度)
{
    INT bestvalue = INT_MIN;
    INT I;
    int值;

    枚举球员得主= game_board_check_winner(板);

    如果(赢家!=空||深度== 0)
        回报评估(板);

    对于(i = 0; I<板级>大小;我++)
    {
        如果(板级>领域[I] ==空)
        {
            游戏键盘*副本= game_board_copy(板);
            game_board_make_move(副本,我);
            game_board_toggle_player(板);

            值= -negamax(复印件,深-1);
            bestvalue = MAX(值,bestvalue);

            game_board_free(复印件);
        }
    }

    返回bestvalue;
}

/ **
 *根据NegaMax估计当前板
 * 算法。
 *
 *参数板上的游戏键盘,以评估
 返回:一个NegaMax得分
 * /
INT game_board_evaluate(游戏键盘*板,​​为int * best_pos)
{
    INT I;
    INT MAXVAL;
    INT VAL;

    MAXVAL = INT_MIN;

    对于(i = 0; I<板级>大小;我++)
    {
        如果(板级>领域[I] ==空)
        {
            VAL = negamax(板,9);
            如果(VAL> MAXVAL)
            {
                MAXVAL = VAL;
                的printf(最佳移动:%I \ N,我);
                (* best_pos)= I;
            }
        }
    }

    返回MAXVAL;
}

INT game_board_is_full(游戏键盘*板)
{
    INT I;

    对于(i = 0; I<板级>大小;我++)
    {
        如果(板级>领域[I] == 0)
        {
            返回0;
        }
    }

    返回1;
}

无效game_board_make_move(游戏键盘*板,​​INT POS)
{
    板级>领域[POS] =板级>打开;
}

/ **
 *打印一个游戏板。
 *
 * @参数板上打印的游戏键盘
 * /
无效game_board_print(游戏键盘*板)
{
    INT I;

    对于(i = 0; I<板级>大小;我++)
    {
        的printf(%I(%I)\ t的,板级>领域[I],I);

        如果(我== 2 ||我== 5)
            的printf(\ N);
    }

    的printf(\ N);
}
 

解决方案

在一个NegaMax框架,评价职能必须有所不同。试试下面的评测功能:

  INT evaluateNegaMax(游戏键盘*板)
{
    如果(板级>把== MAXPLAYER)
        回报评估(板);
    其他
        返回-evaluate(板);
}
 

借助 chessprogramming 网站解释了原因:

  

为了negaMax的工作,你的静态评估函数必须返回得分相对侧,以被评估

I am new to game tree algorithms, and i've tried to implement a simple tictactoe game which utilizes the NegaMax algorithm to evaluate tile scores for the computer AI player. However, the AI behaves not in a smart way (i can win all the time, because the AI does not block my moves), and i think i implemented the NegaMax algorithm wrongly.

If anyone could help me with the following code, it would be great. I spent such a long time trying different things out, but i never got it to work.

#include <stdlib.h>
#include <stdio.h>
#include <malloc.h>
#include <limits.h>
#include "game_board.h"

/**
 * Constructs a new game board.
 *
 * @param size the size of the board (the length of fields)
 * @return a new GameBoard
 */
GameBoard* game_board_new(int size)
{
    GameBoard* board = (GameBoard*) calloc(1, sizeof(GameBoard));
    board->size = size;
    board->turn = WHITE;
    board->fields = (enum Player*) calloc(size, sizeof(enum Player));
    return board;
}

/**
 * Releases a game board.
 *
 * @param board the GameBoard to release
 */
void game_board_free(GameBoard* board)
{
    free(board->fields);
    free(board);
}

/**
 * Copies a game board.
 *
 * @param original the GameBoard to copy
 * @return a new GameBoard which is copied from the original
 */
GameBoard* game_board_copy(GameBoard* original)
{
    GameBoard* copy;
    int i;

    copy = game_board_new(original->size);
    copy->size = original->size;
    copy->turn = original->turn;

    for (i = 0; i < original->size; i++)
    {
        copy->fields[i] = original->fields[i];
    }

    return copy;
}

/**
 * Returns the winner for the current game or EMPTY if
 * there is no winner, yet.
 *
 * @param board the GameBoard to check
 * @return the winning Player or EMPTY
 */
enum Player game_board_check_winner(GameBoard* board)
{
    enum Player* f;

    f = board->fields;

    // Columns
    if (f[0] != EMPTY && f[0] == f[3] && f[0] == f[6])
        return f[0];
    if (f[1] != EMPTY && f[1] == f[4] && f[1] == f[7])
        return f[1];
    if (f[2] != EMPTY && f[2] == f[5] && f[2] == f[8])
        return f[2];

    // Rows
    if (f[0] != EMPTY && f[0] == f[1] && f[1] == f[2])
        return f[0];
    if (f[3] != EMPTY && f[3] == f[4] && f[4] == f[5])
        return f[3];
    if (f[6] != EMPTY && f[6] == f[7] && f[7] == f[8])
        return f[6];

    // Diagonal
    if (f[0] != EMPTY && f[0] == f[4] && f[4] == f[8])
        return f[0];
    if (f[2] != EMPTY && f[2] == f[4] && f[4] == f[6])
        return f[2];

    return EMPTY;
}

void game_board_toggle_player(GameBoard* board)
{
    if (board->turn == WHITE)
        board->turn = BLACK;
    else if (board->turn == BLACK)
        board->turn = WHITE;
}

int game_board_is_ended(GameBoard* board)
{
    return game_board_check_winner(board) || game_board_is_full(board);
}

static int evaluate(GameBoard* board)
{
    enum Player winner = game_board_check_winner(board);

    if (winner != EMPTY && winner == board->turn)
    {
        return 1;
    }
    else if (winner != EMPTY && winner != board->turn)
    {
        return -1;
    }
    else
    {
        return 0;
    }
}

int negamax(GameBoard* board, int depth)
{
    int bestvalue = INT_MIN;
    int i;
    int value;

    enum Player winner = game_board_check_winner(board);

    if (winner != EMPTY || depth == 0)
        return evaluate(board);

    for (i = 0; i < board->size; i++)
    {
        if (board->fields[i] == EMPTY)
        {
            GameBoard* copy = game_board_copy(board);
            game_board_make_move(copy, i);
            game_board_toggle_player(board);

            value = -negamax(copy, depth-1);
            bestvalue = max(value, bestvalue);

            game_board_free(copy);
        }
    }

    return bestvalue;
}

/**
 * Evaluates the current board according to NegaMax
 * algorithm.
 *
 * @param board the GameBoard to evaluate
 * @return a NegaMax score
 */
int game_board_evaluate(GameBoard* board, int* best_pos)
{
    int i;
    int maxVal;
    int val;

    maxVal = INT_MIN;

    for (i = 0; i < board->size; i++)
    {
        if (board->fields[i] == EMPTY)
        {
            val = negamax(board, 9);
            if (val > maxVal)
            {
                maxVal = val;
                printf("Best move: %i\n", i);
                (*best_pos) = i;
            }
        }
    }

    return maxVal;
}

int game_board_is_full(GameBoard* board)
{
    int i;

    for (i = 0; i < board->size; i++)
    {
        if (board->fields[i] == 0)
        {
            return 0;
        }
    }

    return 1;
}

void game_board_make_move(GameBoard* board, int pos)
{
    board->fields[pos] = board->turn;
}

/**
 * Prints a game board.
 *
 * @param board the GameBoard to print
 */
void game_board_print(GameBoard* board)
{
    int i;

    for (i = 0; i < board->size; i++)
    {
        printf("%i (%i)\t", board->fields[i], i);

        if (i == 2 || i == 5)
            printf("\n");
    }

    printf("\n");
}

解决方案

In a NegaMax framework the evaluation function has to look different. Try following evaluation function:

int evaluateNegaMax(GameBoard* board)
{
    if (board->turn == MAXPLAYER)
        return evaluate(board);
    else
        return -evaluate(board);
}

The chessprogramming site explains why:

In order for negaMax to work, your Static Evaluation function must return a score relative to the side to being evaluated

这篇关于NegaMax算法和井字游戏...怎么了?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆