为什么我的尝试泄露数据? [英] Why is my trie leaking data?

查看:26
本文介绍了为什么我的尝试泄露数据?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在尝试使用 trie 制作拼写检查器,但我尝试添加到我的 trie 中的单词似乎没有输入.泄漏在哪里?

I am trying to make a speller checker using a trie, but the words I try to add to my trie don't seem to be getting put in. Where is the leak?

我花了几个小时使用调试器并逐步执行我的代码...

I've spent hours using a debugger and stepping through my code...

主要功能:

/**
 * Implements a spell-checker.
 */

#include <ctype.h>
#include <stdio.h>
#include <sys/resource.h>
#include <sys/time.h>

#include "dictionary.h"
#undef calculate
#undef getrusage

// default dictionary
#define DICTIONARY "dictionaries/large"

// prototype
double calculate(const struct rusage *b, const struct rusage *a);

int main(int argc, char *argv[])
{
    // check for correct number of args
    if (argc != 2 && argc != 3)
    {
        printf("Usage: speller [dictionary] text\n");
        return 1;
    }

    // structs for timing data
    struct rusage before, after;

    // benchmarks
    double time_load = 0.0, time_check = 0.0, time_size = 0.0, time_unload = 0.0;

    // determine dictionary to use
    char* dictionary = (argc == 3) ? argv[1] : DICTIONARY;

    // load dictionary
    getrusage(RUSAGE_SELF, &before);
    bool loaded = load(dictionary);
    getrusage(RUSAGE_SELF, &after);

    // abort if dictionary not loaded
    if (!loaded)
    {
        printf("Could not load %s.\n", dictionary);
        return 1;
    }

    // calculate time to load dictionary
    time_load = calculate(&before, &after);

    // try to open text
    char *text = (argc == 3) ? argv[2] : argv[1];
    FILE *fp = fopen(text, "r");
    if (fp == NULL)
    {
        printf("Could not open %s.\n", text);
        unload();
        return 1;
    }

    // prepare to report misspellings
    printf("\nMISSPELLED WORDS\n\n");

    // prepare to spell-check
    int index = 0, misspellings = 0, words = 0;
    char word[LENGTH+1];

    // spell-check each word in text
    for (int c = fgetc(fp); c != EOF; c = fgetc(fp))
    {
        // allow only alphabetical characters and apostrophes
        if (isalpha(c) || (c == '\'' && index > 0))
        {
            // append character to word
            word[index] = c;
            index++;

            // ignore alphabetical strings too long to be words
            if (index > LENGTH)
            {
                // consume remainder of alphabetical string
                while ((c = fgetc(fp)) != EOF && isalpha(c));

                // prepare for new word
                index = 0;
            }
        }

        // ignore words with numbers (like MS Word can)
        else if (isdigit(c))
        {
            // consume remainder of alphanumeric string
            while ((c = fgetc(fp)) != EOF && isalnum(c));

            // prepare for new word
            index = 0;
        }

        // we must have found a whole word
        else if (index > 0)
        {
            // terminate current word
            word[index] = '\0';

            // update counter
            words++;

            // check word's spelling
            getrusage(RUSAGE_SELF, &before);
            bool misspelled = !check(word);
            getrusage(RUSAGE_SELF, &after);

            // update benchmark
            time_check += calculate(&before, &after);

            // print word if misspelled
            if (misspelled)
            {
                printf("%s\n", word);
                misspellings++;
            }

            // prepare for next word
            index = 0;
        }
    }

    // check whether there was an error
    if (ferror(fp))
    {
        fclose(fp);
        printf("Error reading %s.\n", text);
        unload();
        return 1;
    }

    // close text
    fclose(fp);

    // determine dictionary'size
    getrusage(RUSAGE_SELF, &before);
    unsigned int n = size();
    getrusage(RUSAGE_SELF, &after);

    // calculate time to determine dictionary's size
    time_size = calculate(&before, &after);

    // unload dictionary
    getrusage(RUSAGE_SELF, &before);
    bool unloaded = unload();
    getrusage(RUSAGE_SELF, &after);

    // abort if dictionary not unloaded
    if (!unloaded)
    {
        printf("Could not unload %s.\n", dictionary);
        return 1;
    }

    // calculate time to unload dictionary
    time_unload = calculate(&before, &after);

    // report benchmarks
    printf("\nWORDS MISSPELLED:     %d\n", misspellings);
    printf("WORDS IN DICTIONARY:  %d\n", n);
    printf("WORDS IN TEXT:        %d\n", words);
    printf("TIME IN load:         %.2f\n", time_load);
    printf("TIME IN check:        %.2f\n", time_check);
    printf("TIME IN size:         %.2f\n", time_size);
    printf("TIME IN unload:       %.2f\n", time_unload);
    printf("TIME IN TOTAL:        %.2f\n\n", 
     time_load + time_check + time_size + time_unload);

    // that's all folks
    return 0;
}

/**
 * Returns number of seconds between b and a.
 */
double calculate(const struct rusage *b, const struct rusage *a)
{
    if (b == NULL || a == NULL)
    {
        return 0.0;
    }
    else
    {
        return ((((a->ru_utime.tv_sec * 1000000 + a->ru_utime.tv_usec) -
                 (b->ru_utime.tv_sec * 1000000 + b->ru_utime.tv_usec)) +
                ((a->ru_stime.tv_sec * 1000000 + a->ru_stime.tv_usec) -
                 (b->ru_stime.tv_sec * 1000000 + b->ru_stime.tv_usec)))
                / 1000000.0);
    }
}

检查单词是否在树中的函数:

Functions that check to see if a word is in the trie:

/**
 * Implements a dictionary's functionality.
 */

#include <stdbool.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <stdio.h>

#include "dictionary.h"

int dictionary_size;

bool find(char *word, node *next)
{
    word++;
    if (next == NULL)
    {
        return false;
    }
    else if (word[0] == '\0')
    {
        if (next->is_word == true)
        {
            return true;
        }
        else
        {
            return false;
        }
    }
    else
    {
        //go to this nodes chold that is now eual to the first letter of the word
        if (word[0] == '\'')
        {
            return find(word, next->children[26]);
        }
        else
        {
            return find(word, next->children[word[0] - 'a']);
        }
    }
}

/**
 * Returns true if word is in dictionary else false.
 */
bool check(const char *word)
{
    //get word from text and make it malluable
    char w[47];
    memcpy(w, word, strlen(word) + 1);
    //make sure it's all lower case
    for (int i = 0; i < strlen(word); i++)
    {
        w[i] = tolower(w[i]);
    }
    //go to the root child node equal to the first letter of the word
    if (find(w, root))
    {
        return true;
    }
    else
    {
        return false;
    }
}

将单词字典加载到树中的函数.(我猜这是泄漏的地方?):

functions that load a dictionary of words into the trie. (This where the leak is i'm guessing?):

struct node *newNode(char *word, node *next)
{
    next = malloc(sizeof(node));
    word++;
    if (word[0] == '\0')
    {
        next->is_word = true;
        return next;
    }
    else
    {
        node *new_node = NULL;
        // go to this nodes choild that is now equal to the first letter of the word
        if (word[0] == '\'')
        {
           return next->children[26] = newNode(word, new_node);
        }
        else
        { 
           return next->children[word[0] - 97] = newNode(word, new_node);
        }
    }
}


/**
 * Loads dictionary into memory. Returns true if successful else false.
 */
bool load(const char *dictionary)
{
    dictionary_size = 0;
    FILE *dic = fopen(dictionary, "r");
    if (dic == NULL)
    {
        return false;
    }
    // Initalize root node
    root = malloc(sizeof(node));

    //get word from dictinary
    int ch;
    while ((ch = fgetc(dic)) != EOF)
    {
        char word[47];
        fgets(word, 47, dic);
        dictionary_size++;
        // make sure it's all lower case
        for (int i = 0; i < 47; i++)
        {
            word[i] = tolower(word[i]);
        }
        // get rid of new line char
        char *pos;
        if ((pos = strchr(word, '\n')) != NULL)
        {
            *pos = '\0';
        }
        printf("%s\n", word);
        //go to root nodes child that is equal to the first letter of the word
        node *child_node = NULL;
        root->children[word[0] - 'a'] = newNode(word, child_node);
    }
return true;
}

/**
 * Returns number of words in dictionary if loaded else 0 if not yet loaded.
 */
unsigned int size(void)
{
    if (dictionary_size != 0)
    {
        return dictionary_size;
    }
    return 0;
}

void free_node(node *next)
{
    // safety including root node
    if(!next) return;   

    // takes you to end of trie
    for (int i = 0; i < 26; i++)
    {
       free_node(next->children[i]);
    }

    // base case
    free(next);
}

/**
 * Unloads dictionary from memory. Returns true if successful else false.
 */
bool unload(void)
{
    free_node(root);
    return true;
}

定义节点结构:

typedef struct node
{
    bool is_word;
    struct node *children[27]; 
}
node;

node *root;

推荐答案

这段代码使用彻底重写的 addWord() 函数代替 addNode() 来工作.>

This code works using radically rewritten addWord() function in place of addNode().

/* SO 3994-9116 */
#include <assert.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct node
{
    bool is_word;
    struct node *children[27];
} node;

static node *root = 0;
static int dictionary_size = 0;

static void addWord(char *word, node *trie)
{
    assert(islower((unsigned char)word[0]) || word[0] == '\0');
    if (word[0] == '\0')
        trie->is_word = true;
    else
    {
        int code = word[0] - 'a';
        if (trie->children[code] == 0)
        {
            trie->children[code] = calloc(sizeof(node), 1);
            if (trie->children[code] == 0)
            {
                fprintf(stderr, "Memory allocation failed\n");
                exit(1);
            }
        }
        addWord(&word[1], trie->children[code]);
    }
}

static
bool load(const char *dictionary)
{
    dictionary_size = 0;
    FILE *dic = fopen(dictionary, "r");
    if (dic == NULL)
        return false;
    root = calloc(1, sizeof(node));
    if (root == 0)
    {
        fprintf(stderr, "Memory allocation failed\n");
        exit(1);
    }

    char word[47];
    while (fgets(word, sizeof(word), dic) != 0)
    {
        char *pos;
        if ((pos = strchr(word, '\n')) != NULL)
            *pos = '\0';
        dictionary_size++;
        for (int i = 0, len = strlen(word); i < len; i++)
        {
            if (!isalpha((unsigned char)word[i]))
                fprintf(stderr, "Non-alphabetic character in '%s'\n", word);
            else
                word[i] = tolower((unsigned char)word[i]);
        }
        addWord(word, root);
    }
    return true;
}

static void print_subtrie(const char *prefix, char nchar, node *child)
{
    if (child != 0)
    {
        int len = strlen(prefix);
        char buffer[len + 2];
        strcpy(buffer, prefix);
        buffer[len] = nchar;
        buffer[len+1] = '\0';
        if (child->is_word)
            printf("%s\n", buffer);
        for (int i = 0; i < 26; i++)
            print_subtrie(buffer, 'a' + i, child->children[i]);
    }
}

static void print_trie(node *top)
{
    for (int i = 0; i < 26; i++)
        print_subtrie("", 'a' + i, top->children[i]);
}

static void free_trie(node *trie)
{
    if (trie != 0)
    {
        for (int i = 0; i < 27; i++)
        {
            if (trie->children[i] != 0)
                free_trie(trie->children[i]);
        }
        free(trie);
    }
}

int main(void)
{
    if (load("words.list"))
    {
        printf("Nominal dictionary size: %d\n", dictionary_size);
        print_trie(root);
        free_trie(root);
    }
    return 0;
}

测试文件 1:

aardvark
abelone
assignation
assignment
assigned
assert
assertion

输出文件 1:

Nominal dictionary size: 7
aardvark
abelone
assert
assertion
assignation
assigned
assignment

测试文件 2:

Mike
Aquarius
delta
assert
zulu
assertion
alpha
aardvark
abelone
culmination
Zulu
kilo
amniocentesis
Oscar
Yankee
cabal
echo
Sierra
duck
Charlie
Quebec
centre
assigned
Papa
foxtrot
uMBRelLA
Romeo
cab
India
Juliet
bravo
amniocentesis
hotel
amniocentesis
Lima
Whisky
cab
culminate
cold
Xray
cab
cabs
assignation
cam
Victor
golf
Tango
November
cinema
cabbie
assignment

输出文件 2:

Nominal dictionary size: 56
a
aardvark
abelone
alpha
amniocentesis
aquarius
as
assert
assertion
assign
assignation
assigned
assignment
bravo
cab
cabal
cabbie
cabs
cam
centre
charlie
cinema
cold
culminate
culmination
delta
duck
echo
foxtrot
golf
hotel
i
india
juliet
kilo
lima
mike
november
o
oscar
papa
quebec
romeo
sierra
tango
umbrella
victor
whisky
xray
yankee
zulu

注意'nominal size'的56太大了;输入中有 46 个不同的词,因此输出中有 46 个不同的词.

Note that the 'nominal size' of 56 is too large; there are 46 distinct words in the input and hence in the output.

这篇关于为什么我的尝试泄露数据?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
相关文章
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆