怎么能在这段代码中获得最快的速度? [英] how can get the fastest speed in this code ?

查看:48
本文介绍了怎么能在这段代码中获得最快的速度?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

#include "stdafx.h"
#include <conio.h>
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <map>
#include <set>
#include <Windows.h>
#include <sstream>
#include <algorithm>
#include "hash_map"
#include <stdio.h>
using namespace std;
string b1 = "<BODY>", b2 = "</BODY>";
#define  FINPUT "input.txt"
#define  FOUTPUT "output.txt"
#define  STOPWORDS  "stopword.txt"
#define  START_DOC "<BODY>"
#define  END_DOC "</BODY>"
#define  START_DOC_LEN b1.length()
#define  END_DOC_LEN b2.length()
class CountOfWrd<code></code>
{
public:
    int DocNum;
    int Repeat;
    vector<int> Positions;
};
vector<string> vec_stop;
vector<string> WordsForSort;
hash_map <string, vector<CountOfWrd>> hmap;
hash_map <string, vector<CountOfWrd>> :: iterator hmap_AcIter;
typedef pair <string, vector<CountOfWrd>> Word_Pair;
void syntax(string &word);
inline string stem(string word);
inline void caps(string &word);
bool is_stop(string& word);
int _tmain(int argc, _TCHAR* argv[])
{
    ifstream file_of_stopwords(STOPWORDS);
    string str_stop;
    while( file_of_stopwords >> str_stop )
        vec_stop.push_back(str_stop);
    CountOfWrd TempCw;
    ifstream in(FINPUT);
    string word;
    int Doc = 0;
    int Pos=0;
    int startPos;
    vector<CountOfWrd> TmpPushPos;
    while(in >> word)
        if(word.find(START_DOC) != string::npos){
            if(word.length() > START_DOC_LEN){
                startPos = word.find(START_DOC);
                word = word.substr (startPos + START_DOC_LEN );
                Pos = 0;
                do{
                Pos ++;
                if(is_stop(word) == true)
                    continue;
                syntax(word);
                if(word.length() > 2)
                stem(word);
                caps(word);
                if(is_stop(word) == true)
                    continue;
                hmap_AcIter = hmap.find(word);
                if(hmap_AcIter != hmap.end())
                {if(hmap_AcIter->second[hmap_AcIter->second.size()-1].DocNum == Doc)
                    {   (hmap_AcIter)->second[hmap_AcIter->second.size()-1].Repeat++;
                        (hmap_AcIter->second[hmap_AcIter->second.size()-1].Positions).push_back(Pos);}
                    else{
                        TempCw.Repeat = 1;
                        TempCw.DocNum = Doc;
                        TempCw.Positions.clear();
                        TempCw.Positions.push_back(Pos);
                        ((hmap_AcIter)->second).push_back(TempCw);}}
                else
                {
                    TempCw.DocNum = Doc;
                    TempCw.Positions.clear();
                    TempCw.Positions.push_back(Pos);
                    TempCw.Repeat = 1;
                    TmpPushPos.clear();
                    TmpPushPos.push_back(TempCw);
                    hmap.insert(Word_Pair(word,TmpPushPos));
                    WordsForSort.push_back(word);
                }

            }while(in >> word && word.find(END_DOC) == string::npos);
        }
    make_heap(WordsForSort.begin(), WordsForSort.end());
    sort_heap(WordsForSort.begin(), WordsForSort.end());
    int start_of_index=0;
    ofstream out;
    out.open(FOUTPUT);
    for(unsigned int i = start_of_index; i < WordsForSort.size(); i++){
        hmap_AcIter = hmap.find(WordsForSort[i]);
        out << hmap_AcIter->first << "\t\t";
        for(unsigned int j = 0; j < hmap_AcIter->second.size(); j++){
            out << "[" <<hmap_AcIter->second[j].DocNum << "," << hmap_AcIter->second[j].Repeat << "(";
                for(unsigned int k = 0; k < hmap_AcIter->second[j].Positions.size(); k++){
                out << hmap_AcIter->second[j].Positions[k];
                        if(k !=hmap_AcIter->second[j].Positions.size()-1)
                            out << ",";}
                    out << ")" << "]" << "\t";}
        out<< "\n";
        }
    out.close();
    _getch();
    return 0;}
inline string stem(string word){
    if((word[word.length()-3] == 'i' || word[word.length()-3] == 'I') && (word[word.length()-2] == 'n' || word[word.length()-2] == 'N') && (word[word.length()-1] == 'g' || word[word.length()-1] == 'G'))
            word.erase(word.length()-3, 3);
    return word;}
inline void caps(string &word){
for(unsigned int i=0; i<word.size(); i++)
        if( isupper(word[i]))
        word[i] = tolower( word[i] );}
bool is_stop(string& word)
{for(unsigned int i=0; i<vec_stop.size(); i++)
        if( word == vec_stop[i] )
            return true;
    return false;}
void syntax(string &word){
for(unsigned int i=0; i<word.size(); i++){
        if( word[i] == '.'||word[i] == '#' ||word[i] == '&'|| word[i] == ',' || word[i] == ';'
            || word[i] == ')' || word[i] == '(' || word[i] == ':' || word[i] == '-' || word[i] == '"' ||  word[i] == '/' || word[i] == '?'
            || word[i] == '\'' || word[i] == '\"' || (word[i] >= 48 && word[i] <= 57))
            {word.erase(i, 1);
            i--;}
            }
    return;}

推荐答案

inline string stem(string word){
    if((word[word.length()-3] == 'i' || word[word.length()-3] == 'I') && (word[word.length()-2] == 'n' || word[word.length()-2] == 'N') && (word[word.length()-1] == 'g' || word[word.length()-1] == 'G'))
            word.erase(word.length()-3, 3);
    return word;}



为什么在运行时库已经使用这样的代码提供优化的函数来比较字符串而不考虑cas?阅读文档,无论是字符串类,还是 strXXX 函数。类似地, isXXX 函数将确定字符串是否包含控制字符,是否为所有数字等。


Why are you using code like this when the run time libraries already provide optimised functions for comparing strings without regard to cas? Read the documentation, either for the string class, or the strXXX functions. Similarly the isXXX functions will determine if a string contains control characters, is all digits etc.


这篇关于怎么能在这段代码中获得最快的速度?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆