总是在输出中获得重复的输入文件 [英] always get duplicate input file in output

查看:91
本文介绍了总是在输出中获得重复的输入文件的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

文件在这里:

计算机科学或计算

science(缩写为CS或CompSci)是科学的b $ b b计算方法及其应用程序。

计算机科学家擅长计算理论和计算机或计算机的设计

系统



file is here:
Computer science or computing
science (abbreviated CS or CompSci) is the scientific
approach to computation and its applications.
A computer scientist specialises in the theory of
computation and the design of computers or computational
systems

#include " stdafx.h "
#include < fstream >
#include < iostream >
#include < cctype >
#include < string >
#include < algorithm >
#include <map>
#include < vector >
#include < string >
#include < set >
#include < cstddef >
#include < sstream >


using namespace std;

std::string int_to_string(int);
// tabdil int  i into s C++ string
string int_to_string(int i) {
 	stringstream out;
	out<< i ;
	
	return out.str();
}


//an index to a file.

class FileIndex {
 public:
  FileIndex(int);
  FileIndex* getNext();
  int getIndex();
  void setNext(FileIndex*);

 private:
  int index;       // The index of the file a word was found in.
  FileIndex* next; // The next file index.
};

// The class  for the inverted index generator.
class InvertedIndexGen {
 public:
    InvertedIndexGen();
    ~InvertedIndexGen();
    int build(const std::string&);
    FileIndex* lookup(const std::string&);
	void to_set(std:: set<int>&s, FileIndex* ) ;
    std::string toString();
    int numberOfWords();
 private:
    std::map<std::string,> idx;
    
    // 
    int loadIndexFile(std::vector<std::string xmlns:std="#unknown">&, const std::string&);
    int indexFiles(const std::vector<std::string>&);
    int readWords(const std::string&, std::vector<std::string>&);
    void insert(const std::string&, int);
};


InvertedIndexGen::InvertedIndexGen() {
    // Default constructor.
}


InvertedIndexGen::~InvertedIndexGen() {
    map<string,>::iterator it = idx.begin();
    // iterate over each map element pair.
    while (it != idx.end()) {
        FileIndex* fi = it->second;
        // Now, delete each file index.
        while (fi != NULL) {
            FileIndex* p = fi;
            fi = fi->getNext();
            delete p;
        }
        it++;
    }
    // Next, delete all map entries.
    idx.clear();
}


// Takes a file name as an argument and builds the inverted index.
int InvertedIndexGen::build(const string& file) {
    vector<string> files;
    if (loadIndexFile(files, file) == -1)
        return -1;
    if (indexFiles(files) == -1)
        return -1;
    return 0;
}



// Looks up a word in the inverted index.
FileIndex* InvertedIndexGen::lookup(const string& word) {
    return idx[word];
}











// Returns true if c is an alpha character.
int alpha(char c) {
	return isalpha(c) ;
}

// Returns true if c is not an alpha character.
bool not_alpha(char c) {
	return !isalpha(c);
}





// This method splits the string str into a vector of strings.  That is, we
// split a sentence into words.
//
vector<string> split(const string& str) {
	vector<string> ret;
	string::const_iterator i = str.begin();
	while (i != str.end()) {
		// Ignore unrecognized characters (non-alpha):
		i = find_if(i, str.end(), alpha);

		// Find the next alpha word:
		string::const_iterator j = find_if(i, str.end(), not_alpha);

		// Copy the characters in [i, j)
		if (i != str.end())
			ret.push_back(string(i, j));

		i = j;
	}
	return ret;
}






// This method reads the words in the provided file into the vector v.
//
int InvertedIndexGen::readWords(const string& file, vector<string>& v) {
	std::ifstream infile("D:/C++ project/ConsoleApplication7/file.txt");
	if (infile) {
		std::string line;
		while (getline(infile, line)) {
			vector<string> words = split(line);
			v.insert(v.end(), words.begin(), words.end());
		}
		return 0;
	}
	else {
		cerr << "can't open file " << file << endl;
		return -1;
	}
}



//
// This method converts the FileIndex list fi into a set of
// integers representing the file indexes.
//
void InvertedIndexGen :: to_set(set<int>&s, FileIndex* fi) {
    FileIndex* p = fi;
    while (p != NULL) {
        s.insert(p->getIndex());
        p = p->getNext();
    }
}





// Indexes each file in the files vector.
int InvertedIndexGen::indexFiles(const vector<string>& files) {
    // Uncomment the following lines:
    vector<string> words; // Words in a file.
    set<string>    seen;  // Files we have "seen" (already indexed).
    int            fcnt;  // The file we are indexing.
    string         curr_file;
    string         curr_word;
    int status = 0;

    fcnt = 0;


    for (vector<string>::const_iterator file_it = files.begin(); file_it != files.end(); ++file_it){
        curr_file = *file_it;
        if(seen.find(curr_file) != seen.end()){
            seen.insert(curr_file);
            if(readWords(curr_file, words) != -1){
                for (vector<string>::iterator words_it = words.begin(); words_it != words.end(); ++words_it){
                    curr_word = *words_it;
                    insert(curr_word, fcnt);
                    words.clear();
                }
                fcnt++;
            }
            else {
                status = -1;
                fcnt++;
            }
        }
        else {
            status = -1;
            cout << "duplicate input file: " << curr_file << ". Skipping." << endl;
            fcnt++;
        }
    }
    return status;
}



// Inserts a word into the inverted index.
void InvertedIndexGen::insert(const string& word, int fcnt) {
    if(idx.find(word) == idx.end()) {                // word not seen
        FileIndex* newEntry = new FileIndex(fcnt);  // add new pair to idx map
        idx.insert(pair<string,>(word, newEntry) );
        return;
    }
    else {                                           // word has been seen
        FileIndex* curr = lookup(word);
        while(curr->getIndex() != fcnt && curr->getNext() != NULL){  // iterate through word's FileIndex objects
            curr = curr->getNext();
        }
        if((curr->getIndex() == fcnt)) {  // if there's an index match, do nothing
            return;
        }
        else {                        // if there's no match, add new FileIndex pointer to value list
            FileIndex* addIndex = new FileIndex(fcnt);
            curr->setNext(addIndex);
        }
    }
}



// Loads the index file into the vector files.
int InvertedIndexGen::loadIndexFile(vector<string>& files, const string& idxfile) {
	std::ifstream infile("D:/C++ project/ConsoleApplication7/file.txt");
	if (infile) {
		std:: string line;
		int lineno = 1;
		while (getline(infile, line)) {
			if (line == "")
				cerr << "[" << int_to_string(lineno)
				     << "] found blank line in input file. skipping." << endl;
			else
				files.push_back(line);
			lineno++;
		}
		return 0;
	}
	else {
		cerr << "can't open file " << idxfile << endl;
		return -1;
	}
}




// Creates a new file index given the index i.
//
FileIndex::FileIndex(int i) {
	index = i;
  next  = NULL;
}

//
// Returns the next file index in this list.
//
FileIndex* FileIndex::getNext() {
  return next;
}

//
// Sets the next file index in this list.
//
void FileIndex::setNext(FileIndex* n) {
  next = n;
}

//
// Returns the index.
//
int FileIndex::getIndex() {
  return index;
}


//Returns the string representation of the inverted index.
string InvertedIndexGen::toString() {
    set<int>indexes;
    string   res = "";
    map<string,>::iterator it = idx.begin();
//    int *it2 ;
    while (it != idx.end()) {
        res += it->first + ": ";
        to_set(indexes, it->second);
        for (set<int>::iterator it2 = indexes.begin();
             it2 != indexes.end(); ++it2) {
            res += int_to_string(*it2) + " ";
        }    
        res += "\n";    
        indexes.clear();
        it++;
    }

    return res;
}

int InvertedIndexGen::numberOfWords() {
	return idx.size();
}




// Main program entry point.
int main(int argc, char* argv[]) {
	//Check the program arguments.
	/*if (argc != 2) {
		std::cerr << "usage: InvIndexer file" << endl;
		return 1;
	}*/
	cout << "Test" << endl;
	int s;
	InvertedIndexGen ivgen;    	// Create the inverted index generator.
	ivgen.build(argv[0]);		// Build the index.
	cout << ivgen.toString();	// Return the a string representation of the index.
	cout << ivgen.numberOfWords() << " words" << endl;
	cin>>s;
	return 0;
}

推荐答案

在调试器下运行应用程序,转到断言消息指示的行。给它一个断点。再次运行应用程序,直到执行在断点处停止。找出哪个对象应该是非null。打开调试窗口调用堆栈并浏览堆栈以查看错误数据的来源。修复它。



-SA
Running the application under the debugger, go to the line indicated by the assertion message. Put a break point on it. Run application again until the execution stops at the break point. Find out which object is supposed to be non-null. Open the Debug Window "Call stack" and browse through the stack to see where wrong data come from. Fix it.

—SA


你打电话给没有参数的程序:它需要一个文件名。如果你不提供一个, argv [] 将只有一个条目, argv [1] 是一个指针无效。
You have called the program without argument: it expects a filename. If you don't provide one, argv[] will have only one entry, and argv[1] is an invalid pointer.


这篇关于总是在输出中获得重复的输入文件的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆