总是在输出中获得重复的输入文件 [英] always get duplicate input file in output
本文介绍了总是在输出中获得重复的输入文件的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!
问题描述
文件在这里:
计算机科学或计算
science(缩写为CS或CompSci)是科学的b $ b b计算方法及其应用程序。
计算机科学家擅长计算理论和计算机或计算机的设计
系统
file is here:
Computer science or computing
science (abbreviated CS or CompSci) is the scientific
approach to computation and its applications.
A computer scientist specialises in the theory of
computation and the design of computers or computational
systems
#include " stdafx.h "
#include < fstream >
#include < iostream >
#include < cctype >
#include < string >
#include < algorithm >
#include <map>
#include < vector >
#include < string >
#include < set >
#include < cstddef >
#include < sstream >
using namespace std;
std::string int_to_string(int);
// tabdil int i into s C++ string
string int_to_string(int i) {
stringstream out;
out<< i ;
return out.str();
}
//an index to a file.
class FileIndex {
public:
FileIndex(int);
FileIndex* getNext();
int getIndex();
void setNext(FileIndex*);
private:
int index; // The index of the file a word was found in.
FileIndex* next; // The next file index.
};
// The class for the inverted index generator.
class InvertedIndexGen {
public:
InvertedIndexGen();
~InvertedIndexGen();
int build(const std::string&);
FileIndex* lookup(const std::string&);
void to_set(std:: set<int>&s, FileIndex* ) ;
std::string toString();
int numberOfWords();
private:
std::map<std::string,> idx;
//
int loadIndexFile(std::vector<std::string xmlns:std="#unknown">&, const std::string&);
int indexFiles(const std::vector<std::string>&);
int readWords(const std::string&, std::vector<std::string>&);
void insert(const std::string&, int);
};
InvertedIndexGen::InvertedIndexGen() {
// Default constructor.
}
InvertedIndexGen::~InvertedIndexGen() {
map<string,>::iterator it = idx.begin();
// iterate over each map element pair.
while (it != idx.end()) {
FileIndex* fi = it->second;
// Now, delete each file index.
while (fi != NULL) {
FileIndex* p = fi;
fi = fi->getNext();
delete p;
}
it++;
}
// Next, delete all map entries.
idx.clear();
}
// Takes a file name as an argument and builds the inverted index.
int InvertedIndexGen::build(const string& file) {
vector<string> files;
if (loadIndexFile(files, file) == -1)
return -1;
if (indexFiles(files) == -1)
return -1;
return 0;
}
// Looks up a word in the inverted index.
FileIndex* InvertedIndexGen::lookup(const string& word) {
return idx[word];
}
// Returns true if c is an alpha character.
int alpha(char c) {
return isalpha(c) ;
}
// Returns true if c is not an alpha character.
bool not_alpha(char c) {
return !isalpha(c);
}
// This method splits the string str into a vector of strings. That is, we
// split a sentence into words.
//
vector<string> split(const string& str) {
vector<string> ret;
string::const_iterator i = str.begin();
while (i != str.end()) {
// Ignore unrecognized characters (non-alpha):
i = find_if(i, str.end(), alpha);
// Find the next alpha word:
string::const_iterator j = find_if(i, str.end(), not_alpha);
// Copy the characters in [i, j)
if (i != str.end())
ret.push_back(string(i, j));
i = j;
}
return ret;
}
// This method reads the words in the provided file into the vector v.
//
int InvertedIndexGen::readWords(const string& file, vector<string>& v) {
std::ifstream infile("D:/C++ project/ConsoleApplication7/file.txt");
if (infile) {
std::string line;
while (getline(infile, line)) {
vector<string> words = split(line);
v.insert(v.end(), words.begin(), words.end());
}
return 0;
}
else {
cerr << "can't open file " << file << endl;
return -1;
}
}
//
// This method converts the FileIndex list fi into a set of
// integers representing the file indexes.
//
void InvertedIndexGen :: to_set(set<int>&s, FileIndex* fi) {
FileIndex* p = fi;
while (p != NULL) {
s.insert(p->getIndex());
p = p->getNext();
}
}
// Indexes each file in the files vector.
int InvertedIndexGen::indexFiles(const vector<string>& files) {
// Uncomment the following lines:
vector<string> words; // Words in a file.
set<string> seen; // Files we have "seen" (already indexed).
int fcnt; // The file we are indexing.
string curr_file;
string curr_word;
int status = 0;
fcnt = 0;
for (vector<string>::const_iterator file_it = files.begin(); file_it != files.end(); ++file_it){
curr_file = *file_it;
if(seen.find(curr_file) != seen.end()){
seen.insert(curr_file);
if(readWords(curr_file, words) != -1){
for (vector<string>::iterator words_it = words.begin(); words_it != words.end(); ++words_it){
curr_word = *words_it;
insert(curr_word, fcnt);
words.clear();
}
fcnt++;
}
else {
status = -1;
fcnt++;
}
}
else {
status = -1;
cout << "duplicate input file: " << curr_file << ". Skipping." << endl;
fcnt++;
}
}
return status;
}
// Inserts a word into the inverted index.
void InvertedIndexGen::insert(const string& word, int fcnt) {
if(idx.find(word) == idx.end()) { // word not seen
FileIndex* newEntry = new FileIndex(fcnt); // add new pair to idx map
idx.insert(pair<string,>(word, newEntry) );
return;
}
else { // word has been seen
FileIndex* curr = lookup(word);
while(curr->getIndex() != fcnt && curr->getNext() != NULL){ // iterate through word's FileIndex objects
curr = curr->getNext();
}
if((curr->getIndex() == fcnt)) { // if there's an index match, do nothing
return;
}
else { // if there's no match, add new FileIndex pointer to value list
FileIndex* addIndex = new FileIndex(fcnt);
curr->setNext(addIndex);
}
}
}
// Loads the index file into the vector files.
int InvertedIndexGen::loadIndexFile(vector<string>& files, const string& idxfile) {
std::ifstream infile("D:/C++ project/ConsoleApplication7/file.txt");
if (infile) {
std:: string line;
int lineno = 1;
while (getline(infile, line)) {
if (line == "")
cerr << "[" << int_to_string(lineno)
<< "] found blank line in input file. skipping." << endl;
else
files.push_back(line);
lineno++;
}
return 0;
}
else {
cerr << "can't open file " << idxfile << endl;
return -1;
}
}
// Creates a new file index given the index i.
//
FileIndex::FileIndex(int i) {
index = i;
next = NULL;
}
//
// Returns the next file index in this list.
//
FileIndex* FileIndex::getNext() {
return next;
}
//
// Sets the next file index in this list.
//
void FileIndex::setNext(FileIndex* n) {
next = n;
}
//
// Returns the index.
//
int FileIndex::getIndex() {
return index;
}
//Returns the string representation of the inverted index.
string InvertedIndexGen::toString() {
set<int>indexes;
string res = "";
map<string,>::iterator it = idx.begin();
// int *it2 ;
while (it != idx.end()) {
res += it->first + ": ";
to_set(indexes, it->second);
for (set<int>::iterator it2 = indexes.begin();
it2 != indexes.end(); ++it2) {
res += int_to_string(*it2) + " ";
}
res += "\n";
indexes.clear();
it++;
}
return res;
}
int InvertedIndexGen::numberOfWords() {
return idx.size();
}
// Main program entry point.
int main(int argc, char* argv[]) {
//Check the program arguments.
/*if (argc != 2) {
std::cerr << "usage: InvIndexer file" << endl;
return 1;
}*/
cout << "Test" << endl;
int s;
InvertedIndexGen ivgen; // Create the inverted index generator.
ivgen.build(argv[0]); // Build the index.
cout << ivgen.toString(); // Return the a string representation of the index.
cout << ivgen.numberOfWords() << " words" << endl;
cin>>s;
return 0;
}
推荐答案
在调试器下运行应用程序,转到断言消息指示的行。给它一个断点。再次运行应用程序,直到执行在断点处停止。找出哪个对象应该是非null。打开调试窗口调用堆栈并浏览堆栈以查看错误数据的来源。修复它。
-SA
Running the application under the debugger, go to the line indicated by the assertion message. Put a break point on it. Run application again until the execution stops at the break point. Find out which object is supposed to be non-null. Open the Debug Window "Call stack" and browse through the stack to see where wrong data come from. Fix it.
—SA
你打电话给没有参数的程序:它需要一个文件名。如果你不提供一个,argv []
将只有一个条目,argv [1]
是一个指针无效。
You have called the program without argument: it expects a filename. If you don't provide one,argv[]
will have only one entry, andargv[1]
is an invalid pointer.
这篇关于总是在输出中获得重复的输入文件的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!
查看全文