分段故障(核心转储)问题:每当我在.Raw文件中搜索关键字时。 [英] Segmentation Fault (Core Dumped) Problem : Whenever I Search For Keywords In A .Raw File.

查看:87
本文介绍了分段故障(核心转储)问题:每当我在.Raw文件中搜索关键字时。的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

/* Program for Bad Character Heuristic of Boyer Moore String Matching Algorithm */ 
# include <limits.h>
# include <string.h>
# include <vector>
#include <stdio.h>
#include <iostream>
#include <fstream>
#include <omp.h>
 
# define NO_OF_CHARS 256

using namespace std;

// An unsigned char can store 1 Bytes (8bits) of data (0-255)
typedef unsigned char BYTE;


// A utility function to get maximum of two integers
int max (int a, int b) { return (a > b)? a: b; }



// The preprocessing function for Boyer Moore's bad character heuristic
void badCharHeuristic( char *str, int size, int badchar[NO_OF_CHARS])
{
    int i;
 
    // Initialize all occurrences as -1
    for (i = 0; i < NO_OF_CHARS; i++)
         badchar[i] = -1;
 
    // Fill the actual value of last occurrence of a character
    for (i = 0; i < size; i++)
         badchar[(int) str[i]] = i;
}
 
/* A pattern searching function that uses Bad Character Heuristic of
   Boyer Moore Algorithm */
void search( char *txt,  string *Keywords,long long Pos,int &KeyWordCount,vector< vector<long long> > &Offsets)
{   	
	char *pat;
	
	for (int i=0;i<KeyWordCount;i++)
	{	
	    pat=(char*)Keywords[i].c_str();
	    int m = strlen(pat);
	    int n = strlen(txt);
	    //cout<<"\n"<<pat<<"\n";		
	    
	        
	 
	    int badchar[NO_OF_CHARS];
	 
	    /* Fill the bad character array by calling the preprocessing
	       function badCharHeuristic() for given pattern */
	    badCharHeuristic(pat, m, badchar);
	 
	    int s = 0;  // s is shift of the pattern with respect to text
	    while(s <= (n - m))
	    {
		int j = m-1;
	 
		/* Keep reducing index j of pattern while characters of
		   pattern and text are matching at this shift s */
		while(j >= 0 && pat[j] == txt[s+j])
		    j--;
	 
		/* If the pattern is present at current shift, then index j
		   will become -1 after the above loop */
		if (j < 0)
		{    		
			//printf("\nFound @ Offset %d", Pos+s);
						
			if(!Offsets[i].empty()) 			//if Any Offset Exist For Keyword
				if(Offsets[i].back()==(Pos+s)) 	//Check it with last offset for duplication			
					Offsets[i].pop_back();		//If duplicates, pop out		
			
			Offsets[i].push_back((long long)(Pos+s)); //Push the offset
			//cout<<"\n '"<<pat<<"' Found @ Offset:"<<(Pos+s);	
		    	
	 
		    /* Shift the pattern so that the next character in text
		       aligns with the last occurrence of it in pattern.
		       The condition s+m < n is necessary for the case when
		       pattern occurs at the end of text */
		    s += (s+m < n)? m-badchar[txt[s+m]] : 1;
	 
		}
	 
		else
		    /* Shift the pattern so that the bad character in text
		       aligns with the last occurrence of it in pattern. The
		       max function is used to make sure that we get a positive
		       shift. We may get a negative shift if the last occurrence
		       of bad character in pattern is on the right side of the
		       current character. */
		    s += max(1, j - badchar[txt[s+j]]);
	    }
	}
    
   
}


long long FileSize(const char* sFileName)
{
  std::ifstream f;
  f.open(sFileName, std::ios_base::binary | std::ios_base::in);
  if (!f.good() || f.eof() || !f.is_open()) { return 0; }
  f.seekg(0, std::ios_base::beg);
  std::ifstream::pos_type begin_pos = f.tellg();
  f.seekg(0, std::ios_base::end);
  return static_cast<long long>(f.tellg() - begin_pos);
}

/*	The Program read the File and Search For Multiple Keyword.
	During Searching the occurrence of each Keyword is Stored in a Seperate Vector
	The Program Currently Displays the Hit Count of each Keyword using its Vector Size
*/
int main()
{	
	
	/*---------------------------------*/
	/* Arguments to the Function, these values need to be updated */
	
	//Give the File Location 
	const char *filePath = "xyz.raw";	// 
	int KeyWordCount=2;
	//Define the Searching Keywords;
	string *Keywords=new string[KeyWordCount];
	Keywords[2]="gmail";
	Keywords[0]="facebook";
	int MaxKeyLen=0;
	
	
	
	//Define a Vector For Storing Offsets of Keyword occurrence  
	vector< vector<long long> > Offsets;
	for(int i=0;i<KeyWordCount;i++)
	{	Offsets.push_back(vector<long long>()); //Initialise Vector
		if(Keywords[i].size()> MaxKeyLen)
		   MaxKeyLen=Keywords[i].size();
	}
	
	cout<<"Max Length Of Keyword: "<<MaxKeyLen<<"\n";
	/* --------------------------------------------*/

	
	FILE *file = NULL;			// File pointer
	int ChunkSize=512; 		// Declare the Blocks Size ie read from File
					// Current File Pointer

	// Open the file in binary mode using the "rb" format string
	// This also checks if the file exists and/or can be opened for reading correctly
	if ((file = fopen(filePath, "rb")) == NULL)
		cout << "Could not open specified file" << endl;
	else
		cout << "File opened successfully" << endl;

	//off_t fileSize = getfilesize(file);
	long long fileSize = FileSize(filePath);
	cout<<"FileSize:"<<fileSize<<endl;
	
	

	cout<<"Searching :\n";
        //Process the File as Blocks
	int Currlevel;
	int w;

	#pragma omp parallel for
	for(int Pos=0;Pos<=fileSize; Pos+=ChunkSize-MaxKeyLen)
	{	
				
									//Advance the pointer , here the MaxKeyLen is Subtracted
				//if(Pos!=0)					//to skip keyword Skipping duringreading
				
				int x; // a variable local or private to each thread
				char *fileBuf;
				// Allocate space in the buffer for the Specified Block Size
				try
				{
					fileBuf = new char[ChunkSize];
				}
				catch(...)
				{
					cout<<"Memory allocation exception"<<endl;
					//return 0;
				}
			    	x = omp_get_thread_num();
				cout<<"Thread="<<x<<"Pos:"<<Pos<<"\n";
				fread(fileBuf, ChunkSize, 1, file); 		//Read the File into Buffer
				fileBuf[ChunkSize]='\0';			//put a String Terminator For Search
				
				search(fileBuf,Keywords,Pos,KeyWordCount,Offsets);	//Record the Hit
				delete[]fileBuf;					//delete the Buffer
				w++;
				fseek(file,Pos,SEEK_SET);			//Seek the file pointer
				//std::cin.get();
		     		
		
	}
	//#pragma omp barrier

	//cout<<"\r"<<flush<<"Completed"; //Refresh the Screen
	
	/* Print the Search Result */
	cout<<"\n\t***Keyword Result*** \n \tKeyword :\t Count";
	for(int i=0;i<KeyWordCount;i++)
		cout<<"\n\t"<<Keywords[i]<<" :\t "<<Offsets[i].size()<<""; //Print Each Keyword and Its Seach Hit Count using Size of its Vector
	
	cout<<"\n i="<<w;
	/* --------------------------------------------*/

	
	delete []Keywords;	//delete the Buffer
	Offsets.clear();
        fclose(file);		//Close the file
	return 0;
}

推荐答案

这篇关于分段故障(核心转储)问题:每当我在.Raw文件中搜索关键字时。的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆