分段故障(核心转储)问题:每当我在.Raw文件中搜索关键字时。 [英] Segmentation Fault (Core Dumped) Problem : Whenever I Search For Keywords In A .Raw File.
本文介绍了分段故障(核心转储)问题:每当我在.Raw文件中搜索关键字时。的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!
问题描述
/* Program for Bad Character Heuristic of Boyer Moore String Matching Algorithm */
# include <limits.h>
# include <string.h>
# include <vector>
#include <stdio.h>
#include <iostream>
#include <fstream>
#include <omp.h>
# define NO_OF_CHARS 256
using namespace std;
// An unsigned char can store 1 Bytes (8bits) of data (0-255)
typedef unsigned char BYTE;
// A utility function to get maximum of two integers
int max (int a, int b) { return (a > b)? a: b; }
// The preprocessing function for Boyer Moore's bad character heuristic
void badCharHeuristic( char *str, int size, int badchar[NO_OF_CHARS])
{
int i;
// Initialize all occurrences as -1
for (i = 0; i < NO_OF_CHARS; i++)
badchar[i] = -1;
// Fill the actual value of last occurrence of a character
for (i = 0; i < size; i++)
badchar[(int) str[i]] = i;
}
/* A pattern searching function that uses Bad Character Heuristic of
Boyer Moore Algorithm */
void search( char *txt, string *Keywords,long long Pos,int &KeyWordCount,vector< vector<long long> > &Offsets)
{
char *pat;
for (int i=0;i<KeyWordCount;i++)
{
pat=(char*)Keywords[i].c_str();
int m = strlen(pat);
int n = strlen(txt);
//cout<<"\n"<<pat<<"\n";
int badchar[NO_OF_CHARS];
/* Fill the bad character array by calling the preprocessing
function badCharHeuristic() for given pattern */
badCharHeuristic(pat, m, badchar);
int s = 0; // s is shift of the pattern with respect to text
while(s <= (n - m))
{
int j = m-1;
/* Keep reducing index j of pattern while characters of
pattern and text are matching at this shift s */
while(j >= 0 && pat[j] == txt[s+j])
j--;
/* If the pattern is present at current shift, then index j
will become -1 after the above loop */
if (j < 0)
{
//printf("\nFound @ Offset %d", Pos+s);
if(!Offsets[i].empty()) //if Any Offset Exist For Keyword
if(Offsets[i].back()==(Pos+s)) //Check it with last offset for duplication
Offsets[i].pop_back(); //If duplicates, pop out
Offsets[i].push_back((long long)(Pos+s)); //Push the offset
//cout<<"\n '"<<pat<<"' Found @ Offset:"<<(Pos+s);
/* Shift the pattern so that the next character in text
aligns with the last occurrence of it in pattern.
The condition s+m < n is necessary for the case when
pattern occurs at the end of text */
s += (s+m < n)? m-badchar[txt[s+m]] : 1;
}
else
/* Shift the pattern so that the bad character in text
aligns with the last occurrence of it in pattern. The
max function is used to make sure that we get a positive
shift. We may get a negative shift if the last occurrence
of bad character in pattern is on the right side of the
current character. */
s += max(1, j - badchar[txt[s+j]]);
}
}
}
long long FileSize(const char* sFileName)
{
std::ifstream f;
f.open(sFileName, std::ios_base::binary | std::ios_base::in);
if (!f.good() || f.eof() || !f.is_open()) { return 0; }
f.seekg(0, std::ios_base::beg);
std::ifstream::pos_type begin_pos = f.tellg();
f.seekg(0, std::ios_base::end);
return static_cast<long long>(f.tellg() - begin_pos);
}
/* The Program read the File and Search For Multiple Keyword.
During Searching the occurrence of each Keyword is Stored in a Seperate Vector
The Program Currently Displays the Hit Count of each Keyword using its Vector Size
*/
int main()
{
/*---------------------------------*/
/* Arguments to the Function, these values need to be updated */
//Give the File Location
const char *filePath = "xyz.raw"; //
int KeyWordCount=2;
//Define the Searching Keywords;
string *Keywords=new string[KeyWordCount];
Keywords[2]="gmail";
Keywords[0]="facebook";
int MaxKeyLen=0;
//Define a Vector For Storing Offsets of Keyword occurrence
vector< vector<long long> > Offsets;
for(int i=0;i<KeyWordCount;i++)
{ Offsets.push_back(vector<long long>()); //Initialise Vector
if(Keywords[i].size()> MaxKeyLen)
MaxKeyLen=Keywords[i].size();
}
cout<<"Max Length Of Keyword: "<<MaxKeyLen<<"\n";
/* --------------------------------------------*/
FILE *file = NULL; // File pointer
int ChunkSize=512; // Declare the Blocks Size ie read from File
// Current File Pointer
// Open the file in binary mode using the "rb" format string
// This also checks if the file exists and/or can be opened for reading correctly
if ((file = fopen(filePath, "rb")) == NULL)
cout << "Could not open specified file" << endl;
else
cout << "File opened successfully" << endl;
//off_t fileSize = getfilesize(file);
long long fileSize = FileSize(filePath);
cout<<"FileSize:"<<fileSize<<endl;
cout<<"Searching :\n";
//Process the File as Blocks
int Currlevel;
int w;
#pragma omp parallel for
for(int Pos=0;Pos<=fileSize; Pos+=ChunkSize-MaxKeyLen)
{
//Advance the pointer , here the MaxKeyLen is Subtracted
//if(Pos!=0) //to skip keyword Skipping duringreading
int x; // a variable local or private to each thread
char *fileBuf;
// Allocate space in the buffer for the Specified Block Size
try
{
fileBuf = new char[ChunkSize];
}
catch(...)
{
cout<<"Memory allocation exception"<<endl;
//return 0;
}
x = omp_get_thread_num();
cout<<"Thread="<<x<<"Pos:"<<Pos<<"\n";
fread(fileBuf, ChunkSize, 1, file); //Read the File into Buffer
fileBuf[ChunkSize]='\0'; //put a String Terminator For Search
search(fileBuf,Keywords,Pos,KeyWordCount,Offsets); //Record the Hit
delete[]fileBuf; //delete the Buffer
w++;
fseek(file,Pos,SEEK_SET); //Seek the file pointer
//std::cin.get();
}
//#pragma omp barrier
//cout<<"\r"<<flush<<"Completed"; //Refresh the Screen
/* Print the Search Result */
cout<<"\n\t***Keyword Result*** \n \tKeyword :\t Count";
for(int i=0;i<KeyWordCount;i++)
cout<<"\n\t"<<Keywords[i]<<" :\t "<<Offsets[i].size()<<""; //Print Each Keyword and Its Seach Hit Count using Size of its Vector
cout<<"\n i="<<w;
/* --------------------------------------------*/
delete []Keywords; //delete the Buffer
Offsets.clear();
fclose(file); //Close the file
return 0;
}
推荐答案
这篇关于分段故障(核心转储)问题:每当我在.Raw文件中搜索关键字时。的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!
查看全文