/*******************************************************
* File : searchPattern.cpp
* Author : saurabh gupta
* Description : searching a string pattern from a given file
*
* Usage :
* [sgupta@rhel55x86 search]$ ./searchPattern
* Usage: ./searchPattern filename searchpattern
* [sgupta@rhel55x86 search]$
*
* ToDo : change the code for stream search
********************************************************/
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
using namespace std;
typedef std :: vector<int> vecInt;
class CSearchString {
private:
vecInt m_vShifts;
void m_computeShifts(const std::string &strPattern);
public:
int m_findFirst(const std::string &text, const std::string &strPattern);
vecInt m_findAll(const std::string &text, const std::string &strPattern);
};
// create the shift-lookup-table
void CSearchString::m_computeShifts(const std::string &strPattern) {
int nNextShift = 0;
m_vShifts.clear();
m_vShifts.push_back(0); // shift to the first character
// start with the second character, since the shift to the first is always 0
for(int i = 1; i < strPattern.length(); i++) {
while(nNextShift > 0 && strPattern[nNextShift] != strPattern[i]) {
nNextShift = m_vShifts[nNextShift];
}
if(strPattern[nNextShift] == strPattern[i]) {
nNextShift++;
}
m_vShifts.push_back(nNextShift);
}
}
// search the string and return when the first occurrence is found
int CSearchString::m_findFirst(const std::string &text, const std::string &strPattern) {
int nNextShift = 0;
m_computeShifts(strPattern);
for(int i = 0; i < text.length(); i++)
{
while(nNextShift > 0 && strPattern[nNextShift] != text[i]) {
nNextShift = m_vShifts[nNextShift - 1];
}
if(strPattern[nNextShift] == text[i]) {
nNextShift++;
}
if(nNextShift == strPattern.length()) {
return i - (strPattern.length() - 1); // found the first so return
}
}
return -1;
}
// search the string and put every occurence in a vector
vecInt CSearchString::m_findAll(const std::string &text, const std::string &strPattern) {
int nNextShift = 0;
vecInt vPositions;
m_computeShifts(strPattern);
for(int i = 0; i < text.length(); i++) {
while(nNextShift > 0 && strPattern[nNextShift] != text[i]) {
nNextShift = m_vShifts[nNextShift - 1];
}
if(strPattern[nNextShift] == text[i]) {
nNextShift++;
}
if(nNextShift == strPattern.length()) {
vPositions.push_back(i - (strPattern.length() - 1)); // found one, put in list
nNextShift = m_vShifts[nNextShift - 1]; // restart strPattern with last shift
}
}
return vPositions;
}
int main(int argc, char **argv) { // main begins
if(argc <= 2){
cout << "Usage: " << argv[0] << " filename searchpattern" << endl;
return 0;
}
std::string strPattern = argv[2];
// read the file. Since the file is read like this all white-characters
// are eaten, so a search including white-characters will fail...
fstream fs;
std::string text, temp;
fs.open(argv[1], ios::in);
while(!fs.eof()) {
fs >> temp;
text += temp;
}
fs.close();
// search the file
CSearchString search;
vecInt pos_list = search.m_findAll(text, strPattern);
// print out result
std::vector<int>::iterator it;
cout << "Found " << pos_list.size() << " occurrences" << endl;
for(it = pos_list.begin(); it != pos_list.end(); it++) {
temp = text.substr(*it, strPattern.length());
cout << "Pos=" << *it << " == " << temp << endl;
}
return 0;
} //main ends
No comments:
Post a Comment