Tuesday, July 26, 2011

c++ code to search a string pattern from a file


/*******************************************************
 * File         :    searchPattern.cpp
 * Author       :    saurabh gupta
 * Description  :    searching a string pattern from a given file
 *
 * Usage        :   
 * [sgupta@rhel55x86 search]$ ./searchPattern
 * Usage: ./searchPattern filename searchpattern
 * [sgupta@rhel55x86 search]$
 *
 * ToDo         :    change the code for stream search
 ********************************************************/
#include <iostream>
#include <fstream>
#include <string>
#include <vector>

using namespace std;

typedef std :: vector<int> vecInt;

class CSearchString {
     private:
           vecInt m_vShifts;
           void m_computeShifts(const std::string &strPattern);
          
     public:
           int m_findFirst(const std::string &text, const std::string &strPattern);
           vecInt m_findAll(const std::string &text, const std::string &strPattern);

};

// create the shift-lookup-table
void CSearchString::m_computeShifts(const std::string &strPattern) {
     int nNextShift = 0;
     m_vShifts.clear();
     m_vShifts.push_back(0); // shift to the first character

// start with the second character, since the shift to the first is always 0
     for(int i = 1; i < strPattern.length(); i++) {
           while(nNextShift > 0 && strPattern[nNextShift] != strPattern[i]) {
                nNextShift = m_vShifts[nNextShift];
           }

           if(strPattern[nNextShift] == strPattern[i]) {
                nNextShift++;
           }

           m_vShifts.push_back(nNextShift);
     }
}

// search the string and return when the first occurrence is found
int CSearchString::m_findFirst(const std::string &text, const std::string &strPattern) {
     int nNextShift = 0;
     m_computeShifts(strPattern);
     for(int i = 0; i < text.length(); i++)
     {
           while(nNextShift > 0 && strPattern[nNextShift] != text[i]) {
                nNextShift = m_vShifts[nNextShift - 1];
           }
          
           if(strPattern[nNextShift] == text[i]) {
                nNextShift++;
           }
          
           if(nNextShift == strPattern.length()) {
                return i - (strPattern.length() - 1); // found the first so return
           }
     }
     return -1;
}

// search the string and put every occurence in a vector
vecInt CSearchString::m_findAll(const std::string &text, const std::string &strPattern) {
     int nNextShift = 0;
     vecInt vPositions;
     m_computeShifts(strPattern);
     for(int i = 0; i < text.length(); i++) {
           while(nNextShift > 0 && strPattern[nNextShift] != text[i]) {
                nNextShift = m_vShifts[nNextShift - 1];
           }

           if(strPattern[nNextShift] == text[i]) {
                nNextShift++;
           }

           if(nNextShift == strPattern.length()) {
                vPositions.push_back(i - (strPattern.length() - 1)); // found one, put in list
                nNextShift = m_vShifts[nNextShift - 1];                    // restart strPattern with last shift
           }
     }
     return vPositions;
}

int main(int argc, char **argv) {    // main begins
     if(argc <= 2){
           cout << "Usage: " << argv[0] << " filename searchpattern" << endl;
           return 0;
     }
     std::string strPattern = argv[2];

     // read the file. Since the file is read like this all white-characters
     // are eaten, so a search including white-characters will fail...
     fstream fs;
     std::string text, temp;
     fs.open(argv[1], ios::in);
 
     while(!fs.eof()) {
           fs >> temp;
           text += temp;
     }
     fs.close();

     // search the file
     CSearchString search;
     vecInt pos_list = search.m_findAll(text, strPattern);

     // print out result
     std::vector<int>::iterator it;
     cout << "Found " << pos_list.size() << " occurrences" << endl;
    
     for(it = pos_list.begin(); it != pos_list.end(); it++) {
           temp = text.substr(*it, strPattern.length());
           cout << "Pos=" << *it << " == " << temp << endl;
     }
     return 0;
}    //main ends

No comments:

Post a Comment