Yeah...=|
Ok, so my problem now is that it's not perfectly parsing each word. This means I sometimes get two or more words together counting as one even though in the input source file they have a space or line between them. It's also counting spaces as words.
Here's some updated code:
Problem area is in the last third, I think.Code:#include <cstdlib> // Required for exit #include <iostream> // Required for cin, cout, and cerr. #include <fstream> // Required for ifstream and ofstream. #include <string> // Required for string. #include <cctype> // Required for tolower, isalpha, isupper. #include <vector> // Required for vector <>. using namespace std; // Function Prototypes int main(); int main() { // Declare Objects. char character; bool text_state = true; string infile; string storage; ifstream input; ofstream store; // Prompt user for name of the input file. cout << "Enter the name of the input file:"; cin >> infile; // Prompt user for name of the storage file. cout << "Enter the name of the storage file:"; cin >> storage; // Open files. input.open(infile.c_str()); if(input.fail()) { cerr << "Error opening the input file\n"; exit(1); } store.open(storage.c_str()); //Read first character from html file. input.get(character); while(!input.eof()) { //Check state if(text_state) { if(character == '<') // Beginning of a tag. { text_state=false; // Change states. } else { store << character; // Still text, write to file. } } else { // Command state, no output required. if(character == '>') // End of a tag. { text_state=true; // Change states. } } // Read next character from html file. input.get(character); } input.close(); store.close(); /*----------------------------------------------------------------------------*/ // // This section removes non-alphabetic characters and converts uppercase letters // to lowercase form. It also preserves whitespace. // /*----------------------------------------------------------------------------*/ // Declare objects. string outfile; ifstream input2; ofstream output; // Prompt users for the name of the final output file. cout << "Enter the name of the final output file:"; cin >> outfile; // Open fthe storage file. input2.open(storage.c_str()); if(input2.fail()) { cerr << "Error opening the input file\n"; exit(1); } output.open(outfile.c_str()); // Read first character. input2.get(character); // cout << "Hi!\n" << endl; // Execution stage indicator. while(!input2.eof()) { character = tolower(character); if (isalpha(character)||isspace(character)) { text_state=true; output << character; } else { if(iscntrl(character)) { text_state=false; } text_state=false; } // Get next character. input2.get(character); } // End of "while". store.close(); output.close(); /*----------------------------------------------------------------------------*/ // // This next segment will parse words from the output file and list them in // descending order of frequency in a new file. // /*----------------------------------------------------------------------------*/ // Declare objects. char wordchar; // characters that will make up each word string keywords; string tempWord; string testWord; vector<string> WordList; vector<int> WordCount; ifstream input3; ofstream outfinal; // Prompt for filenames and open the files. cout << "Enter name of file for the final list of keywords: "; cin >> keywords; // This next line is the previous output file. input3.open(outfile.c_str()); if(input3.fail()) { cout << "Error opening input file"; exit(1); } outfinal.open(keywords.c_str()); if(outfinal.fail()) { cout << "Error opening keywords file"; exit(1); } // Get the first character from the input file. input3.get(wordchar); // Parse for unique words. cout << "Parse for unique words.\n"; do { if(isspace(wordchar)) { int i; text_state = false; for(i=0; i<WordList.size(); i++) { // Check to see if word is already in the list. if(WordList.at(i).compare(testWord) == 0) { WordCount.at(i) += 1; break; } } // end for // No match so add the word to the array. if (i == WordList.size()) { // Add word to array if it is not already included. WordList.push_back(testWord); WordCount.push_back(1); testWord = ""; //empty temp word variable for next word } } else { text_state=true; // Otherwise, continue building the current word. testWord += wordchar; } input3.get(wordchar); } while (!input3.eof()); cout << "end parse for words\n"; // Close input file. input3.close(); //Declarations for bubble sort algorithm. bool sorted = false; int tempCount; // Bubble sorter. //for(int i=1; (i<=WordList.size()) && flag; i++) while(!sorted) { // Assume no sorts will take place. sorted = true; // Compare and sort if necessary. for(int j=0; j<((WordList.size())-1); j++) { // Sort by WordCount in descending order if(WordCount.at(j+1) > WordCount.at(j)) { tempCount = WordCount.at(j); WordCount.at(j) = WordCount.at(j+1); WordCount.at(j+1) = tempCount; // Need to keep the association between the word and its count tempWord = WordList.at(j); WordList.at(j) = WordList.at(j+1); WordList.at(j+1) = tempWord; sorted = false; // a sort took place } } // end of 'for'. } // end of 'while'. // Write the sorted list. for(int i=0; i<WordList.size(); i++) { outfinal << WordList.at(i) << " occurs " << WordCount.at(i) << " times\n"; } outfinal.close(); return 0; }






Bookmarks