main.cpp
资源名称:TextQuery.rar [点击查看]
上传用户:cydzxjc
上传日期:2021-11-14
资源大小:2668k
文件大小:4k
源码类别:
STL
开发平台:
Visual C++
- // TextQuery.cpp : 定义控制台应用程序的入口点。
- //
- #include "stdafx.h"
- #include "TextQuery.h"
- //返回值是指向string vector的指针
- vector<string>* retrieve_text();
- //将按换行符分割的字符串按空格分割成单词
- text_loc* separate_words(const vector<string> *text_file);
- //生成map
- extern map<string, loc*>* build_word_map(const text_loc *text_locations);
- //对map进行迭代
- void display_map_text(map<string, loc*> *text_map);
- string CTextQuery::filt_elems("",.;:!<<)(\/?~");
- int _tmain(int argc, _TCHAR* argv[])
- {
- CTextQuery tq;
- tq.doit();
- tq.query_text();
- tq.display_map_text();
- return 0;
- }
- vector<string>* retrieve_text()
- {
- string file_name;
- cout<<"please enter file name: ";
- cin>>file_name;
- //打开文本文件以便输入...
- ifstream infile(file_name.c_str(), ios::in);
- if (!infile)
- {
- cerr<<"oops! unable to open file "
- <<file_name<<"--bailing out!n";
- exit(-1);
- }
- else
- {
- cout<<'n';
- }
- vector<string> *lines_of_text = new vector<string>;
- string textline;
- typedef pair<string::size_type, int> stats;
- stats maxline;
- int linenum = 0;
- while (getline(infile, textline, 'n'))
- {
- cout<<"line read:"<<textline<<'n';
- if (maxline.first < textline.size())
- {
- maxline.first = textline.size();
- maxline.second = linenum;
- }
- lines_of_text->push_back(textline);
- linenum++;
- }
- return lines_of_text;
- }
- text_loc* separate_words(const vector<string> *text_file)
- {
- //words: 包含独立单词的集合
- //locations:包含相关的行/列信息
- vector<string> *words = new vector<string>;
- vector<location> *locations = new vector<location>;
- unsigned short line_pos = 0; //当前行号
- //迭代文件中的每个行
- for (; line_pos < text_file->size(); ++line_pos)
- {
- //textline:
- //word_pos:
- short word_pos = 0;
- string textline = (*text_file)[line_pos];
- string::size_type pos = 0, prev_pos = 0;
- while ((pos = textline.find_first_of(' ', pos)) != string::npos)
- {
- //存储当前单词子串的拷贝
- words->push_back(textline.substr(prev_pos, pos - prev_pos));
- //将行/列信息存储为pair
- locations->push_back(make_pair(line_pos, word_pos));
- //为下一次迭代修改位置信息
- ++word_pos;
- prev_pos = ++pos;
- }
- //现在处理最后一个单词
- words->push_back(textline.substr(prev_pos, pos - prev_pos));
- locations->push_back(make_pair(line_pos, word_pos));
- }
- return new text_loc(words, locations);
- }
- map<string, loc*>* build_word_map(const text_loc *text_locations)
- {
- map<string, loc*> *word_map = new map<string, loc*>;
- vector<string> *text_words = text_locations->first;
- vector<location> *text_locs = text_locations->second;
- register int elem_cnt = text_words->size();
- for (int ix = 0; ix < elem_cnt; ++ix)
- {
- string textword = (*text_words)[ix];
- //排除策略:如果少于3个字符,
- //或在排除集合中存在,
- //则不输入到map中.
- if (textword.size() < 3 /*|| exclusion_set.cout(textword)*/)
- {
- continue;
- }
- //判断单词是否存在
- //如果count()返回0,则不存在--加入它
- if (!word_map->count((*text_words)[ix]))
- {
- loc *ploc = new vector<location>;
- ploc->push_back((*text_locs)[ix]);
- word_map->insert(val_Type((*text_words)[ix], ploc));
- }
- else
- {
- //修改该项的位置向量
- (*word_map)[(*text_words)[ix]]->push_back((*text_locs)[ix]);
- }
- }
- return word_map;
- }
- void display_map_text(map<string, loc*> *text_map)
- {
- typedef map<string, loc*> tmap;
- tmap::iterator iter = text_map->begin(), iter_end = text_map->end();
- while(iter != iter_end)
- {
- cout << "word: "<<(*iter).first<<" (";
- int loc_cnt = 0;
- loc *text_locs = (*iter).second;
- loc::iterator liter = text_locs->begin(), liter_end = text_locs->end();
- while(liter != liter_end)
- {
- if (loc_cnt)
- {
- cout<<',';
- }
- else
- {
- ++loc_cnt;
- }
- cout<<'('<<(*liter).first<<','<<(*liter).second<<')';
- ++liter;
- }
- cout<<")n";
- ++iter;
- }
- cout<<endl;
- }