数据结构 C++实现 基于不同策略的英文单词的词频统计和检索系统( 三 )

<= 'Z') //判断单词中是否有大写字母return 1; //如果有,返回1} //forreturn 0; //没有返回0}//词频统计void StatisticsData() {system("cls"); //清屏ifstream fin; //文件读操作,存储设备读取到内存中fin.open(file); //关联文件filechar ch; //用于获取字符 string word; //用于存放单词int count = 0, min; //count用于标记单词个数,min用于标记最小的单词for (int i = 0; fin.get(ch); i++) { //读取文件内容,并去除符号if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {if (word == "\0") //word为空,放入第一个字母word = ch;elseword += ch; //word不为空,拼接字母组成单词} //ifelse {if (word != "\0") { //判断之前的word里面是否有单词count++; //有单词,总数+1if (count > MaxSize) {cout << "文章单词超出统计上限,系统已退出" << endl;fin.close(); //关闭文件exit(0); //退出程序system("pause"); //暂停}StatisticalWord(word); //存放到结构体数组里面} //ifword = "\0"; //重置word,用于存放新单词} //else} //for//按照词典排序(选择排序) 从小到大WordFrequency temp; //临时存储空间for (int i = 0; i < sum; i++) {min = i; //重置minfor (int j = i + 1; j < sum; j++) {if (WordTransition(WF[j].word) < WordTransition(WF[min].word)) //将单词转换成小写进行比较min = j; //得到最小单词序号} //for//交换原始单词,词频temp = WF[i];WF[i] = WF[min];WF[min] = temp;} //forfor (int i = 0; i < sum; i++) {min = i;for (int j = i + 1; j < sum; j++) {if (WordTransition(WF[j].word) == WordTransition(WF[min].word)) //两个单词相等if (WordJudge(WF[j].word) > WordJudge(WF[min].word)) //大写的排前面min = j; //得到最小单词序号} //for//交换原始单词,词频temp = WF[i];WF[i] = WF[min];WF[min] = temp;} //forfin.close(); //关闭文件}//将单词转换为唯一关键码intWordTransitionKey(string word) {int a[21] = { 0,2,3,5,7,11,13,17,19,23,27,29,31,37,41,47,51,67,87,101,111 }; //最长识别20个字母的的单词int sumkey = 0;for (int i = 0; i < int(word.size()); i++) {sumkey += int(word[i]); //每个字符的ASCLL值相加}sumkey += int('h') * a[int(word.size())];return sumkey;}
3、顺序表类
//顺序表类class SeqList{public:SeqList() {} //无参构造SeqList(datatype a[], int n){ //有参构造函数,初始化长度为n的顺序表if (n > MaxSize){cout << "单词数量过多,超出线性表最大容量" << endl;} //iffor (int i = 0; i < n; i++){wf[i].word = a[i].word;wf[i].frequency = a[i].frequency;} //for}~SeqList(){};//析构函数int Empty();//顺序表判空函数void PrintList(int n);//遍历操作,按序号依次输出各元素int SeqlistLocate(string word);//顺序查找int BinSearch(string word);//折半查找string getword(int n); //返回单词int getfre(int n); //返回词频private:datatype wf[MaxSize];//存放词频结构体的数组};//返回单词string SeqList::getword(int n) {return wf[n].word;}//返回词频int SeqList::getfre(int n) {return wf[n].frequency;}//顺序表判空函数int SeqList::Empty(){if (sum == 0)return 1;elsereturn 0;}//顺序查找int SeqList::SeqlistLocate(string word){for (int i = 0; i < sum; i++){ //依次遍历if (wf[i].word == word) //找到wordreturn i; //返回下标} //forreturn -1; //未找到返回-1}//折半查找intSeqList::BinSearch(string word){int mid, low = 0, high = sum - 1; //初始查找区间是[0, sum-1]while (low <= high) { //当区间存在时mid = (low + high) / 2; //初始化中值if (word == wf[mid].word) //找到wordbreak; //退出循环else if (WordTransition(word) < WordTransition(wf[mid].word)) //word在前半段high = mid - 1; //改变上限,gigh前移查找区间变为 [low,mid-1]else //word在后半段,或者不存在low = mid + 1; //改变下线,low后移查找区间变为 [mid+1,high]} //whileif (low <= high)return mid; //找到返回下标elsereturn -1; //未找到返回-1}//输出线性表顺序表,参数n用来控制输出顺序查找还是折半查找void SeqList::PrintList(int n){system("cls"); //清屏if (n == 1){ofstream fout; //文件写操作 内存写入存储设备 fout.open("outfile1.txt");fout