【问题描述】
编写程序统计一个英文文本文件中每个单词的出现次数(词频统计),并将统计结果按单词字典序输出到屏幕上。
要求:程序应用二叉排序树(BST)来存储和统计读入的单词。
注:在此单词为仅由字母组成的字符序列。包含大写字母的单词应将大写字母转换为小写字母后统计。在生成二叉排序树不做平衡处理。
【输入形式】
打开当前目录下文件article.txt,从中读取英文单词进行词频统计。
【输出形式】
程序应首先输出二叉排序树中根节点、根节点的右节点及根节点的右节点的右节点上的单词(即root、root->right、root->right->right节点上的单词),单词中间有一个空格分隔,最后一个单词后没有空格,直接为回车(若单词个数不足三个,则按实际数目输出)。
程序将单词统计结果按单词字典序输出到屏幕上,每行输出一个单词及其出现次数,单词和其出现次数间由一个空格分隔,出现次数后无空格,直接为回车。
【样例输入】
当前目录下文件article.txt内容如下:
“Do not take to heart every thing you hear.”
“Do not spend all that you have.”
“Do not sleep as long as you want;”
【样例输出】
do not take
all 1
as 2
do 3
every 1
have 1
hear 1
heart 1
long 1
not 3
sleep 1
spend 1
take 1
that 1
thing 1
to 1
want 1
you 3
【样例说明】
程序首先在屏幕上输出程序中二叉排序树上根节点、根节点的右子节点及根节点的右子节点的右子节点上的单词,分别为do not take,然后按单词字典序依次输出单词及其出现次数。
方法一
#include <stdio.h>
#include <math.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <math.h>
typedef struct node
{
char word[20];
int num;
struct node *lchild, *rchild;
} Tree;
Tree *temp, *tempp, *root = NULL, *add = NULL;
Tree *New(char w[]);
Tree *PTFT(Tree *root);
int main()
{
char ch;
FILE *fp;
fp = fopen("article.txt", "r+");
ch = fgetc(fp);
while (ch != EOF)
{
char s[20] = {0};
if ((ch <= 'z' && ch >= 'a') || (ch <= 'Z' && ch >= 'A'))
{
if (ch <= 'Z' && ch >= 'A')
ch = ch + 32;
s[0] = ch;
for (int i = 1; i < 20; i++)
{
ch = fgetc(fp);
if (ch <= 'Z' && ch >= 'A')
{
ch = ch + 32;
}
if ((ch >= 'z' || (ch < 'a' && ch > 'Z') || ch < 'A'))
{
root = New(s);
ungetc(ch, fp);
break;
}
s[i] = ch;
}
}
ch = fgetc(fp);
}
while (tempp != NULL)
{
printf("%s ", tempp->word);
tempp = tempp->rchild;
}
printf("\n");
PTFT(root);
fclose(fp);
return 0;
}
Tree *New(char w[])
{
temp = (Tree *)malloc(sizeof(Tree));
temp->lchild = temp->rchild = NULL;
for (int i = 0; i < 20; i++)
{
temp->word[i] = 0;
}
strcpy(temp->word, w);
if (root == NULL)
{
temp->num = 1;
root = tempp = temp;
}
else
{
temp->num = 1;
for (int i = 0; strlen(tempp->word) != 0; i++)
{
if (strcmp(temp->word, tempp->word) > 0 && tempp->rchild == NULL)
{
tempp = tempp->rchild = temp;
break;
}
if (strcmp(temp->word, tempp->word) > 0 && tempp->rchild != NULL)
tempp = tempp->rchild;
if (strcmp(temp->word, tempp->word) == 0)
{
tempp->num++;
break;
}
if (strcmp(temp->word, tempp->word) < 0 && tempp->lchild != NULL)
tempp = tempp->lchild;
if (strcmp(temp->word, tempp->word) < 0 && tempp->lchild == NULL)
{
tempp = tempp->lchild = temp;
break;
}
}
tempp = root;
}
tempp = root;
return root;
}
Tree *PTFT(Tree *root)
{
if (root == NULL)
{
return NULL;
}
PTFT(root->lchild);
printf("%s %d", root->word, root->num);
printf("\n");
PTFT(root->rchild);
/*printf("%s %d", root->word, root->num);
printf("\n");*/
return NULL;
}
方法二
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct BSTree
{
struct BSTree *lchild;
char str[20];
int num;
struct BSTree *rchild;
};
typedef struct BSTree *bst;
bst tree, one, curr;
struct BSTree llBtree[100];
int n, treeIdx; // number, self_statistic;
int i, j, k;
bst creat_bst(char s[])
{
one = &llBtree[treeIdx++];
one->lchild = one->rchild = NULL;
for (i = 0; i < 20; i++)
one->str[i] = 0;
strcpy(one->str, s);
if (tree == NULL)
{
one->num = 1;
tree = curr = one;
}
else
{
one->num = 1;
for (j = 0; strlen(curr->str) != 0; j++)
{
if (strcmp(one->str, curr->str) < 0 && curr->lchild == NULL)
{
curr = curr->lchild = one;
break;
}
else if (strcmp(one->str, curr->str) < 0 && curr->lchild != NULL)
curr = curr->lchild;
else if (strcmp(one->str, curr->str) > 0 && curr->rchild == NULL)
{
curr = curr->rchild = one;
break;
}
else if (strcmp(one->str, curr->str) > 0 && curr->rchild != NULL)
curr = curr->rchild;
else if (strcmp(one->str, curr->str) == 0)
{
curr->num++;
break;
}
}
}
curr = tree;
return tree;
}
bst print_tree(bst tree)
{
if (tree == NULL)
return NULL;
print_tree(tree->lchild);
printf("%s %d\n", tree->str, tree->num);
print_tree(tree->rchild);
return NULL;
}
int main()
{
char c;
FILE *input = fopen("article.txt", "r");
while ((c = fgetc(input)) != EOF)
{
char s[20] = {0};
if ((c < 123 && c > 96) || (c < 91 && c > 64))
{
if (c < 91 && c > 64)
c += 32;
s[0] = c;
for (i = 1; i < 20; i++)
{
c = fgetc(input);
if ((c > 122 || (c < 97 && c > 90) || c < 65))
{
tree = creat_bst(s);
ungetc(c, input);
break;
}
if (c < 91 && c > 64)
c += 32;
s[i] = c;
}
}
else
continue;
}
for (; curr != NULL; curr = curr->rchild)
printf("%s ", curr->str);
printf("\n");
print_tree(tree);
return 0;
}
仅供参考、学习!不得抄袭!