有趣的数据结构算法17——哈夫曼编码及其c语言实现

2023-10-27

有趣的数据结构算法17——哈夫曼编码及其c语言实现

什么是哈夫曼编码
哈夫曼编码过程举例
利用c语言实现哈夫曼编码
全部实现代码
GITHUB下载连接

哈夫曼编码真的好复杂噢！！！！！！！
在这里插入图片描述

什么是哈夫曼编码

哈夫曼编码(Huffman Coding)是一种编码方式，是可变字长编码(VLC)的一种。Huffman于1952年提出一种编码方法，该方法完全根据字符出现的概率来构造平均长度最短的码字。
在计算机数据处理中，哈夫曼编码使用变长编码表被编码内容进行编码，变长编码表是通过字符出现几率进行构造的，出现机率高的字母使用较短的编码，出现机率低的则使用较长的编码，这便使编码之后的字符串的平均长度、期望值降低，从而达到无损压缩数据的目的。
例如，在英文中，e的出现机率最高，而z的出现概率则最低。当利用哈夫曼编码对一篇英文进行压缩时，e用一个比特来表示，而z则用25个比特来表示。用普通的表示方法时，每个英文字母均占用一个字节，即8个比特。二者相比，e使用了一般编码的1/8的长度，z则使用了3倍多。倘若我们能实现对于英文中各个字母出现概率的较准确的估算，就可以大幅度提高无损压缩的比例。

哈夫曼编码过程举例

假设我们有一段英文abbcccddddeeee。
其中a数量为1；
b数量为2；
c数量为3；
d数量为4；
e数量为5；
我们将其数量从小到大进行排队。
在这里插入图片描述
取出队列的前两位，组成哈夫曼树的最底层。

将二者合成的父结点压入队列，父结点（a&b）的总数量为3（分别是a b b）。此时队列为：

取出队列的前两位，组成哈夫曼树的新一结点。

将二者合成的父结点压入队列，父结点（a&b&c）的总数量为6（分别是a b b c c c）。此时队列为：
在这里插入图片描述
取出队列的前两位，组成哈夫曼树的新一结点。

将二者合成的父结点压入队列，父结点（d&e）的总数量为9（分别是d d d d e e e e e）。此时队列为：

取出队列的前两位，组成哈夫曼树的新一结点。

此时哈夫曼树便构建完成。
编码方式如下：
在这里插入图片描述
编码结果为：
a为000。
b为001。
c为01。
d为10。
e为11。

利用c语言实现哈夫曼编码

生成哈夫曼树

生成哈夫曼树的过程主要分为三部分：
1、统计每个ascii码的个数。
2、将每个ascii码按照个数从小到大排队。
3、每次取队列的前两个，生成新节点，按照从小到大的规律压入队列。直至仅剩一个根节点，取出。

htTree *buildTree(char *inputString)
{
	int *probility = (int*)malloc(sizeof(int)* 256);	//申请256个空间用于存放每个ascii码的数量

	for (int i = 0; i < 256; i++){
		probility[i] = 0;
	}

	for (int j = 0; inputString[j] != '\0'; j++){
		probility[(unsigned char)inputString[j]]++;		//利用char和0~256的转换完成计数
	}

	pQueue * huffmanqueue;								//定义队列
	initpQueue(&huffmanqueue);							//初始化队列

	for (int k = 0; k < 256; k++){
		if (probility[k] != 0)							//遍历所有ascii码，如果某个ascii码的数量不为0
		{												//按照数量从小到大排列
			htNode* aux = (htNode*)malloc(sizeof(htNode));
			aux->left = NULL;							
			aux->right = NULL;
			aux->symbol = (char)k;						
			addpQueue(&huffmanqueue, aux, probility[k]);
		}
	}

	free(probility);									//在数据存入队列后，释放数组

	while (huffmanqueue->size!=1){
		int priority = huffmanqueue->first->priority;	//取出队列中前两个ascii码
		priority += huffmanqueue->first->Next->priority;

		htNode* left = getQueue(&huffmanqueue);
		htNode* right = getQueue(&huffmanqueue);

		htNode* newNode = (htNode*)malloc(sizeof(htNode));	//生成新结点，左右子树分别为前两个ascii码
		newNode->left = left;
		newNode->right = right;
		addpQueue(&huffmanqueue, newNode, priority);		//将新结点加入队列
	}
	htTree *tree = (htTree *)malloc(sizeof(htTree));		
	tree->root = getQueue(&huffmanqueue);					//根节点为队列仅剩的最后一个结点
	return tree;
}

生成哈夫曼编码

生成哈夫曼编码的过程主要分为以下三个部分：
1、将根节点传入traverseTree函数，判断是否抵达叶子节点。
2、如果尚未到达叶子结点，则分别对左右子树进行判断，判断编码中应该添加0还是添加1，并进行下一结点的运算。
3、如果到达叶子结点，则加上停止符号，并将编码存入Table中。

void traverseTree(htNode* node, hlTable** table, int k, char code[256]){
	if (node->left == NULL&&node->right == NULL){		//当是根节点的时候
		code[k] = '\0';									//为末尾加上停止符号
		hlNode *aux = (hlNode*)malloc(sizeof(hlNode));	//申请Table结点的空间
		aux->core = (char *)malloc(sizeof(char)*(strlen(code) + 1));	//aux->core指的是某一个字母的编码结果
		strcpy(aux->core, code);		//将编码结果存入aux->core
		aux->symbol = node->symbol;		//将ascii码存入aux->symbol
		aux->next = NULL;
		if ((*table)->first == NULL){	//如果是Table的开头，则初始化
			(*table)->first = aux;
			(*table)->last = aux;
		}
		else{
			(*table)->last->next = aux;	//否则在末尾添加
			(*table)->last = aux;
		}
	}
	if (node->left != NULL){			//在左边则为编码添加上一个0
		code[k] = '0';
		traverseTree(node->left, table, k + 1, code);
	}		
	if (node->right != NULL){			//在右边则为编码添加上一个1
		code[k] = '1';
		traverseTree(node->right, table, k + 1, code);
	}

}

hlTable* buildTable(htTree *huffmanTree){
	hlTable* table = (hlTable*)malloc(sizeof(hlTable));		//申请hlTable的空间
	table->first = NULL;	//first和last都为空
	table->last = NULL;

	char code[256];			//初始化code
	int k = 0;				//初始化k为0
	traverseTree(huffmanTree->root, &table, k, code);		//生成哈夫曼编码
	return table;
}

解码与编码

在哈夫曼编码中，Table用于查询ASCII码对应的编码值，其用于编码；
哈夫曼树用于将编码值转化成ASCII码。

void encode(hlTable* huffmanTable, char* stringToEncode){
	hlNode *traversal;

	printf("\n\nEncoding……\n\nInput string:\n%s\n\nDecoded string:\n", stringToEncode);
	for (int i = 0; stringToEncode[i] != '\0'; i++){
		traversal = huffmanTable->first;			//如果在Table中找到对应的ascii码，则打印编码结果。
		while (traversal->symbol != stringToEncode[i]){
			traversal = traversal->next;
		}
		printf("%s ", traversal->core);
	}
	printf("\n");
}

void decode(htTree* huffmanTree, char* stringToDecode){
	htNode *traversal = huffmanTree->root;
	printf("\n\nDecoding……\n\nInput string:\n%s\n\nDecoded string:\n",stringToDecode);
	for (int i = 0; stringToDecode[i] != '\0'; i++){
		if (traversal->left == NULL&&traversal->right == NULL){		//如果抵达叶子结点，则打印ascii码
			printf("%c", traversal->symbol);
			traversal = huffmanTree->root;
		}
		if (stringToDecode[i] == '0')		//如果编码内容为0，向左子树寻找
			traversal = traversal->left;
		if (stringToDecode[i] == '1')		//如果编码内容为1，向右子树寻找
			traversal = traversal->right;
		if (stringToDecode[i] != '0'&&stringToDecode[i] != '1'){
			return;
		}
	}
	if (traversal->left == NULL&&traversal->right == NULL){
		printf("%c", traversal->symbol);
		traversal = huffmanTree->root;
	}
	printf("\n");
	
}

全部实现代码

全部代码分为五个部分，分别是main.cpp、huffman.cpp、queue.cpp、huffman.h、queue.h。
其中main.cpp主要保存的是主函数执行部分；huffman.cpp保存的是哈夫曼树的编码与解码的函数；queue.cpp用于处理队列，辅助哈夫曼编码。
main.cpp：

#include<stdio.h>
#include<stdlib.h>
#include"huffman.h"

void visit(htNode* t, int level){
	if (t->left == NULL&&t->right == NULL)
		printf("%c位于第%d层\n", t->symbol, level);
}

void scan(htNode* t, int level){
	if (t != NULL){
		scan(t->left, level + 1);
		scan(t->right, level + 1);
		visit(t, level);
	}
}

int main(){
	htTree* tree = buildTree("abbcccddddeeeee");
	hlTable* Table = buildTable(tree);
	encode(Table, "abbcccddddeeeee");
	decode(tree, "0011111000111");
	scan(tree->root, 0);
	return 0;
}

huffman.cpp：

#include<stdio.h>
#include<stdlib.h>
#include<string.h>

#include"huffman.h"
#include"queue.h"

htTree *buildTree(char *inputString)
{
	int *probility = (int*)malloc(sizeof(int)* 256);	//申请256个空间用于存放每个ascii码的数量

	for (int i = 0; i < 256; i++){
		probility[i] = 0;
	}

	for (int j = 0; inputString[j] != '\0'; j++){
		probility[(unsigned char)inputString[j]]++;		//利用char和0~256的转换完成计数
	}

	pQueue * huffmanqueue;								//定义队列
	initpQueue(&huffmanqueue);							//初始化队列

	for (int k = 0; k < 256; k++){
		if (probility[k] != 0)							//遍历所有ascii码，如果某个ascii码的数量不为0
		{												//按照数量从小到大排列
			htNode* aux = (htNode*)malloc(sizeof(htNode));
			aux->left = NULL;							
			aux->right = NULL;
			aux->symbol = (char)k;						
			addpQueue(&huffmanqueue, aux, probility[k]);
		}
	}

	free(probility);									//在数据存入队列后，释放数组

	while (huffmanqueue->size!=1){
		int priority = huffmanqueue->first->priority;	//取出队列中前两个ascii码
		priority += huffmanqueue->first->Next->priority;

		htNode* left = getQueue(&huffmanqueue);
		htNode* right = getQueue(&huffmanqueue);

		htNode* newNode = (htNode*)malloc(sizeof(htNode));	//生成新结点，左右子树分别为前两个ascii码
		newNode->left = left;
		newNode->right = right;
		addpQueue(&huffmanqueue, newNode, priority);		//将新结点加入队列
	}
	htTree *tree = (htTree *)malloc(sizeof(htTree));		
	tree->root = getQueue(&huffmanqueue);					//根节点为队列仅剩的最后一个结点
	return tree;
}

void traverseTree(htNode* node, hlTable** table, int k, char code[256]){
	if (node->left == NULL&&node->right == NULL){		//当是根节点的时候
		code[k] = '\0';									//为末尾加上停止符号
		hlNode *aux = (hlNode*)malloc(sizeof(hlNode));	//申请Table结点的空间
		aux->core = (char *)malloc(sizeof(char)*(strlen(code) + 1));	//aux->core指的是某一个字母的编码结果
		strcpy(aux->core, code);										//将编码结果存入aux->core
		aux->symbol = node->symbol;										//将ascii码存入aux->symbol
		aux->next = NULL;
		if ((*table)->first == NULL){									//如果是Table的开头，则初始化
			(*table)->first = aux;
			(*table)->last = aux;
		}
		else{
			(*table)->last->next = aux;									//否则在末尾添加
			(*table)->last = aux;
		}
	}
	if (node->left != NULL){											//在左边则为编码添加上一个0
		code[k] = '0';
		traverseTree(node->left, table, k + 1, code);
	}		
	if (node->right != NULL){											//在右边则为编码添加上一个1
		code[k] = '1';
		traverseTree(node->right, table, k + 1, code);
	}

}

hlTable* buildTable(htTree *huffmanTree){
	hlTable* table = (hlTable*)malloc(sizeof(hlTable));		//申请hlTable的空间
	table->first = NULL;									//first和last都为空
	table->last = NULL;

	char code[256];											//初始化code
	int k = 0;												//初始化k为0
	traverseTree(huffmanTree->root, &table, k, code);		//生成哈夫曼编码
	return table;
}

void encode(hlTable* huffmanTable, char* stringToEncode){
	hlNode *traversal;

	printf("\n\nEncoding……\n\nInput string:\n%s\n\nDecoded string:\n", stringToEncode);
	for (int i = 0; stringToEncode[i] != '\0'; i++){
		traversal = huffmanTable->first;			//如果在Table中找到对应的ascii码，则打印编码结果。
		while (traversal->symbol != stringToEncode[i]){
			traversal = traversal->next;
		}
		printf("%s ", traversal->core);
	}
	printf("\n");
}

void decode(htTree* huffmanTree, char* stringToDecode){
	htNode *traversal = huffmanTree->root;
	printf("\n\nDecoding……\n\nInput string:\n%s\n\nDecoded string:\n",stringToDecode);
	for (int i = 0; stringToDecode[i] != '\0'; i++){
		if (traversal->left == NULL&&traversal->right == NULL){		//如果抵达叶子结点，则打印ascii码
			printf("%c", traversal->symbol);
			traversal = huffmanTree->root;
		}
		if (stringToDecode[i] == '0')		//如果编码内容为0，向左子树寻找
			traversal = traversal->left;
		if (stringToDecode[i] == '1')		//如果编码内容为1，向右子树寻找
			traversal = traversal->right;
		if (stringToDecode[i] != '0'&&stringToDecode[i] != '1'){
			return;
		}
	}
	if (traversal->left == NULL&&traversal->right == NULL){
		printf("%c", traversal->symbol);
		traversal = huffmanTree->root;
	}
	printf("\n");
	
}

queue.cpp：

#include<stdio.h>
#include<stdlib.h>
#include"queue.h"

void initpQueue(pQueue **pqueue){
	(*pqueue) = (pQueue*)malloc(sizeof(pQueue));
	(*pqueue)->first = NULL;
	(*pqueue)->size = 0;
	return;
}
 
void addpQueue(pQueue **queue, TYPE val, unsigned int priority){
	if ((*queue)->size == Max_Size){
		printf("\nQueue is FULL\n");
		return;
	}
	pQueueNode *aux = (pQueueNode *)malloc(sizeof(pQueueNode));
	aux->priority = priority;
	aux->val = val;
	if ((*queue)->size == 0 || (*queue)->first == NULL){
		aux->Next = NULL;
		(*queue)->first = aux;
		(*queue)->size = 1;
		return;
	}
	else{
		if (priority <= (*queue)->first->priority){
			aux->Next = (*queue)->first;
			(*queue)->first = aux;
			((*queue)->size)++;
		}
		else{
			pQueueNode *iterator = (*queue)->first;
			while (iterator->Next != NULL){
				if (priority <= iterator->Next->priority){
					aux->Next = iterator->Next;
					iterator->Next = aux;
					(*queue)->size++;
					return;
				}
				iterator = iterator->Next;
			}
			if (iterator->Next == NULL){
				aux->Next = NULL;
				iterator->Next = aux;
				(*queue)->size++;
				return;
			}
		}
	}

}

TYPE getQueue(pQueue **queue){
	TYPE returnVal;
	if ((*queue)->size > 0){
		returnVal = (*queue)->first->val;
		(*queue)->first = (*queue)->first->Next;
		(*queue)->size--;
	}
	else{
		printf("\nQueue is empty\n");
	}
	return returnVal;
}

huffman.h：

#pragma once
#ifndef _HUFFMAN_H
#define _HUFFMAN_H

typedef struct _htNode{
	char symbol;
	struct _htNode *left, *right;
}htNode;

typedef struct _htTree{
	htNode* root;
}htTree;

typedef struct _hlNode{
	char symbol;
	char *core;
	struct _hlNode* next;
}hlNode;

typedef struct _hlTable{
	hlNode *first;
	hlNode *last;
}hlTable;
htTree *buildTree(char *inputString);
hlTable *buildTable(htTree *huffmanTree);
void encode(hlTable* huffmanTable, char* stringToEncode);
void decode(htTree* huffmanTree, char* stringToDecode);
#endif

queue.h：

#pragma once
#ifndef _PQUEUE_H
#define _PQUEUE_H
#include"huffman.h"
#define TYPE htNode*
#define Max_Size 256

typedef struct _pQueueNode{
	TYPE val;
	unsigned int priority;
	struct _pQueueNode* Next;
}pQueueNode;

typedef struct _pQueue{
	unsigned int size;
	pQueueNode *first;
}pQueue;

void initpQueue(pQueue **queue);
void addpQueue(pQueue **queue, TYPE val, unsigned int priority);
TYPE getQueue(pQueue **queue);
 
#endif

实现结果为：
在这里插入图片描述

GITHUB下载连接

https://github.com/bubbliiiing/Data-Structure-and-Algorithm

希望得到朋友们的喜欢。
有问题的朋友可以提问噢。

本文内容由网友自发贡献，版权归原作者所有，本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容，请联系:hwhale#tublm.com(使用前将#替换为@)