C++数据结构之文件压缩(哈夫曼树)实例详解
概要:
项目简介:利用哈夫曼编码的方式对文件进行压缩,并且对压缩文件可以解压
开发环境:windows vs2013
项目概述:
1.压缩
a.读取文件,将每个字符,该字符出现的次数和权值构成哈夫曼树
b.哈夫曼树是利用小堆构成,字符出现次数少的节点指针存在堆顶,出现次数多的在堆底
c.每次取堆顶的两个数,再将两个数相加进堆,直到堆被取完,这时哈夫曼树也建成
d.从哈夫曼树中获取哈夫曼编码,然后再根据整个字符数组来获取出现了得字符的编码
e.获取编码后每次凑满8位就将编码串写入到压缩文件(value处理编码1与它即可,0只移动位)
f.写好配置文件,统计每个字符及其出现次数,并以“字符+','+次数”的形式保存到配置文件中
2.解压
a.读取配置文件,统计所有字符的个数
b.构建哈夫曼树,读解压缩文件,将所读到的编码字符的这个节点所所含的字符写入到解压缩文件中,知道将压缩文件读完
c.压缩解压缩完全完成,进行小文件大文件的测试
实例代码:
#pragma once
#include<vector>
template<class T>
struct Less
{
bool operator()(const T& l, const T& r) const
{
return l < r;
}
};
template<class T>
struct Greater
{
bool operator()(const T& l, const T& r) const
{
return l > r;
}
};
template<class T, class Compare>
class Heap
{
public:
Heap()
{}
Heap(T* array, size_t n) //建堆
{
_array.reserve(n);
for (size_t i = 0; i < n; i++)
{
_array.push_back(array[i]);
}
for (int i = (_array.size() - 2) >> 1; i >= 0; --i)
{
_AdjustDown(i);
}
}
const T& Top()const
{
return _array[0];
}
void Push(const T& x)
{
_array.push_back(x);
_AdjustUp(_array.size() - 1);
}
size_t Size()
{
return _array.size();
}
void Pop()
{
assert(_array.size() > 0);
swap(_array[0], _array[_array.size() - 1]);
_array.pop_back();
_AdjustDown(0);
}
bool Empty()
{
return _array.size() == 0;
}
void Print()
{
for (size_t i = 0; i < _array.size(); ++i)
{
cout << _array[i] << " ";
}
cout << endl;
}
protected:
void _AdjustUp(int child) //上调
{
Compare ComFunc;
int parent = (child - 1) >> 1;
while (child)
{
if (ComFunc(_array[child], _array[parent]))
{
swap(_array[child], _array[parent]);
child = parent;
parent = (child - 1) >> 1;
}
else
{
break;
}
}
}
void _AdjustDown(int root) //下调
{
Compare ComFunc;
int parent = root;
int child = root * 2 + 1;
while (child < _array.size())
{
if (child + 1 < _array.size() && ComFunc(_array[child + 1], _array[child]))
{
++child;
}
if (ComFunc(_array[child], _array[parent]))
{
swap(_array[child], _array[parent]);
parent = child;
child = parent * 2 + 1;
}
else
{
break;
}
}
}
protected:
vector<T> _array;
};
void TestHeap()
{
int a[] = { 10, 11, 13, 12, 16, 18, 15, 17, 14, 19 };
//Heap<int> heap(a, sizeof(a) / sizeof(a[0]));
//Heap<int, Less<int>> heap(a, sizeof(a) / sizeof(a[0]));
Heap<int, Greater<int>> heap(a, sizeof(a) / sizeof(a[0]));
heap.Print();
heap.Push(25);
heap.Print();
heap.Pop();
heap.Print();
}
#pragma once
#include"Heap.h"
template<class T>
struct HuffmanTreeNode
{
HuffmanTreeNode<T>* _left;
HuffmanTreeNode<T>* _right;
HuffmanTreeNode<T>* _parent;
T _w; //权值
HuffmanTreeNode(const T& x)
:_w(x)
, _left(NULL)
, _right(NULL)
, _parent(NULL)
{}
};
template<class T>
class HuffmanTree
{
typedef HuffmanTreeNode<T> Node;
public:
HuffmanTree()
:_root(NULL)
{}
HuffmanTree(T* a, size_t n, const T& invalid = T()) //构建哈夫曼树
{
struct Compare
{
bool operator()(Node* l, Node* r) const
{
return l->_w < r->_w;
}
};
Heap<Node*, Compare> minHeap;
for (size_t i = 0; i < n; ++i)
{
if (a[i] != invalid)
{
minHeap.Push(new Node(a[i]));
}
}
while (minHeap.Size() > 1)
{
Node* left = minHeap.Top();
minHeap.Pop();
Node* right = minHeap.Top();
minHeap.Pop();
Node* parent = new Node(left->_w + right->_w);
parent->_left = left;
parent->_right = right;
left->_parent = parent;
right->_parent = parent;
minHeap.Push(parent);
}
_root = minHeap.Top();
}
Node* GetRoot()
{
return _root;
}
~HuffmanTree()
{
_Destory(_root);
}
protected:
void _Destory(Node* root)
{
if (root == NULL)
{
return;
}
_Destory(root->_left);
_Destory(root->_right);
delete root;
}
HuffmanTree(const HuffmanTree<T>& tree);
HuffmanTree& operator=(const HuffmanTree<T>& tree);
protected:
Node* _root;
};
void TestHuffmanTree()
{<pre code_snippet_id="2340790" snippet_file_name="blog_20170418_2_3778260" name="code" class="cpp">#pragma once
#include<iostream>
using namespace std;
#include<assert.h>
#include"HuffmanTree.h"
typedef long long LongType;
struct CharInfo
{
unsigned char _ch; //字符
LongType _count; //字符出现的次数
string _code; //huffman编码
CharInfo(LongType count = 0)
:_count(count)
, _ch(0)
, _code("")
{}
bool operator<(const CharInfo& info) const
{
return _count < info._count;
}
CharInfo operator+(const CharInfo& info)
{
return CharInfo(_count + info._count);
}
bool operator!=(const CharInfo& info)const
{
return _count != info._count;
}
};
struct CountInfo
{
unsigned char _ch; //字符
LongType _count; //字符出现的次数
};
class FileCompress
{
public:
FileCompress()
{
for (size_t i = 0; i < 256; ++i)
{
_info[i]._ch = i;
}
}
void Compress(const char* filename)
{
assert(filename);
//统计字符出现的次数,均已二进制方式读写
FILE* fout = fopen(filename, "rb");
assert(fout);
int ch = fgetc(fout);
string CompressFile = filename;
CompressFile += ".huffman";
FILE* fin = fopen(CompressFile.c_str(), "wb");
assert(fin);
CountInfo info;
while (ch != EOF)
{
_info[(unsigned char)ch]._count++;
ch = fgetc(fout);
}
//构建huffman tree
CharInfo invalid;
invalid._count = 0;
HuffmanTree<CharInfo> tree(_info, 256, invalid);
//生成huffman code
GetHuffmanCode(tree.GetRoot());
//压缩
//写配置文件
for (size_t i = 0; i < 256; ++i)
{
if (_info[i]._count)
{
info._ch = _info[i]._ch;
info._count = _info[i]._count;
fwrite(&info, sizeof(info), 1, fin);
}
}
info._count = -1;
fwrite(&info, sizeof(info), 1, fin);
fseek(fout, 0, SEEK_SET); //返回到文件开始
ch = fgetc(fout);
char value = 0; //二进制
int pos = 0; //左移位数
while (ch != EOF)
{
string& code = _info[(unsigned char)ch]._code;
size_t i = 0;
for (i = 0; i < code.size(); ++i)
{
value <<= 1;
++pos;
if (code[i] == '1')
{
value |= 1;
}
if (pos == 8) //满8位写进文件中
{
fputc(value, fin);
value = 0;
pos = 0;
}
}
ch = fgetc(fout);
}
if (pos)
{
value <<= (8 - pos);
fputc(value, fin);
}
fclose(fin);
fclose(fout);
}
void GetHuffmanCode(HuffmanTreeNode<CharInfo>* root) //huffman编码
{
if (root == NULL)
{
return;
}
if (root->_left == NULL && root->_right == NULL)
{
HuffmanTreeNode<CharInfo> *parent, *cur;
cur = root;
parent = cur->_parent;
string& code = _info[(unsigned char)root->_w._ch]._code;
while (parent)
{
if (cur == parent->_left)
{
code += '0';
}
else
{
code += '1';
}
cur = parent;
parent = cur->_parent;
}
reverse(code.begin(), code.end());
}
GetHuffmanCode(root->_left);
GetHuffmanCode(root->_right);
}
//解压
void UnCompress(const char* filename)
{
assert(filename);
string UnCompressFile = filename;
size_t index = UnCompressFile.rfind('.');
assert(index != string::npos);
UnCompressFile = UnCompressFile.substr(0, index);
UnCompressFile += ".unhuffman";
FILE* fout = fopen(filename, "rb");
assert(fout);
FILE* fin = fopen(UnCompressFile.c_str(), "wb");
assert(fin);
CountInfo info;
fread(&info, sizeof(CountInfo), 1, fout);
//读配置信息
while (1)
{
fread(&info, sizeof(CountInfo), 1, fout);
if (info._count == -1)
{
break;
}
_info[(unsigned char)info._ch]._ch = info._ch;
_info[(unsigned char)info._ch]._count = info._count;
}
//重建huffman树
CharInfo invalid;
invalid._count = 0;
HuffmanTree<CharInfo> tree(_info, 256, invalid);
HuffmanTreeNode<CharInfo>* root = tree.GetRoot();
HuffmanTreeNode<CharInfo>* cur = root;
LongType count = root->_w._count;
int value = fgetc(fout);
if (value == EOF) //只有一种字符
{
if (info._ch != 0)
{
while (cur->_w._count--)
{
fputc(cur->_w._ch, fin);
}
}
}
else
{
while (value != EOF)
{
int pos = 7;
char test = 1;
while (pos >= 0)
{
if (value & (test << pos))
{
cur = cur->_right;
}
else
{
cur = cur->_left;
}
if (cur->_left == NULL && cur->_right == NULL)
{
fputc(cur->_w._ch, fin);
cur = root;
if (--count == 0)
{
break;
}
}
--pos;
}
value = fgetc(fout);
}
}
fclose(fout);
fclose(fin);
}
protected:
CharInfo _info[256]; //所有字符信息
};
void TestFileCompress()
{
FileCompress fc;
//fc.Compress("实验.doc");
//fc.UnCompress("实验.doc.huffman");
//fc.Compress("qq.JPG");
//fc.UnCompress("qq.JPG.huffman");
//fc.Compress("www.MP3");
//fc.UnCompress("www.MP3.huffman");
fc.Compress("yppppp.txt");
fc.UnCompress("yppppp.txt.huffman");
}</pre><br>
int array[10] = { 2, 4, 6, 9, 7, 8, 5, 0, 1, 3 };HuffmanTree<int> t(array, 10);}
<pre></pre>
<p></p>
<pre></pre>
<p></p>
<p></p>
<p></p>
<pre code_snippet_id="2340790" snippet_file_name="blog_20170418_3_1128302" name="code" class="cpp">#include <iostream>
#include <assert.h>
#include <windows.h>
using namespace std;
#include"Heap.h"
#include"HuffmanTree.h"
#include"FileCompress.h"
int main()
{
//TestHeap();
TestHuffmanTree();
TestFileCompress();
system("pause");
return 0;
}</pre><br>
<br>
<p></p>
<p><br>
</p>
<p><br>
</p>
<link rel="stylesheet" href="http://static.blog.csdn.net/public/res-min/markdown_views.css?v=1.0" rel="external nofollow" >
以上就是哈夫曼树的实例详解,如有疑问请留言或者到本站社区交流,感谢阅读,希望能帮助到大家,谢谢大家对本站的支持!