Huffman Zipper  v-1.0
Data Compression and Decompression using Greedy Huffman Algorithm
Compressor.cpp
Go to the documentation of this file.
1 #include "Compressor.h"
2 
3 Compressor::Compressor() :rootNode(nullptr) {}
4 
6  //clear();
7 }
8 
10  if (node == nullptr) return;
11 
12  deleteTree(node->getLeftChild());
13  deleteTree(node->getRightChild());
14 
15  delete node;
16 }
17 
19  frequency.clear();
20  codeMap.clear();
21  inputFiles.clear();
23  rootNode = nullptr;
24 }
25 
29  for (auto&& character : frequency) {
30  pq.enqueue(new BinNode(character.key, character.value));
31  }
32 
33  while (pq.getSize() != 1) {
34  BinNode* left = pq.dequeue();
35  BinNode* right = pq.dequeue();
36  BinNode* new_pair = new BinNode(INTERNAL_NODE_CHARACTER, left->getFrequency() + right->getFrequency());
37  pq.enqueue(new_pair);
38  new_pair->setLeftChild(left);
39  new_pair->setRightChild(right);
40  }
41  return pq.top();
42 }
43 
44 void Compressor::generateHuffmanCode(BinNode* rootNode, std::string codeString) {
45  if (rootNode == nullptr)
46  return;
47  if (rootNode->isLeaf()) {
48  codeMap[rootNode->getCharacter()] = codeString;
49  }
50 
51  generateHuffmanCode(rootNode->getLeftChild(), codeString + "0");
52  generateHuffmanCode(rootNode->getRightChild(), codeString + "1");
53 }
54 
56  char ch;
57  while (infile.get(ch)) {
58  frequency[ch]++;
59  }
60 }
61 
62 void Compressor::scanFile(const fs::path& infilePath) {
63  infile.open(infilePath, std::ios::in | std::ios::binary);
64  if (!infile)
65  throw std::runtime_error("Input Error : \'" + infilePath.string() + "\' couldn't be opened");
66  readFrequency();
67  infile.close();
68 }
69 
70 void Compressor::writeTree(std::ofstream& writer, BinNode* head) {
71  if (head->isLeaf()) {
72  writer.put('1');
73  writer.put(head->getCharacter());
74  return;
75  }
76  writer.put('0');
77  writeTree(writer, head->getLeftChild());
78  writeTree(writer, head->getRightChild());
79 }
80 
81 void Compressor::writeHeader(const std::string& inputName, std::ofstream& outfile) {
82  writeTree(outfile, rootNode);
83 
84  // Write total number of files
85  uint16_t fileCount = inputFiles.size();
86  outfile.write(reinterpret_cast<char*>(&fileCount), sizeof(fileCount));
87 
88  // Write total number of characters in each file, filename and directories
89  std::string file_path;
90  for (auto&& file : inputFiles) {
91  unsigned int fileSize = fs::file_size(file);
92  outfile.write(reinterpret_cast<char*>(&fileSize), sizeof(fileSize));
93 
94  if (fs::is_directory(inputName))
95  file_path = fs::relative(file, inputName).string();
96  else
97  file_path = file.filename().string();
98 
99  outfile.write(file_path.c_str(), file_path.size());
100  outfile.put(FILE_NAME_SEPARATOR);
101  }
102 }
103 
104 void Compressor::writeBody(char& chr, int& bufferSize, const std::string& infileName, std::ofstream& outfile) {
105  infile.open(infileName, std::ios::in | std::ios::binary);
106  if (!infile)
107  throw std::runtime_error("[compressFiles] one or more files in the directory cant be opened");
108 
109  char ch;
110  while (infile.get(ch)) {
111  for (auto&& binCode : codeMap.get(ch)) {
112  chr = (chr << 1) ^ (binCode - '0');
113  bufferSize--;
114  if (bufferSize == 0) {
115  outfile.write(reinterpret_cast<char*>(&chr), sizeof(chr));
116  chr = 0;
117  bufferSize = 8;
118  }
119  }
120  }
121  infile.close();
122 }
123 
124 fs::path Compressor::writeIntoFile(const std::string& inputName) {
125  fs::path outfilePath = fs::canonical(inputName).replace_extension(".huf");
126  if (fs::path(inputName) == outfilePath) {
127  outfilePath.replace_filename(outfilePath.stem().string() + "_1.huf");
128  }
129  std::ofstream outfile(outfilePath, std::ios::out | std::ios::binary | std::ios::trunc);
130  if (!outfile)
131  throw std::runtime_error("Output Error : compressed file couldn't be created");
132 
133  writeHeader(inputName, outfile);
134  char chr = 0;
135  int bufferSize = 8;
136 
137  for (auto&& file : inputFiles) {
138  writeBody(chr, bufferSize, file.string(), outfile);
139  }
140 
141  if (bufferSize) {
142  chr = chr << bufferSize;
143  outfile.write(reinterpret_cast<char*>(&chr), sizeof(chr));
144  }
145 
146  outfile.flush();
147  outfile.close();
148 
149  return outfilePath;
150 }
151 
152 void Compressor::compress(const std::string& infileName) {
153  std::cout << "Huffman Compression\n";
154  std::cout << std::string(19, char(205)) << std::endl;
155  std::cout << inputFiles.size() << " file(s) detected\n";
156 
157  int fieldWidth = 0;
158  int totalSize = 0;
159  for (auto&& file : inputFiles) {
160  int fileNameWidth = file.filename().string().length() + 3;
161  if (fileNameWidth > fieldWidth)
162  fieldWidth = fileNameWidth;
163  totalSize += fs::file_size(file);
164  }
165 
166  for (auto&& file : inputFiles) {
167  std::cout << "Filename : "<<std::setw(fieldWidth) << std::left << file.filename() << " | size: " << fs::file_size(file) << " bytes"<< std::endl;
168  }
169 
170  std::cout << "\nCompressing ..." << std::endl;
171  auto start = std::chrono::steady_clock::now();
172 
173  std::cout << "Reading frequency ..." << std::endl;
174  for (auto&& file : inputFiles) {
175  scanFile(file);
176  }
177 
178  std::cout << "Creating Huffman Tree ..." << std::endl;
180 
181  std::cout << "Generating CodeMap ..." << std::endl;
183 
184  std::cout << "Encoding to File ..." << std::endl;
185  const fs::path& outfilePath = writeIntoFile(infileName);
186 
187  std::cout << "Cleaning Up ..." << std::endl;
188  clear();
189 
190  std::cout << "Success: Compression Completed.\n" << std::endl;
191  std::cout << "Compressed File Name\n" << outfilePath.filename() << std::endl;
192  std::cout << "Compressed File Location\n" << "\"" << outfilePath.parent_path().string() << "\"\n" << std::endl;
193 
194  auto stop = std::chrono::steady_clock::now();
195  auto duration = std::chrono::duration_cast<std::chrono::duration<double>>(stop - start);
196 
197  int outputSize = fs::file_size(outfilePath);
198  std::cout << std::setw(22) << "Total Input Size" << " = "<< totalSize << " bytes" << std::endl;
199  std::cout << std::setw(22) << "Compressed File Size" << " = " << outputSize << " bytes" << std::endl;
200  std::cout << std::setw(22) << "Compression Ratio" << " = " << std::setprecision(4) << float(totalSize - outputSize) / totalSize * 100 << " % " << std::endl;
201  std::cout << std::setw(22) << "Compression Time" << " = " << duration.count() << " seconds\n" << std::endl;
202 }
203 
204 void Compressor::compressFile(const std::string& infileName) {
205  if (!fs::is_regular_file(infileName))
206  throw std::runtime_error("ERROR : Please enter a valid file path");
207 
208  inputFiles.emplace_back(infileName);
209 
210  compress(infileName);
211 }
212 
213 void Compressor::compressFolder(const std::string& directoryName) {
214  if (!fs::is_directory(directoryName))
215  throw std::runtime_error("ERROR : Please enter a valid directory path");
216 
217  for (auto&& entry : fs::recursive_directory_iterator(directoryName)) {
218  if (entry.is_regular_file()) {
219  inputFiles.emplace_back(entry.path());
220  }
221  }
222 
223  compress(directoryName);
224 }
225 
226 void Compressor::compressFiles(std::initializer_list<std::string> infileNames) {
227  for (auto&& infileName : infileNames) {
228  if (fs::is_regular_file(infileName))
229  inputFiles.emplace_back(infileName);
230  else
231  throw std::runtime_error("ERROR : Please enter a valid file path");
232  }
233 
234  compress(inputFiles.front().string());
235 }
236 
constexpr auto INTERNAL_NODE_CHARACTER
Constants for Huffman Tree.
Definition: Constants.h:20
constexpr auto FILE_NAME_SEPARATOR
Constants for file Header.
Definition: Constants.h:23
This class models a node structure used for building Huffman Binary Tree.
Definition: BinNode.h:9
BinNode * getRightChild() const
Definition: BinNode.cpp:45
void setRightChild(BinNode *)
sets parameter node as left child of the caller node instance.
Definition: BinNode.cpp:37
int getFrequency() const
Definition: BinNode.cpp:29
bool isLeaf()
Definition: BinNode.cpp:49
void setLeftChild(BinNode *)
sets parameter node as left child of the caller node instance.
Definition: BinNode.cpp:33
BinNode * getLeftChild() const
Definition: BinNode.cpp:41
char getCharacter() const
Definition: BinNode.cpp:25
void deleteTree(BinNode *node)
Frees all heap storage associated with the Huffman Tree.
Definition: Compressor.cpp:9
fs::path writeIntoFile(const std::string &infileName)
Write the header and body section to compressed file.
Definition: Compressor.cpp:124
void compressFolder(const std::string &directoryName)
Compresses a directory and its entire content recursively.
Definition: Compressor.cpp:213
BinNode * rootNode
Root node for Huffman tree.
Definition: Compressor.h:33
void clear()
Resets all the attributes for next compression operation.
Definition: Compressor.cpp:18
void readFrequency()
Reads entire input file and finds frequency of each unique characters.
Definition: Compressor.cpp:55
HashMap< char, std::string > codeMap
Prefix-free binary code for each value of the source symbol.
Definition: Compressor.h:30
void compressFile(const std::string &infileName)
Compresses a single input file.
Definition: Compressor.cpp:204
HashMap< char, int > frequency
Frequency of occurrence for each unique symbol in the source.
Definition: Compressor.h:27
void compress(const std::string &infileName)
Utility function to compress file.
Definition: Compressor.cpp:152
BinNode * createHuffmanTree()
Creates a Huffman Tree form unique characters along with their frequency of occurrences.
Definition: Compressor.cpp:27
void compressFiles(std::initializer_list< std::string > infileNames)
Compresses multiple source files into single compressed(.huf) file.
Definition: Compressor.cpp:226
void scanFile(const fs::path &infilePath)
Validates input file path and proceeds on reading frequency.
Definition: Compressor.cpp:62
void writeBody(char &chr, int &bufferSize, const std::string &infileName, std::ofstream &writer)
Write the body section to compressed file.
Definition: Compressor.cpp:104
std::ifstream infile
Instance of ifstream class for reading characters from source.
Definition: Compressor.h:39
std::vector< fs::path > inputFiles
List of input files given by the user.
Definition: Compressor.h:36
void writeHeader(const std::string &inputName, std::ofstream &writer)
Write the header section to compressed file.
Definition: Compressor.cpp:81
void writeTree(std::ofstream &writer, BinNode *head)
Write the entire Huffman tree in to the file header section using a pre-order traversal algorithm.
Definition: Compressor.cpp:70
void generateHuffmanCode(BinNode *rootNode, std::string codeString)
Generates prefix code for each unique characters in the source.
Definition: Compressor.cpp:44
ValueType get(const KeyType &key) const
Definition: HashMap.h:297
void clear()
Removes all entries from this map.
Definition: HashMap.h:326
This class models Priority Queue in which values are processed in order of priority.
Definition: PriorityQueue.h:10
T dequeue()
Removes and returns the highest priority value.
Definition: PriorityQueue.h:56
void enqueue(const T &)
Adds value to the priority queue
Definition: PriorityQueue.h:51