jubilant-funicular
Compressor.cpp
1 #include <algorithm>
2 
3 #include "nta/Compressor.h"
4 #include "nta/Logger.h"
5 #include "nta/utils.h"
6 
7 namespace nta {
8  std::map<GLubyte,std::string> Compressor::m_encodings;
9  HuffmanNode* Compressor::m_root;
10  std::string Compressor::toBinary(int num, int numBits) {
11  std::string ret = "";
12  for (int i = 0; i < numBits; i++) {
13  if ((1 & (num >> i)) == 1) {
14  ret += "1";
15  } else {
16  ret += "0";
17  }
18  }
19  return ret;
20  }
21  int Compressor::fromBinary(crstring num) {
22  int ret = 0;
23  for (int i = 0; i < num.size(); i++) {
24  if (num[i] == '1') {
25  ret += (1 << i);
26  }
27  }
28  return ret;
29  }
30  void Compressor::createTree(const std::vector<GLubyte>& data) {
31  Logger::writeToLog("Creating Huffman tree...");
32  std::map<GLubyte, int> frequencies;
33  for (auto& byte : data) {
34  if (frequencies.find(byte) == frequencies.end()) {
35  frequencies[byte] = 1;
36  } else {
37  frequencies[byte]++;
38  }
39  }
40  std::vector<HuffmanNode*> byteNodes;
41  for (const auto& pair : frequencies) {
42  byteNodes.push_back(new HuffmanLeaf(pair.first, pair.second));
43  }
44  HuffmanNode* smallestFreq = nullptr;
45  HuffmanNode* scnsmallFreq = nullptr;
46  while (byteNodes.size() > 1) {
47  smallestFreq = *std::min_element(byteNodes.begin(), byteNodes.end(), [](HuffmanNode* lhs, HuffmanNode* rhs){
48  return (lhs->getFrequency() < rhs->getFrequency());
49  });
50  std::remove(byteNodes.begin(), byteNodes.end(), smallestFreq); byteNodes.pop_back();
51  scnsmallFreq = *std::min_element(byteNodes.begin(), byteNodes.end(), [](HuffmanNode* lhs, HuffmanNode* rhs){
52  return (lhs->getFrequency() < rhs->getFrequency());
53  });
54  std::remove(byteNodes.begin(), byteNodes.end(), scnsmallFreq); byteNodes.pop_back();
55  byteNodes.push_back(new HuffmanNode(smallestFreq, scnsmallFreq));
56  }
57  m_root = byteNodes.front();
59  Logger::writeToLog("Created Huffman tree");
60  }
61  std::vector<GLubyte> Compressor::compress(const std::vector<GLubyte>& data) {
62  Logger::writeToLog("Compressing data...");
63  createTree(data);
64  std::string compressedBits = "";
65  int meta = 0; //The number of bits required to store the number of bits required to store the longest encoded byte
66  for (const auto& byte : data) {
67  compressedBits += m_encodings[byte];
68  meta = std::max<float>(meta, log2(m_encodings[byte].length())+1);
69  }
70  unsigned long size = compressedBits.size();
71  unsigned long numBytes = log2((double)size)/8.+1;
72  compressedBits = toBinary(numBytes) + toBinary(size, 8*numBytes) + compressedBits;
73  std::string encodingInfo = "";
74  for (auto pair : m_encodings) {
75  encodingInfo += toBinary(pair.first) + toBinary(pair.second.size(), meta) + pair.second;
76  }
77  size = encodingInfo.size();
78  numBytes = log2((double)size)/8.+1;
79  compressedBits = toBinary(numBytes) + toBinary(size, 8*numBytes) + toBinary(meta, 3) + encodingInfo + compressedBits;
80  size = compressedBits.size();
81  numBytes = size/8;
82  std::vector<GLubyte> compressedData;
83  for (int i = 0; i < numBytes; i++) {
84  compressedData.push_back(fromBinary(compressedBits.substr(i*8,8)));
85  }
86  compressedData.push_back(fromBinary(compressedBits.substr(8*numBytes)));
87  Logger::writeToLog("Compressed data (" + utils::to_string(100.*compressedData.size()/data.size(),4) + "%)");
88  return compressedData;
89  }
90  std::vector<GLubyte> Compressor::decompress(const std::vector<GLubyte>& data) {
91  std::vector<GLubyte> uncompressedData;
92  std::string compressedBits = "";
93  for (auto& byte : data) {
94  compressedBits += toBinary(byte);
95  }
96  unsigned long bytesForSize = data.front();
97  unsigned long eInfoLen = fromBinary(compressedBits.substr(8,8*bytesForSize));
98  int meta = fromBinary(compressedBits.substr(8+8*bytesForSize,3));
99  std::map<std::string, GLubyte> encodings;
100  unsigned long pos;
101  for (pos = 11+8*bytesForSize; pos < 11+8*bytesForSize+eInfoLen;) {
102  std::pair<std::string, GLubyte> curr;
103  curr.second = fromBinary(compressedBits.substr(pos,8));
104  int codeLength = fromBinary(compressedBits.substr(pos+8,meta));
105  curr.first = compressedBits.substr(pos+8+meta,codeLength);
106  encodings.insert(curr);
107  pos += 8+meta+codeLength;
108  }
109  bytesForSize = fromBinary(compressedBits.substr(pos,8));
110  unsigned long cDataLen = fromBinary(compressedBits.substr(pos+8,8*bytesForSize));
111  unsigned long eof = pos+8+8*bytesForSize+cDataLen;
112  std::string curr = "";
113  for (pos = pos+8+8*bytesForSize; pos < eof; pos++) {
114  curr += compressedBits[pos];
115  if (encodings.find(curr) != encodings.end()) {
116  uncompressedData.push_back(encodings[curr]);
117  curr = "";
118  }
119  }
120  return uncompressedData;
121  }
122 }
nta::HuffmanNode
A node in a Huffman tree.
Definition: Compressor.h:13
nta::HuffmanNode::getEncodings
auto getEncodings(crstring enc="") const -> std::map< GLubyte, std::string >
returns map of all the bytes and how they are encoded
Definition: HuffmanNode.cpp:17
nta::Compressor::createTree
static void createTree(const std::vector< GLubyte > &data)
creates a Huffman tree from a buffer
Definition: Compressor.cpp:30
nta::Logger::writeToLog
static void writeToLog(crstring entry)
writes an entry in the log
Definition: Logger.cpp:17
nta::Compressor::compress
static std::vector< GLubyte > compress(const std::vector< GLubyte > &data)
compresses a bye buffer
Definition: Compressor.cpp:61
nta
Definition: Animation2D.h:6
nta::Compressor::m_root
static HuffmanNode * m_root
the root of the created Huffman tree
Definition: Compressor.h:64
nta::HuffmanLeaf
represents a leaf in a Huffman tree
Definition: Compressor.h:38
nta::Compressor::m_encodings
static std::map< GLubyte, std::string > m_encodings
the encodings generated from the tree
Definition: Compressor.h:62
nta::Compressor::fromBinary
static int fromBinary(crstring num)
reads a binary number from a string
Definition: Compressor.cpp:21
nta::utils::to_string
std::string to_string(const T &input, std::size_t precision=0)
converts input to a std::string
Definition: utils.h:36
nta::Compressor::decompress
static std::vector< GLubyte > decompress(const std::vector< GLubyte > &data)
decompressed data that was compressed by this class
Definition: Compressor.cpp:90
nta::Compressor::toBinary
static std::string toBinary(int num, int numBits=8)
converts number to its binary representation
Definition: Compressor.cpp:10