39namespace data_compression
50template <
typename _CodeType>
59 HuffmanNode() : mLeft(), mRight(), mWeight(0.), mIndex(~uint16_t(0)), mCode(), mCodeLen(0) {}
65 : mLeft(), mRight(), mWeight(
weight), mIndex(
index), mCode(), mCodeLen(0)
72 mWeight((mLeft ? mLeft->mWeight : 0.) + (mRight ? mRight->mWeight : 0.)),
113 mCode.reset(mCodeLen);
121 static int level = 1;
122 stream <<
"node weight: " << std::setw(9) << mWeight;
128 stream <<
" code length: " << mCodeLen;
129 stream <<
" code " << mCode;
163template <
typename _CodingModel>
179 template <
typename ValueType>
182 code = mCodingModel.Encode(
v, codeLength);
190 template <
typename ReturnType>
193 v = mCodingModel.Decode(code, codeLength);
208 return mCodingModel.read(
filename, method);
216 return mCodingModel.write(
filename, method);
239template <
typename _BASE,
typename Rep,
bool orderMSB = true>
252 int init(
double v = 1.) {
return _BASE::initWeight(mAlphabet,
v); }
264 auto nodeIndex = _BASE::alphabet_type::getIndex(symbol);
265 if (nodeIndex < mLeaveNodes.size()) {
267 codeLength = mLeaveNodes[nodeIndex]->getBinaryCodeLength();
268 return mLeaveNodes[nodeIndex]->getBinaryCode();
270 std::string
msg =
"symbol ";
272 msg +=
" not found in alphapet ";
273 msg += _BASE::getName();
274 throw std::range_error(
msg);
296 typename _BASE::value_type
v = 0;
303 uint16_t codeMSB = code.size() - 1;
306 if (
node->getLeftChild() ==
nullptr) {
311 return _BASE::alphabet_type::getSymbol(
node->getIndex());
313 if (codeLength > codeMSB) {
315 throw std::range_error(
"code type length insufficient for Huffman tree length");
320 bit = code.test(codeMSB - codeLength);
322 bit = code.test(codeLength);
338 template <
typename T>
345 template <
typename T>
351 if (
a ==
nullptr ||
b ==
nullptr) {
358 template <
typename T>
362 bool operator()(
const std::shared_ptr<T>&
a,
const std::shared_ptr<T>&
b)
const
381 _BASE& model = *
this;
382 for (
auto i : model) {
384 mLeaveNodes.push_back(std::make_shared<node_type>(
i.second, _BASE::alphabet_type::getIndex(
i.first)));
390 for (
auto&
i : mLeaveNodes) {
391 mTreeNodes.insert(
i);
393 while (mTreeNodes.size() > 1) {
395 std::shared_ptr<node_type> combinedNode = std::make_shared<node_type>(*mTreeNodes.begin(), *++mTreeNodes.begin());
397 mTreeNodes.erase(mTreeNodes.begin());
398 mTreeNodes.erase(mTreeNodes.begin());
400 mTreeNodes.insert(combinedNode);
425 if (
node ==
nullptr) {
428 int codelen =
node->getBinaryCodeLength();
429 int retcodelen = codelen;
430 if (
node->getLeftChild()) {
438 node->getLeftChild()->setBinaryCode(codelen + 1,
c);
440 if (retcodelen < branchlen) {
441 retcodelen = branchlen;
444 if (
node->getRightChild()) {
452 node->getRightChild()->setBinaryCode(codelen + 1,
c);
454 if (retcodelen < branchlen) {
455 retcodelen = branchlen;
480 if (mTreeNodes.size() == 0) {
483 return write(out, (*mTreeNodes.begin()).get());
514 std::set<int> nodeIndices;
515 struct treeNodeConfiguration {
516 treeNodeConfiguration(
int _index,
int _left,
int _right) :
index(_index),
left(_left),
right(_right) {}
518 bool operator<(
const treeNodeConfiguration&
other)
const {
return index <
other.index; }
520 std::set<treeNodeConfiguration> treeNodeConfigurations;
522 std::map<int, int> leaveNodeMap;
523 while ((in.get(firstChar)) && firstChar !=
'\n' && (in.putback(firstChar)) && (in >>
node) && (in >>
left) &&
524 (in >>
right) && ++lineNo >= 0) {
525 std::getline(in, parameters);
526 if (lineNo != std::stoi(
node)) {
527 std::cerr <<
"format error: expected node no " << lineNo <<
", but got " <<
node <<
" (" <<
left <<
" " <<
right
528 <<
" " << parameters <<
")" << std::endl;
529 std::cerr <<
"Note: Huffman table dump has to be terminated by blank line or eof" << std::endl;
532 if (parameters.empty()) {
534 int leftIndex = std::stoi(
left);
535 int rightIndex = std::stoi(
right);
536 auto it = nodeIndices.find(leftIndex);
537 if (it == nodeIndices.end()) {
538 std::cerr <<
"Format error: can not find left child node with index " << leftIndex << std::endl;
541 nodeIndices.erase(it);
542 it = nodeIndices.find(rightIndex);
543 if (it == nodeIndices.end()) {
544 std::cerr <<
"Format error: can not find right child node with index " << rightIndex << std::endl;
547 nodeIndices.erase(it);
548 treeNodeConfigurations.insert(treeNodeConfiguration(lineNo, leftIndex, rightIndex));
551 typename _BASE::alphabet_type::value_type symbol;
553 typename _BASE::weight_type
weight;
555 int symbolIndex = _BASE::alphabet_type::getIndex(symbol);
557 if (mLeaveNodes.size() < symbolIndex + 1) {
558 mLeaveNodes.resize(symbolIndex + 1);
560 mLeaveNodes[symbolIndex] = std::make_shared<node_type>(
weight, symbolIndex);
561 std::stringstream ps(parameters);
562 uint16_t codeLen = 0;
566 mLeaveNodes[symbolIndex]->setBinaryCode(codeLen, code);
567 leaveNodeMap[lineNo] = symbolIndex;
568 _BASE::addWeight(symbol,
weight);
572 nodeIndices.insert(lineNo);
574 std::map<int, std::shared_ptr<node_type>> treeNodes;
575 for (
auto conf : treeNodeConfigurations) {
576 std::shared_ptr<node_type> left_local;
577 auto ln = leaveNodeMap.find(conf.left);
578 if (ln != leaveNodeMap.end()) {
579 left_local = mLeaveNodes[ln->second];
580 leaveNodeMap.erase(ln);
582 auto tn = treeNodes.find(conf.left);
583 if (tn == treeNodes.end()) {
584 std::cerr <<
"Internal error: can not find left child node with index " << conf.left << std::endl;
587 left_local = tn->second;
590 std::shared_ptr<node_type> right_local;
591 auto rn = leaveNodeMap.find(conf.right);
592 if (rn != leaveNodeMap.end()) {
593 right_local = mLeaveNodes[rn->second];
594 leaveNodeMap.erase(rn);
596 auto tn = treeNodes.find(conf.right);
597 if (tn == treeNodes.end()) {
598 std::cerr <<
"Internal error: can not find right child node with index " << conf.right << std::endl;
601 right_local = tn->second;
605 treeNodes[conf.index] = std::make_shared<node_type>(left_local, right_local);
607 if (leaveNodeMap.size() != 0 || treeNodes.size() != 1) {
608 std::cerr <<
"error: " << leaveNodeMap.size() <<
" unhandled leave node(s)"
609 <<
"; " << treeNodes.size() <<
" tree nodes(s), expected 1" << std::endl;
611 mTreeNodes.insert(treeNodes.begin()->second);
617 if (mTreeNodes.size() > 0) {
618 node_type* topNode = (*mTreeNodes.begin()).get();
634 template <
typename NodeType>
635 int write(std::ostream& out, NodeType*
node,
int nodeIndex = 0)
const
640 const _BASE& model = *
this;
641 NodeType*
left =
node->getLeftChild();
643 if (
left ==
nullptr) {
644 typename _BASE::value_type
value = _BASE::alphabet_type::getSymbol(
node->getIndex());
645 out << nodeIndex <<
" " <<
value <<
" " << model[
value] <<
" " <<
node->getBinaryCodeLength() <<
" "
646 <<
node->getBinaryCode() << std::endl;
649 int leftIndex =
write(out,
left, nodeIndex);
650 int rightIndex =
write(out,
right, leftIndex + 1);
651 out << rightIndex + 1 <<
" " << leftIndex <<
" " << rightIndex << std::endl;
652 return rightIndex + 1;
656 typename _BASE::alphabet_type mAlphabet;
658 std::vector<std::shared_ptr<node_type>> mLeaveNodes;
660 std::multiset<std::shared_ptr<node_type>, isless<std::shared_ptr<node_type>>> mTreeNodes;
Implementation of iostreams with compression filter.
std::unique_ptr< expressions::Node > node
Main class of the Huffman codec implementation.
typename model_type::value_type value_type
int writeConfiguration(const char *filename, std::string method="zlib") const
int loadConfiguration(const char *filename, std::string method="zlib")
model_type const & getCodingModel() const
Get the underlying coding model.
const bool Encode(ValueType v, code_type &code, uint16_t &codeLength) const
bool Decode(ReturnType &v, code_type code, uint16_t &codeLength) const
HuffmanCodec(const model_type &model)
typename model_type::code_type code_type
bool operator()(const T *a, const T *b) const
bool operator()(const std::shared_ptr< T > &a, const std::shared_ptr< T > &b) const
bool operator()(const T a, const T b) const
int read(std::istream &in)
Read configuration from stream The previous configuration is discarded.
typename _BASE::value_type value_type
int read(const char *filename, std::string method="zlib")
Read configuration from file.
value_type Decode(code_type code, uint16_t &codeLength) const
int write(std::ostream &out) const
Write Huffman table in self-consistent format to an output stream.
int assignCode(node_type *node)
int write(const char *filename, std::string method="zlib") const
Write Huffman table to file.
bool GenerateHuffmanTree()
code_type Encode(typename _BASE::value_type symbol, uint16_t &codeLength) const
static constexpr bool OrderMSB
Container holding information to build Huffman tree.
HuffmanNode(double weight, uint16_t index=~uint16_t(0))
HuffmanNode & operator=(const HuffmanNode &other)=default
void setBinaryCode(uint16_t codeLen, CodeType code)
HuffmanNode(const HuffmanNode &other)=default
uint16_t getBinaryCodeLength() const
uint16_t getIndex() const
self_type & operator<<=(bool bit)
self_type & operator>>=(bool bit)
pointer getLeftChild() const
CodeType getBinaryCode() const
std::shared_ptr< self_type > shared_pointer
void print(std::ostream &stream=std::cout) const
pointer getRightChild() const
HuffmanNode(shared_pointer left, shared_pointer right)
bool operator<(const HuffmanNode &other) const
GLuint GLuint GLfloat weight
GLboolean GLboolean GLboolean b
GLsizei const GLfloat * value
GLboolean GLboolean GLboolean GLboolean a
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
Defining DataPointCompositeObject explicitly as copiable.
VectorOfTObjectPtrs other
uint64_t const void const *restrict const msg