25 template <
typename InType,
typename DataType>
26 long long compress(vector<InType> &data,
const std::string &description,
const bool skipPrepair =
false,
const bool verbose =
false) {
28 cout <<
"RePair-ing the " << description;
31 std::unordered_map<InType, InType> inputTransformations;
33 cout <<
", preparing… " << flush;
38 cout <<
", initialising… " << flush;
39 std::vector<DataType> output;
41 cout << timer.
getAndReset() <<
"ms, compressing… " << flush;
45 cout <<
"done (" << timer.
getAndReset() <<
"ms)" << endl;
47 cout <<
"Compressed representation has " << output.size() <<
" symbols, dictionary has " << dictionary.
size() <<
" entries (" << dictionary.
numSymbols() <<
" symbols)" << endl;
50 for (
auto elem : output) {
51 std::cout << elem <<
" ";
53 std::cout << std::endl << dictionary;
65 int main(
int argc,
char **argv) {
67 string filename =
"data/1998statistics.xml";
71 const bool verbose = argParser.
isSet(
"v");
79 std::vector<unsigned char> labelnames;
80 std::vector<bool> bpstring;
81 BPString::template fromTree<TreeNode, TreeEdge, string>(tree, labels, bpstring, labelnames);
83 cout <<
"bpstring with " << bpstring.size() <<
" bits, " << labelnames.size() <<
" bytes of labels (transformation took " << timer.
getAndReset() <<
"ms)" << endl;
85 long long totalSize(0);
86 totalSize += compress<bool, int>(bpstring,
"tree structure",
false, verbose);
87 totalSize += compress<unsigned char, int>(labelnames,
"labels", verbose);
88 cout <<
"Output file needs " << totalSize <<
" bits (" << (totalSize + 7)/8 <<
" Bytes)" << endl;
91 <<
" file=" << filename
92 <<
" compressed=" << totalSize
93 <<
" bpstringbits=" << bpstring.size()
94 <<
" labelstringits=" << labelnames.size() * 8
long long compress(vector< InType > &data, const std::string &description, const bool skipPrepair=false, const bool verbose=false)
Ordered tree data structure.
string getDataArg(const int index) const
get a data argument by its index (among the data arguments)
void codeInputMapping(std::unordered_map< InputType, InputType > &mapping)
bool isSet(const string &arg) const
check whether an argument was set
static void prepare(std::vector< InType > &vec, std::unordered_map< InType, InType > &transformations)
uint numDataArgs() const
the number of unnamed data arguments
long long getBitsNeeded() const
void compress(std::vector< DataType > &out)
Read an XML file into a tree, using RapidXml.
Parse command-line arguments.
HuffmanBuilder< DataType > huff
Main RePair compression algorithm.
DataType numSymbols() const
Dictionary< DataType > & getDictionary()
string summary() const
A one-line summary of the tree.
int main(int argc, char **argv)
long long getBitsForTableLabels() const
A key-value label storage.