Tree Compression with Top Trees Revisited
coding.cpp
Go to the documentation of this file.
1 #include <iostream>
2 #include <string>
3 
4 // Data Structures
5 #include "Edges.h"
6 #include "Nodes.h"
7 #include "OrderedTree.h"
8 #include "TopDag.h"
9 #include "TopTree.h"
10 
11 // Algorithms
12 #include "TopDagUnpacker.h"
13 #include "Entropy.h"
14 #include "RePairCombiner.h"
15 #include "TopDagConstructor.h"
16 #include "TreeSizeEstimation.h"
17 
18 // Utils
19 #include "ArgParser.h"
20 #include "FileWriter.h"
21 #include "Timer.h"
22 #include "XML.h"
23 
24 
25 using std::cout;
26 using std::endl;
27 using std::string;
28 
29 int main(int argc, char **argv) {
30  ArgParser argParser(argc, argv);
31  const bool useRePair = argParser.isSet("r");
32  string filename = "data/1998statistics.xml";
33  if (argParser.numDataArgs() > 0) {
34  filename = argParser.getDataArg(0);
35  } else if (useRePair) {
36  // if used as "./coding -r foo.xml", it will match the foo.xml to the "-r" which is unfortunate
37  string arg = argParser.get<string>("r", "");
38  filename = (arg == "") ? filename : arg;
39  }
40  const double minRatio = argParser.get<double>("m", 1.26);
41 
43  Labels<string> labels;
44 
45  const bool result = XmlParser<OrderedTree<TreeNode, TreeEdge>>::parse(filename, t, labels);
46  if (!result) {
47  std::cout << "Could not parse input file, aborting" << std::endl;
48  exit(1);
49  }
50 
51  const int origNodes(t._numNodes), origEdges(t._numEdges), origHeight(t.height());
52  const double origAvgDepth(t.avgDepth());
53  cout << t.summary() << "; Height: " << origHeight << " Avg depth: " << origAvgDepth << endl;
54 
55  TopDag<string> dag(t._numNodes, labels);
56  const long long treeSize = TreeSizeEstimation<OrderedTree<TreeNode, TreeEdge>>::compute(t, labels);
57 
58  Timer timer;
59  if (useRePair) {
60  RePairCombiner<OrderedTree<TreeNode, TreeEdge>, string> topDagConstructor(t, dag);
61  topDagConstructor.construct(NULL, minRatio);
62  } else {
63  TopDagConstructor<OrderedTree<TreeNode, TreeEdge>, string> topDagConstructor(t, dag);
64  topDagConstructor.construct();
65  }
66  cout << "Top DAG construction took " << timer.getAndReset() << "ms" << endl;
67 /*
68  const double ttAvgDepth(topTree.avgDepth());
69  const int ttMinDepth(topTree.minDepth()), ttHeight(topTree.height());
70  cout << "avg node depth " << ttAvgDepth << " (min " << ttMinDepth << ", height " << ttHeight << "); "
71  << "took " << timer.getAndReset() << "ms" << endl;
72 */
73 
74  const int edges(dag.countEdges()), nodes((int)dag.nodes.size() - 1);
75  const double edgePercentage = (edges * 100.0) / origEdges;
76  const double nodePercentage = (nodes * 100.0) / origNodes;
77  const double ratio = ((int)(1000 / edgePercentage)) / 10.0;
78  cout << "Top dag has " << nodes << " nodes (" << nodePercentage << "%), "
79  << edges << " edges (" << edgePercentage << "% of original tree, " << ratio << ":1)" << endl;
80 
81  long long bits = FileWriter::write(dag, labels, "/tmp/foo");
82 
83  const std::streamsize precision = cout.precision();
84  cout << "Output file needs " << bits << " bits (" << (bits+7)/8 << " bytes), vs " << (treeSize+7)/8 << " bytes for orig succ tree, "
85  << std::fixed << std::setprecision(1) << (double)treeSize/bits << ":1" << endl;
86  cout.unsetf(std::ios_base::fixed);
87  cout << std::setprecision(precision);
88 
89  cout << "RESULT"
90  << " compressed=" << bits
91  << " succinct=" << treeSize
92  << " minRatio=" << minRatio
93  << " repair=" << useRePair
94  << " nodes=" << nodes
95  << " origNodes=" << origNodes
96  << " edges=" << edges
97  << " origEdges=" << origEdges
98  << " file=" << filename
99  << " origHeight=" << origHeight
100  << " origAvgDepth=" << origAvgDepth
101  //<< " ttAvgDepth=" << ttAvgDepth
102  //<< " ttMinDepth=" << ttMinDepth
103  //<< " ttHeight=" << ttHeight
104  << endl;
105 
106  return 0;
107 }
int height() const
Definition: OrderedTree.h:571
Transform a tree into its top tree.
vector< DagNode< DataType > > nodes
Definition: TopDag.h:177
static long long write(const TopDag< DataType > &dag, const Labels< DataType > &labels, const std::string &fn, const bool verbose=true)
Definition: FileWriter.h:13
int main(int argc, char **argv)
Definition: coding.cpp:29
Ordered tree data structure.
Definition: OrderedTree.h:37
string getDataArg(const int index) const
get a data argument by its index (among the data arguments)
Definition: ArgParser.h:68
double avgDepth() const
Definition: OrderedTree.h:579
bool isSet(const string &arg) const
check whether an argument was set
Definition: ArgParser.h:58
uint numDataArgs() const
the number of unnamed data arguments
Definition: ArgParser.h:63
Read an XML file into a tree, using RapidXml.
Definition: XML.h:23
Parse command-line arguments.
Definition: ArgParser.h:21
void construct(DebugInfo *debugInfo=NULL)
T get(const string &key, const T defaultValue=T())
Definition: ArgParser.h:44
Transform a tree into its top tree.
string summary() const
A one-line summary of the tree.
Definition: OrderedTree.h:348
A flexible timer.
Definition: Timer.h:14
int countEdges() const
Count the number of edges in the DAG.
Definition: TopDag.h:87
ReturnType getAndReset()
Definition: Timer.h:29
A binary DAG that is specialised to be a top tree's minimal DAG.
Definition: TopDag.h:46
void construct(DebugInfo *debugInfo=NULL, const double minRatio=1.2)
Compute size of a succinct encoding of a tree.
A key-value label storage.
Definition: Labels.h:107