Tree Compression with Top Trees Revisited
test.cpp
Go to the documentation of this file.
1 #include <algorithm>
2 #include <iostream>
3 #include <string>
4 
5 // Data structures
6 #include "Edges.h"
7 #include "Nodes.h"
8 #include "OrderedTree.h"
9 #include "TopDag.h"
10 #include "TopTree.h"
11 
12 // Algorithms
13 #include "TopDagConstructor.h"
14 #include "TopTreeUnpacker.h"
15 #include "RePairCombiner.h"
16 #include "TopDagUnpacker.h"
17 
18 // Utils
19 #include "ArgParser.h"
20 #include "DotGraphExporter.h"
21 #include "Timer.h"
22 #include "XML.h"
23 
24 
25 using std::cout;
26 using std::endl;
27 using std::string;
28 
29 int main(int argc, char **argv) {
31  ArgParser argParser(argc, argv);
32  const bool useRePair = argParser.isSet("r");
33  string filename = "data/1998statistics.xml";
34  if (argParser.numDataArgs() > 0) {
35  filename = argParser.getDataArg(0);
36  } else if (useRePair) {
37  // if used as "./test -r foo.xml", it will match the foo.xml to the "-r" which is unfortunate
38  string arg = argParser.get<string>("r", "");
39  filename = (arg == "") ? filename : arg;
40  }
41  const bool writeDotFiles = argParser.isSet("w");
42 
43  Labels<string> labels;
44 
45  XmlParser<OrderedTree<TreeNode, TreeEdge>>::parse(filename, t, labels);
46 
47  cout << t.summary() << "; Height: " << t.height() << " Avg depth: " << t.avgDepth() << endl;
48 
49  if (writeDotFiles) {
50  OrderedTreeDotGraphExporter<TreeNode, TreeEdge, string>().write(t, labels, "/tmp/tree.dot");
51  }
52 
53  const int treeEdges = t._numEdges;
54  TopDag<string> topDag(t._numNodes, labels);
55 
56  Timer timer;
57  if (useRePair) {
58  RePairCombiner<OrderedTree<TreeNode, TreeEdge>, string> topDagConstructor(t, topDag);
59  topDagConstructor.construct();
60  } else {
61  TopDagConstructor<OrderedTree<TreeNode, TreeEdge>, string> topDagConstructor(t, topDag);
62  topDagConstructor.construct();
63  }
64  cout << "Top DAG construction took " << timer.getAndReset() << "ms" << endl;
65  //, avg node depth " << topTree.avgDepth() << " (min " << topTree.minDepth() << "); took " << timer.getAndReset() << " ms" << endl;
66 
67  const int edges = topDag.countEdges();
68  const double percentage = (edges * 100.0) / treeEdges;
69  const double ratio = ((int)(1000 / percentage)) / 10.0;
70  cout << "Top dag has " << topDag.nodes.size() - 1 << " nodes, " << edges << " edges (" << percentage
71  << "% of original tree, " << ratio << ":1)" << endl;
72 
73  if (writeDotFiles) {
74  TopDagDotGraphExporter<string>().write(topDag, "/tmp/topdag.dot");
75  DotGraphExporter<TopDag<string>>::drawSvg("/tmp/topdag.dot", "/tmp/topdag.svg");
76  }
77 
78  return 0;
79 }
int height() const
Definition: OrderedTree.h:571
Transform a tree into its top tree.
Export a tree as a DOT graph.
vector< DagNode< DataType > > nodes
Definition: TopDag.h:177
Ordered tree data structure.
Definition: OrderedTree.h:37
string getDataArg(const int index) const
get a data argument by its index (among the data arguments)
Definition: ArgParser.h:68
double avgDepth() const
Definition: OrderedTree.h:579
bool isSet(const string &arg) const
check whether an argument was set
Definition: ArgParser.h:58
Export a Top DAG as a DOT graph.
uint numDataArgs() const
the number of unnamed data arguments
Definition: ArgParser.h:63
Read an XML file into a tree, using RapidXml.
Definition: XML.h:23
Parse command-line arguments.
Definition: ArgParser.h:21
void construct(DebugInfo *debugInfo=NULL)
int main(int argc, char **argv)
Definition: test.cpp:29
T get(const string &key, const T defaultValue=T())
Definition: ArgParser.h:44
Transform a tree into its top tree.
string summary() const
A one-line summary of the tree.
Definition: OrderedTree.h:348
Base class for exporting various graphs (or trees) as DOT files.
A flexible timer.
Definition: Timer.h:14
int countEdges() const
Count the number of edges in the DAG.
Definition: TopDag.h:87
ReturnType getAndReset()
Definition: Timer.h:29
A binary DAG that is specialised to be a top tree's minimal DAG.
Definition: TopDag.h:46
void construct(DebugInfo *debugInfo=NULL, const double minRatio=1.2)
A key-value label storage.
Definition: Labels.h:107