Tree Compression with Top Trees Revisited
testTT.cpp
Go to the documentation of this file.
1 #include <iostream>
2 #include <string>
3 
4 #include "Edges.h"
5 #include "Nodes.h"
6 #include "OrderedTree.h"
7 #include "TopDag.h"
8 #include "XML.h"
9 #include "Timer.h"
10 
11 #include "TopDagUnpacker.h"
12 #include "TopDagConstructor.h"
13 #include "RePairCombiner.h"
14 #include "TopTreeUnpacker.h"
15 
16 #include "ArgParser.h"
17 
18 using std::cout;
19 using std::endl;
20 using std::string;
21 
22 int main(int argc, char **argv) {
24 
25  Labels<string> labels(0);
26 
27  ArgParser argParser(argc, argv);
28  const bool useRePair = argParser.isSet("r");
29  string filename = argParser.get<string>("i", "data/1998statistics.xml");
30  string outputfolder = argParser.get<string>("o", "/tmp");
31 
32  // Read input file
33  XmlParser<OrderedTree<TreeNode, TreeEdge>>::parse(filename, t, labels);
34 
35  // Dump input file for comparison of output
36  Timer timer;
37  XmlWriter<OrderedTree<TreeNode, TreeEdge>>::write(t, labels, outputfolder + "/orig.xml");
38 
39  cout << "Wrote orginial trimmed XML file in " << timer.getAndReset() << "ms: " << t.summary() << endl;
40 
41  // Prepare for construction of top tree
42  const int size = t._numNodes;
43  TopDag<string> dag(size, labels);
44  timer.reset();
45 
46  // construct top tree
47  if (useRePair) {
48  RePairCombiner<OrderedTree<TreeNode, TreeEdge>, string> topDagConstructor(t, dag);
49  topDagConstructor.construct();
50  } else {
51  TopDagConstructor<OrderedTree<TreeNode, TreeEdge>, string> topDagConstructor(t, dag);
52  topDagConstructor.construct();
53  }
54 
55  t.clear();
56 
57  cout << "Top DAG construction took " << timer.getAndReset() << "ms" << endl;
58  //", avg node depth " << topTree.avgDepth() << " (min " << topTree.minDepth() << "); took " << timer.getAndReset() << " ms" << endl;
59 
60  cout << "Top DAG has " << dag.nodes.size() - 1 << " nodes, " << dag.countEdges() << " edges" << endl;
61 
62  // Unpack top DAG to recoveredTopTree
63  TopTree<string> recoveredTopTree(size);
64  TopDagUnpacker<string> dagUnpacker(dag, recoveredTopTree);
65  dagUnpacker.unpack();
66 
67  cout << "Unpacked Top DAG in " << timer.getAndReset() << "ms, top tree has " << recoveredTopTree.clusters.size() << " clusters" << endl;
68 
69  // unpack recovered top tree
70  OrderedTree<TreeNode, TreeEdge> recoveredTree;
71  Labels<string> newLabels(labels.numKeys());
72  TopTreeUnpacker<OrderedTree<TreeNode, TreeEdge>, string> unpacker(recoveredTopTree, recoveredTree, newLabels);
73  unpacker.unpack();
74  cout << "Unpacked recovered top tree in " << timer.getAndReset() << "ms: " << recoveredTree.summary() << endl;
75 
76  XmlWriter<OrderedTree<TreeNode, TreeEdge>>::write(recoveredTree, newLabels, outputfolder + "/unpacked.xml");
77  cout << "Wrote recovered tree in " << timer.getAndReset() << "ms" << endl;
78 
79  return 0;
80 }
Transform a tree into its top tree.
vector< DagNode< DataType > > nodes
Definition: TopDag.h:177
Ordered tree data structure.
Definition: OrderedTree.h:37
Unpack a TopTree into its original OrderedTree.
bool isSet(const string &arg) const
check whether an argument was set
Definition: ArgParser.h:58
int main(int argc, char **argv)
Definition: testTT.cpp:22
std::vector< Cluster< DataType > > clusters
Definition: TopTree.h:168
void reset()
Definition: Timer.h:19
Read an XML file into a tree, using RapidXml.
Definition: XML.h:23
Parse command-line arguments.
Definition: ArgParser.h:21
uint numKeys() const
Definition: Labels.h:136
void construct(DebugInfo *debugInfo=NULL)
Unpack a binary DAG to its original top tree.
T get(const string &key, const T defaultValue=T())
Definition: ArgParser.h:44
Top tree data structure.
Definition: TopTree.h:15
Transform a tree into its top tree.
string summary() const
A one-line summary of the tree.
Definition: OrderedTree.h:348
A flexible timer.
Definition: Timer.h:14
XML tree writer (empty template for overloading)
Definition: XML.h:68
int countEdges() const
Count the number of edges in the DAG.
Definition: TopDag.h:87
ReturnType getAndReset()
Definition: Timer.h:29
A binary DAG that is specialised to be a top tree's minimal DAG.
Definition: TopDag.h:46
void construct(DebugInfo *debugInfo=NULL, const double minRatio=1.2)
A key-value label storage.
Definition: Labels.h:107
void clear()
Definition: OrderedTree.h:587