Tree Compression with Top Trees Revisited
randomVerify.cpp
Go to the documentation of this file.
1 #include <iostream>
2 #include <string>
3 
4 #include <thread>
5 #include <mutex>
6 
7 #include "Common.h"
8 
9 // Data Structures
10 #include "Edges.h"
11 #include "Labels.h"
12 #include "Nodes.h"
13 #include "OrderedTree.h"
14 #include "TopDag.h"
15 #include "TopTree.h"
16 
17 // Algorithms
18 #include "RandomTree.h"
19 #include "TopDagUnpacker.h"
20 #include "RePairCombiner.h"
21 #include "TopDagConstructor.h"
22 #include "TopTreeUnpacker.h"
23 
24 // Utils
25 #include "ArgParser.h"
26 #include "ProgressBar.h"
27 #include "Statistics.h"
28 #include "Timer.h"
29 #include "XML.h"
30 
31 using std::cout;
32 using std::endl;
33 
34 void usage(char* name) {
35  cout << "Usage: " << name << " <options>" << endl
36  << " -m <int> tree size (edges) (default: 1000)" << endl
37  << " -n <int> number of trees to test (default: 100)" << endl
38  << " -l <int> number of different labels to assign to the nodes (default: 2)" << endl
39  << " -s <int> seed (default: 12345678)" << endl
40  << " -r <file> set output file for edge compression ratios (default: no output)" << endl
41  << " -o <file> set output file for debug information (default: no output)" << endl
42  << " -w <path> set output folder for generated trees as XML files (default: don't write)" << endl
43  << " -t <int> number of threads to use (default: #cores)" << endl
44  << " -v verbose" << endl
45  << " -vv extra verbose" << endl;
46 }
47 
48 std::mutex debugMutex;
49 
50 void runIteration(const int iteration, RandomGeneratorType &generator, const uint seed, const int size,
51  const int numLabels, const bool useRePair, const bool verbose, const bool extraVerbose,
52  Statistics &statistics, ProgressBar &bar, const string &treePath) {
53  // Seed RNG
54  generator.seed(seed);
55  if (verbose) cout << endl << "Round " << iteration << ", seed is " <<seed << endl;
56 
57  DebugInfo debugInfo;
60 
61  Timer timer;
62 
63  // Generate random tree
64  rand.generateTree(tree, size);
65  RandomLabels<RandomGeneratorType> labels(size+1, numLabels, generator);
66 
67  debugInfo.generationDuration = timer.get();
68  if (verbose) cout << "Generated " << tree.summary() << " in " << timer.get() << "ms" << endl;
69  timer.reset();
70 
71  if (treePath != "") {
72  const string filename(treePath + "/" + std::to_string(iteration) + "_orig.xml");
73  XmlWriter<OrderedTree<TreeNode, TreeEdge>>::write(tree, labels, filename);
74  debugInfo.ioDuration = timer.get();
75  if (verbose) cout << "Wrote orginial trimmed XML file in " << timer.get() << "ms: " << tree.summary() << endl;
76  timer.reset();
77  }
78 
79  OrderedTree<TreeNode, TreeEdge> treeCopy(tree);
80  debugInfo.height = tree.height();
81  debugInfo.avgDepth = tree.avgDepth();
82  debugInfo.statDuration = timer.getAndReset();
83 
84  // Construct top tree
85  TopDag<int> dag(tree._numNodes, labels);
86  if (useRePair) {
87  RePairCombiner<OrderedTree<TreeNode, TreeEdge>, int> topDagConstructor(tree, dag, verbose, extraVerbose);
88  topDagConstructor.construct(&debugInfo);
89  } else {
90  TopDagConstructor<OrderedTree<TreeNode, TreeEdge>, int> topDagConstructor(tree, dag, verbose, extraVerbose);
91  topDagConstructor.construct(&debugInfo);
92  }
93 
94  debugInfo.mergeDuration = timer.get();
95  if (verbose)
96  cout << "Top DAG construction took " << timer.get() << "ms" << endl;
97  timer.reset();
98 
99 /*
100  debugInfo.topTreeHeight = topTree.height();
101  debugInfo.topTreeAvgDepth = topTree.avgDepth();
102  debugInfo.topTreeMinDepth = topTree.minDepth();
103  debugInfo.statDuration += timer.getAndReset();
104 */
105 
106  // Unpack top DAG to topTree
107  TopTree<int> topTree(size + 1);
108  TopDagUnpacker<int> dagUnpacker(dag, topTree);
109  dagUnpacker.unpack();
110 
111  // Unpack top tree
112  OrderedTree<TreeNode, TreeEdge> unpackedTree;
113  Labels<int> newLabels(size + 1);
114  TopTreeUnpacker<OrderedTree<TreeNode, TreeEdge>, int> treeUnpacker(topTree, unpackedTree, newLabels);
115  treeUnpacker.unpack();
116  debugInfo.unpackDuration = timer.get();
117  if (verbose) cout << "Unpacked top tree in " << timer.get() << "ms" << flush;
118  timer.reset();
119 
120  // Verify that the unpacked tree is identical to the original tree
121  if (!unpackedTree.isEqual<LabelsT<int>>(treeCopy, newLabels, labels)) {
122  std::cerr << "Top Tree unpacking produced incorrect result for seed " << seed << endl;
123  }
124  debugInfo.statDuration += timer.get();
125  if (verbose) cout << "; checked in " << timer.get() << "ms" << endl;
126  timer.reset();
127 
128 
129  if (treePath != "") {
130  const string filename(treePath + "/" + std::to_string(iteration) + "_unpacked.xml");
131  XmlWriter<OrderedTree<TreeNode, TreeEdge>>::write(unpackedTree, newLabels, filename);
132  debugInfo.ioDuration += timer.get();
133  if (verbose) cout << "Wrote recovered tree in " << timer.get() << "ms" << endl;
134  }
135 
136  debugMutex.lock();
137  statistics.addDebugInfo(debugInfo);
138  ++bar;
139  debugMutex.unlock();
140 }
141 
142 int main(int argc, char **argv) {
143  ArgParser argParser(argc, argv);
144 
145  if (argParser.isSet("h") || argParser.isSet("-help")) {
146  usage(argv[0]);
147  return 0;
148  }
149 
150  const int size = argParser.get<int>("m", 1000);
151  const int numIterations = argParser.get<int>("n", 100);
152  const uint numLabels = argParser.get<uint>("l", 2);
153  const uint seed = argParser.get<uint>("s", 12345678);
154  const bool useRePair = argParser.isSet("r");
155  const bool verbose = argParser.isSet("v") || argParser.isSet("vv");
156  const bool extraVerbose = argParser.isSet("vv");
157  const string ratioFilename = argParser.get<string>("r", "");
158  const string debugFilename = argParser.get<string>("o", "");
159  const string treePath = argParser.get<string>("w", "");
160 
161  if (treePath != "") {
162  makePathRecursive(treePath);
163  }
164 
165  int numWorkers(std::thread::hardware_concurrency());
166  numWorkers = argParser.get<int>("t", numWorkers);
167 
168  Timer timer;
169  Statistics statistics(ratioFilename, debugFilename);
170  ProgressBar bar(numIterations, std::cerr);
171 
172  cout << "Running experiments with " << numIterations << " trees of size " << size << " with " << numLabels
173  << " different labels" << flush;
174 
175  // Generate seeds deterministically from the input parameters
176  vector<uint> seeds(numIterations);
177  if (numIterations > 1) {
178  std::seed_seq seedSeq({(uint)size, (uint)numIterations, numLabels, seed});
179  seedSeq.generate(seeds.begin(), seeds.end());
180  } else {
181  // Allow reconstructing a single tree
182  seeds[0] = seed;
183  }
184 
185  // function that the threads will execute
186  auto worker = [&](int start, int end) {
187  RandomGeneratorType engine{};
188  for (int i = start; i < end; ++i) {
189  runIteration(i, engine, seeds[i], size, numLabels, useRePair, verbose, extraVerbose, statistics, bar, treePath);
190  }
191  };
192 
193  const int treesPerThread = numIterations / numWorkers;
194  const int leftovers = numIterations % numWorkers;
195 
196  vector<std::thread> workers;
197 
198  cout << " using " << numWorkers << " threads" << endl;
199 
200  int min(0), max(treesPerThread);
201  for (int i = 0; i < numWorkers; ++i) {
202  if (i < leftovers) {
203  max++;
204  }
205  workers.push_back(std::thread(worker, min, max));
206  min = max;
207  max += treesPerThread;
208  }
209 
210  for (std::thread &worker : workers) {
211  worker.join();
212  }
213 
214  bar.undraw();
215 
216  statistics.compute();
217  statistics.dump(std::cerr);
218 }
int height() const
Definition: OrderedTree.h:571
Transform a tree into its top tree.
A statistics aggregator.
Definition: Statistics.h:244
A simple progress bar.
Definition: ProgressBar.h:10
Ordered tree data structure.
Definition: OrderedTree.h:37
void undraw()
remove all traces of the bar from the output stream
Definition: ProgressBar.h:43
double mergeDuration
the time it took to transform the tree into its top tree (in milliseconds)
Definition: Statistics.h:47
double generationDuration
the time it took to generate the tree (in milliseconds)
Definition: Statistics.h:45
Unpack a TopTree into its original OrderedTree.
double avgDepth() const
Definition: OrderedTree.h:579
bool isSet(const string &arg) const
check whether an argument was set
Definition: ArgParser.h:58
ReturnType get() const
Definition: Timer.h:23
void dump(std::ostream &os) const
Definition: Statistics.h:278
double avgDepth
average depth of the tree's nodes
Definition: Statistics.h:77
uint_fast64_t height
height of the tree
Definition: Statistics.h:75
double statDuration
the time spent on statistical stuff
Definition: Statistics.h:55
int main(int argc, char **argv)
std::mutex debugMutex
void reset()
Definition: Timer.h:19
double unpackDuration
the time spent unpacking trees
Definition: Statistics.h:51
Uniformly random labels.
Definition: Labels.h:76
bool isEqual(const OrderedTree< NodeType, EdgeType > &other, LabelType &labels, LabelType &otherLabels, const bool verbose=false) const
Definition: OrderedTree.h:313
Parse command-line arguments.
Definition: ArgParser.h:21
void usage(char *name)
void construct(DebugInfo *debugInfo=NULL)
void addDebugInfo(const DebugInfo &info)
add a debug info object
Definition: Statistics.h:260
Unpack a binary DAG to its original top tree.
Holds debug information about a tree compression run.
Definition: Statistics.h:43
void runIteration(const int iteration, RandomGeneratorType &generator, const uint seed, const int size, const int numLabels, const bool useRePair, const bool verbose, const bool extraVerbose, Statistics &statistics, ProgressBar &bar, const string &treePath)
T get(const string &key, const T defaultValue=T())
Definition: ArgParser.h:44
Generates ordered unlabelled trees uniformly at random.
Definition: RandomTree.h:15
Top tree data structure.
Definition: TopTree.h:15
Transform a tree into its top tree.
double ioDuration
the time spent on I/O
Definition: Statistics.h:53
string summary() const
A one-line summary of the tree.
Definition: OrderedTree.h:348
A flexible timer.
Definition: Timer.h:14
bool makePathRecursive(std::string path, int permissions=0755)
Definition: Common.h:48
XML tree writer (empty template for overloading)
Definition: XML.h:68
ReturnType getAndReset()
Definition: Timer.h:29
void compute()
Definition: Statistics.h:274
A binary DAG that is specialised to be a top tree's minimal DAG.
Definition: TopDag.h:46
void generateTree(TreeType &tree, const int numEdges, const bool verbose=false)
Definition: RandomTree.h:54
void construct(DebugInfo *debugInfo=NULL, const double minRatio=1.2)
std::mt19937 RandomGeneratorType
the type of random generator used
Definition: Common.h:38
A key-value label storage.
Definition: Labels.h:107