Tree Compression with Top Trees Revisited
randomEval.cpp
Go to the documentation of this file.
1 #include <iostream>
2 #include <string>
3 
4 #include <thread>
5 #include <mutex>
6 
7 #include "Common.h"
8 
9 // Data Structures
10 #include "Edges.h"
11 #include "Labels.h"
12 #include "Nodes.h"
13 #include "OrderedTree.h"
14 #include "TopDag.h"
15 #include "TopTree.h"
16 
17 // Algorithms
18 #include "RandomTree.h"
19 #include "TopDagUnpacker.h"
20 #include "RePairCombiner.h"
21 #include "TopDagConstructor.h"
22 
23 // Utils
24 #include "ArgParser.h"
25 #include "ProgressBar.h"
26 #include "Statistics.h"
27 #include "Timer.h"
28 #include "XML.h"
29 
30 using std::cout;
31 using std::endl;
32 
33 void usage(char* name) {
34  cout << "Usage: " << name << " <options>" << endl
35  << " -m <int> tree size (edges) (default: 1000)" << endl
36  << " -n <int> number of trees to test (default: 100)" << endl
37  << " -l <int> number of different labels to assign to the nodes (default: 2)" << endl
38  << " -s <int> seed (default: 12345678)" << endl
39  << " -r use RePair-inspired combiner" << endl
40  << " -g <file> set output file for edge compression ratios (default: no output)" << endl
41  << " -o <file> set output file for debug information (default: no output)" << endl
42  << " -w <path> set output folder for generated trees as XML files (default: don't write)" << endl
43  << " -t <int> number of threads to use (default: #cores)" << endl
44  << " -v verbose" << endl
45  << " -vv extra verbose" << endl;
46 }
47 
48 std::mutex debugMutex;
49 
50 void runIteration(const int iteration, RandomGeneratorType &generator, const uint seed, const int size,
51  const int numLabels, const bool useRepair, const bool verbose, const bool extraVerbose,
52  Statistics &statistics, ProgressBar &bar, const string &treePath) {
53  // Seed RNG
54  generator.seed(seed);
55  if (verbose) cout << endl << "Round " << iteration << ", seed is " <<seed << endl;
56 
57  DebugInfo debugInfo;
60 
61  Timer timer;
62 
63  // Generate random tree
64  rand.generateTree(tree, size);
65  RandomLabels<RandomGeneratorType> labels(size + 1, numLabels, generator);
66 
67  debugInfo.generationDuration = timer.get();
68  if (verbose) cout << "Generated " << tree.summary() << " in " << timer.get() << "ms" << endl;
69  timer.reset();
70 
71  debugInfo.height = tree.height();
72  debugInfo.avgDepth = tree.avgDepth();
73  debugInfo.statDuration = timer.getAndReset();
74 
75  if (treePath != "") {
76  const string filename(treePath + "/" + std::to_string(iteration) + "_" + std::to_string(seed) + ".xml");
77  XmlWriter<OrderedTree<TreeNode, TreeEdge>>::write(tree, labels, filename);
78  debugInfo.ioDuration = timer.getAndReset();
79  }
80 
81  const int treeEdges = tree._numEdges;
82  TopDag<int> dag(tree._numNodes, labels);
83  if (useRepair) {
84  RePairCombiner<OrderedTree<TreeNode, TreeEdge>, int> topDagConstructor(tree, dag, verbose, extraVerbose);
85  topDagConstructor.construct(&debugInfo);
86  } else {
87  TopDagConstructor<OrderedTree<TreeNode, TreeEdge>, int> topDagConstructor(tree, dag, verbose, extraVerbose);
88  topDagConstructor.construct(&debugInfo);
89  }
90 
91  tree.clear(); // free memory
92 
93  if (debugInfo.minEdgeRatio < 1.2) {
94  cout << "minRatio = " << debugInfo.minEdgeRatio << " for seed " << seed << endl;
95  }
96 
97  debugInfo.mergeDuration = timer.get();
98  if (verbose)
99  cout << "Top DAG construction took " << timer.get() << "ms" << endl;
100  timer.reset();
101 /*
102  debugInfo.topTreeHeight = topTree.height();
103  debugInfo.topTreeAvgDepth = topTree.avgDepth();
104  debugInfo.topTreeMinDepth = topTree.minDepth();
105  debugInfo.statDuration += timer.getAndReset();
106 */
107 
108  const int edges = dag.countEdges();
109  const double percentage = (edges * 100.0) / treeEdges;
110  const double ratio = ((int)(1000 / percentage)) / 10.0;
111  debugInfo.dagDuration = timer.get();
112  if (verbose)
113  cout << "Top dag has " << dag.nodes.size() - 1 << " nodes, " << edges << " edges (" << percentage
114  << "% of original tree, " << ratio << ":1)" << endl;
115 
116  debugInfo.numDagEdges = edges;
117  debugInfo.numDagNodes = dag.nodes.size() - 1;
118 
119  debugMutex.lock();
120  statistics.addDebugInfo(debugInfo);
121  ++bar;
122  debugMutex.unlock();
123 }
124 
125 int main(int argc, char **argv) {
126  ArgParser argParser(argc, argv);
127 
128  if (argParser.isSet("h") || argParser.isSet("-help")) {
129  usage(argv[0]);
130  return 0;
131  }
132 
133  const int size = argParser.get<int>("m", 1000);
134  const int numIterations = argParser.get<int>("n", 100);
135  const uint numLabels = argParser.get<uint>("l", 2);
136  const uint seed = argParser.get<uint>("s", 12345678);
137  const bool verbose = argParser.isSet("v") || argParser.isSet("vv");
138  const bool extraVerbose = argParser.isSet("vv");
139  const bool useRepair = argParser.isSet("r");
140  const string ratioFilename = argParser.get<string>("g", "");
141  const string debugFilename = argParser.get<string>("o", "");
142  const string treePath = argParser.get<string>("w", "");
143 
144  if (treePath != "") {
145  makePathRecursive(treePath);
146  }
147 
148  int numWorkers(std::thread::hardware_concurrency());
149  numWorkers = argParser.get<int>("t", numWorkers);
150 
151  Statistics statistics(ratioFilename, debugFilename);
152  ProgressBar bar(numIterations, std::cerr);
153 
154  cout << "Running experiments with " << numIterations << " trees of size " << size << " with " << numLabels
155  << " different labels" << flush;
156 
157  // Generate seeds deterministically from the input parameters
158  vector<uint> seeds(numIterations);
159  if (numIterations > 1) {
160  std::seed_seq seedSeq({(uint)size, (uint)numIterations, numLabels, seed});
161  seedSeq.generate(seeds.begin(), seeds.end());
162  } else {
163  // Allow reconstructing a single tree
164  seeds[0] = seed;
165  }
166 
167  // function that the threads will execute
168  auto worker = [&](int start, int end) {
169  RandomGeneratorType engine{};
170  for (int i = start; i < end; ++i) {
171  runIteration(i, engine, seeds[i], size, numLabels, useRepair, verbose, extraVerbose, statistics, bar, treePath);
172  }
173  };
174 
175  const int treesPerThread = numIterations / numWorkers;
176  const int leftovers = numIterations % numWorkers;
177 
178  vector<std::thread> workers;
179 
180  cout << " using " << numWorkers << " threads" << endl;
181 
182  int min(0), max(treesPerThread);
183  for (int i = 0; i < numWorkers; ++i) {
184  if (i < leftovers) {
185  max++;
186  }
187  workers.push_back(std::thread(worker, min, max));
188  min = max;
189  max += treesPerThread;
190  }
191 
192  for (std::thread &worker : workers) {
193  worker.join();
194  }
195 
196  bar.undraw();
197 
198  statistics.compute();
199  statistics.dump(std::cerr);
200 }
int height() const
Definition: OrderedTree.h:571
Transform a tree into its top tree.
A statistics aggregator.
Definition: Statistics.h:244
vector< DagNode< DataType > > nodes
Definition: TopDag.h:177
A simple progress bar.
Definition: ProgressBar.h:10
Ordered tree data structure.
Definition: OrderedTree.h:37
void undraw()
remove all traces of the bar from the output stream
Definition: ProgressBar.h:43
double mergeDuration
the time it took to transform the tree into its top tree (in milliseconds)
Definition: Statistics.h:47
double dagDuration
the time it took to compute the top tree's minimal DAG (in milliseconds)
Definition: Statistics.h:49
double generationDuration
the time it took to generate the tree (in milliseconds)
Definition: Statistics.h:45
uint_fast64_t numDagEdges
number of edges in the minimal DAG
Definition: Statistics.h:65
void runIteration(const int iteration, RandomGeneratorType &generator, const uint seed, const int size, const int numLabels, const bool useRepair, const bool verbose, const bool extraVerbose, Statistics &statistics, ProgressBar &bar, const string &treePath)
Definition: randomEval.cpp:50
int main(int argc, char **argv)
Definition: randomEval.cpp:125
double avgDepth() const
Definition: OrderedTree.h:579
bool isSet(const string &arg) const
check whether an argument was set
Definition: ArgParser.h:58
ReturnType get() const
Definition: Timer.h:23
double avgDepth
average depth of the tree's nodes
Definition: Statistics.h:77
uint_fast64_t height
height of the tree
Definition: Statistics.h:75
double statDuration
the time spent on statistical stuff
Definition: Statistics.h:55
void reset()
Definition: Timer.h:19
Uniformly random labels.
Definition: Labels.h:76
Parse command-line arguments.
Definition: ArgParser.h:21
uint_fast64_t numDagNodes
number of nodes in the minimal DAG
Definition: Statistics.h:67
void construct(DebugInfo *debugInfo=NULL)
void addDebugInfo(const DebugInfo &info)
add a debug info object
Definition: Statistics.h:260
Holds debug information about a tree compression run.
Definition: Statistics.h:43
T get(const string &key, const T defaultValue=T())
Definition: ArgParser.h:44
Generates ordered unlabelled trees uniformly at random.
Definition: RandomTree.h:15
Transform a tree into its top tree.
double minEdgeRatio
the minimum ratio of edges before and after an iteration (there are theoretical bounds on this) ...
Definition: Statistics.h:57
double ioDuration
the time spent on I/O
Definition: Statistics.h:53
string summary() const
A one-line summary of the tree.
Definition: OrderedTree.h:348
void usage(char *name)
Definition: randomEval.cpp:33
A flexible timer.
Definition: Timer.h:14
std::mutex debugMutex
Definition: randomEval.cpp:48
bool makePathRecursive(std::string path, int permissions=0755)
Definition: Common.h:48
XML tree writer (empty template for overloading)
Definition: XML.h:68
int countEdges() const
Count the number of edges in the DAG.
Definition: TopDag.h:87
ReturnType getAndReset()
Definition: Timer.h:29
A binary DAG that is specialised to be a top tree's minimal DAG.
Definition: TopDag.h:46
void generateTree(TreeType &tree, const int numEdges, const bool verbose=false)
Definition: RandomTree.h:54
void construct(DebugInfo *debugInfo=NULL, const double minRatio=1.2)
std::mt19937 RandomGeneratorType
the type of random generator used
Definition: Common.h:38
void clear()
Definition: OrderedTree.h:587