1 package eu.fbk.knowledgestore.tool;
2
3 import java.io.BufferedReader;
4 import java.io.File;
5 import java.io.IOException;
6 import java.io.InputStream;
7 import java.io.Writer;
8 import java.nio.file.Path;
9 import java.nio.file.Paths;
10 import java.util.Arrays;
11 import java.util.Collection;
12 import java.util.Collections;
13 import java.util.List;
14 import java.util.Map;
15 import java.util.Properties;
16 import java.util.Random;
17 import java.util.Set;
18 import java.util.concurrent.atomic.AtomicReference;
19
20 import javax.annotation.Nullable;
21
22 import com.google.common.base.Charsets;
23 import com.google.common.base.Joiner;
24 import com.google.common.base.Preconditions;
25 import com.google.common.collect.Lists;
26 import com.google.common.collect.Sets;
27
28 import org.openrdf.query.BindingSet;
29 import org.slf4j.Logger;
30 import org.slf4j.LoggerFactory;
31
32 import eu.fbk.knowledgestore.OperationException;
33 import eu.fbk.knowledgestore.Session;
34 import eu.fbk.knowledgestore.client.Client;
35 import eu.fbk.knowledgestore.data.Handler;
36 import eu.fbk.knowledgestore.data.Stream;
37 import eu.fbk.knowledgestore.internal.CommandLine;
38 import eu.fbk.knowledgestore.internal.Util;
39 import eu.fbk.rdfpro.util.IO;
40 import eu.fbk.rdfpro.util.Tracker;
41
42 public final class TestGenerator {
43
44 private static final Logger LOGGER = LoggerFactory.getLogger(TestGenerator.class);
45
46 private static final Random RANDOM = new Random(System.currentTimeMillis());
47
48 private final Dictionary dictionary;
49
50 private final String url;
51
52 private final String username;
53
54 private final String password;
55
56 private final int mixes;
57
58 private final File outputFile;
59
60 private final Query[] queries;
61
62 public static void main(final String[] args) {
63 try {
64 final CommandLine cmd = CommandLine
65 .parser()
66 .withName("ks-test-generator")
67 .withHeader(
68 "Generates the request mixes for the test, by querying the "
69 + "KnowledgeStore. Generator parameters and queries are "
70 + "supplied in a .properties file. Output data is written "
71 + "to a .tsv file.")
72 .withOption("c", "config", "the configuration file", "FILE",
73 CommandLine.Type.FILE_EXISTING, true, false, true)
74 .withFooter(
75 "Configuration parameters may be overridden by supplying additional "
76 + "property=value\narguments on the command line.")
77 .withLogger(LoggerFactory.getLogger("eu.fbk.knowledgestore")).parse(args);
78
79 final File configFile = cmd.getOptionValue("c", File.class);
80
81 final Properties config = new Properties();
82 try (InputStream configStream = IO.read(configFile.getAbsolutePath())) {
83 config.load(configStream);
84 }
85
86 for (final String arg : cmd.getArgs(String.class)) {
87 final int index = arg.indexOf('=');
88 if (index > 0) {
89 final String name = arg.substring(0, index);
90 final String value = arg.substring(index + 1);
91 config.setProperty(name, value);
92 }
93 }
94
95 new TestGenerator(config, configFile.getParentFile()).run();
96
97 } catch (final Throwable ex) {
98 CommandLine.fail(ex);
99 }
100 }
101
102 public TestGenerator(final Properties properties, @Nullable final File basePath) {
103
104
105 this.dictionary = new Dictionary();
106
107
108 final Path base = (basePath != null ? basePath : new File(System.getProperty("user.dir")))
109 .toPath();
110
111
112 this.url = TestUtil.read(properties, "test.url", String.class);
113 this.username = TestUtil.read(properties, "test.username", String.class, null);
114 this.password = TestUtil.read(properties, "test.password", String.class, null);
115 LOGGER.info("SUT: {}{}", this.url,
116 this.username == null && this.password == null ? " (anonymous access)"
117 : " (authenticated access)");
118
119
120 this.mixes = TestUtil.read(properties, "test.mixes", Integer.class, 0);
121 this.outputFile = base.resolve(
122 Paths.get(TestUtil.read(properties, "test.out", String.class))).toFile();
123 LOGGER.info("{} mix(es) to be written to {}", this.mixes,
124 this.outputFile.getAbsolutePath());
125
126
127 final List<Query> allQueries = Query.create(properties, basePath);
128 final List<Query> enabledQueries = Lists.newArrayList();
129 final Set<String> enabledNames = Sets.newLinkedHashSet(Arrays.asList(TestUtil.read(
130 properties, "test.queries", String.class).split("\\s*[,]\\s*")));
131 for (final String name : enabledNames) {
132 boolean added = false;
133 for (final Query query : allQueries) {
134 if (query.name.equals(name)) {
135 enabledQueries.add(query);
136 added = true;
137 break;
138 }
139 }
140 Preconditions.checkArgument(added, "Unknown query " + name);
141 }
142 this.queries = enabledQueries.toArray(new Query[enabledQueries.size()]);
143 LOGGER.info("{} queries enabled ({} defined): {}", enabledQueries.size(),
144 allQueries.size(), Joiner.on(", ").join(enabledQueries));
145 }
146
147 @SuppressWarnings("resource")
148 public void run() throws IOException, OperationException {
149
150 Client client = null;
151 Session session = null;
152
153 try {
154
155 client = Client.builder(this.url).compressionEnabled(true).validateServer(false)
156 .build();
157 session = client.newSession(this.username, this.password);
158
159
160 final List<List<String>> fileVars = Lists.newArrayList();
161 final List<List<Tuple>> fileTuples = Lists.newArrayList();
162 for (int i = 0; i < this.queries.length; ++i) {
163 final List<String> vars = Lists.newArrayList();
164 final List<Tuple> tuples = Lists.newArrayList();
165 final File file = this.queries[i].download(session);
166 read(file, vars, tuples, this.dictionary);
167 fileVars.add(vars);
168 fileTuples.add(tuples);
169 }
170
171
172 final int[][] fileMappings = new int[this.queries.length][];
173 final List<String> outputVars = Lists.newArrayList();
174 for (int i = 0; i < fileVars.size(); ++i) {
175 boolean insidePrefix = true;
176 fileMappings[i] = new int[fileVars.get(i).size()];
177 for (int j = 0; j < fileMappings[i].length; ++j) {
178 final String var = fileVars.get(i).get(j);
179 int index = outputVars.indexOf(var);
180 if (index < 0) {
181 insidePrefix = false;
182 index = outputVars.size();
183 outputVars.add(var);
184 } else if (!insidePrefix) {
185 throw new IllegalArgumentException("Variable " + var + " of query "
186 + this.queries[i] + " matches var in previous files "
187 + "but is preceded by newly intruduced variable ");
188 }
189 fileMappings[i][j] = index;
190 }
191 }
192 LOGGER.info("Output schema: ({})", Joiner.on(", ").join(outputVars));
193
194
195 final Tracker tracker = new Tracker(LOGGER, null,
196 "Generated %d tuples (%d tuple/s avg)",
197 "Generated %d tuples (%d tuple/s, %d tuple/s avg)");
198 tracker.start();
199
200
201 int numFailures = 0;
202 int numDuplicates = 0;
203 final Set<Tuple> outputTuples = Sets.newLinkedHashSet();
204 final int[] outputCodes = new int[outputVars.size()];
205 outer: while (outputTuples.size() < this.mixes) {
206 Arrays.fill(outputCodes, 0);
207 for (int i = 0; i < fileTuples.size(); ++i) {
208 if (!pick(fileTuples.get(i), fileMappings[i], outputCodes)) {
209 ++numFailures;
210 continue outer;
211 }
212 }
213 if (outputTuples.add(Tuple.create(outputCodes))) {
214 tracker.increment();
215 } else {
216 ++numDuplicates;
217 }
218 }
219
220
221 tracker.end();
222
223
224 LOGGER.info("Tuple generation statistics: {} attempts failed, {} duplicates",
225 numFailures, numDuplicates);
226
227
228 write(this.outputFile, outputVars, outputTuples, this.dictionary);
229
230 } finally {
231
232 Util.closeQuietly(session);
233 Util.closeQuietly(client);
234 }
235 }
236
237 private static boolean pick(final List<Tuple> tuples, final int[] mappings,
238 final int[] outputCodes) {
239
240 final int numVariables = mappings.length;
241 final int numTuples = tuples.size();
242
243
244 int start = 0;
245 int end = tuples.size();
246
247
248 boolean constrained = false;
249 final int[] searchCodes = new int[numVariables];
250 for (int i = 0; i < numVariables; ++i) {
251 final int code = outputCodes[mappings[i]];
252 if (code != 0) {
253 searchCodes[i] = code;
254 constrained = true;
255 }
256 }
257
258
259
260
261 if (constrained) {
262 final Tuple searchTuple = Tuple.create(searchCodes);
263 start = Collections.binarySearch(tuples, searchTuple);
264 if (start < 0) {
265 start = -start - 1;
266 }
267 if (start >= numTuples || !tuples.get(start).matches(searchTuple)) {
268 return false;
269 }
270 end = start + 1;
271 while (end < numTuples && tuples.get(end).matches(searchTuple)) {
272 ++end;
273 }
274 }
275
276
277 final int chosenIndex = start + RANDOM.nextInt(end - start);
278 final Tuple chosenTuple = tuples.get(chosenIndex);
279 for (int i = 0; i < numVariables; ++i) {
280 final int slot = mappings[i];
281 final int oldValue = outputCodes[slot];
282 final int newValue = chosenTuple.get(i);
283 if (oldValue != 0 && newValue != oldValue) {
284 throw new Error("Join error: " + chosenTuple + " - "
285 + Arrays.toString(outputCodes) + " (search: "
286 + Arrays.toString(searchCodes) + "; start " + start + "; end " + end + ")");
287 }
288 outputCodes[mappings[i]] = chosenTuple.get(i);
289 }
290
291
292 return true;
293 }
294
295 private static void read(final File file, final List<String> vars, final List<Tuple> tuples,
296 final Dictionary dictionary) throws IOException {
297
298
299 try (final BufferedReader reader = new BufferedReader(IO.utf8Reader(IO.buffer(IO.read(file
300 .getAbsolutePath()))))) {
301
302
303 for (final String token : reader.readLine().split("\t")) {
304 vars.add(token.trim().substring(1));
305 }
306
307
308 final Tracker tracker = new Tracker(LOGGER, null,
309 "Parsed " + file.getAbsolutePath() + " (" + Joiner.on(", ").join(vars)
310 + "): %d tuples (%d tuple/s avg)",
311 "Parsed %d tuples (%d tuple/s, %d tuple/s avg)");
312 tracker.start();
313
314
315 int lineNum = 0;
316 String line;
317 final int[] codes = new int[vars.size()];
318 while ((line = reader.readLine()) != null) {
319 try {
320 ++lineNum;
321 final String[] tokens = line.split("\t");
322 for (int j = 0; j < codes.length; ++j) {
323 codes[j] = dictionary.codeFor(tokens[j]);
324 }
325 tuples.add(Tuple.create(codes));
326 tracker.increment();
327 } catch (final Throwable ex) {
328 LOGGER.warn("Ignoring invalid line " + lineNum + " of file " + file + " - "
329 + ex.getMessage() + " [" + line + "]");
330 }
331 }
332
333
334 tracker.end();
335
336
337 Collections.sort(tuples);
338 }
339
340 }
341
342 private static void write(final File file, final List<String> vars,
343 final Collection<Tuple> tuples, final Dictionary dictionary) throws IOException {
344
345
346 final Tracker tracker = new Tracker(LOGGER, null,
347 "Written " + file.getAbsolutePath() + " (" + Joiner.on(", ").join(vars)
348 + "): %d tuples (%d tuple/s avg)",
349 "Written %d tuples (%d tuple/s, %d tuple/s avg)");
350 tracker.start();
351
352
353 final int numVars = vars.size();
354 try (Writer writer = IO.utf8Writer(IO.buffer(IO.write(file.getAbsolutePath())))) {
355
356
357 for (int i = 0; i < numVars; ++i) {
358 if (i > 0) {
359 writer.write("\t");
360 }
361 writer.write("?");
362 writer.write(vars.get(i));
363 }
364 writer.write("\n");
365
366
367 for (final Tuple tuple : tuples) {
368 for (int i = 0; i < numVars; ++i) {
369 if (i > 0) {
370 writer.write("\t");
371 }
372 writer.write(dictionary.stringFor(tuple.get(i)));
373 }
374 writer.write("\n");
375 tracker.increment();
376 }
377 }
378
379
380 tracker.end();
381 }
382
383 private static class Query {
384
385 private final String name;
386
387 private final File file;
388
389 private final String string;
390
391 public Query(final String name, final File file, final String string) {
392 this.name = name;
393 this.file = file;
394 this.string = string;
395 }
396
397 public File download(final Session session) throws IOException, OperationException {
398
399 if (!this.file.exists()) {
400
401 final AtomicReference<Writer> writerToClose = new AtomicReference<Writer>(null);
402
403 final Tracker tracker = new Tracker(LOGGER, null,
404 "Evaluated query " + this.name + ": %d tuples (%d tuple/s avg)",
405 "Evaluating query " + this.name
406 + ": %d tuples (%d tuple/s, %d tuple/s avg)");
407 tracker.start();
408
409 try (Stream<BindingSet> stream = session.sparql(this.string).timeout(3600 * 1000L)
410 .execTuples()) {
411
412 stream.toHandler(new Handler<BindingSet>() {
413
414 private Writer writer = null;
415
416 private List<String> variables;
417
418 @SuppressWarnings("unchecked")
419 @Override
420 public void handle(final BindingSet bindings) throws Throwable {
421 if (this.writer == null) {
422 this.writer = IO.utf8Writer(IO.buffer(IO.write(Query.this.file
423 .getAbsolutePath())));
424 writerToClose.set(this.writer);
425 this.variables = stream.getProperty("variables", List.class);
426 for (int i = 0; i < this.variables.size(); ++i) {
427 this.writer.write(i > 0 ? "\t?" : "?");
428 this.writer.write(this.variables.get(i));
429 }
430 this.writer.write("\n");
431 }
432 if (bindings != null) {
433 this.writer.write(TestUtil.encode(this.variables, bindings));
434 this.writer.write("\n");
435 tracker.increment();
436 }
437 }
438
439 });
440
441 } finally {
442 final Writer writer = writerToClose.get();
443 if (writer != null) {
444 writer.flush();
445 Util.closeQuietly(writer);
446 try {
447
448
449 Thread.sleep(250);
450 } catch (InterruptedException ex) {
451
452 }
453 }
454 }
455
456 tracker.end();
457 }
458
459 return this.file;
460 }
461
462 public static List<Query> create(final Properties properties, final File basePath) {
463 final List<Query> queries = Lists.newArrayList();
464 for (final Map.Entry<String, Properties> entry : TestUtil.split(properties).entrySet()) {
465 final String name = entry.getKey();
466 final Properties props = entry.getValue();
467 final String filename = props.getProperty("file");
468 final String query = props.getProperty("query");
469 if (filename != null && query != null) {
470 final File file = basePath.toPath().resolve(Paths.get(filename)).toFile();
471 queries.add(new Query(name, file, query));
472 }
473 }
474 return queries;
475 }
476
477 @Override
478 public String toString() {
479 return this.name;
480 }
481
482 }
483
484 private static class Dictionary {
485
486 private static final int TABLE_SIZE = 32 * 1024 * 1024 - 1;
487
488 private static final int MAX_COLLISIONS = 1024;
489
490 private static final int BUFFER_BITS = 12;
491
492 private static final int BUFFER_SIZE = 1 << BUFFER_BITS;
493
494 private final int[] table;
495
496 private int[] list;
497
498 private final List<byte[]> buffers;
499
500 private int offset;
501
502 private int lastCode;
503
504 Dictionary() {
505 this.table = new int[Dictionary.TABLE_SIZE];
506 this.list = new int[1024];
507 this.buffers = Lists.newArrayList();
508 this.offset = BUFFER_SIZE;
509 this.lastCode = 0;
510 }
511
512 public int codeFor(final String string) {
513 final byte[] bytes = string.getBytes(Charsets.UTF_8);
514 int bucket = (string.hashCode() & 0x7FFFFFFF) % TABLE_SIZE;
515 for (int i = 0; i < MAX_COLLISIONS; ++i) {
516 final int code = this.table[bucket];
517 if (code != 0) {
518 final int pointer = this.list[code - 1];
519 if (match(pointer, bytes)) {
520 return code;
521 }
522 } else {
523 final int pointer = store(bytes);
524 if (this.lastCode >= this.list.length) {
525 final int[] oldList = this.list;
526 this.list = Arrays.copyOf(oldList, this.list.length * 2);
527 }
528 this.list[this.lastCode++] = pointer;
529 this.table[bucket] = this.lastCode;
530
531
532
533
534
535 return this.lastCode;
536 }
537 bucket = (bucket + 1) % TABLE_SIZE;
538 }
539 throw new Error("Max number of collisions exceeded - RDF vocabulary too large");
540 }
541
542 public String stringFor(final int code) {
543 final int pointer = this.list[code - 1];
544 return new String(load(pointer), Charsets.UTF_8);
545 }
546
547 private byte[] load(final int pointer) {
548 final int index = pointer >>> BUFFER_BITS - 2;
549 final int offset = pointer << 2 & BUFFER_SIZE - 1;
550 final byte[] buffer = this.buffers.get(index);
551 int end = offset;
552 while (buffer[end] != 0) {
553 ++end;
554 }
555 return Arrays.copyOfRange(buffer, offset, end);
556 }
557
558 private int store(final byte[] bytes) {
559 if (this.offset + bytes.length + 1 > BUFFER_SIZE) {
560 this.buffers.add(new byte[BUFFER_SIZE]);
561 this.offset = 0;
562 }
563 final int index = this.buffers.size() - 1;
564 final int pointer = this.offset >> 2 | index << BUFFER_BITS - 2;
565 final byte[] buffer = this.buffers.get(index);
566 System.arraycopy(bytes, 0, buffer, this.offset, bytes.length);
567 this.offset += bytes.length;
568 buffer[this.offset++] = 0;
569 this.offset = this.offset + 3 & 0xFFFFFFFC;
570 return pointer;
571 }
572
573 private boolean match(final int pointer, final byte[] bytes) {
574 final int index = pointer >>> BUFFER_BITS - 2;
575 final int offset = pointer << 2 & BUFFER_SIZE - 1;
576 final byte[] buffer = this.buffers.get(index);
577 for (int i = 0; i < bytes.length; ++i) {
578 if (buffer[offset + i] != bytes[i]) {
579 return false;
580 }
581 }
582 return true;
583 }
584
585 }
586
587 private static abstract class Tuple implements Comparable<Tuple> {
588
589 public static Tuple create(final int... codes) {
590 switch (codes.length) {
591 case 0:
592 return Tuple0.INSTANCE;
593 case 1:
594 return new Tuple1(codes[0]);
595 case 2:
596 return new Tuple2(codes[0], codes[1]);
597 case 3:
598 return new Tuple3(codes[0], codes[1], codes[2]);
599 case 4:
600 return new Tuple4(codes[0], codes[1], codes[2], codes[3]);
601 default:
602 return new TupleN(codes.clone());
603 }
604 }
605
606 public abstract int size();
607
608 public abstract int get(int index);
609
610 public boolean matches(final Tuple tuple) {
611 final int size = size();
612 for (int i = 0; i < size; ++i) {
613 final int expected = tuple.get(i);
614 if (expected != 0 && get(i) != expected) {
615 return false;
616 }
617 }
618 return true;
619 }
620
621 @Override
622 public int compareTo(final Tuple other) {
623 final int thisSize = size();
624 final int otherSize = other.size();
625 final int minSize = Math.min(thisSize, otherSize);
626 for (int i = 0; i < minSize; ++i) {
627 final int result = get(i) - other.get(i);
628 if (result != 0) {
629 return result;
630 }
631 }
632 return thisSize - otherSize;
633 }
634
635 @Override
636 public boolean equals(final Object object) {
637 if (object == this) {
638 return true;
639 }
640 if (!(object instanceof Tuple)) {
641 return false;
642 }
643 final Tuple other = (Tuple) object;
644 final int size = size();
645 if (other.size() != size) {
646 return false;
647 }
648 for (int i = 0; i < size; ++i) {
649 if (get(i) != other.get(i)) {
650 return false;
651 }
652 }
653 return true;
654 }
655
656 @Override
657 public int hashCode() {
658 final int size = size();
659 int hash = size;
660 for (int i = 0; i < size; ++i) {
661 hash = 37 * hash + get(i);
662 }
663 return hash;
664 }
665
666 @Override
667 public String toString() {
668 final int size = size();
669 final StringBuilder builder = new StringBuilder();
670 builder.append('(');
671 for (int i = 0; i < size; ++i) {
672 if (i > 0) {
673 builder.append(", ");
674 }
675 builder.append(get(i));
676 }
677 builder.append(')');
678 return builder.toString();
679 }
680
681 private static final class Tuple0 extends Tuple {
682
683 static final Tuple0 INSTANCE = new Tuple0();
684
685 @Override
686 public int size() {
687 return 0;
688 }
689
690 @Override
691 public int get(final int index) {
692 throw new IndexOutOfBoundsException("Invalid index " + index);
693 }
694
695 }
696
697 private static final class Tuple1 extends Tuple {
698
699 private final int code;
700
701 Tuple1(final int code) {
702 this.code = code;
703 }
704
705 @Override
706 public int size() {
707 return 1;
708 }
709
710 @Override
711 public int get(final int index) {
712 Preconditions.checkElementIndex(index, 1);
713 return this.code;
714 }
715
716 }
717
718 private static final class Tuple2 extends Tuple {
719
720 private final int code0;
721
722 private final int code1;
723
724 Tuple2(final int code0, final int code1) {
725 this.code0 = code0;
726 this.code1 = code1;
727 }
728
729 @Override
730 public int size() {
731 return 2;
732 }
733
734 @Override
735 public int get(final int index) {
736 Preconditions.checkElementIndex(index, 2);
737 return index == 0 ? this.code0 : this.code1;
738 }
739
740 }
741
742 private static final class Tuple3 extends Tuple {
743
744 private final int code0;
745
746 private final int code1;
747
748 private final int code2;
749
750 Tuple3(final int code0, final int code1, final int code2) {
751 this.code0 = code0;
752 this.code1 = code1;
753 this.code2 = code2;
754 }
755
756 @Override
757 public int size() {
758 return 3;
759 }
760
761 @Override
762 public int get(final int index) {
763 switch (index) {
764 case 0:
765 return this.code0;
766 case 1:
767 return this.code1;
768 case 2:
769 return this.code2;
770 default:
771 throw new IndexOutOfBoundsException("Index " + index + ", size 3");
772 }
773 }
774
775 }
776
777 private static final class Tuple4 extends Tuple {
778
779 private final int code0;
780
781 private final int code1;
782
783 private final int code2;
784
785 private final int code3;
786
787 Tuple4(final int code0, final int code1, final int code2, final int code3) {
788 this.code0 = code0;
789 this.code1 = code1;
790 this.code2 = code2;
791 this.code3 = code3;
792 }
793
794 @Override
795 public int size() {
796 return 4;
797 }
798
799 @Override
800 public int get(final int index) {
801 switch (index) {
802 case 0:
803 return this.code0;
804 case 1:
805 return this.code1;
806 case 2:
807 return this.code2;
808 case 3:
809 return this.code3;
810 default:
811 throw new IndexOutOfBoundsException("Index " + index + ", size 4");
812 }
813 }
814
815 }
816
817 private static final class TupleN extends Tuple {
818
819 private final int[] codes;
820
821 TupleN(final int[] codes) {
822 this.codes = codes;
823 }
824
825 @Override
826 public int size() {
827 return this.codes.length;
828 }
829
830 @Override
831 public int get(final int index) {
832 return this.codes[index];
833 }
834
835 }
836
837 }
838
839 }