diff options
Diffstat (limited to 'predicate-search/src/main/java/com/yahoo/search/predicate/benchmarks')
3 files changed, 570 insertions, 0 deletions
diff --git a/predicate-search/src/main/java/com/yahoo/search/predicate/benchmarks/HitsVerificationBenchmark.java b/predicate-search/src/main/java/com/yahoo/search/predicate/benchmarks/HitsVerificationBenchmark.java new file mode 100644 index 00000000000..1e63fed737d --- /dev/null +++ b/predicate-search/src/main/java/com/yahoo/search/predicate/benchmarks/HitsVerificationBenchmark.java @@ -0,0 +1,189 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.predicate.benchmarks; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import com.yahoo.search.predicate.Config; +import com.yahoo.search.predicate.Hit; +import com.yahoo.search.predicate.PredicateIndex; +import com.yahoo.search.predicate.PredicateIndexBuilder; +import com.yahoo.search.predicate.PredicateQuery; +import com.yahoo.search.predicate.serialization.PredicateQuerySerializer; +import com.yahoo.search.predicate.utils.VespaFeedParser; +import com.yahoo.search.predicate.utils.VespaQueryParser; +import io.airlift.airline.Arguments; +import io.airlift.airline.Command; +import io.airlift.airline.HelpOption; +import io.airlift.airline.Option; +import io.airlift.airline.SingleCommand; + +import javax.inject.Inject; +import java.io.BufferedInputStream; +import java.io.BufferedWriter; +import java.io.DataInputStream; +import java.io.FileInputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.TreeMap; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Stream; + +import static com.yahoo.search.predicate.benchmarks.HitsVerificationBenchmark.BenchmarkArguments.*; +import static java.util.stream.Collectors.joining; +import static java.util.stream.Collectors.toList; + +/** + * A test that runs outputs the hits for each query into result file. + * + * @author bjorncs + */ +public class HitsVerificationBenchmark { + + public static void main(String[] rawArgs) throws IOException { + Optional<BenchmarkArguments> wrappedArgs = getArguments(rawArgs); + if (!wrappedArgs.isPresent()) return; + BenchmarkArguments args = wrappedArgs.get(); + Map<String, Object> output = new TreeMap<>(); + addArgsToOutput(output, args); + + Config config = new Config.Builder() + .setArity(args.arity) + .setUseConjunctionAlgorithm(args.algorithm == Algorithm.CONJUNCTION) + .build(); + + PredicateIndex index = getIndex(args, config, output); + + Stream<PredicateQuery> queries = parseQueries(args.format, args.queryFile); + int totalHits = runQueries(index, queries, args.outputFile); + output.put("Total hits", totalHits); + writeOutputToStandardOut(output); + } + + private static PredicateIndex getIndex(BenchmarkArguments args, Config config, Map<String, Object> output) throws IOException { + if (args.feedFile != null) { + PredicateIndexBuilder builder = new PredicateIndexBuilder(config); + AtomicInteger idCounter = new AtomicInteger(); + VespaFeedParser.parseDocuments( + args.feedFile, Integer.MAX_VALUE, p -> builder.indexDocument(idCounter.incrementAndGet(), p)); + builder.getStats().putValues(output); + return builder.build(); + } else { + try (DataInputStream in = new DataInputStream(new BufferedInputStream(new FileInputStream(args.indexFile)))) { + long start = System.currentTimeMillis(); + PredicateIndex index = PredicateIndex.fromInputStream(in); + output.put("Time deserialize index", System.currentTimeMillis() - start); + return index; + } + } + } + + private static int runQueries( + PredicateIndex index, Stream<PredicateQuery> queries, String outputFile) throws IOException { + try (BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile, false))) { + AtomicInteger i = new AtomicInteger(); + PredicateIndex.Searcher searcher = index.searcher(); + return queries.map(searcher::search) + .peek(hits -> {if (i.get() % 500 == 0) {index.rebuildPostingListCache();}}) + .mapToInt(hits -> writeHits(i.getAndIncrement(), hits, writer)) + .sum(); + + } + } + + private static Stream<PredicateQuery> parseQueries(Format format, String queryFile) + throws IOException { + PredicateQuerySerializer serializer = new PredicateQuerySerializer(); + return Files.lines(Paths.get(queryFile)) + .map(line -> + format == Format.JSON + ? serializer.fromJSON(line) + : VespaQueryParser.parseQueryFromQueryProperties(line)); + + } + + private static int writeHits(int i, Stream<Hit> hitStream, BufferedWriter writer) { + try { + List<Hit> hits = hitStream.collect(toList()); + writer.append(Integer.toString(i)) + .append(": ") + .append(hits.stream() + .map(hit -> String.format("(%d, 0x%x)", hit.getDocId(), hit.getSubquery())) + .collect(joining(", ", "[", "]"))) + .append("\n\n"); + return hits.size(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static Optional<BenchmarkArguments> getArguments(String[] rawArgs) { + BenchmarkArguments args = SingleCommand.singleCommand(BenchmarkArguments.class).parse(rawArgs); + if (args.helpOption.showHelpIfRequested()) { + return Optional.empty(); + } + if (args.feedFile == null && args.indexFile == null) { + System.err.println("Provide either a feed file or index file."); + return Optional.empty(); + } + return Optional.of(args); + + } + + private static void addArgsToOutput(Map<String, Object> output, BenchmarkArguments args) { + output.put("Arity", args.arity); + output.put("Algorithm", args.algorithm); + output.put("Query format", args.format); + output.put("Feed file", args.feedFile); + output.put("Query file", args.queryFile); + output.put("Output file", args.outputFile); + output.put("Index file", args.indexFile); + } + + private static void writeOutputToStandardOut(Map<String, Object> output) { + try { + ObjectMapper objectMapper = new ObjectMapper(); + objectMapper.enable(SerializationFeature.INDENT_OUTPUT); + objectMapper.writeValue(System.out, output); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Command(name = "hits-verifier", + description = "Java predicate search system test that outputs the returned hits for each query") + public static class BenchmarkArguments { + + public enum Format{JSON, VESPA} + public enum Algorithm{CONJUNCTION, INTERVALONLY} + + @Option(name = {"-a", "--arity"}, description = "Arity") + public int arity = 2; + + @Option(name = {"-al", "--algorithm"}, description = "Algorithm (CONJUNCTION or INTERVALONLY)") + public Algorithm algorithm = Algorithm.INTERVALONLY; + + @Option(name = {"-qf", "--query-format"}, description = + "Query format. Valid formats are either 'vespa' (obsolete query property format) or 'json'.") + public Format format = Format.VESPA; + + @Option(name = {"-ff", "--feed-file"}, description = "File path to feed file (Vespa XML feed)") + public String feedFile; + + @Option(name = {"-if", "--index-file"}, description = "File path to index file (Serialized index)") + public String indexFile; + + @Option(name = {"-quf", "--query-file"}, description = "File path to a query file") + public String queryFile; + + @Arguments(title = "Output file", description = "File path to output file") + public String outputFile; + + @Inject + public HelpOption helpOption; + } +} diff --git a/predicate-search/src/main/java/com/yahoo/search/predicate/benchmarks/PredicateIndexBenchmark.java b/predicate-search/src/main/java/com/yahoo/search/predicate/benchmarks/PredicateIndexBenchmark.java new file mode 100644 index 00000000000..f3518edd930 --- /dev/null +++ b/predicate-search/src/main/java/com/yahoo/search/predicate/benchmarks/PredicateIndexBenchmark.java @@ -0,0 +1,297 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.predicate.benchmarks; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import com.google.common.collect.Iterators; +import com.yahoo.search.predicate.Config; +import com.yahoo.search.predicate.PredicateIndex; +import com.yahoo.search.predicate.PredicateIndexBuilder; +import com.yahoo.search.predicate.PredicateQuery; +import com.yahoo.search.predicate.serialization.PredicateQuerySerializer; +import com.yahoo.search.predicate.utils.VespaFeedParser; +import com.yahoo.search.predicate.utils.VespaQueryParser; +import io.airlift.airline.Command; +import io.airlift.airline.HelpOption; +import io.airlift.airline.Option; +import io.airlift.airline.SingleCommand; + +import javax.inject.Inject; +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Date; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Random; +import java.util.TreeMap; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +import static com.yahoo.search.predicate.benchmarks.PredicateIndexBenchmark.BenchmarkArguments.Algorithm; +import static com.yahoo.search.predicate.benchmarks.PredicateIndexBenchmark.BenchmarkArguments.Format; +import static java.util.stream.Collectors.toList; + +/** + * A benchmark that tests the indexing and search performance. + * + * @author bjorncs + */ +public class PredicateIndexBenchmark { + + private static final Map<String, Object> output = new TreeMap<>(); + + public static void main(String[] rawArgs) throws IOException { + Optional<BenchmarkArguments> optionalArgs = getBenchmarkArguments(rawArgs); + if (!optionalArgs.isPresent()) return; + BenchmarkArguments args = optionalArgs.get(); + + putBenchmarkArgumentsToOutput(args); + + long start = System.currentTimeMillis(); + Config config = new Config.Builder() + .setArity(args.arity) + .setUseConjunctionAlgorithm(args.algorithm == Algorithm.CONJUNCTION) + .build(); + PredicateIndex index = getIndex(args, config); + if (args.indexOutputFile != null) { + writeIndexToFile(index, args.indexOutputFile); + } + if (args.queryFile != null) { + runQueries(args, index); + } + output.put("Total time", System.currentTimeMillis() - start); + output.put("Timestamp", new Date().toString()); + writeOutputToStandardOut(); + } + + private static Optional<BenchmarkArguments> getBenchmarkArguments(String[] rawArgs) { + BenchmarkArguments args = SingleCommand.singleCommand(BenchmarkArguments.class).parse(rawArgs); + if (args.helpOption.showHelpIfRequested()) { + return Optional.empty(); + } + if (args.feedFile == null && args.indexFile == null) { + System.err.println("Provide either a feed file or index file."); + return Optional.empty(); + } + return Optional.of(args); + } + + private static PredicateIndex getIndex(BenchmarkArguments args, Config config) throws IOException { + if (args.feedFile != null) { + PredicateIndexBuilder builder = new PredicateIndexBuilder(config); + long start = System.currentTimeMillis(); + AtomicInteger idCounter = new AtomicInteger(); + int documentCount = VespaFeedParser.parseDocuments( + args.feedFile, args.maxDocuments, p -> builder.indexDocument(idCounter.incrementAndGet(), p)); + output.put("Indexed document count", documentCount); + output.put("Time indexing documents", System.currentTimeMillis() - start); + builder.getStats().putValues(output); + + start = System.currentTimeMillis(); + PredicateIndex index = builder.build(); + output.put("Time prepare index", System.currentTimeMillis() - start); + return index; + } else { + try (DataInputStream in = new DataInputStream(new BufferedInputStream(new FileInputStream(args.indexFile)))) { + long start = System.currentTimeMillis(); + PredicateIndex index = PredicateIndex.fromInputStream(in); + output.put("Time deserialize index", System.currentTimeMillis() - start); + return index; + } + } + } + + private static void writeIndexToFile(PredicateIndex index, String indexOutputFile) throws IOException { + try (DataOutputStream out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(indexOutputFile)))) { + long start = System.currentTimeMillis(); + index.writeToOutputStream(out); + output.put("Time write index", System.currentTimeMillis() - start); + } + } + + private static void putBenchmarkArgumentsToOutput(BenchmarkArguments args) { + output.put("Arity", args.arity); + output.put("Max documents", args.maxDocuments); + output.put("Max queries", args.maxQueries); + output.put("Threads", args.nThreads); + output.put("Runtime", args.runtime); + output.put("Algorithm", args.algorithm); + output.put("Serialized index output file", args.indexOutputFile); + output.put("Feed file", args.feedFile); + output.put("Query file", args.queryFile); + output.put("Index file", args.indexFile); + output.put("Query format", args.format); + output.put("Warmup", args.warmup); + } + + private static void runQueries(BenchmarkArguments args, PredicateIndex index) throws IOException { + List<PredicateQuery> queries = parseQueries(args.queryFile, args.maxQueries, args.format); + long warmup1 = warmup(queries, index, args.nThreads, args.warmup / 2); + output.put("Time warmup before building posting cache", warmup1); + rebuildPostingListCache(index); + long warmup2 = warmup(queries, index, args.nThreads, args.warmup / 2); + output.put("Time warmup after building posting cache", warmup2); + searchIndex(queries, index, args.nThreads, args.runtime); + } + + private static void rebuildPostingListCache(PredicateIndex index) { + long start = System.currentTimeMillis(); + index.rebuildPostingListCache(); + output.put("Time rebuild posting list cache", System.currentTimeMillis() - start); + } + + private static List<PredicateQuery> parseQueries(String queryFile, int maxQueryCount, Format format) throws IOException { + long start = System.currentTimeMillis(); + List<PredicateQuery> queries = format == Format.VESPA ? + VespaQueryParser.parseQueries(queryFile, maxQueryCount) : + PredicateQuerySerializer.parseQueriesFromFile(queryFile, maxQueryCount); + output.put("Time parse queries", System.currentTimeMillis() - start); + output.put("Queries parsed", queries.size()); + return queries; + } + + private static long warmup(List<PredicateQuery> queries, PredicateIndex index, int nThreads, int warmup) { + ExecutorService executor = Executors.newFixedThreadPool(nThreads); + Random random = new Random(42); + for (int i = 0; i < nThreads; i++) { + List<PredicateQuery> shuffledQueries = new ArrayList<>(queries); + Collections.shuffle(shuffledQueries, random); + executor.submit(new QueryRunner(shuffledQueries, index.searcher())); + } + long start = System.currentTimeMillis(); + waitAndShutdown(warmup, executor); + return System.currentTimeMillis() - start; + } + + private static void searchIndex(List<PredicateQuery> queries, PredicateIndex index, int nThreads, int runtime) { + ExecutorService executor = Executors.newFixedThreadPool(nThreads); + Random random = new Random(42); + List<QueryRunner> runners = new ArrayList<>(); + for (int i = 0; i < nThreads; i++) { + List<PredicateQuery> shuffledQueries = new ArrayList<>(queries); + Collections.shuffle(shuffledQueries, random); + runners.add(new QueryRunner(shuffledQueries, index.searcher())); + } + long start = System.currentTimeMillis(); + List<Future<ResultMetrics>> futureResults = runners.stream().map(executor::submit).collect(toList()); + waitAndShutdown(runtime, executor); + long searchTime = System.currentTimeMillis() - start; + getResult(futureResults).writeMetrics(output, searchTime); + } + + private static void waitAndShutdown(int warmup, ExecutorService executor) { + try { + Thread.sleep(warmup * 1000); + executor.shutdownNow(); + executor.awaitTermination(2, TimeUnit.SECONDS); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + + private static ResultMetrics getResult(List<Future<ResultMetrics>> futureResults) { + try { + ResultMetrics combined = futureResults.get(0).get(); + for (int i = 1; i < futureResults.size(); i++) { + combined.combine(futureResults.get(i).get()); + } + return combined; + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException(e); + } + } + + private static class QueryRunner implements Callable<ResultMetrics> { + private final List<PredicateQuery> queries; + private final PredicateIndex.Searcher searcher; + + public QueryRunner(List<PredicateQuery> queries, PredicateIndex.Searcher seacher) { + this.queries = queries; + this.searcher = seacher; + } + + @Override + public ResultMetrics call() throws Exception { + Iterator<PredicateQuery> iterator = Iterators.cycle(queries); + ResultMetrics result = new ResultMetrics(); + while (!Thread.interrupted()) { + long start = System.nanoTime(); + long hits = searcher.search(iterator.next()).count(); + double latencyMilliseconds = (System.nanoTime() - start) / 1_000_000d; + result.registerResult(hits, latencyMilliseconds); + } + return result; + } + } + + private static void writeOutputToStandardOut() { + try { + ObjectMapper objectMapper = new ObjectMapper(); + objectMapper.enable(SerializationFeature.INDENT_OUTPUT); + objectMapper.writeValue(System.out, output); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Command(name = "benchmark", description = "Java predicate search library benchmark") + public static class BenchmarkArguments { + public enum Format{JSON, VESPA} + public enum Algorithm{CONJUNCTION, INTERVALONLY} + + @Option(name = {"-t", "--threads"}, description = "Number of search threads") + public int nThreads = 1; + + @Option(name = {"-a", "--arity"}, description = "Arity") + public int arity = 2; + + @Option(name = {"-r", "--runtime"}, description = "Number of seconds to run queries") + public int runtime = 30; + + @Option(name = {"-md", "--max-documents"}, + description = "The maximum number of documents to index from feed file") + public int maxDocuments = Integer.MAX_VALUE; + + @Option(name = {"-mq", "--max-queries"}, description = "The maximum number of queries to run from query file") + public int maxQueries = Integer.MAX_VALUE; + + @Option(name = {"-al", "--algorithm"}, description = "Algorithm (CONJUNCTION or INTERVALONLY)") + public Algorithm algorithm = Algorithm.INTERVALONLY; + + @Option(name = {"-w", "--warmup"}, description = "Warmup in seconds.") + public int warmup = 30; + + @Option(name = {"-qf", "--query-format"}, + description = "Query format. Valid formats are either 'VESPA' (obsolete query property format) or 'JSON'.") + public Format format = Format.VESPA; + + @Option(name = {"-ff", "--feed-file"}, description = "File path to feed file (Vespa XML feed)") + public String feedFile; + + @Option(name = {"-if", "--index-file"}, description = "File path to index file (Serialized index)") + public String indexFile; + + @Option(name = {"-wi", "--write-index"}, description = "Serialize index to the given file") + public String indexOutputFile; + + @Option(name = {"-quf", "--query-file"}, description = "File path to a query file") + public String queryFile; + + @Inject + public HelpOption helpOption; + } +} diff --git a/predicate-search/src/main/java/com/yahoo/search/predicate/benchmarks/ResultMetrics.java b/predicate-search/src/main/java/com/yahoo/search/predicate/benchmarks/ResultMetrics.java new file mode 100644 index 00000000000..801937c995f --- /dev/null +++ b/predicate-search/src/main/java/com/yahoo/search/predicate/benchmarks/ResultMetrics.java @@ -0,0 +1,84 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.predicate.benchmarks; + +import java.util.Map; + +/** + * Various metrics stored during query execution + * + * @author bjorncs + */ +public class ResultMetrics { + private static final int MAX_LATENCY = 100; // ms + private static final int RESOLUTION = 25; // sample points per ms + private static final int SLOTS = MAX_LATENCY * RESOLUTION; + + private long totalQueries = 0; + private long totalHits = 0; + private double maxLatency = Double.MIN_VALUE; + private double minLatency = Double.MAX_VALUE; + private final long[] latencyHistogram = new long[SLOTS]; + + public void registerResult(long hits, double latencyMilliseconds) { + if (latencyMilliseconds > maxLatency) { + maxLatency = latencyMilliseconds; + } + if (latencyMilliseconds < minLatency) { + minLatency = latencyMilliseconds; + } + totalHits += hits; + ++totalQueries; + int latencySlot = (int) Math.round(latencyMilliseconds * RESOLUTION); + // Note: extreme latency values are ignored in the histogram for simplicity + if (latencySlot < SLOTS) { + ++latencyHistogram[latencySlot]; + } + } + + public void combine(ResultMetrics other) { + totalQueries += other.totalQueries; + minLatency = Math.min(minLatency, other.minLatency); + maxLatency = Math.max(maxLatency, other.maxLatency); + totalHits += other.totalHits; + for (int i = 0; i < SLOTS; i++) { + latencyHistogram[i] += other.latencyHistogram[i]; + } + } + + public void writeMetrics(Map<String, Object> metricMap, long timeSearch) { + double qps = timeSearch == 0 ? 0 : (1000d * totalQueries / timeSearch); + metricMap.put("QPS", qps); + metricMap.put("Time search", timeSearch); + metricMap.put("Total hits", totalHits); + metricMap.put("Total queries", totalQueries); + metricMap.put("Max latency", latencyToString(maxLatency)); + metricMap.put("Min latency", latencyToString(minLatency)); + metricMap.put("99.9 percentile", latencyToString(percentile(0.999))); + metricMap.put("99 percentile", latencyToString(percentile(0.99))); + metricMap.put("90 percentile", latencyToString(percentile(0.90))); + metricMap.put("75 percentile", latencyToString(percentile(0.75))); + metricMap.put("50 percentile", latencyToString(percentile(0.50))); + } + + private double percentile(double percentile) { + int targetCount = (int) Math.round(totalQueries * percentile); + int currentCount = 0; + int index = 0; + while (currentCount < targetCount && index < SLOTS) { + currentCount += latencyHistogram[index]; + ++index; + } + if (index == SLOTS) { + return maxLatency; + } + return toLatency(currentCount == targetCount ? index + 1 : index); + } + + private static String latencyToString(double averageLatency) { + return String.format("%.2fms", averageLatency); + } + + private static double toLatency(int index) { + return (index + 0.5) / (double) RESOLUTION; + } +} |