summaryrefslogtreecommitdiffstats
path: root/predicate-search/src/main/java/com/yahoo/search/predicate/utils
diff options
context:
space:
mode:
Diffstat (limited to 'predicate-search/src/main/java/com/yahoo/search/predicate/utils')
-rw-r--r--predicate-search/src/main/java/com/yahoo/search/predicate/utils/PostingListSearch.java89
-rw-r--r--predicate-search/src/main/java/com/yahoo/search/predicate/utils/PrimitiveArraySorter.java97
-rw-r--r--predicate-search/src/main/java/com/yahoo/search/predicate/utils/TargetingQueryFileConverter.java289
-rw-r--r--predicate-search/src/main/java/com/yahoo/search/predicate/utils/VespaFeedParser.java44
-rw-r--r--predicate-search/src/main/java/com/yahoo/search/predicate/utils/VespaFeedWriter.java43
-rw-r--r--predicate-search/src/main/java/com/yahoo/search/predicate/utils/VespaQueryParser.java105
6 files changed, 667 insertions, 0 deletions
diff --git a/predicate-search/src/main/java/com/yahoo/search/predicate/utils/PostingListSearch.java b/predicate-search/src/main/java/com/yahoo/search/predicate/utils/PostingListSearch.java
new file mode 100644
index 00000000000..93246bfaf85
--- /dev/null
+++ b/predicate-search/src/main/java/com/yahoo/search/predicate/utils/PostingListSearch.java
@@ -0,0 +1,89 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.search.predicate.utils;
+
+/**
+ * Algorithms for searching in the docId arrays in posting lists.
+ * @author bjorncs
+ */
+public class PostingListSearch {
+
+ // Use linear search when size less than threshold
+ public static final int LINEAR_SEARCH_THRESHOLD = 16;
+ // Use linear search when value difference between first value and key is less than threshold
+ public static final int LINEAR_SEARCH_THRESHOLD_2 = 32;
+ // User binary search when size is less than threshold
+ public static final int BINARY_SEARCH_THRESHOLD = 32768;
+
+ public static int interpolationSearch(int[] a, int fromIndex, int toIndex, int key) {
+ int low = fromIndex;
+ int lowVal = a[low];
+ if (key - lowVal < LINEAR_SEARCH_THRESHOLD_2) {
+ return linearSearch(a, low, toIndex, key);
+ }
+ int high = toIndex - 1;
+ int diff = high - low;
+ if (diff <= BINARY_SEARCH_THRESHOLD) {
+ return binarySearch(a, low, toIndex, key);
+ }
+ int highVal = a[high];
+ do {
+ if (key == lowVal) {
+ return low + 1;
+ }
+ if (key >= highVal) {
+ return high + 1;
+ }
+ int mean = (int) (diff * (long) (key - lowVal) / (highVal - lowVal));
+ int eps = diff >>> 4;
+ int lowMid = low + Math.max(0, mean - eps);
+ int highMid = low + Math.min(diff, mean + eps);
+ assert lowMid <= highMid;
+ assert lowMid >= low;
+ assert highMid <= high;
+
+ if (a[lowMid] > key) {
+ high = lowMid;
+ highVal = a[lowMid];
+ } else if (a[highMid] <= key) {
+ low = highMid;
+ lowVal = a[highMid];
+ } else {
+ low = lowMid;
+ lowVal = a[lowMid];
+ high = highMid;
+ highVal = a[highMid];
+ }
+ assert low <= high;
+ diff = high - low;
+ } while (diff >= BINARY_SEARCH_THRESHOLD);
+ return binarySearch(a, low, high + 1, key);
+ }
+
+ /**
+ * Modified binary search:
+ * - Returns the first index where a[index] is larger then key
+ */
+ private static int binarySearch(int[] a, int fromIndex, int toIndex, int key) {
+ assert fromIndex < toIndex;
+ int low = fromIndex;
+ int high = toIndex - 1;
+ while (high - low > LINEAR_SEARCH_THRESHOLD) {
+ int mid = (low + high) >>> 1;
+ assert mid < high;
+ if (a[mid] < key) {
+ low = mid + 1;
+ } else {
+ high = mid;
+ }
+ }
+ return linearSearch(a, low, high + 1, key);
+ }
+
+ private static int linearSearch(int[] a, int low, int high, int key) {
+ assert low < high;
+ while (low < high && a[low] <= key) {
+ ++low;
+ }
+ return low;
+ }
+}
diff --git a/predicate-search/src/main/java/com/yahoo/search/predicate/utils/PrimitiveArraySorter.java b/predicate-search/src/main/java/com/yahoo/search/predicate/utils/PrimitiveArraySorter.java
new file mode 100644
index 00000000000..63b7acc6042
--- /dev/null
+++ b/predicate-search/src/main/java/com/yahoo/search/predicate/utils/PrimitiveArraySorter.java
@@ -0,0 +1,97 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.search.predicate.utils;
+
+/**
+ * This class enables sorting of an array of primitive short values using a supplied comparator for custom ordering.
+ * The sort methods in Java standard library cannot sort using a comparator for primitive arrays.
+ * Sorting is performed using Quicksort.
+ *
+ * @author bjorncs
+ */
+public class PrimitiveArraySorter {
+
+ @FunctionalInterface
+ public interface ShortComparator {
+ int compare(short l, short r);
+ }
+
+ private PrimitiveArraySorter() {}
+
+ public static void sort(short[] array, ShortComparator comparator) {
+ sort(array, 0, array.length, comparator);
+ }
+
+ public static void sort(short[] array, int fromIndex, int toIndex, ShortComparator comparator) {
+ // Sort using insertion sort for size less then 20.
+ if (toIndex - fromIndex <= 20) {
+ insertionSort(array, fromIndex, toIndex, comparator);
+ return;
+ }
+ int i = fromIndex;
+ int j = toIndex - 1;
+ short pivotValue = array[i + (j - i) / 2]; // Use middle item as pivot value.
+ while (i < j) {
+ while (comparator.compare(pivotValue, array[i]) > 0) ++i;
+ while (comparator.compare(array[j], pivotValue) > 0) --j;
+ if (i < j) {
+ short temp = array[i];
+ array[i] = array[j];
+ array[j] = temp;
+ ++i;
+ --j;
+ }
+ }
+ if (fromIndex < j) {
+ sort(array, fromIndex, j + 1, comparator);
+ }
+ if (i < toIndex - 1) {
+ sort(array, i, toIndex, comparator);
+ }
+ }
+
+ public static boolean sortAndMerge(short[] array, short[] mergeArray, int pivotIndex, int toIndex, ShortComparator comparator) {
+ if (array.length == 1) return false;
+ sort(array, 0, pivotIndex, comparator);
+ if (pivotIndex == toIndex || comparator.compare(array[pivotIndex - 1], array[pivotIndex]) <= 0) {
+ return false;
+ }
+ merge(array, mergeArray, pivotIndex, toIndex, comparator);
+ return true;
+ }
+
+ public static void merge(short[] array, short[] mergeArray, int pivotIndex, ShortComparator comparator) {
+ merge(array, mergeArray, pivotIndex, array.length, comparator);
+ }
+
+ public static void merge(short[] array, short[] mergeArray, int pivotIndex, int toIndex, ShortComparator comparator) {
+ int indexMergeArray = 0;
+ int indexPartition0 = 0;
+ int indexPartition1 = pivotIndex;
+ while (indexPartition0 < pivotIndex && indexPartition1 < toIndex) {
+ short val0 = array[indexPartition0];
+ short val1 = array[indexPartition1];
+ if (comparator.compare(val0, val1) <= 0) {
+ mergeArray[indexMergeArray++] = val0;
+ ++indexPartition0;
+ } else {
+ mergeArray[indexMergeArray++] = val1;
+ ++indexPartition1;
+ }
+ }
+ int nLeftPartition0 = pivotIndex - indexPartition0;
+ System.arraycopy(array, indexPartition0, mergeArray, indexMergeArray, nLeftPartition0);
+ System.arraycopy(array, indexPartition1, mergeArray, indexMergeArray + nLeftPartition0, toIndex - indexPartition1);
+ }
+
+ private static void insertionSort(short[] array, int fromIndex, int toIndex, ShortComparator comparator) {
+ for (int i = fromIndex + 1; i < toIndex; ++i) {
+ int j = i;
+ while (j > 0 && comparator.compare(array[j - 1], array[j]) > 0) {
+ short temp = array[j - 1];
+ array[j - 1] = array[j];
+ array[j] = temp;
+ --j;
+ }
+ }
+ }
+}
diff --git a/predicate-search/src/main/java/com/yahoo/search/predicate/utils/TargetingQueryFileConverter.java b/predicate-search/src/main/java/com/yahoo/search/predicate/utils/TargetingQueryFileConverter.java
new file mode 100644
index 00000000000..a333286b465
--- /dev/null
+++ b/predicate-search/src/main/java/com/yahoo/search/predicate/utils/TargetingQueryFileConverter.java
@@ -0,0 +1,289 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.search.predicate.utils;
+
+import com.google.common.net.UrlEscapers;
+import com.yahoo.search.predicate.PredicateQuery;
+import com.yahoo.search.predicate.serialization.PredicateQuerySerializer;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.*;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.Stream;
+
+import static java.util.stream.Collectors.joining;
+
+/**
+ * Converts a targeting query (the format provided by targeting team) into a file of Vespa queries formatted as URLs.
+ *
+ * The format is the following:
+ * - Each line represents one bulk query (upto 64 subqueries)
+ * - Each bulk query has a set of subqueries separated by ";"
+ * - Each subquery is of the format: attrName\tattrValue\tsubqueryIndex\tisRangeTerm;
+ * - Some attributes have no value.
+ * - Value may contain ";"
+ *
+ * @author bjorncs
+ */
+public class TargetingQueryFileConverter {
+
+ // Subqueries having more than this value are skipped.
+ private static final int MAX_NUMBER_OF_TERMS = 100;
+
+ private enum OutputFormat {JSON, YQL}
+
+ private TargetingQueryFileConverter() {}
+
+ public static void main(String[] args) throws IOException {
+ int nQueries = 123042;
+ int batchFactor = 64;
+ Subqueries subqueries = parseRiseQueries(new File("test-data/rise-query2.txt"), nQueries);
+ filterOutHugeSubqueries(subqueries);
+ List<Query> queries = batchSubqueries(subqueries, batchFactor);
+ writeSubqueriesToFile(
+ queries,
+ new File("test-data/targeting-queries-json-" + batchFactor + "b-" + nQueries + "n.txt"),
+ OutputFormat.JSON);
+ writeSubqueriesToFile(
+ queries,
+ new File("test-data/targeting-queries-yql-" + batchFactor + "b-" + nQueries + "n.txt"),
+ OutputFormat.YQL);
+ }
+
+
+ private static void writeSubqueriesToFile(List<Query> queries, File output, OutputFormat outputFormat)
+ throws IOException {
+ try (BufferedWriter writer = new BufferedWriter(new FileWriter(output))) {
+ if (outputFormat == OutputFormat.JSON) {
+ writeJSONOutput(writer, queries);
+ } else {
+ writeYQLOutput(writer, queries);
+ }
+
+ }
+ }
+
+ private static void writeJSONOutput(BufferedWriter writer, List<Query> queries) throws IOException {
+ PredicateQuerySerializer serializer = new PredicateQuerySerializer();
+ for (Query query : queries) {
+ PredicateQuery predicateQuery = toPredicateQuery(query);
+ String json = serializer.toJSON(predicateQuery);
+ writer.append(json).append('\n');
+ }
+ }
+
+ private static PredicateQuery toPredicateQuery(Query query) {
+ PredicateQuery predicateQuery = new PredicateQuery();
+ for (Map.Entry<Long, Set<Feature>> e : query.valuesForSubqueries.entrySet()) {
+ e.getValue().forEach(f -> predicateQuery.addFeature(f.key, f.strValue, e.getKey()));
+ }
+ for (Map.Entry<Long, Set<Feature>> e : query.rangesForSubqueries.entrySet()) {
+ e.getValue().forEach(f -> predicateQuery.addRangeFeature(f.key, f.longValue, e.getKey()));
+ }
+ return predicateQuery;
+ }
+
+ private static void writeYQLOutput(BufferedWriter writer, List<Query> queries) throws IOException {
+ for (Query query : queries) {
+ writer.append(toYqlString(query)).append('\n');
+ }
+ }
+
+ private static String toYqlString(Query query) {
+ StringBuilder yqlBuilder = new StringBuilder("select * from sources * where predicate(boolean, ");
+ yqlBuilder
+ .append(createYqlFormatSubqueryMapString(query.valuesForSubqueries, query.isSingleQuery))
+ .append(", ")
+ .append(createYqlFormatSubqueryMapString(query.rangesForSubqueries, query.isSingleQuery))
+ .append(");");
+ return "/search/?query&nocache&yql=" + UrlEscapers.urlFormParameterEscaper().escape(yqlBuilder.toString());
+ }
+
+ /*
+ * The subqueryBatchFactor determines the batch factor for each query. A maximum of 64 queries can be batched
+ * into a single query (as subqueries).
+ * 0 => Do not batch and output plain queries (no subquery).
+ * 1 => Do not batch, but output queries with single subquery.
+ */
+ private static List<Query> batchSubqueries(Subqueries subqueries, int subqueryBatchFactor) {
+ Iterator<Integer> iterator = subqueries.subqueries.iterator();
+ List<Query> result = new ArrayList<>();
+ while (iterator.hasNext()) {
+ // Aggregate the subqueries that contains a given value.
+ Map<Feature, Long> subqueriesForValue = new TreeMap<>();
+ Map<Feature, Long> subqueriesForRange = new TreeMap<>();
+ // Batch single to single subquery for batch factor 0.
+ for (int i = 0; i < Math.max(1, subqueryBatchFactor) && iterator.hasNext(); ++i) {
+ Integer subquery = iterator.next();
+ registerSubqueryValues(i, subqueries.valuesForSubquery.get(subquery), subqueriesForValue);
+ registerSubqueryValues(i, subqueries.rangesForSubquery.get(subquery), subqueriesForRange);
+ }
+
+ // Aggregate the values that are contained in a given set of subqueries.
+ Query query = new Query(subqueryBatchFactor == 0);
+ simplifyAndFillQueryValues(query.valuesForSubqueries, subqueriesForValue);
+ simplifyAndFillQueryValues(query.rangesForSubqueries, subqueriesForRange);
+ result.add(query);
+ }
+ return result;
+ }
+
+ private static void registerSubqueryValues(int subquery, Set<Feature> values, Map<Feature, Long> subqueriesForValue) {
+ if (values != null) {
+ values.forEach(value -> subqueriesForValue.merge(value, 1L << subquery, (ids1, ids2) -> ids1 | ids2));
+ }
+ }
+
+ private static void simplifyAndFillQueryValues(Map<Long, Set<Feature>> queryValues, Map<Feature, Long> subqueriesForValue) {
+ for (Map.Entry<Feature, Long> entry : subqueriesForValue.entrySet()) {
+ Feature feature = entry.getKey();
+ Long subqueryBitmap = entry.getValue();
+ Set<Feature> featureSet = queryValues.computeIfAbsent(subqueryBitmap, (k) -> new HashSet<>());
+ featureSet.add(feature);
+ }
+ }
+
+ private static String createYqlFormatSubqueryMapString(Map<Long, Set<Feature>> subqueriesForString, boolean isSingleQuery) {
+ return subqueriesForString.entrySet().stream()
+ .map(e -> {
+ Stream<String> features = e.getValue().stream().map(Feature::asYqlString);
+ if (isSingleQuery) {
+ return features.collect(joining(", "));
+ } else {
+ // Note: Cannot use method reference as both method toString(int) and method toString() match.
+ String values = features.collect(joining(", ", "{", "}"));
+ return String.format("\"0x%s\":%s", Long.toHexString(e.getKey()), values);
+ }
+ })
+ .collect(joining(", ", "{", "}"));
+ }
+
+ private static Subqueries parseRiseQueries(File riseQueryFile, int maxQueries) throws IOException {
+ try (BufferedReader reader = new BufferedReader(new FileReader(riseQueryFile))) {
+ Subqueries parsedSubqueries = new Subqueries();
+ AtomicInteger counter = new AtomicInteger(1);
+ reader.lines()
+ .limit(maxQueries)
+ .forEach(riseQuery -> parseRiseQuery(parsedSubqueries, riseQuery, counter.getAndIncrement()));
+ return parsedSubqueries;
+ }
+ }
+
+ private static void filterOutHugeSubqueries(Subqueries subqueries) {
+ Iterator<Integer> iterator = subqueries.subqueries.iterator();
+ while (iterator.hasNext()) {
+ Integer subquery = iterator.next();
+ Set<Feature> values = subqueries.valuesForSubquery.get(subquery);
+ Set<Feature> ranges = subqueries.rangesForSubquery.get(subquery);
+ int sizeValues = values == null ? 0 : values.size();
+ int sizeRanges = ranges == null ? 0 : ranges.size();
+ if (sizeValues + sizeRanges > MAX_NUMBER_OF_TERMS) {
+ iterator.remove();
+ subqueries.valuesForSubquery.remove(subquery);
+ subqueries.rangesForSubquery.remove(subquery);
+ }
+ }
+ }
+
+ private static void parseRiseQuery(Subqueries subqueries, String queryString, int queryId) {
+ StringTokenizer subQueryTokenizer = new StringTokenizer(queryString, "\t", true);
+ while (subQueryTokenizer.hasMoreTokens()) {
+ String key = subQueryTokenizer.nextToken("\t");
+ subQueryTokenizer.nextToken(); // Consume delimiter
+ String value = subQueryTokenizer.nextToken();
+ if (value.equals("\t")) {
+ value = "";
+ } else {
+ subQueryTokenizer.nextToken(); // Consume delimiter
+ }
+ int subQueryIndex = Integer.parseInt(subQueryTokenizer.nextToken());
+ subQueryTokenizer.nextToken(); // Consume delimiter
+ boolean isRangeTerm = Boolean.parseBoolean(subQueryTokenizer.nextToken(";"));
+ if (subQueryTokenizer.hasMoreTokens()) {
+ subQueryTokenizer.nextToken(); // Consume delimiter
+ }
+ int subqueryId = subQueryIndex + 64 * queryId;
+ if (isRangeTerm) {
+ Set<Feature> rangeFeatures = subqueries.rangesForSubquery.computeIfAbsent(
+ subqueryId, (id) -> new HashSet<>());
+ rangeFeatures.add(new Feature(key, Long.parseLong(value)));
+ } else {
+ Set<Feature> features = subqueries.valuesForSubquery.computeIfAbsent(subqueryId, (id) -> new HashSet<>());
+ features.add(new Feature(key, value));
+ }
+ subqueries.subqueries.add(subqueryId);
+ }
+ }
+
+ private static class Subqueries {
+ public final TreeSet<Integer> subqueries = new TreeSet<>();
+ public final Map<Integer, Set<Feature>> valuesForSubquery = new HashMap<>();
+ public final Map<Integer, Set<Feature>> rangesForSubquery = new HashMap<>();
+ }
+
+ private static class Query {
+ public final boolean isSingleQuery;
+ public final Map<Long, Set<Feature>> valuesForSubqueries = new TreeMap<>();
+ public final Map<Long, Set<Feature>> rangesForSubqueries = new TreeMap<>();
+
+ public Query(boolean isSingleQuery) {
+ this.isSingleQuery = isSingleQuery;
+ }
+ }
+
+ private static class Feature implements Comparable<Feature> {
+ public final String key;
+ private final String strValue;
+ private final long longValue;
+
+ public Feature(String key, String value) {
+ this.key = key;
+ this.strValue = value;
+ this.longValue = 0;
+ }
+
+ public Feature(String key, long value) {
+ this.key = key;
+ this.strValue = null;
+ this.longValue = value;
+ }
+
+ public String asYqlString() {
+ if (strValue != null) {
+ return String.format("\"%s\":\"%s\"", key, strValue);
+ } else {
+ return String.format("\"%s\":%dl", key, longValue);
+ }
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (!(o instanceof Feature)) return false;
+
+ Feature feature = (Feature) o;
+
+ if (longValue != feature.longValue) return false;
+ if (!key.equals(feature.key)) return false;
+ return !(strValue != null ? !strValue.equals(feature.strValue) : feature.strValue != null);
+
+ }
+
+ @Override
+ public int hashCode() {
+ int result = key.hashCode();
+ result = 31 * result + (strValue != null ? strValue.hashCode() : 0);
+ result = 31 * result + (int) (longValue ^ (longValue >>> 32));
+ return result;
+ }
+
+ @Override
+ public int compareTo(Feature o) {
+ return asYqlString().compareTo(o.asYqlString());
+ }
+ }
+}
diff --git a/predicate-search/src/main/java/com/yahoo/search/predicate/utils/VespaFeedParser.java b/predicate-search/src/main/java/com/yahoo/search/predicate/utils/VespaFeedParser.java
new file mode 100644
index 00000000000..8ba9236a66c
--- /dev/null
+++ b/predicate-search/src/main/java/com/yahoo/search/predicate/utils/VespaFeedParser.java
@@ -0,0 +1,44 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.search.predicate.utils;
+
+import com.yahoo.document.predicate.Predicate;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.function.Consumer;
+
+/**
+ * Parses a feed file containing documents in XML format. Its implementation is based on the following assumptions:
+ * 1. Each document has single predicate field.
+ * 2. The predicate is stored in a field named "boolean".
+ *
+ * @author bjorncs
+ */
+public class VespaFeedParser {
+
+ public static int parseDocuments(String feedFile, int maxDocuments, Consumer<Predicate> consumer) throws IOException {
+ int documentCount = 0;
+ try (BufferedReader reader = new BufferedReader(new FileReader(feedFile), 8 * 1024)) {
+ reader.readLine();
+ reader.readLine(); // Skip to start of first document
+ String line = reader.readLine();
+ while (!line.startsWith("</vespafeed>") && documentCount < maxDocuments) {
+ while (!line.startsWith("<boolean>")) {
+ line = reader.readLine();
+ }
+ Predicate predicate = Predicate.fromString(extractBooleanExpression(line));
+ consumer.accept(predicate);
+ ++documentCount;
+ while (!line.startsWith("<document") && !line.startsWith("</vespafeed>")) {
+ line = reader.readLine();
+ }
+ }
+ }
+ return documentCount;
+ }
+
+ private static String extractBooleanExpression(String line) {
+ return line.substring(9, line.length() - 10);
+ }
+}
diff --git a/predicate-search/src/main/java/com/yahoo/search/predicate/utils/VespaFeedWriter.java b/predicate-search/src/main/java/com/yahoo/search/predicate/utils/VespaFeedWriter.java
new file mode 100644
index 00000000000..544a9a12af0
--- /dev/null
+++ b/predicate-search/src/main/java/com/yahoo/search/predicate/utils/VespaFeedWriter.java
@@ -0,0 +1,43 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.search.predicate.utils;
+
+import com.yahoo.document.predicate.Predicate;
+import org.apache.commons.lang.StringEscapeUtils;
+
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.Writer;
+
+/**
+ * @author <a href="mailto:magnarn@yahoo-inc.com">Magnar Nedland</a>
+ */
+public class VespaFeedWriter extends BufferedWriter {
+ private String namespace;
+ private String documentType;
+
+ VespaFeedWriter(Writer writer, String namespace, String documentType) throws IOException {
+ super(writer);
+ this.namespace = namespace;
+ this.documentType = documentType;
+
+ this.append("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n");
+ this.append("<vespafeed>\n");
+ }
+
+ @Override
+ public void close() throws IOException {
+ this.append("</vespafeed>\n");
+ super.close();
+ }
+
+ public void writePredicateDocument(int id, String fieldName, Predicate predicate) {
+ try {
+ this.append(String.format("<document documenttype=\"%2$s\" documentid=\"id:%1$s:%2$s::%3$d\">\n",
+ namespace, documentType, id));
+ this.append("<" + fieldName + ">" + StringEscapeUtils.escapeHtml(predicate.toString()) + "</" + fieldName + ">\n");
+ this.append("</document>\n");
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+}
diff --git a/predicate-search/src/main/java/com/yahoo/search/predicate/utils/VespaQueryParser.java b/predicate-search/src/main/java/com/yahoo/search/predicate/utils/VespaQueryParser.java
new file mode 100644
index 00000000000..b8ec20c59a0
--- /dev/null
+++ b/predicate-search/src/main/java/com/yahoo/search/predicate/utils/VespaQueryParser.java
@@ -0,0 +1,105 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.search.predicate.utils;
+
+import com.yahoo.search.predicate.PredicateQuery;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.net.URLDecoder;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.BiConsumer;
+
+import static java.util.stream.Collectors.toList;
+
+/**
+ * Parses query file containing Vespa queries using the deprecated predicate format (query properties - not YQL).
+ *
+ * @author bjorncs
+ */
+public class VespaQueryParser {
+
+ /**
+ * Parses a query formatted using the deprecated boolean query format (query properties).
+ */
+ public static List<PredicateQuery> parseQueries(String queryFile, int maxQueryCount) throws IOException {
+ try (BufferedReader reader = new BufferedReader(new FileReader(queryFile), 8 * 1024)) {
+ List<PredicateQuery> queries = reader.lines()
+ .limit(maxQueryCount)
+ .map(VespaQueryParser::parseQueryFromQueryProperties)
+ .collect(toList());
+ return queries;
+ }
+ }
+
+ public static PredicateQuery parseQueryFromQueryProperties(String queryString) {
+ try {
+ // Decode the URL in case the query property content is escaped.
+ queryString = URLDecoder.decode(queryString, "UTF-8");
+ PredicateQuery query = new PredicateQuery();
+ extractQueryValues(queryString, "boolean.attributes", query::addFeature);
+ extractQueryValues(queryString, "boolean.rangeAttributes",
+ (k, v) -> query.addRangeFeature(k, Integer.parseInt(v)));
+ return query;
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private static void extractQueryValues(String query, String prefix, BiConsumer<String, String> registerTerm) {
+ int rangeIndex = query.indexOf(prefix);
+ if (rangeIndex != -1) {
+ // Adding 2 to skip '={'
+ int startIndex = rangeIndex + prefix.length() + 2;
+ // '%7D' represents the end of the predicate string.
+ int endIndex = query.indexOf("}", startIndex);
+ String rangeString = query.substring(startIndex, endIndex);
+ List<Feature> features = new ArrayList<>();
+ String[] keyValuePairs = rangeString.split(",");
+
+ for (String keyValuePair : keyValuePairs) {
+ String[] keyAndValue = keyValuePair.split(":");
+ // If not colon is found, the string is part of the previous value.
+ if (keyAndValue.length == 1) {
+ Feature feature = features.get(features.size() - 1);
+ feature.value += ("," + keyValuePair);
+ } else {
+ features.add(new Feature(keyAndValue[0], keyAndValue[1]));
+ }
+ }
+ features.stream().forEach(f -> registerTerm.accept(f.key, f.value));
+ }
+ }
+
+ private static class Feature {
+ public final String key;
+ public String value;
+
+ private Feature(String key, String value) {
+ this.key = key;
+ this.value = value;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ Feature feature = (Feature) o;
+
+ if (!key.equals(feature.key)) return false;
+ if (!value.equals(feature.value)) return false;
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = key.hashCode();
+ result = 31 * result + value.hashCode();
+ return result;
+ }
+ }
+}