diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
commit | 72231250ed81e10d66bfe70701e64fa5fe50f712 (patch) | |
tree | 2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /container-search/src/main/java/com/yahoo/search/yql |
Publish
Diffstat (limited to 'container-search/src/main/java/com/yahoo/search/yql')
34 files changed, 6504 insertions, 0 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/yql/ArgumentsTypeChecker.java b/container-search/src/main/java/com/yahoo/search/yql/ArgumentsTypeChecker.java new file mode 100644 index 00000000000..c297bf80cac --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/ArgumentsTypeChecker.java @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Preconditions; + +import java.util.List; + +final class ArgumentsTypeChecker { + + private final Operator target; + private final List<OperatorTypeChecker> checkers; + + public ArgumentsTypeChecker(Operator target, List<OperatorTypeChecker> checkers) { + this.target = target; + this.checkers = checkers; + } + + public void check(Object... args) { + if (args == null) { + Preconditions.checkArgument(checkers.size() == 0, "Operator %s argument count mismatch: expected %s got 0", target, checkers.size()); + return; + } else { + Preconditions.checkArgument(args.length == checkers.size(), "Operator %s argument count mismatch: expected: %s got %s", target, checkers.size(), args.length); + } + for (int i = 0; i < checkers.size(); ++i) { + checkers.get(i).check(args[i]); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/CaseInsensitiveFileStream.java b/container-search/src/main/java/com/yahoo/search/yql/CaseInsensitiveFileStream.java new file mode 100644 index 00000000000..33e684357af --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/CaseInsensitiveFileStream.java @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import org.antlr.v4.runtime.ANTLRFileStream; +import org.antlr.v4.runtime.CharStream; + +import java.io.IOException; + +/** + * Enable ANTLR to do case insensitive comparisons when reading from files without throwing away the case in the token. + */ + +class CaseInsensitiveFileStream extends ANTLRFileStream { + + public CaseInsensitiveFileStream(String fileName) throws IOException { + super(fileName); + } + + public CaseInsensitiveFileStream(String fileName, String encoding) throws IOException { + super(fileName, encoding); + } + + @Override + public int LA(int i) { + if (i == 0) { + return 0; + } + if (i < 0) { + i++; // e.g., translate LA(-1) to use offset 0 + } + + if ((p + i - 1) >= n) { + return CharStream.EOF; + } + return Character.toLowerCase(data[p + i - 1]); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/CaseInsensitiveInputStream.java b/container-search/src/main/java/com/yahoo/search/yql/CaseInsensitiveInputStream.java new file mode 100644 index 00000000000..e15fe04bb39 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/CaseInsensitiveInputStream.java @@ -0,0 +1,50 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import org.antlr.v4.runtime.ANTLRInputStream; +import org.antlr.v4.runtime.CharStream; + +import java.io.IOException; +import java.io.InputStream; + +/** + * Enable ANTLR to do case insensitive comparisons when reading from files without throwing away the case in the token. + */ +class CaseInsensitiveInputStream extends ANTLRInputStream { + + public CaseInsensitiveInputStream() { + super(); + } + + public CaseInsensitiveInputStream(InputStream input) throws IOException { + super(input); + } + + public CaseInsensitiveInputStream(InputStream input, int size) throws IOException { + super(input, size); + } + + public CaseInsensitiveInputStream(char[] data, int numberOfActualCharsInArray) throws IOException { + super(data, numberOfActualCharsInArray); + } + + public CaseInsensitiveInputStream(String input) throws IOException { + super(input); + } + + @Override + public int LA(int i) { + if (i == 0) { + return 0; + } + if (i < 0) { + i++; // e.g., translate LA(-1) to use offset 0 + } + + if ((p + i - 1) >= n) { + return CharStream.EOF; + } + return Character.toLowerCase(data[p + i - 1]); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/ExpressionOperator.java b/container-search/src/main/java/com/yahoo/search/yql/ExpressionOperator.java new file mode 100644 index 00000000000..e9fe52d33e7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/ExpressionOperator.java @@ -0,0 +1,84 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Predicate; + +/** + * Operators on expressions. + */ +enum ExpressionOperator implements Operator { + + AND(TypeCheckers.EXPRS), + OR(TypeCheckers.EXPRS), + EQ(ExpressionOperator.class, ExpressionOperator.class), + NEQ(ExpressionOperator.class, ExpressionOperator.class), + LT(ExpressionOperator.class, ExpressionOperator.class), + GT(ExpressionOperator.class, ExpressionOperator.class), + LTEQ(ExpressionOperator.class, ExpressionOperator.class), + GTEQ(ExpressionOperator.class, ExpressionOperator.class), + + IN(ExpressionOperator.class, ExpressionOperator.class), + IN_QUERY(ExpressionOperator.class, SequenceOperator.class), + NOT_IN(ExpressionOperator.class, ExpressionOperator.class), + NOT_IN_QUERY(ExpressionOperator.class, SequenceOperator.class), + + LIKE(ExpressionOperator.class, ExpressionOperator.class), + NOT_LIKE(ExpressionOperator.class, ExpressionOperator.class), + + IS_NULL(ExpressionOperator.class), + IS_NOT_NULL(ExpressionOperator.class), + MATCHES(ExpressionOperator.class, ExpressionOperator.class), + NOT_MATCHES(ExpressionOperator.class, ExpressionOperator.class), + CONTAINS(ExpressionOperator.class, ExpressionOperator.class), + + ADD(ExpressionOperator.class, ExpressionOperator.class), + SUB(ExpressionOperator.class, ExpressionOperator.class), + MULT(ExpressionOperator.class, ExpressionOperator.class), + DIV(ExpressionOperator.class, ExpressionOperator.class), + MOD(ExpressionOperator.class, ExpressionOperator.class), + + NEGATE(ExpressionOperator.class), + NOT(ExpressionOperator.class), + + MAP(TypeCheckers.LIST_OF_STRING, TypeCheckers.EXPRS), + + ARRAY(TypeCheckers.EXPRS), + + INDEX(ExpressionOperator.class, ExpressionOperator.class), + PROPREF(ExpressionOperator.class, String.class), + + CALL(TypeCheckers.LIST_OF_STRING, TypeCheckers.EXPRS), + + VARREF(String.class), + + LITERAL(TypeCheckers.LITERAL_TYPES), + + READ_RECORD(String.class), + READ_FIELD(String.class, String.class), + READ_MODULE(TypeCheckers.LIST_OF_STRING), + + VESPA_GROUPING(String.class), + + NULL(); + + private final ArgumentsTypeChecker checker; + + + private ExpressionOperator(Object... types) { + checker = TypeCheckers.make(this, types); + } + + + @Override + public void checkArguments(Object... args) { + checker.check(args); + } + + public static Predicate<OperatorNode<? extends Operator>> IS = new Predicate<OperatorNode<? extends Operator>>() { + @Override + public boolean apply(OperatorNode<? extends Operator> input) { + return input.getOperator() instanceof ExpressionOperator; + } + }; + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/FieldFiller.java b/container-search/src/main/java/com/yahoo/search/yql/FieldFiller.java new file mode 100644 index 00000000000..f6e8ee1f27a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/FieldFiller.java @@ -0,0 +1,156 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import com.google.common.annotations.Beta; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.prelude.fastsearch.DocumentdbInfoConfig; +import com.yahoo.prelude.fastsearch.DocumentdbInfoConfig.Documentdb; +import com.yahoo.prelude.fastsearch.DocumentdbInfoConfig.Documentdb.Summaryclass; +import com.yahoo.prelude.fastsearch.DocumentdbInfoConfig.Documentdb.Summaryclass.Fields; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.query.Presentation; +import com.yahoo.search.searchchain.Execution; + +import edu.umd.cs.findbugs.annotations.NonNull; + +/** + * Ensure the fields specified in {@link Presentation#getSummaryFields()} are + * available after filling phase. + * + * @author <a href="mailto:stiankri@yahoo-inc.com">Stian Kristoffersen</a> + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@Beta +@After(MinimalQueryInserter.EXTERNAL_YQL) +public class FieldFiller extends Searcher { + + private final Set<String> intersectionOfAttributes; + private final SummaryIntersections summaryDb = new SummaryIntersections(); + public static final CompoundName FIELD_FILLER_DISABLE = new CompoundName( + "FieldFiller.disable"); + + private static class SummaryIntersections { + private final Map<String, Map<String, Set<String>>> db = new HashMap<>(); + + void add(String dbName, Summaryclass summary) { + Map<String, Set<String>> docType = getOrCreateDocType(dbName); + Set<String> fields = new HashSet<>(summary.fields().size()); + for (Fields f : summary.fields()) { + fields.add(f.name()); + } + docType.put(summary.name(), fields); + } + + @NonNull + private Map<String, Set<String>> getOrCreateDocType(String dbName) { + Map<String, Set<String>> docType = db.get(dbName); + if (docType == null) { + docType = new HashMap<>(); + db.put(dbName, docType); + } + return docType; + } + + boolean hasAll(Set<String> requested, String summaryName, Set<String> restrict) { + Set<String> explicitRestriction; + Set<String> intersection = null; + + if (restrict.isEmpty()) { + explicitRestriction = db.keySet(); + } else { + explicitRestriction = restrict; + } + + for (String docType : explicitRestriction) { + Map<String, Set<String>> summaries = db.get(docType); + Set<String> summary; + + if (summaries == null) { + continue; + } + summary = summaries.get(summaryName); + if (summary == null) { + intersection = null; + break; + } + if (intersection == null) { + intersection = new HashSet<>(summary.size()); + intersection.addAll(summary); + } else { + intersection.retainAll(summary); + } + } + return intersection == null ? false : intersection + .containsAll(requested); + } + } + + public FieldFiller(DocumentdbInfoConfig config) { + intersectionOfAttributes = new HashSet<>(); + boolean first = true; + + for (Documentdb db : config.documentdb()) { + for (Summaryclass summary : db.summaryclass()) { + Set<String> attributes = null; + if (Execution.ATTRIBUTEPREFETCH.equals(summary.name())) { + attributes = new HashSet<>(summary.fields().size()); + for (Fields f : summary.fields()) { + attributes.add(f.name()); + } + if (first) { + first = false; + intersectionOfAttributes.addAll(attributes); + } else { + intersectionOfAttributes.retainAll(attributes); + } + } + // yes, we store attribute prefetch here as well, this is in + // case we get a query where we have a restrict parameter which + // makes filling with attribute prefetch possible even though it + // wouldn't have been possible without restricting the set of + // doctypes + summaryDb.add(db.name(), summary); + } + } + } + + @Override + public Result search(Query query, Execution execution) { + return execution.search(query); + } + + @Override + public void fill(Result result, String summaryClass, Execution execution) { + execution.fill(result, summaryClass); + + final Set<String> summaryFields = result.getQuery().getPresentation() + .getSummaryFields(); + + if (summaryFields.isEmpty() + || summaryClass == null + || result.getQuery().properties() + .getBoolean(FIELD_FILLER_DISABLE)) { + return; + } + + if (intersectionOfAttributes.containsAll(summaryFields)) { + if (!Execution.ATTRIBUTEPREFETCH.equals(summaryClass)) { + execution.fill(result, Execution.ATTRIBUTEPREFETCH); + } + } else { + // Yes, summaryClass may be Execution.ATTRIBUTEPREFETCH here + if (!summaryDb.hasAll(summaryFields, summaryClass, result + .getQuery().getModel().getRestrict())) { + execution.fill(result, null); + } + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/FieldFilter.java b/container-search/src/main/java/com/yahoo/search/yql/FieldFilter.java new file mode 100644 index 00000000000..b44fdadd17b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/FieldFilter.java @@ -0,0 +1,64 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import java.util.Iterator; +import java.util.Map.Entry; +import java.util.Set; + +import com.google.common.annotations.Beta; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.result.Hit; +import com.yahoo.search.searchchain.Execution; + +/** + * Remove fields which are not explicitly requested, if any field is explicitly + * requested. Disable using FieldFilter.disable=true in request. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@Beta +@After(MinimalQueryInserter.EXTERNAL_YQL) +@Before("com.yahoo.search.yql.FieldFiller") +public class FieldFilter extends Searcher { + + public static final CompoundName FIELD_FILTER_DISABLE = new CompoundName("FieldFilter.disable"); + + @Override + public Result search(Query query, Execution execution) { + Result result = execution.search(query); + filter(result); + return result; + } + + @Override + public void fill(Result result, String summaryClass, Execution execution) { + execution.fill(result, summaryClass); + filter(result); + } + + private void filter(Result result) { + Set<String> requestedFields; + + if (result.getQuery().properties().getBoolean(FIELD_FILTER_DISABLE)) return; + if (result.getQuery().getPresentation().getSummaryFields().isEmpty()) return; + + requestedFields = result.getQuery().getPresentation().getSummaryFields(); + for (Iterator<Hit> i = result.hits().unorderedDeepIterator(); i.hasNext();) { + Hit h = i.next(); + if (h.isMeta()) continue; + for (Iterator<Entry<String, Object>> fields = h.fieldIterator(); fields.hasNext();) { + Entry<String, Object> field = fields.next(); + if ( ! requestedFields.contains(field.getKey())) + fields.remove(); + } + + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/JavaListTypeChecker.java b/container-search/src/main/java/com/yahoo/search/yql/JavaListTypeChecker.java new file mode 100644 index 00000000000..86e2cbf01ff --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/JavaListTypeChecker.java @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Preconditions; + +import java.util.List; + +class JavaListTypeChecker extends OperatorTypeChecker { + + private final Class<?> elementType; + + public JavaListTypeChecker(Operator parent, int idx, Class<?> elementType) { + super(parent, idx); + this.elementType = elementType; + } + + @Override + public void check(Object argument) { + Preconditions.checkNotNull(argument, "Argument %s of %s must not be null", idx, parent); + Preconditions.checkArgument(argument instanceof List, "Argument %s of %s must be a List<%s>", idx, parent, elementType.getName(), argument.getClass().getName()); + List<?> lst = (List<?>) argument; + for (Object elt : lst) { + Preconditions.checkNotNull(elt, "Argument %s of %s List elements may not be null", idx, parent); + Preconditions.checkArgument(elementType.isInstance(elt), "Argument %s of %s List elements must be %s (is %s)", idx, parent, elementType.getName(), elt.getClass().getName()); + } + } + +} + diff --git a/container-search/src/main/java/com/yahoo/search/yql/JavaTypeChecker.java b/container-search/src/main/java/com/yahoo/search/yql/JavaTypeChecker.java new file mode 100644 index 00000000000..bf91474c19b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/JavaTypeChecker.java @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Preconditions; + +class JavaTypeChecker extends OperatorTypeChecker { + + private final Class<?> type; + + public JavaTypeChecker(Operator parent, int idx, Class<?> type) { + super(parent, idx); + this.type = type; + } + + @Override + public void check(Object argument) { + Preconditions.checkNotNull(argument, "Argument %s of %s must not be null", idx, parent); + Preconditions.checkArgument(type.isInstance(argument), "Argument %s of %s must be %s (is: %s).", idx, parent, type.getName(), argument.getClass().getName()); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/JavaUnionTypeChecker.java b/container-search/src/main/java/com/yahoo/search/yql/JavaUnionTypeChecker.java new file mode 100644 index 00000000000..a94027a9bd2 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/JavaUnionTypeChecker.java @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; + +import java.util.Set; + +public class JavaUnionTypeChecker extends OperatorTypeChecker { + + private final Set<Class<?>> types; + + public JavaUnionTypeChecker(Operator parent, int idx, Set<Class<?>> types) { + super(parent, idx); + this.types = types; + } + + public JavaUnionTypeChecker(Operator parent, int idx, Class<?>... types) { + super(parent, idx); + this.types = ImmutableSet.copyOf(types); + } + + @Override + public void check(Object argument) { + Preconditions.checkNotNull(argument, "Argument %s of %s must not be null", idx, parent); + for (Class<?> candidate : types) { + if (candidate.isInstance(argument)) { + return; + } + } + Preconditions.checkArgument(false, "Argument %s of %s must be %s (is: %s).", idx, parent, Joiner.on("|").join(types), argument.getClass()); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/Location.java b/container-search/src/main/java/com/yahoo/search/yql/Location.java new file mode 100644 index 00000000000..a304ed75536 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/Location.java @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +/** + * A pointer to a location in a YQL source program. + */ +final class Location { + + private final String programName; + private final int lineNumber; + private final int characterOffset; + + public Location(String programName, int lineNumber, int characterOffset) { + this.programName = programName; + this.lineNumber = lineNumber; + this.characterOffset = characterOffset; + } + + + public int getLineNumber() { + return lineNumber; + } + + public int getCharacterOffset() { + return characterOffset; + } + + @Override + public String toString() { + if (programName != null) { + return programName + ":L" + lineNumber + ":" + characterOffset; + } else { + return "L" + lineNumber + ":" + characterOffset; + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/MinimalQueryInserter.java b/container-search/src/main/java/com/yahoo/search/yql/MinimalQueryInserter.java new file mode 100644 index 00000000000..d710754e887 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/MinimalQueryInserter.java @@ -0,0 +1,98 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.annotations.Beta; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.grouping.GroupingRequest; +import com.yahoo.search.query.QueryTree; +import com.yahoo.search.query.parser.Parsable; +import com.yahoo.search.query.parser.ParserEnvironment; +import com.yahoo.search.query.parser.ParserFactory; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; +import com.yahoo.yolean.chain.After; +import com.yahoo.yolean.chain.Before; +import com.yahoo.yolean.chain.Provides; + +/** + * Minimal combinator for YQL+ syntax and heuristically parsed user queries. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @since 5.1.28 + */ +@Beta +@Provides(MinimalQueryInserter.EXTERNAL_YQL) +@Before(PhaseNames.TRANSFORMED_QUERY) +@After("com.yahoo.prelude.statistics.StatisticsSearcher") +public class MinimalQueryInserter extends Searcher { + public static final String EXTERNAL_YQL = "ExternalYql"; + + public static final CompoundName YQL = new CompoundName("yql"); + + private static final CompoundName MAX_HITS = new CompoundName("maxHits"); + private static final CompoundName MAX_OFFSET = new CompoundName("maxOffset"); + + public MinimalQueryInserter() { + } + + @Override + public Result search(Query query, Execution execution) { + if (query.properties().get(YQL) == null) { + return execution.search(query); + } + ParserEnvironment env = ParserEnvironment.fromExecutionContext(execution.context()); + YqlParser parser = (YqlParser) ParserFactory.newInstance(Query.Type.YQL, env); + parser.setQueryParser(false); + parser.setUserQuery(query); + QueryTree newTree; + try { + newTree = parser.parse(Parsable.fromQueryModel(query.getModel()) + .setQuery(query.properties().getString(YQL))); + } catch (RuntimeException e) { + return new Result(query, ErrorMessage.createInvalidQueryParameter( + "Could not instantiate query from YQL+", e)); + } + if (parser.getOffset() != null) { + final int maxHits = query.properties().getInteger(MAX_HITS); + final int maxOffset = query.properties().getInteger(MAX_OFFSET); + if (parser.getOffset() > maxOffset) { + return new Result(query, ErrorMessage.createInvalidQueryParameter("Requested offset " + parser.getOffset() + + ", but the max offset allowed is " + maxOffset + ".")); + } + if (parser.getHits() > maxHits) { + return new Result(query, ErrorMessage.createInvalidQueryParameter("Requested " + parser.getHits() + + " hits returned, but max hits allowed is " + maxHits + ".")); + + } + } + query.getModel().getQueryTree().setRoot(newTree.getRoot()); + query.getPresentation().getSummaryFields().addAll(parser.getYqlSummaryFields()); + for (VespaGroupingStep step : parser.getGroupingSteps()) { + GroupingRequest.newInstance(query) + .setRootOperation(step.getOperation()) + .continuations().addAll(step.continuations()); + } + if (parser.getYqlSources().size() == 0) { + query.getModel().getSources().clear(); + } else { + query.getModel().getSources().addAll(parser.getYqlSources()); + } + if (parser.getOffset() != null) { + query.setOffset(parser.getOffset()); + query.setHits(parser.getHits()); + } + if (parser.getTimeout() != null) { + query.setTimeout(parser.getTimeout().longValue()); + } + if (parser.getSorting() != null) { + query.getRanking().setSorting(parser.getSorting()); + } + query.trace("YQL+ query parsed", true, 2); + return execution.search(query); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/NodeTypeChecker.java b/container-search/src/main/java/com/yahoo/search/yql/NodeTypeChecker.java new file mode 100644 index 00000000000..c407689e107 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/NodeTypeChecker.java @@ -0,0 +1,34 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import java.util.Set; + +/** + * Check that an argument is an OperatorNode of a particular operator set. + */ +class NodeTypeChecker extends OperatorTypeChecker { + + private final Class<? extends Operator> operatorType; + private final Set<? extends Operator> operators; + + public NodeTypeChecker(Operator parent, int idx, Class<? extends Operator> operatorType, Set<? extends Operator> operators) { + super(parent, idx); + this.operatorType = operatorType; + this.operators = operators; + } + + @Override + public void check(Object argument) { + Preconditions.checkNotNull(argument, "Argument %s of %s must not be null", idx, parent); + Preconditions.checkArgument(argument instanceof OperatorNode, "Argument %s of %s must be an OperatorNode<%s> (is %s).", idx, parent, operatorType.getName(), argument.getClass()); + OperatorNode<?> node = (OperatorNode<?>) argument; + Operator op = node.getOperator(); + Preconditions.checkArgument(operatorType.isInstance(op), "Argument %s of %s must be an OperatorNode<%s> (is: %s).", idx, parent, operatorType.getName(), op.getClass()); + if (!operators.isEmpty()) { + Preconditions.checkArgument(operators.contains(op), "Argument %s of %s must be %s (is %s).", idx, parent, Joiner.on("|").join(operators), op); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/NullItemException.java b/container-search/src/main/java/com/yahoo/search/yql/NullItemException.java new file mode 100644 index 00000000000..c50f22ff711 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/NullItemException.java @@ -0,0 +1,14 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +/** + * Used to communicate a NullItem has been encountered in the query tree. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@SuppressWarnings("serial") +public class NullItemException extends RuntimeException { + public NullItemException(String message) { + super(message); + } +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/Operator.java b/container-search/src/main/java/com/yahoo/search/yql/Operator.java new file mode 100644 index 00000000000..f5c0f9fb56d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/Operator.java @@ -0,0 +1,10 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +interface Operator { + + String name(); + + void checkArguments(Object... args); + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/OperatorNode.java b/container-search/src/main/java/com/yahoo/search/yql/OperatorNode.java new file mode 100644 index 00000000000..d1b65ee258b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/OperatorNode.java @@ -0,0 +1,261 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Function; +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +import javax.annotation.Nullable; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +/** + * Represents a use of an operator against concrete arguments. The types of arguments depend on the operator. + * <p> + * The extension point of this scheme is the Operator rather than new types of Nodes. + * <p> + * Operators SHOULD take a fixed number of arguments -- wrap variable argument counts in Lists. + */ +final class OperatorNode<T extends Operator> { + + public static <T extends Operator> OperatorNode<T> create(T operator, Object... args) { + operator.checkArguments(args == null ? EMPTY_ARGS : args); + return new OperatorNode<T>(operator, args); + } + + public static <T extends Operator> OperatorNode<T> create(Location loc, T operator, Object... args) { + operator.checkArguments(args == null ? EMPTY_ARGS : args); + return new OperatorNode<T>(loc, operator, args); + } + + public static <T extends Operator> OperatorNode<T> create(Location loc, Map<String, Object> annotations, T operator, Object... args) { + operator.checkArguments(args == null ? EMPTY_ARGS : args); + return new OperatorNode<T>(loc, annotations, operator, args); + } + + private static final Object[] EMPTY_ARGS = new Object[0]; + + private final Location location; + private final T operator; + private Map<String, Object> annotations = ImmutableMap.of(); + private final Object[] args; + + private OperatorNode(T operator, Object... args) { + this.location = null; + this.operator = operator; + if (args == null) { + this.args = EMPTY_ARGS; + } else { + this.args = args; + } + } + + private OperatorNode(Location loc, T operator, Object... args) { + this.location = loc; + this.operator = operator; + if (args == null) { + this.args = EMPTY_ARGS; + } else { + this.args = args; + } + } + + private OperatorNode(Location loc, Map<String, Object> annotations, T operator, Object... args) { + this.location = loc; + this.operator = operator; + this.annotations = ImmutableMap.copyOf(annotations); + if (args == null) { + this.args = EMPTY_ARGS; + } else { + this.args = args; + } + } + + public T getOperator() { + return operator; + } + + public Object[] getArguments() { + // this is only called by a test right now, but ImmutableList.copyOf won't tolerate null elements + if (args.length == 0) { + return args; + } + Object[] copy = new Object[args.length]; + System.arraycopy(args, 0, copy, 0, args.length); + return copy; + } + + public <T> T getArgument(int i) { + return (T) args[i]; + } + + public <T> T getArgument(int i, Class<T> clazz) { + return clazz.cast(getArgument(i)); + } + + public Location getLocation() { + return location; + } + + public Object getAnnotation(String name) { + return annotations.get(name); + } + + public OperatorNode<T> putAnnotation(String name, Object value) { + if (annotations.isEmpty()) { + annotations = Maps.newLinkedHashMap(); + } else if (annotations instanceof ImmutableMap) { + annotations = Maps.newLinkedHashMap(annotations); + } + annotations.put(name, value); + return this; + } + + public Map<String, Object> getAnnotations() { + // TODO: this should be a read-only view? + return ImmutableMap.copyOf(annotations); + } + + public OperatorNode<T> transform(Function<Object, Object> argumentTransform) { + if (args.length == 0) { + // nothing to transform, so no change is possible + return this; + } + Object[] newArgs = new Object[args.length]; + boolean changed = false; + for (int i = 0; i < args.length; ++i) { + Object target = args[i]; + if (target instanceof List) { + List<Object> newList = Lists.newArrayListWithExpectedSize(((List) target).size()); + for (Object val : (List) target) { + newList.add(argumentTransform.apply(val)); + } + newArgs[i] = newList; + // this will always 'change' the tree, maybe fix later + } else { + newArgs[i] = argumentTransform.apply(args[i]); + } + changed = changed || newArgs[i] != args[i]; + } + if (changed) { + return new OperatorNode<>(location, annotations, operator, newArgs); + } + return this; + } + + public void visit(OperatorVisitor visitor) { + if (visitor.enter(this)) { + for (Object target : args) { + if (target instanceof List) { + for (Object val : (List) target) { + if (val instanceof OperatorNode) { + ((OperatorNode) val).visit(visitor); + } + } + } else if (target instanceof OperatorNode) { + ((OperatorNode) target).visit(visitor); + + } + } + } + visitor.exit(this); + } + + // we are aware only of types used in our logical operator trees -- OperatorNode, List, and constant values + private static final Function<Object, Object> COPY = new Function<Object, Object>() { + @Nullable + @Override + public Object apply(@Nullable Object input) { + if (input instanceof List) { + List<Object> newList = Lists.newArrayListWithExpectedSize(((List) input).size()); + for (Object val : (List) input) { + newList.add(COPY.apply(val)); + } + return newList; + } else if (input instanceof OperatorNode) { + return ((OperatorNode) input).copy(); + } else if (input instanceof String || input instanceof Number || input instanceof Boolean) { + return input; + } else { + // this may be annoying but COPY not understanding how to COPY and quietly reusing + // when it may not be immutable could be dangerous + throw new IllegalArgumentException("Unexpected value type in OperatorNode tree: " + input); + } + } + }; + + public OperatorNode<T> copy() { + Object[] newArgs = new Object[args.length]; + for (int i = 0; i < args.length; ++i) { + newArgs[i] = COPY.apply(args[i]); + } + return new OperatorNode<>(location, ImmutableMap.copyOf(annotations), operator, newArgs); + } + + public void toString(StringBuilder output) { + output.append("(") + .append(operator.name()); + if(location != null) { + output.append(" L") + .append(location.getCharacterOffset()) + .append(":") + .append(location.getLineNumber()); + } + if(annotations != null && !annotations.isEmpty()) { + output.append(" {"); + Joiner.on(", ").withKeyValueSeparator("=") + .appendTo(output, annotations); + output.append("}"); + } + boolean first = true; + for(Object arg : args) { + if(!first) { + output.append(","); + } + first = false; + output.append(" "); + if(arg instanceof OperatorNode) { + ((OperatorNode) arg).toString(output); + } else if(arg instanceof Iterable) { + output.append("["); + Joiner.on(", ").appendTo(output, (Iterable)arg); + output.append("]"); + } else { + output.append(arg.toString()); + } + } + output.append(")"); + } + + public String toString() { + StringBuilder output = new StringBuilder(); + toString(output); + return output.toString(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + OperatorNode that = (OperatorNode) o; + + if (!annotations.equals(that.annotations)) return false; + // Probably incorrect - comparing Object[] arrays with Arrays.equals + if (!Arrays.equals(args, that.args)) return false; + if (!operator.equals(that.operator)) return false; + + return true; + } + + @Override + public int hashCode() { + int result = operator.hashCode(); + result = 31 * result + annotations.hashCode(); + result = 31 * result + Arrays.hashCode(args); + return result; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/OperatorNodeListTypeChecker.java b/container-search/src/main/java/com/yahoo/search/yql/OperatorNodeListTypeChecker.java new file mode 100644 index 00000000000..d0c98fb3d11 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/OperatorNodeListTypeChecker.java @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; + +import java.util.List; +import java.util.Set; + +class OperatorNodeListTypeChecker extends OperatorTypeChecker { + + private final Class<? extends Operator> operatorType; + private final Set<? extends Operator> operators; + + public OperatorNodeListTypeChecker(Operator parent, int idx, Class<? extends Operator> operatorType, Set<? extends Operator> operators) { + super(parent, idx); + this.operatorType = operatorType; + this.operators = operators; + } + + @Override + public void check(Object argument) { + Preconditions.checkNotNull(argument, "Argument %s of %s must not be null", idx, parent); + Preconditions.checkArgument(argument instanceof List, "Argument %s of %s must be a List<OperatorNode<%s>>", idx, parent, operatorType.getName(), argument.getClass()); + List<OperatorNode<?>> lst = (List<OperatorNode<?>>) argument; + for (OperatorNode<?> node : lst) { + Operator op = node.getOperator(); + Preconditions.checkArgument(operatorType.isInstance(op), "Argument %s of %s must contain only OperatorNode<%s> (is: %s).", idx, parent, operatorType.getName(), op.getClass()); + if (!operators.isEmpty()) { + Preconditions.checkArgument(operators.contains(op), "Argument %s of %s must contain only %s (is %s).", idx, parent, Joiner.on("|").join(operators), op); + } + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/OperatorTypeChecker.java b/container-search/src/main/java/com/yahoo/search/yql/OperatorTypeChecker.java new file mode 100644 index 00000000000..8266f414fa7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/OperatorTypeChecker.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +/** + * Check the type of a single argument. + */ +abstract class OperatorTypeChecker { + + protected final Operator parent; + protected final int idx; + + protected OperatorTypeChecker(Operator parent, int idx) { + this.parent = parent; + this.idx = idx; + } + + public abstract void check(Object argument); + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/OperatorVisitor.java b/container-search/src/main/java/com/yahoo/search/yql/OperatorVisitor.java new file mode 100644 index 00000000000..73c3612c1c9 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/OperatorVisitor.java @@ -0,0 +1,10 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +interface OperatorVisitor { + + <T extends Operator> boolean enter(OperatorNode<T> node); + + <T extends Operator> void exit(OperatorNode<T> node); + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/ParserBase.java b/container-search/src/main/java/com/yahoo/search/yql/ParserBase.java new file mode 100644 index 00000000000..af3418919e8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/ParserBase.java @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.collect.Sets; + +import org.antlr.v4.runtime.Parser; +import org.antlr.v4.runtime.TokenStream; +import org.antlr.v4.runtime.tree.ParseTree; + +import java.util.Set; + +/** + * Provides semantic helper functions to Parser. + */ +abstract class ParserBase extends Parser { + + private static String arrayRuleName = "array"; + public ParserBase(TokenStream input) { + super(input); + } + + private Set<String> arrayParameters = Sets.newHashSet(); + + public void registerParameter(String name, String typeName) { + if (typeName.equals(arrayRuleName)) { + arrayParameters.add(name); + } + } + + public boolean isArrayParameter(ParseTree nameNode) { + String name = nameNode.getText(); + if (name.startsWith("@")) { + name = name.substring(1); + } + return name != null && arrayParameters.contains(name); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/ProgramCompileException.java b/container-search/src/main/java/com/yahoo/search/yql/ProgramCompileException.java new file mode 100644 index 00000000000..592bd690d56 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/ProgramCompileException.java @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +class ProgramCompileException extends RuntimeException { + + private Location sourceLocation; + + public ProgramCompileException(String message) { + super(message); + } + + public ProgramCompileException(String message, Object... args) { + super(formatMessage(message, args)); + } + + private static String formatMessage(String message, Object... args) { + return args == null ? message : String.format(message, args); + } + + public ProgramCompileException(String message, Throwable cause) { + super(message, cause); + } + + public ProgramCompileException(Throwable cause) { + super(cause); + } + + public ProgramCompileException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } + + + public ProgramCompileException(Location sourceLocation, String message, Object... args) { + super(String.format("%s %s", sourceLocation != null ? sourceLocation : "", args == null ? message : String.format(message, args))); + this.sourceLocation = sourceLocation; + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/ProgramParser.java b/container-search/src/main/java/com/yahoo/search/yql/ProgramParser.java new file mode 100644 index 00000000000..a8d1bc43a4c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/ProgramParser.java @@ -0,0 +1,1549 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import com.yahoo.search.yql.yqlplusParser.AnnotationContext; +import com.yahoo.search.yql.yqlplusParser.AnnotateExpressionContext; +import com.yahoo.search.yql.yqlplusParser.ArgumentContext; +import com.yahoo.search.yql.yqlplusParser.ArgumentsContext; +import com.yahoo.search.yql.yqlplusParser.ArrayLiteralContext; +import com.yahoo.search.yql.yqlplusParser.ArrayTypeContext; +import com.yahoo.search.yql.yqlplusParser.Call_sourceContext; +import com.yahoo.search.yql.yqlplusParser.ConstantArrayContext; +import com.yahoo.search.yql.yqlplusParser.ConstantExpressionContext; +import com.yahoo.search.yql.yqlplusParser.ConstantMapExpressionContext; +import com.yahoo.search.yql.yqlplusParser.ConstantPropertyNameAndValueContext; +import com.yahoo.search.yql.yqlplusParser.Delete_statementContext; +import com.yahoo.search.yql.yqlplusParser.DereferencedExpressionContext; +import com.yahoo.search.yql.yqlplusParser.EqualityExpressionContext; +import com.yahoo.search.yql.yqlplusParser.ExpressionContext; +import com.yahoo.search.yql.yqlplusParser.FallbackContext; +import com.yahoo.search.yql.yqlplusParser.Field_defContext; +import com.yahoo.search.yql.yqlplusParser.Field_names_specContext; +import com.yahoo.search.yql.yqlplusParser.Field_values_group_specContext; +import com.yahoo.search.yql.yqlplusParser.Field_values_specContext; +import com.yahoo.search.yql.yqlplusParser.IdentContext; +import com.yahoo.search.yql.yqlplusParser.Import_listContext; +import com.yahoo.search.yql.yqlplusParser.Import_statementContext; +import com.yahoo.search.yql.yqlplusParser.InNotInTargetContext; +import com.yahoo.search.yql.yqlplusParser.Insert_sourceContext; +import com.yahoo.search.yql.yqlplusParser.Insert_statementContext; +import com.yahoo.search.yql.yqlplusParser.Insert_valuesContext; +import com.yahoo.search.yql.yqlplusParser.JoinExpressionContext; +import com.yahoo.search.yql.yqlplusParser.Join_exprContext; +import com.yahoo.search.yql.yqlplusParser.LimitContext; +import com.yahoo.search.yql.yqlplusParser.Literal_elementContext; +import com.yahoo.search.yql.yqlplusParser.Literal_listContext; +import com.yahoo.search.yql.yqlplusParser.LogicalANDExpressionContext; +import com.yahoo.search.yql.yqlplusParser.LogicalORExpressionContext; +import com.yahoo.search.yql.yqlplusParser.MapExpressionContext; +import com.yahoo.search.yql.yqlplusParser.MapTypeContext; +import com.yahoo.search.yql.yqlplusParser.Merge_componentContext; +import com.yahoo.search.yql.yqlplusParser.Merge_statementContext; +import com.yahoo.search.yql.yqlplusParser.ModuleIdContext; +import com.yahoo.search.yql.yqlplusParser.ModuleNameContext; +import com.yahoo.search.yql.yqlplusParser.MultiplicativeExpressionContext; +import com.yahoo.search.yql.yqlplusParser.Namespaced_nameContext; +import com.yahoo.search.yql.yqlplusParser.Next_statementContext; +import com.yahoo.search.yql.yqlplusParser.OffsetContext; +import com.yahoo.search.yql.yqlplusParser.OrderbyContext; +import com.yahoo.search.yql.yqlplusParser.Orderby_fieldContext; +import com.yahoo.search.yql.yqlplusParser.Output_specContext; +import com.yahoo.search.yql.yqlplusParser.Paged_clauseContext; +import com.yahoo.search.yql.yqlplusParser.ParamsContext; +import com.yahoo.search.yql.yqlplusParser.Pipeline_stepContext; +import com.yahoo.search.yql.yqlplusParser.Procedure_argumentContext; +import com.yahoo.search.yql.yqlplusParser.Program_arglistContext; +import com.yahoo.search.yql.yqlplusParser.Project_specContext; +import com.yahoo.search.yql.yqlplusParser.ProgramContext; +import com.yahoo.search.yql.yqlplusParser.PropertyNameAndValueContext; +import com.yahoo.search.yql.yqlplusParser.Query_statementContext; +import com.yahoo.search.yql.yqlplusParser.RelationalExpressionContext; +import com.yahoo.search.yql.yqlplusParser.RelationalOpContext; +import com.yahoo.search.yql.yqlplusParser.Returning_specContext; +import com.yahoo.search.yql.yqlplusParser.Scalar_literalContext; +import com.yahoo.search.yql.yqlplusParser.Select_source_joinContext; +import com.yahoo.search.yql.yqlplusParser.Select_source_multiContext; +import com.yahoo.search.yql.yqlplusParser.Select_statementContext; +import com.yahoo.search.yql.yqlplusParser.Selectvar_statementContext; +import com.yahoo.search.yql.yqlplusParser.Sequence_sourceContext; +import com.yahoo.search.yql.yqlplusParser.Source_listContext; +import com.yahoo.search.yql.yqlplusParser.Source_specContext; +import com.yahoo.search.yql.yqlplusParser.Source_statementContext; +import com.yahoo.search.yql.yqlplusParser.StatementContext; +import com.yahoo.search.yql.yqlplusParser.TimeoutContext; +import com.yahoo.search.yql.yqlplusParser.TypenameContext; +import com.yahoo.search.yql.yqlplusParser.UnaryExpressionContext; +import com.yahoo.search.yql.yqlplusParser.Update_statementContext; +import com.yahoo.search.yql.yqlplusParser.Update_valuesContext; +import com.yahoo.search.yql.yqlplusParser.ViewContext; +import com.yahoo.search.yql.yqlplusParser.WhereContext; + +import org.antlr.v4.runtime.BaseErrorListener; +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.ParserRuleContext; +import org.antlr.v4.runtime.RecognitionException; +import org.antlr.v4.runtime.Recognizer; +import org.antlr.v4.runtime.Token; +import org.antlr.v4.runtime.TokenStream; +import org.antlr.v4.runtime.atn.PredictionMode; +import org.antlr.v4.runtime.misc.NotNull; +import org.antlr.v4.runtime.misc.Nullable; +import org.antlr.v4.runtime.tree.ParseTree; +import org.antlr.v4.runtime.tree.RuleNode; +import org.antlr.v4.runtime.tree.TerminalNode; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Translate the ANTLR grammar into the logical representation. + */ +final class ProgramParser { + + public yqlplusParser prepareParser(String programName, InputStream input) throws IOException { + return prepareParser(programName, new CaseInsensitiveInputStream(input)); + } + + public yqlplusParser prepareParser(String programName, String input) throws IOException { + return prepareParser(programName, new CaseInsensitiveInputStream(input)); + } + + public yqlplusParser prepareParser(File file) throws IOException { + return prepareParser(file.getAbsoluteFile().toString(), new CaseInsensitiveFileStream(file.getAbsolutePath())); + } + + + private yqlplusParser prepareParser(final String programName, CharStream input) { + yqlplusLexer lex = new yqlplusLexer(input); + lex.addErrorListener(new BaseErrorListener() { + @Override + public void syntaxError(@NotNull Recognizer<?, ?> recognizer, + @Nullable Object offendingSymbol, + int line, + int charPositionInLine, + @NotNull String msg, + @Nullable RecognitionException e) + { + throw new ProgramCompileException(new Location(programName, line, charPositionInLine), msg); + } + + }); + TokenStream tokens = new CommonTokenStream(lex); + yqlplusParser parser = new yqlplusParser(tokens); + parser.addErrorListener(new BaseErrorListener() { + @Override + public void syntaxError(@NotNull Recognizer<?, ?> recognizer, + @Nullable Object offendingSymbol, + int line, + int charPositionInLine, + @NotNull String msg, + @Nullable RecognitionException e) + { + throw new ProgramCompileException(new Location(programName, line, charPositionInLine), msg); + } + + }); + parser.getInterpreter().setPredictionMode(PredictionMode.SLL); + return parser; + } + + private ProgramContext parseProgram(yqlplusParser parser) throws RecognitionException { + try { + return parser.program(); + } catch (RecognitionException e) { + //Retry parsing using full LL mode + parser.reset(); + parser.getInterpreter().setPredictionMode(PredictionMode.LL); + return parser.program(); + } + } + + public OperatorNode<StatementOperator> parse(String programName, InputStream program) throws IOException, RecognitionException { + yqlplusParser parser = prepareParser(programName, program); + return convertProgram(parseProgram(parser), parser, programName); + } + + public OperatorNode<StatementOperator> parse(String programName, String program) throws IOException, RecognitionException { + yqlplusParser parser = prepareParser(programName, program); + return convertProgram(parseProgram(parser), parser, programName); + } + + public OperatorNode<StatementOperator> parse(File input) throws IOException, RecognitionException { + yqlplusParser parser = prepareParser(input); + return convertProgram(parseProgram(parser), parser, input.getAbsoluteFile().toString()); + } + + public OperatorNode<ExpressionOperator> parseExpression(String input) throws IOException, RecognitionException { + return convertExpr(prepareParser("<expression>", input).expression(false).getRuleContext(), new Scope()); + } + + public OperatorNode<ExpressionOperator> parseExpression(String input, Set<String> visibleAliases) throws IOException, RecognitionException { + Scope scope = new Scope(); + final Location loc = new Location("<expression>", -1, -1); + for (String alias : visibleAliases) { + scope.defineDataSource(loc, alias); + } + return convertExpr(prepareParser("<expression>", input).expression(false).getRuleContext(), scope); + } + + private Location toLocation(Scope scope, ParseTree node) { + Token start; + if (node instanceof ParserRuleContext) { + start = ((ParserRuleContext)node).start; + } else if (node instanceof TerminalNode) { + start = ((TerminalNode)node).getSymbol(); + } else { + throw new ProgramCompileException("Location is not available for type " + node.getClass()); + } + Location location = new Location(scope != null? scope.programName: "<string>", start.getLine(), start.getCharPositionInLine()); + return location; + } + + private List<String> readName(Namespaced_nameContext node) { + List<String> path = Lists.newArrayList(); + for (ParseTree elt:node.children) { + if (!(getParseTreeIndex(elt) == yqlplusParser.DOT)) { + path.add(elt.getText()); + } + } + return path; + } + + static class Binding { + private final List<String> binding; + + Binding(String moduleName, String exportName) { + this.binding = ImmutableList.of(moduleName, exportName); + } + + Binding(String moduleName) { + this.binding = ImmutableList.of(moduleName); + } + + Binding(List<String> binding) { + this.binding = binding; + } + + public List<String> toPath() { + return binding; + } + + public List<String> toPathWith(List<String> rest) { + return ImmutableList.copyOf(Iterables.concat(toPath(), rest)); + } + } + + static class Scope { + final Scope root; + final Scope parent; + Set<String> cursors = ImmutableSet.of(); + Set<String> variables = ImmutableSet.of(); + Set<String> views = Sets.newHashSet(); + Map<String, Binding> bindings = Maps.newHashMap(); + final yqlplusParser parser; + final String programName; + + Scope() { + this.parser = null; + this.programName = null; + this.root = this; + this.parent = null; + } + + Scope(yqlplusParser parser, String programName) { + this.parser = parser; + this.programName = programName; + this.root = this; + this.parent = null; + } + + Scope(Scope root, Scope parent) { + this.root = root; + this.parent = parent; + this.parser = parent.parser; + this.programName = parent.programName; + } + + public yqlplusParser getParser() { + return parser; + } + + public String getProgramName() { + return programName; + } + + public Set<String> getCursors() { + return cursors; + } + + + boolean isBound(String name) { + // bindings live only in the 'root' node + return root.bindings.containsKey(name); + } + + public Binding getBinding(String name) { + return root.bindings.get(name); + } + + public List<String> resolvePath(List<String> path) { + if (path.size() < 1 || !isBound(path.get(0))) { + return path; + } else { + return getBinding(path.get(0)).toPathWith(path.subList(1, path.size())); + } + } + + boolean isCursor(String name) { + return cursors.contains(name) || (parent != null && parent.isCursor(name)); + } + + boolean isVariable(String name) { + return variables.contains(name) || (parent != null && parent.isVariable(name)); + } + + public void bindModule(Location loc, List<String> binding, String symbolName) { + if (isBound(symbolName)) { + throw new ProgramCompileException(loc, "Name '%s' is already used.", symbolName); + } + root.bindings.put(symbolName, new Binding(binding)); + } + + public void bindModuleSymbol(Location loc, List<String> moduleName, String exportName, String symbolName) { + ImmutableList.Builder<String> builder = ImmutableList.builder(); + builder.addAll(moduleName); + builder.add(exportName); + bindModule(loc, builder.build(), symbolName); + } + + public void defineDataSource(Location loc, String name) { + if (isCursor(name)) { + throw new ProgramCompileException(loc, "Alias '%s' is already used.", name); + } + if (cursors.isEmpty()) { + cursors = Sets.newHashSet(); + } + cursors.add(name); + } + + public void defineVariable(Location loc, String name) { + if (isVariable(name)) { + throw new ProgramCompileException(loc, "Variable/argument '%s' is already used.", name); + } + if (variables.isEmpty()) { + variables = Sets.newHashSet(); + } + variables.add(name); + + } + + public void defineView(Location loc, String text) { + if (this != root) { + throw new IllegalStateException("Views MUST be defined in 'root' scope only"); + } + if (views.contains(text)) { + throw new ProgramCompileException(loc, "View '%s' already defined", text); + } + views.add(text); + } + + Scope child() { + return new Scope(root, this); + } + + Scope getRoot() { + return root; + } + } + + private OperatorNode<SequenceOperator> convertSelectOrInsertOrUpdateOrDelete(ParseTree node, Scope scopeParent) { + + Preconditions.checkArgument(node instanceof Select_statementContext || node instanceof Insert_statementContext || + node instanceof Update_statementContext || node instanceof Delete_statementContext); + + // SELECT^ select_field_spec select_source where? orderby? limit? offset? timeout? fallback? + // select is the only place to define where/orderby/limit/offset and joins + Scope scope = scopeParent.child(); + ProjectionBuilder proj = null; + OperatorNode<SequenceOperator> source = null; + OperatorNode<ExpressionOperator> filter = null; + List<OperatorNode<SortOperator>> orderby = null; + OperatorNode<ExpressionOperator> offset = null; + OperatorNode<ExpressionOperator> limit = null; + OperatorNode<ExpressionOperator> timeout = null; + OperatorNode<SequenceOperator> fallback = null; + OperatorNode<SequenceOperator> insertValues = null; + OperatorNode<ExpressionOperator> updateValues = null; + + ParseTree sourceNode; + + if (node instanceof Select_statementContext ) { + sourceNode = node.getChild(2) != null ? node.getChild(2).getChild(0):null; + } else { + sourceNode = node.getChild(1); + } + + if (sourceNode != null) { + switch (getParseTreeIndex(sourceNode)) { + // ALL_SOURCE and MULTI_SOURCE are how FROM SOURCES + // *|source_name,... are parsed + // They can't be used directly with the JOIN syntax at this time + case yqlplusParser.RULE_select_source_all: { + Location location = toLocation(scope, sourceNode.getChild(2)); + source = OperatorNode.create(location, SequenceOperator.ALL); + source.putAnnotation("alias", "row"); + scope.defineDataSource(location, "row"); + } + break; + case yqlplusParser.RULE_select_source_multi: + Source_listContext multiSourceContext = ((Select_source_multiContext) sourceNode).source_list(); + source = readMultiSource(scope, multiSourceContext); + source.putAnnotation("alias", "row"); + scope.defineDataSource(toLocation(scope, multiSourceContext), "row"); + break; + case yqlplusParser.RULE_select_source_join: + source = convertSource((ParserRuleContext) sourceNode.getChild(1), scope); + List<Join_exprContext> joinContexts = ((Select_source_joinContext)sourceNode).join_expr(); + for (Join_exprContext joinContext:joinContexts) { + source = convertJoin(joinContext, source, scope); + } + break; + case yqlplusParser.RULE_insert_source: + Insert_sourceContext insertSourceContext = (Insert_sourceContext) sourceNode; + source = convertSource((ParserRuleContext)insertSourceContext.getChild(1), scope); + break; + case yqlplusParser.RULE_delete_source: + source = convertSource((ParserRuleContext)sourceNode.getChild(1), scope); + break; + case yqlplusParser.RULE_update_source: + source = convertSource((ParserRuleContext)sourceNode.getChild(0), scope); + break; + } + } else { + source = OperatorNode.create(SequenceOperator.EMPTY); + } + + for (int i = 1; i < node.getChildCount(); ++i) { + ParseTree child = node.getChild(i); + switch (getParseTreeIndex(child)) { + case yqlplusParser.RULE_select_field_spec: + if (getParseTreeIndex(child.getChild(0)) == yqlplusParser.RULE_project_spec) { + proj = readProjection(((Project_specContext) child.getChild(0)).field_def(), scope); + } + break; + case yqlplusParser.RULE_returning_spec: + proj = readProjection(((Returning_specContext) child).select_field_spec().project_spec().field_def(), scope); + break; + case yqlplusParser.RULE_where: + filter = convertExpr(((WhereContext) child).expression(), scope); + break; + case yqlplusParser.RULE_orderby: + // OrderbyContext orderby() + List<Orderby_fieldContext> orderFieds = ((OrderbyContext) child) + .orderby_fields().orderby_field(); + orderby = Lists.newArrayListWithExpectedSize(orderFieds.size()); + for (int j = 0; j < orderFieds.size(); ++j) { + orderby.add(convertSortKey(orderFieds.get(j), scope)); + } + break; + case yqlplusParser.RULE_limit: + limit = convertExpr(((LimitContext) child).fixed_or_parameter(), scope); + break; + case yqlplusParser.RULE_offset: + offset = convertExpr(((OffsetContext) child).fixed_or_parameter(), scope); + break; + case yqlplusParser.RULE_timeout: + timeout = convertExpr(((TimeoutContext) child).fixed_or_parameter(), scope); + break; + case yqlplusParser.RULE_fallback: + fallback = convertQuery(((FallbackContext) child).select_statement(), scope); + break; + case yqlplusParser.RULE_insert_values: + if (child.getChild(0) instanceof yqlplusParser.Query_statementContext) { + insertValues = convertQuery(child.getChild(0).getChild(0), scope); + } else { + insertValues = readBatchValues(((Insert_valuesContext) child).field_names_spec(), ((Insert_valuesContext)child).field_values_group_spec(), scope); + } + break; + case yqlplusParser.RULE_update_values: + if (getParseTreeIndex(child.getChild(0)) == yqlplusParser.RULE_field_def) { + updateValues = readValues(((Update_valuesContext)child).field_def(), scope); + } else { + updateValues = readValues((Field_names_specContext)child.getChild(0), (Field_values_specContext)child.getChild(2), scope); + } + break; + } + } + // now assemble the logical plan + OperatorNode<SequenceOperator> result = source; + // filter + if (filter != null) { + result = OperatorNode.create(SequenceOperator.FILTER, result, filter); + } + // insert values + if (insertValues != null) { + result = OperatorNode.create(SequenceOperator.INSERT, result, insertValues); + } + // update + if (updateValues != null) { + if (filter != null) { + result = OperatorNode.create(SequenceOperator.UPDATE, source, updateValues, filter); + } else { + result = OperatorNode.create(SequenceOperator.UPDATE_ALL, source, updateValues); + } + } + // delete + if (getParseTreeIndex(node) == yqlplusParser.RULE_delete_statement) { + if (filter != null) { + result = OperatorNode.create(SequenceOperator.DELETE, source, filter); + } else { + result = OperatorNode.create(SequenceOperator.DELETE_ALL, source); + } + } + // then sort (or project and sort) + boolean projectBeforeSort = false; + if (orderby != null) { + if (proj != null) { + for (OperatorNode<SortOperator> sortKey : orderby) { + OperatorNode<ExpressionOperator> sortExpression = sortKey.getArgument(0); + List<OperatorNode<ExpressionOperator>> sortReadFields = getReadFieldExpressions(sortExpression); + for (OperatorNode<ExpressionOperator> sortReadField : sortReadFields) { + String sortKeyField = sortReadField.getArgument(1); + if (proj.isAlias(sortKeyField)) { + // TODO: Add support for "mixed" case + projectBeforeSort = true; + break; + } + } + } + } + if (projectBeforeSort) { + result = OperatorNode.create(SequenceOperator.SORT, proj.make(result), orderby); + } else { + result = OperatorNode.create(SequenceOperator.SORT, result, orderby); + } + } + // then offset/limit (must be done after sorting!) + if (offset != null && limit != null) { + result = OperatorNode.create(SequenceOperator.SLICE, result, offset, limit); + } else if (offset != null) { + result = OperatorNode.create(SequenceOperator.OFFSET, result, offset); + } else if (limit != null) { + result = OperatorNode.create(SequenceOperator.LIMIT, result, limit); + } + // finally, project (if not already) + if (proj != null && !projectBeforeSort) { + result = proj.make(result); + } + if (timeout != null) { + result = OperatorNode.create(SequenceOperator.TIMEOUT, result, timeout); + } + // if there's a fallback, emit a fallback node + if (fallback != null) { + result = OperatorNode.create(SequenceOperator.FALLBACK, result, fallback); + } + return result; + } + + private OperatorNode<ExpressionOperator> readValues(List<Field_defContext> fieldDefs, Scope scope) { + List<String> fieldNames; + List<OperatorNode<ExpressionOperator>> fieldValues; + int numPairs = fieldDefs.size(); + fieldNames = Lists.newArrayListWithExpectedSize(numPairs); + fieldValues = Lists.newArrayListWithExpectedSize(numPairs); + for (int j = 0; j < numPairs; j++) { + ParseTree startNode = fieldDefs.get(j); + while(startNode.getChildCount() < 3) { + startNode = startNode.getChild(0); + } + fieldNames.add((String) convertExpr(startNode.getChild(0), scope).getArgument(1)); + fieldValues.add(convertExpr(startNode.getChild(2), scope)); + } + return OperatorNode.create(ExpressionOperator.MAP, fieldNames, fieldValues); + } + + private OperatorNode<SequenceOperator> readMultiSource(Scope scope, Source_listContext multiSource) { + List<List<String>> sourceNameList = Lists.newArrayList(); + List<Namespaced_nameContext> nameSpaces = multiSource.namespaced_name(); + for(Namespaced_nameContext node : nameSpaces) { + List<String> name = readName(node); + sourceNameList.add(name); + } + return OperatorNode.create(toLocation(scope, multiSource), SequenceOperator.MULTISOURCE, sourceNameList); + } +// pipeline_step +// : namespaced_name arguments[false]? +// ; + private OperatorNode<SequenceOperator> convertPipe(Query_statementContext queryStatementContext, List<Pipeline_stepContext> nodes, Scope scope) { + OperatorNode<SequenceOperator> result = convertQuery(queryStatementContext.getChild(0), scope.getRoot()); + for (Pipeline_stepContext step:nodes) { + if (getParseTreeIndex(step.getChild(0)) == yqlplusParser.RULE_vespa_grouping) { + result = OperatorNode.create(SequenceOperator.PIPE, result, ImmutableList.<String>of(), + ImmutableList.of(convertExpr(step.getChild(0), scope))); + } else { + List<String> name = readName(step.namespaced_name()); + List<OperatorNode<ExpressionOperator>> args = ImmutableList.of(); + //LPAREN (argument[$in_select] (COMMA argument[$in_select])*) RPAREN + if (step.getChildCount() > 1) { + ArgumentsContext arguments = step.arguments(); + if (arguments.getChildCount() > 2) { + List<ArgumentContext> argumentContextList = arguments.argument(); + args = Lists.newArrayListWithExpectedSize(argumentContextList.size()); + for (ArgumentContext argumentContext: argumentContextList) { + args.add(convertExpr(argumentContext.expression(), scope.getRoot())); + + } + } + } + result = OperatorNode.create(SequenceOperator.PIPE, result, scope.resolvePath(name), args); + } + } + return result; + } + + private OperatorNode<SequenceOperator> convertMerge(List<Merge_componentContext> mergeComponentList, Scope scope) { + Preconditions.checkArgument(mergeComponentList != null); + List<OperatorNode<SequenceOperator>> sources = Lists.newArrayListWithExpectedSize(mergeComponentList.size()); + for (Merge_componentContext mergeComponent:mergeComponentList) { + Select_statementContext selectContext = mergeComponent.select_statement(); + Source_statementContext sourceContext = mergeComponent.source_statement(); + if (selectContext != null) { + sources.add(convertQuery(selectContext, scope.getRoot())); + } else { + sources.add(convertQuery(sourceContext, scope.getRoot())); + } + } + return OperatorNode.create(SequenceOperator.MERGE, sources); + } + + private OperatorNode<SequenceOperator> convertQuery(ParseTree node, Scope scope) { + if (node instanceof Select_statementContext + || node instanceof Insert_statementContext + || node instanceof Update_statementContext + || node instanceof Delete_statementContext) { + return convertSelectOrInsertOrUpdateOrDelete(node, scope.getRoot()); + } else if (node instanceof Source_statementContext) { //for pipe + Source_statementContext sourceStatementContext = (Source_statementContext)node; + return convertPipe(sourceStatementContext.query_statement(), sourceStatementContext.pipeline_step(), scope); + } else if (node instanceof Merge_statementContext) { + return convertMerge(((Merge_statementContext)node).merge_component(), scope); + } else { + throw new IllegalArgumentException("Unexpected argument type to convertQueryStatement: " + node.toStringTree()); + } + + } + + private OperatorNode<SequenceOperator> convertJoin(Join_exprContext node, OperatorNode<SequenceOperator> left, Scope scope) { + Source_specContext sourceSpec = node.source_spec(); + OperatorNode<SequenceOperator> right = convertSource(sourceSpec, scope); + JoinExpressionContext joinContext = node.joinExpression(); + OperatorNode<ExpressionOperator> joinExpression = readBinOp(ExpressionOperator.valueOf("EQ"), joinContext.getChild(0), joinContext.getChild(2), scope); + if (joinExpression.getOperator() != ExpressionOperator.EQ) { + throw new ProgramCompileException(joinExpression.getLocation(), "Unexpected join expression type: %s (expected EQ)", joinExpression.getOperator()); + } + return OperatorNode.create(toLocation(scope, sourceSpec), node.join_spec().LEFT() != null ? SequenceOperator.LEFT_JOIN : SequenceOperator.JOIN, left, right, joinExpression); + } + + private String assignAlias(String alias, ParserRuleContext node, Scope scope) { + if (alias == null) { + alias = "source"; + } + + if (node != null && node instanceof yqlplusParser.Alias_defContext) { + //alias_def : (AS? ID); + ParseTree idChild = node; + if (node.getChildCount() > 1) { + idChild = node.getChild(1); + } + alias = idChild.getText(); + if (scope.isCursor(alias)) { + throw new ProgramCompileException(toLocation(scope, idChild), "Source alias '%s' is already used", alias); + } + scope.defineDataSource(toLocation(scope, idChild), alias); + return alias; + } else { + String candidate = alias; + int c = 0; + while (scope.isCursor(candidate)) { + candidate = alias + (++c); + } + scope.defineDataSource(null, candidate); + return alias; + } + } + + private OperatorNode<SequenceOperator> convertSource(ParserRuleContext sourceSpecNode, Scope scope) { + + // DataSources + String alias; + OperatorNode<SequenceOperator> result; + ParserRuleContext dataSourceNode = sourceSpecNode; + ParserRuleContext aliasContext = null; + //data_source + //: call_source + //| LPAREN source_statement RPAREN + //| sequence_source + //; + if (sourceSpecNode instanceof Source_specContext) { + dataSourceNode = (ParserRuleContext)sourceSpecNode.getChild(0); + if (sourceSpecNode.getChildCount() == 2) { + aliasContext = (ParserRuleContext)sourceSpecNode.getChild(1); + } + if (dataSourceNode.getChild(0) instanceof Call_sourceContext || + dataSourceNode.getChild(0) instanceof Sequence_sourceContext) { + dataSourceNode = (ParserRuleContext)dataSourceNode.getChild(0); + } else { //source_statement + dataSourceNode = (ParserRuleContext)dataSourceNode.getChild(1); + } + } + switch (getParseTreeIndex(dataSourceNode)) { + case yqlplusParser.RULE_write_data_source: + case yqlplusParser.RULE_call_source: { + List<String> names = readName((Namespaced_nameContext)dataSourceNode.getChild(Namespaced_nameContext.class, 0)); + alias = assignAlias(names.get(names.size() - 1), aliasContext, scope); + List<OperatorNode<ExpressionOperator>> arguments = ImmutableList.of(); + ArgumentsContext argumentsContext = dataSourceNode.getRuleContext(ArgumentsContext.class,0); + if ( argumentsContext != null) { + List<ArgumentContext> argumentContexts = argumentsContext.argument(); + arguments = Lists.newArrayListWithExpectedSize(argumentContexts.size()); + for (ArgumentContext argumentContext:argumentContexts) { + arguments.add(convertExpr(argumentContext, scope)); + } + } + if (names.size() == 1 && scope.isVariable(names.get(0))) { + String ident = names.get(0); + if (arguments.size() > 0) { + throw new ProgramCompileException(toLocation(scope, argumentsContext), "Invalid call-with-arguments on local source '%s'", ident); + } + result = OperatorNode.create(toLocation(scope, dataSourceNode), SequenceOperator.EVALUATE, OperatorNode.create(toLocation(scope, dataSourceNode), ExpressionOperator.VARREF, ident)); + } else { + result = OperatorNode.create(toLocation(scope, dataSourceNode), SequenceOperator.SCAN, scope.resolvePath(names), arguments); + } + break; + } + case yqlplusParser.RULE_sequence_source: { + IdentContext identContext = dataSourceNode.getRuleContext(IdentContext.class,0); + String ident = identContext.getText(); + if (!scope.isVariable(ident)) { + throw new ProgramCompileException(toLocation(scope, identContext), "Unknown variable reference '%s'", ident); + } + alias = assignAlias(ident, aliasContext, scope); + result = OperatorNode.create(toLocation(scope, dataSourceNode), SequenceOperator.EVALUATE, OperatorNode.create(toLocation(scope, dataSourceNode), ExpressionOperator.VARREF, ident)); + break; + } + case yqlplusParser.RULE_source_statement: { + alias = assignAlias(null, dataSourceNode, scope); + result = convertQuery(dataSourceNode, scope); + break; + } + default: + throw new IllegalArgumentException("Unexpected argument type to convertSource: " + dataSourceNode.getText()); + } + result.putAnnotation("alias", alias); + return result; + } + + private OperatorNode<TypeOperator> decodeType(Scope scope, TypenameContext type) { + + TypeOperator op; + ParseTree typeNode = type.getChild(0); + switch (getParseTreeIndex(typeNode)) { + case yqlplusParser.TYPE_BOOLEAN: + op = TypeOperator.BOOLEAN; + break; + case yqlplusParser.TYPE_BYTE: + op = TypeOperator.BYTE; + break; + case yqlplusParser.TYPE_DOUBLE: + op = TypeOperator.DOUBLE; + break; + case yqlplusParser.TYPE_INT16: + op = TypeOperator.INT16; + break; + case yqlplusParser.TYPE_INT32: + op = TypeOperator.INT32; + break; + case yqlplusParser.TYPE_INT64: + op = TypeOperator.INT64; + break; + case yqlplusParser.TYPE_STRING: + op = TypeOperator.STRING; + break; + case yqlplusParser.TYPE_TIMESTAMP: + op = TypeOperator.TIMESTAMP; + break; + case yqlplusParser.RULE_arrayType: + return OperatorNode.create(toLocation(scope, typeNode), TypeOperator.ARRAY, decodeType(scope, ((ArrayTypeContext)typeNode).getChild(TypenameContext.class, 0))); + case yqlplusParser.RULE_mapType: + return OperatorNode.create(toLocation(scope, typeNode), TypeOperator.MAP, decodeType(scope, ((MapTypeContext)typeNode).getChild(TypenameContext.class, 0))); + default: + throw new ProgramCompileException("Unknown type " + typeNode.getText()); + } + return OperatorNode.create(toLocation(scope, typeNode), op); + } + + private List<String> createBindingName(ParseTree node) { + if (node instanceof ModuleNameContext) { + if (((ModuleNameContext)node).namespaced_name() != null) { + return readName(((ModuleNameContext)node).namespaced_name()); + } else if (((ModuleNameContext)node).literalString() != null) { + return ImmutableList.of(((ModuleNameContext)node).literalString().STRING().getText()); + } + } else if (node instanceof ModuleIdContext) { + return ImmutableList.of(node.getText()); + } + throw new ProgramCompileException("Wrong context"); + } + + private OperatorNode<StatementOperator> convertProgram( + ParserRuleContext program, yqlplusParser parser, String programName) { + Scope scope = new Scope(parser, programName); + List<OperatorNode<StatementOperator>> stmts = Lists.newArrayList(); + int output = 0; + for (ParseTree node : program.children) { + if (!(node instanceof ParserRuleContext)) { + continue; + } + ParserRuleContext ruleContext = (ParserRuleContext) node; + switch (ruleContext.getRuleIndex()) { + case yqlplusParser.RULE_params: { + // ^(ARGUMENT ident typeref expression?) + ParamsContext paramsContext = (ParamsContext) ruleContext; + Program_arglistContext program_arglistContext = paramsContext.program_arglist(); + if (program_arglistContext != null) { + List<Procedure_argumentContext> argList = program_arglistContext.procedure_argument(); + for (Procedure_argumentContext procedureArgumentContext : argList) { + String name = procedureArgumentContext.ident().getText(); + OperatorNode<TypeOperator> type = decodeType(scope, procedureArgumentContext.getChild(TypenameContext.class, 0)); + OperatorNode<ExpressionOperator> defaultValue = OperatorNode.create(ExpressionOperator.NULL); + if (procedureArgumentContext.expression() != null) { + defaultValue = convertExpr(procedureArgumentContext.expression(), scope); + } + scope.defineVariable(toLocation(scope, procedureArgumentContext), name); + stmts.add(OperatorNode.create(StatementOperator.ARGUMENT, name, type, defaultValue)); + } + } + break; + } + case yqlplusParser.RULE_import_statement: { + Import_statementContext importContext = (Import_statementContext) ruleContext; + if (null == importContext.import_list()) { + List<String> name = createBindingName(node.getChild(1)); + String target; + Location location = toLocation(scope, node.getChild(1)); + if (node.getChildCount() == 2) { + target = name.get(0); + } else if (node.getChildCount() == 4) { + target = node.getChild(3).getText(); + } else { + throw new ProgramCompileException("Unknown node count for IMPORT: " + node.toStringTree()); + } + scope.bindModule(location, name, target); + } else { + // | FROM moduleName IMPORT import_list -> ^(IMPORT_FROM + // moduleName import_list+) + Import_listContext importListContext = importContext.import_list(); + List<String> name = createBindingName(importContext.moduleName()); + Location location = toLocation(scope, importContext.moduleName()); + List<ModuleIdContext> moduleIds = importListContext.moduleId(); + List<String> symbols = Lists.newArrayListWithExpectedSize(moduleIds.size()); + for (ModuleIdContext cnode : moduleIds) { + symbols.add(cnode.ID().getText()); + } + for (String sym : symbols) { + scope.bindModuleSymbol(location, name, sym, sym); + } + } + break; + } + + // DDL + case yqlplusParser.RULE_ddl: + ruleContext = (ParserRuleContext)ruleContext.getChild(0); + case yqlplusParser.RULE_view: { + // view and projection expansion now has to be done by the + // execution engine + // since views/projections, in order to be useful, have to + // support being used from outside the same program + ViewContext viewContext = (ViewContext) ruleContext; + Location loc = toLocation(scope, viewContext); + scope.getRoot().defineView(loc, viewContext.ID().getText()); + stmts.add(OperatorNode.create(loc, StatementOperator.DEFINE_VIEW, viewContext.ID().getText(), convertQuery(viewContext.source_statement(), scope.getRoot()))); + break; + } + case yqlplusParser.RULE_statement: { + // ^(STATEMENT_QUERY source_statement paged_clause? + // output_spec?) + StatementContext statementContext = (StatementContext) ruleContext; + switch (getParseTreeIndex(ruleContext.getChild(0))) { + case yqlplusParser.RULE_selectvar_statement: { + // ^(STATEMENT_SELECTVAR ident source_statement) + Selectvar_statementContext selectVarContext = (Selectvar_statementContext) ruleContext.getChild(0); + String variable = selectVarContext.ident().getText(); + OperatorNode<SequenceOperator> query = convertQuery(selectVarContext.source_statement(), scope); + Location location = toLocation(scope, selectVarContext.ident()); + scope.defineVariable(location, variable); + stmts.add(OperatorNode.create(location, StatementOperator.EXECUTE, query, variable)); + break; + } + case yqlplusParser.RULE_next_statement: { + // NEXT^ literalString OUTPUT! AS! ident + Next_statementContext nextStateContext = (Next_statementContext) ruleContext.getChild(0); + String continuationValue = StringUnescaper.unquote(nextStateContext.literalString().getText()); + String variable = nextStateContext.ident().getText(); + Location location = toLocation(scope, node); + OperatorNode<SequenceOperator> next = OperatorNode.create(location, SequenceOperator.NEXT, continuationValue); + stmts.add(OperatorNode.create(location, StatementOperator.EXECUTE, next, variable)); + stmts.add(OperatorNode.create(location, StatementOperator.OUTPUT, variable)); + scope.defineVariable(location, variable); + break; + } + case yqlplusParser.RULE_output_statement: + Source_statementContext source_statement = statementContext.output_statement().source_statement(); + OperatorNode<SequenceOperator> query; + if (source_statement.getChildCount() == 1) { + query = convertQuery( source_statement.query_statement().getChild(0), scope); + } else { + query = convertQuery(source_statement, scope); + } + String variable = "result" + (++output); + boolean isCountVariable = false; + OperatorNode<ExpressionOperator> pageSize = null; + ParseTree outputStatement = node.getChild(0); + Location location = toLocation(scope, outputStatement); + for (int i = 1; i < outputStatement.getChildCount(); ++i) { + ParseTree child = outputStatement.getChild(i); + switch (getParseTreeIndex(child)) { + case yqlplusParser.RULE_paged_clause: + Paged_clauseContext pagedContext = (Paged_clauseContext) child; + pageSize = convertExpr(pagedContext.fixed_or_parameter(), scope); + break; + case yqlplusParser.RULE_output_spec: + Output_specContext outputSpecContext = (Output_specContext) child; + variable = outputSpecContext.ident().getText(); + if (outputSpecContext.COUNT() != null) { + isCountVariable = true; + } + break; + default: + throw new ProgramCompileException( "Unknown statement attribute: " + child.toStringTree()); + } + } + scope.defineVariable(location, variable); + if (pageSize != null) { + query = OperatorNode.create(SequenceOperator.PAGE, query, pageSize); + } + stmts.add(OperatorNode.create(location, StatementOperator.EXECUTE, query, variable)); + stmts.add(OperatorNode.create(location, isCountVariable ? StatementOperator.COUNT:StatementOperator.OUTPUT, variable)); + } + break; + } + default: + throw new ProgramCompileException("Unknown program element: " + node.getText()); + } + } + // traverse the tree, find all of the namespaced calls not covered by + // imports so we can + // define "implicit" import statements for them (to make engine + // implementation easier) + return OperatorNode.create(StatementOperator.PROGRAM, stmts); + } + + private OperatorNode<SortOperator> convertSortKey(Orderby_fieldContext node, Scope scope) { + TerminalNode descDef = node.DESC(); + OperatorNode<ExpressionOperator> exprNode = convertExpr(node.expression(), scope); + if (descDef != null ) { + return OperatorNode.create(toLocation(scope, descDef), SortOperator.DESC, exprNode); + } else { + return OperatorNode.create(toLocation(scope, node), SortOperator.ASC, exprNode); + } + } + + private ProjectionBuilder readProjection(List<Field_defContext> fieldDefs, Scope scope) { + if (null == fieldDefs) + throw new ProgramCompileException("Null fieldDefs"); + ProjectionBuilder proj = new ProjectionBuilder(); + for (Field_defContext rulenode : fieldDefs) { + // FIELD + // expression alias_def? + OperatorNode<ExpressionOperator> expr = convertExpr((ExpressionContext)rulenode.getChild(0), scope); + + String aliasName = null; + if (rulenode.getChildCount() > 1) { + // ^(ALIAS ID) + aliasName = rulenode.alias_def().ID().getText(); + } + proj.addField(aliasName, expr); + // no grammar for the other rule types at this time + } + return proj; + } + + public static int getParseTreeIndex(ParseTree parseTree) { + if (parseTree instanceof TerminalNode) { + return ((TerminalNode)parseTree).getSymbol().getType(); + } else { + return ((RuleNode)parseTree).getRuleContext().getRuleIndex(); + } + } + + public OperatorNode<ExpressionOperator> convertExpr(ParseTree parseTree, + Scope scope) { + switch (getParseTreeIndex(parseTree)) { + case yqlplusParser.RULE_vespa_grouping: { + ParseTree firstChild = parseTree.getChild(0); + if (getParseTreeIndex(firstChild) == yqlplusParser.RULE_annotation) { + ParseTree secondChild = parseTree.getChild(1); + OperatorNode<ExpressionOperator> annotation = convertExpr(((AnnotationContext) firstChild) + .constantMapExpression(), scope); + OperatorNode<ExpressionOperator> expr = OperatorNode.create(toLocation(scope, secondChild), + ExpressionOperator.VESPA_GROUPING, secondChild.getText()); + List<String> names = (List<String>) annotation.getArgument(0); + List<OperatorNode<ExpressionOperator>> annotates = (List<OperatorNode<ExpressionOperator>>) annotation + .getArgument(1); + for (int i = 0; i < names.size(); ++i) { + expr.putAnnotation(names.get(i), readConstantExpression(annotates.get(i))); + } + return expr; + } else { + return OperatorNode.create(toLocation(scope, firstChild), ExpressionOperator.VESPA_GROUPING, + firstChild.getText()); + } + } + case yqlplusParser.RULE_nullOperator: + return OperatorNode.create(ExpressionOperator.NULL); + case yqlplusParser.RULE_argument: + return convertExpr(parseTree.getChild(0), scope); + case yqlplusParser.RULE_fixed_or_parameter: { + ParseTree firstChild = parseTree.getChild(0); + if (getParseTreeIndex(firstChild) == yqlplusParser.INT) { + return OperatorNode.create(toLocation(scope, firstChild), ExpressionOperator.LITERAL, new Integer(firstChild.getText())); + } else { + return convertExpr(firstChild, scope); + } + } + case yqlplusParser.RULE_constantMapExpression: { + List<ConstantPropertyNameAndValueContext> propertyList = ((ConstantMapExpressionContext) parseTree).constantPropertyNameAndValue(); + List<String> names = Lists.newArrayListWithExpectedSize(propertyList.size()); + List<OperatorNode<ExpressionOperator>> exprs = Lists.newArrayListWithExpectedSize(propertyList.size()); + for (ConstantPropertyNameAndValueContext child : propertyList) { + // : propertyName ':' expression[$expression::namespace] -> + // ^(PROPERTY propertyName expression) + names.add(StringUnescaper.unquote(child.getChild(0).getText())); + exprs.add(convertExpr(child.getChild(2), scope)); + } + return OperatorNode.create(toLocation(scope, parseTree),ExpressionOperator.MAP, names, exprs); + } + case yqlplusParser.RULE_mapExpression: { + List<PropertyNameAndValueContext> propertyList = ((MapExpressionContext)parseTree).propertyNameAndValue(); + List<String> names = Lists.newArrayListWithExpectedSize(propertyList.size()); + List<OperatorNode<ExpressionOperator>> exprs = Lists.newArrayListWithCapacity(propertyList.size()); + for (PropertyNameAndValueContext child : propertyList) { + // : propertyName ':' expression[$expression::namespace] -> + // ^(PROPERTY propertyName expression) + names.add(StringUnescaper.unquote(child.getChild(0).getText())); + exprs.add(convertExpr(child.getChild(2), scope)); + } + return OperatorNode.create(toLocation(scope, parseTree),ExpressionOperator.MAP, names, exprs); + } + case yqlplusParser.RULE_constantArray: { + List<ConstantExpressionContext> expressionList = ((ConstantArrayContext)parseTree).constantExpression(); + List<OperatorNode<ExpressionOperator>> values = Lists.newArrayListWithExpectedSize(expressionList.size()); + for (ConstantExpressionContext expr : expressionList) { + values.add(convertExpr(expr, scope)); + } + return OperatorNode.create(toLocation(scope, expressionList.isEmpty()? parseTree:expressionList.get(0)), ExpressionOperator.ARRAY, values); + } + case yqlplusParser.RULE_arrayLiteral: { + List<ExpressionContext> expressionList = ((ArrayLiteralContext) parseTree).expression(); + List<OperatorNode<ExpressionOperator>> values = Lists.newArrayListWithExpectedSize(expressionList.size()); + for (ExpressionContext expr : expressionList) { + values.add(convertExpr(expr, scope)); + } + return OperatorNode.create(toLocation(scope, expressionList.isEmpty()? parseTree:expressionList.get(0)), ExpressionOperator.ARRAY, values); + } + //dereferencedExpression: primaryExpression(indexref[in_select]| propertyref)* + case yqlplusParser.RULE_dereferencedExpression: { + DereferencedExpressionContext dereferencedExpression = (DereferencedExpressionContext) parseTree; + Iterator<ParseTree> it = dereferencedExpression.children.iterator(); + OperatorNode<ExpressionOperator> result = convertExpr(it.next(), scope); + while (it.hasNext()) { + ParseTree defTree = it.next(); + if (getParseTreeIndex(defTree) == yqlplusParser.RULE_propertyref) { + //DOT nm=ID + result = OperatorNode.create(toLocation(scope, parseTree), ExpressionOperator.PROPREF, result, defTree.getChild(1).getText()); + } else { + //indexref + result = OperatorNode.create(toLocation(scope, parseTree), ExpressionOperator.INDEX, result, convertExpr(defTree.getChild(1), scope)); + } + } + return result; + } + case yqlplusParser.RULE_primaryExpression: { + // ^(CALL namespaced_name arguments) + ParseTree firstChild = parseTree.getChild(0); + switch (getParseTreeIndex(firstChild)) { + case yqlplusParser.RULE_fieldref: { + return convertExpr(firstChild, scope); + } + case yqlplusParser.RULE_callExpresion: { + List<ArgumentContext> args = ((ArgumentsContext) firstChild.getChild(1)).argument(); + List<OperatorNode<ExpressionOperator>> arguments = Lists.newArrayListWithExpectedSize(args.size()); + for (ArgumentContext argContext : args) { + arguments.add(convertExpr(argContext.expression(),scope)); + } + return OperatorNode.create(toLocation(scope, parseTree), ExpressionOperator.CALL, scope.resolvePath(readName((Namespaced_nameContext) firstChild.getChild(0))), arguments); + } + // TODO add processing this is not implemented in V3 + // case yqlplusParser.APPLY: + + case yqlplusParser.RULE_parameter: + // external variable reference + return OperatorNode.create(toLocation(scope, firstChild), ExpressionOperator.VARREF, firstChild.getChild(1).getText()); + case yqlplusParser.RULE_scalar_literal: + case yqlplusParser.RULE_arrayLiteral: + case yqlplusParser.RULE_mapExpression: + return convertExpr(firstChild, scope); + case yqlplusParser.LPAREN: + return convertExpr(parseTree.getChild(1), scope); + } + } + // TODO: Temporarily disable CAST - think through how types are named + // case yqlplusParser.CAST: { + // + // return new Cast() + // } + // return new CastExpression(payload); + case yqlplusParser.RULE_parameter: { + // external variable reference + ParserRuleContext parameterContext = (ParserRuleContext) parseTree; + IdentContext identContext = parameterContext.getRuleContext(IdentContext.class, 0); + return OperatorNode.create(toLocation(scope, identContext), ExpressionOperator.VARREF, identContext.getText()); + } + case yqlplusParser.RULE_annotateExpression: { + //annotation logicalORExpression + AnnotationContext annotateExpressionContext = ((AnnotateExpressionContext)parseTree).annotation(); + OperatorNode<ExpressionOperator> annotation = convertExpr(annotateExpressionContext.constantMapExpression(), scope); + OperatorNode<ExpressionOperator> expr = convertExpr(parseTree.getChild(1), scope); + List<String> names = (List<String>) annotation.getArgument(0); + List<OperatorNode<ExpressionOperator>> annotates = (List<OperatorNode<ExpressionOperator>>) annotation.getArgument(1); + for (int i = 0; i < names.size(); ++i) { + expr.putAnnotation(names.get(i), readConstantExpression(annotates.get(i))); + } + return expr; + } + case yqlplusParser.RULE_expression: { + return convertExpr(parseTree.getChild(0), scope); + } + case yqlplusParser.RULE_logicalANDExpression: + LogicalANDExpressionContext andExpressionContext = (LogicalANDExpressionContext) parseTree; + return readConjOp(ExpressionOperator.AND, andExpressionContext.equalityExpression(), scope); + case yqlplusParser.RULE_logicalORExpression: { + int childCount = parseTree.getChildCount(); + LogicalORExpressionContext logicalORExpressionContext = (LogicalORExpressionContext) parseTree; + if (childCount > 1) { + return readConjOrOp(ExpressionOperator.OR, logicalORExpressionContext, scope); + } else { + List<EqualityExpressionContext> equalityExpressionList = ((LogicalANDExpressionContext) parseTree.getChild(0)).equalityExpression(); + if (equalityExpressionList.size() > 1) { + return readConjOp(ExpressionOperator.AND, equalityExpressionList, scope); + } else { + return convertExpr(equalityExpressionList.get(0), scope); + } + } + } + case yqlplusParser.RULE_equalityExpression: { + EqualityExpressionContext equalityExpression = (EqualityExpressionContext) parseTree; + RelationalExpressionContext relationalExpressionContext = equalityExpression.relationalExpression(0); + OperatorNode<ExpressionOperator> expr = convertExpr(relationalExpressionContext, scope); + InNotInTargetContext inNotInTarget = equalityExpression.inNotInTarget(); + int childCount = equalityExpression.getChildCount(); + if (childCount == 1) { + return expr; + } + if (inNotInTarget != null) { + Literal_listContext literalListContext = inNotInTarget.literal_list(); + boolean isIN = equalityExpression.IN() != null; + if (literalListContext == null) { + Select_statementContext selectStatementContext = inNotInTarget.select_statement(); + OperatorNode<SequenceOperator> query = convertQuery(selectStatementContext, scope); + return OperatorNode.create(expr.getLocation(),isIN ? ExpressionOperator.IN_QUERY: ExpressionOperator.NOT_IN_QUERY, expr, query); + } else { + // we need to identify the type of the target; if it's a + // scalar we need to wrap it in a CREATE_ARRAY + // if it's already a CREATE ARRAY then it's fine, otherwise + // we need to know the variable type + // return readBinOp(node.getType() == yqlplusParser.IN ? + // ExpressionOperator.IN : ExpressionOperator.NOT_IN, node, + // scope); + return readBinOp(isIN ? ExpressionOperator.IN: ExpressionOperator.NOT_IN, equalityExpression.getChild(0), literalListContext, scope); + } + + } else { + ParseTree firstChild = equalityExpression.getChild(1); + if (equalityExpression.getChildCount() == 2) { + switch (getParseTreeIndex(firstChild)) { + case yqlplusParser.IS_NULL: + return readUnOp(ExpressionOperator.IS_NULL, relationalExpressionContext, scope); + case yqlplusParser.IS_NOT_NULL: + return readUnOp(ExpressionOperator.IS_NOT_NULL, relationalExpressionContext, scope); + } + } else { + switch (getParseTreeIndex(firstChild.getChild(0))) { + case yqlplusParser.EQ: + return readBinOp(ExpressionOperator.EQ, equalityExpression.getChild(0), equalityExpression.getChild(2), scope); + case yqlplusParser.NEQ: + return readBinOp(ExpressionOperator.NEQ, equalityExpression.getChild(0), equalityExpression.getChild(2), scope); + case yqlplusParser.LIKE: + return readBinOp(ExpressionOperator.LIKE, equalityExpression.getChild(0), equalityExpression.getChild(2), scope); + case yqlplusParser.NOTLIKE: + return readBinOp(ExpressionOperator.NOT_LIKE, equalityExpression.getChild(0), equalityExpression.getChild(2), scope); + case yqlplusParser.MATCHES: + return readBinOp(ExpressionOperator.MATCHES, equalityExpression.getChild(0), equalityExpression.getChild(2), scope); + case yqlplusParser.NOTMATCHES: + return readBinOp(ExpressionOperator.NOT_MATCHES, equalityExpression.getChild(0), equalityExpression.getChild(2), scope); + case yqlplusParser.CONTAINS: + return readBinOp(ExpressionOperator.CONTAINS, equalityExpression.getChild(0), equalityExpression.getChild(2), scope); + } + } + + } + break; + } + case yqlplusParser.RULE_relationalExpression: { + RelationalExpressionContext relationalExpressionContext = (RelationalExpressionContext) parseTree; + RelationalOpContext opContext = relationalExpressionContext.relationalOp(); + if (opContext != null) { + switch (getParseTreeIndex(relationalExpressionContext.relationalOp().getChild(0))) { + case yqlplusParser.LT: + return readBinOp(ExpressionOperator.LT, parseTree, scope); + case yqlplusParser.LTEQ: + return readBinOp(ExpressionOperator.LTEQ, parseTree, scope); + case yqlplusParser.GT: + return readBinOp(ExpressionOperator.GT, parseTree, scope); + case yqlplusParser.GTEQ: + return readBinOp(ExpressionOperator.GTEQ, parseTree, scope); + } + } else { + return convertExpr(relationalExpressionContext.additiveExpression(0), scope); + } + } + break; + case yqlplusParser.RULE_additiveExpression: + case yqlplusParser.RULE_multiplicativeExpression: { + if (parseTree.getChildCount() > 1) { + String opStr = parseTree.getChild(1).getText(); + switch (opStr) { + case "+": + return readBinOp(ExpressionOperator.ADD, parseTree, scope); + case "-": + return readBinOp(ExpressionOperator.SUB, parseTree, scope); + case "/": + return readBinOp(ExpressionOperator.DIV, parseTree, scope); + case "*": + return readBinOp(ExpressionOperator.MULT, parseTree, scope); + case "%": + return readBinOp(ExpressionOperator.MOD, parseTree, scope); + default: + if (parseTree.getChild(0) instanceof UnaryExpressionContext) { + return convertExpr(parseTree.getChild(0), scope); + } else { + throw new ProgramCompileException(toLocation(scope, parseTree), "Unknown expression type: " + parseTree.toStringTree()); + } + } + } else { + if (parseTree.getChild(0) instanceof UnaryExpressionContext) { + return convertExpr(parseTree.getChild(0), scope); + } else if (parseTree.getChild(0) instanceof MultiplicativeExpressionContext) { + return convertExpr(parseTree.getChild(0), scope); + } else { + throw new ProgramCompileException(toLocation(scope, parseTree), "Unknown expression type: " + parseTree.getText()); + } + } + } + case yqlplusParser.RULE_unaryExpression: { + if (1 == parseTree.getChildCount()) { + return convertExpr(parseTree.getChild(0), scope); + } else if (2 == parseTree.getChildCount()) { + if ("-".equals(parseTree.getChild(0).getText())) { + return readUnOp(ExpressionOperator.NEGATE, parseTree, scope); + } else if ("!".equals(parseTree.getChild(0).getText())) { + return readUnOp(ExpressionOperator.NOT, parseTree, scope); + } + throw new ProgramCompileException(toLocation(scope, parseTree),"Unknown unary operator " + parseTree.getText()); + } else { + throw new ProgramCompileException(toLocation(scope, parseTree),"Unknown child count " + parseTree.getChildCount() + " of " + parseTree.getText()); + } + } + case yqlplusParser.RULE_fieldref: + case yqlplusParser.RULE_joinDereferencedExpression: { + // all in-scope data sources should be defined in scope + // the 'first' field in a namespaced reference must be: + // - a field name if (and only if) there is exactly one data source + // in scope OR + // - an alias name, which will be followed by a field name + // ^(FIELDREF<FieldReference>[$expression::namespace] + // namespaced_name) + List<String> path = readName((Namespaced_nameContext) parseTree.getChild(0)); + Location loc = toLocation(scope, parseTree.getChild(0)); + String alias = path.get(0); + OperatorNode<ExpressionOperator> result = null; + int start = 0; + if (scope.isCursor(alias)) { + if (path.size() > 1) { + result = OperatorNode.create(loc, ExpressionOperator.READ_FIELD, alias, path.get(1)); + start = 2; + } else { + result = OperatorNode.create(loc, ExpressionOperator.READ_RECORD, alias); + start = 1; + } + } else if (scope.isBound(alias)) { + return OperatorNode.create(loc, ExpressionOperator.READ_MODULE, scope.getBinding(alias).toPathWith(path.subList(1, path.size()))); + } else if (scope.getCursors().size() == 1) { + alias = scope.getCursors().iterator().next(); + result = OperatorNode.create(loc, ExpressionOperator.READ_FIELD, alias, path.get(0)); + start = 1; + } else { + // ah ha, we can't end up with a 'loose' UDF call because it + // won't be a module or known alias + // so we need not support implicit imports for constants used in + // UDFs + throw new ProgramCompileException(loc, "Unknown field or alias '%s'", alias); + } + for (int idx = start; idx < path.size(); ++idx) { + result = OperatorNode.create(loc, ExpressionOperator.PROPREF, result, path.get(idx)); + } + return result; + } + case yqlplusParser.RULE_scalar_literal: + return OperatorNode.create(toLocation(scope, parseTree), ExpressionOperator.LITERAL, convertLiteral((Scalar_literalContext) parseTree)); + case yqlplusParser.RULE_insert_values: + return readValues((Insert_valuesContext) parseTree, scope); + case yqlplusParser.RULE_constantExpression: + return convertExpr(parseTree.getChild(0), scope); + case yqlplusParser.RULE_literal_list: + if (getParseTreeIndex(parseTree.getChild(1)) == yqlplusParser.RULE_array_parameter) { + return convertExpr(parseTree.getChild(1), scope); + } else { + List<Literal_elementContext> elements = ((Literal_listContext) parseTree).literal_element(); + ParseTree firldElement = elements.get(0).getChild(0); + if (elements.size() == 1 && scope.getParser().isArrayParameter(firldElement)) { + return convertExpr(firldElement, scope); + } else { + List<OperatorNode<ExpressionOperator>> values = Lists.newArrayListWithExpectedSize(elements.size()); + for (Literal_elementContext child : elements) { + values.add(convertExpr(child.getChild(0), scope)); + } + return OperatorNode.create(toLocation(scope, elements.get(0)),ExpressionOperator.ARRAY, values); + } + } + } + throw new ProgramCompileException(toLocation(scope, parseTree), + "Unknown expression type: " + parseTree.getText()); + } + + public Object convertLiteral(Scalar_literalContext literal) { + int parseTreeIndex = getParseTreeIndex(literal.getChild(0)); + String text = literal.getChild(0).getText(); + switch(parseTreeIndex) { + case yqlplusParser.INT: + return new Integer(text); + case yqlplusParser.FLOAT: + return new Double(text); + case yqlplusParser.STRING: + return StringUnescaper.unquote(text); + case yqlplusParser.TRUE: + case yqlplusParser.FALSE: + return new Boolean(text); + case yqlplusParser.LONG_INT: + return Long.parseLong(text.substring(0, text.length()-1)); + default: + throw new ProgramCompileException("Unknow literal type " + text); + } + } + + private Object readConstantExpression(OperatorNode<ExpressionOperator> node) { + switch (node.getOperator()) { + case LITERAL: + return node.getArgument(0); + case MAP: { + ImmutableMap.Builder<String, Object> map = ImmutableMap.builder(); + List<String> names = (List<String>) node.getArgument(0); + List<OperatorNode<ExpressionOperator>> exprs = (List<OperatorNode<ExpressionOperator>>) node.getArgument(1); + for (int i = 0; i < names.size(); ++i) { + map.put(names.get(i), readConstantExpression(exprs.get(i))); + } + return map.build(); + } + case ARRAY: { + List<OperatorNode<ExpressionOperator>> exprs = (List<OperatorNode<ExpressionOperator>>) node.getArgument(0); + ImmutableList.Builder<Object> lst = ImmutableList.builder(); + for (OperatorNode<ExpressionOperator> expr : exprs) { + lst.add(readConstantExpression(expr)); + } + return lst.build(); + } + default: + throw new ProgramCompileException(node.getLocation(), "Internal error: Unknown constant expression type: " + node.getOperator()); + } + } + + private OperatorNode<ExpressionOperator> readBinOp(ExpressionOperator op, ParseTree node, Scope scope) { + assert node.getChildCount() == 3; + return OperatorNode.create(op, convertExpr(node.getChild(0), scope), convertExpr(node.getChild(2), scope)); + } + + private OperatorNode<ExpressionOperator> readBinOp(ExpressionOperator op, ParseTree operand1, ParseTree operand2, Scope scope) { + return OperatorNode.create(op, convertExpr(operand1, scope), convertExpr(operand2, scope)); + } + + private OperatorNode<ExpressionOperator> readConjOp(ExpressionOperator op, List<EqualityExpressionContext> nodes, Scope scope) { + List<OperatorNode<ExpressionOperator>> arguments = Lists.newArrayListWithExpectedSize(nodes.size()); + for (ParseTree child : nodes) { + arguments.add(convertExpr(child, scope)); + } + return OperatorNode.create(op, arguments); + } + + private OperatorNode<ExpressionOperator> readConjOrOp(ExpressionOperator op, LogicalORExpressionContext node, Scope scope) { + List<LogicalANDExpressionContext> andExpressionList = node.logicalANDExpression(); + List<OperatorNode<ExpressionOperator>> arguments = Lists.newArrayListWithExpectedSize(andExpressionList.size()); + for (LogicalANDExpressionContext child : andExpressionList) { + List<EqualityExpressionContext> equalities = child.equalityExpression(); + if (equalities.size() == 1) { + arguments.add(convertExpr(equalities.get(0), scope)); + } else { + List<OperatorNode<ExpressionOperator>> andArguments = Lists.newArrayListWithExpectedSize(equalities.size()); + for (EqualityExpressionContext subTreeChild:equalities) { + andArguments.add(convertExpr(subTreeChild, scope)); + } + arguments.add(OperatorNode.create(ExpressionOperator.AND, andArguments)); + } + + } + return OperatorNode.create(op, arguments); + } + + // (IS_NULL | IS_NOT_NULL) + // unaryExpression + private OperatorNode<ExpressionOperator> readUnOp(ExpressionOperator op, ParseTree node, Scope scope) { + assert (node instanceof TerminalNode) || (node.getChildCount() == 1) || (node instanceof UnaryExpressionContext); + if (node instanceof TerminalNode) { + return OperatorNode.create(op, convertExpr(node, scope)); + } else if (node.getChildCount() == 1) { + return OperatorNode.create(op, convertExpr(node.getChild(0), scope)); + } else { + return OperatorNode.create(op, convertExpr(node.getChild(1), scope)); + } + } + + private OperatorNode<ExpressionOperator> readValues(Field_names_specContext nameDefs, Field_values_specContext values, Scope scope) { + List<Field_defContext> fieldDefs = nameDefs.field_def(); + List<ExpressionContext> valueDefs = values.expression(); + assert fieldDefs.size() == valueDefs.size(); + List<String> fieldNames; + List<OperatorNode<ExpressionOperator>> fieldValues; + int numPairs = fieldDefs.size(); + fieldNames = Lists.newArrayListWithExpectedSize(numPairs); + fieldValues = Lists.newArrayListWithExpectedSize(numPairs); + for (int i = 0; i < numPairs; i++) { + fieldNames.add((String) convertExpr(fieldDefs.get(i).expression(), scope).getArgument(1)); + fieldValues.add(convertExpr(valueDefs.get(i), scope)); + } + return OperatorNode.create(ExpressionOperator.MAP, fieldNames, fieldValues); + } + + private OperatorNode<ExpressionOperator> readValues(ParserRuleContext node, Scope scope) { + List<String> fieldNames; + List<OperatorNode<ExpressionOperator>> fieldValues; + if (node.getRuleIndex() == yqlplusParser.RULE_field_def) { + Field_defContext fieldDefContext = (Field_defContext)node; + //TODO double check + fieldNames = Lists.newArrayListWithExpectedSize(node.getChildCount()); + fieldValues = Lists.newArrayListWithExpectedSize(node.getChildCount()); + for (int i = 0; i < node.getChildCount(); i++) { + fieldNames.add((String) convertExpr(node.getChild(i).getChild(0).getChild(0), scope).getArgument(1)); + fieldValues.add(convertExpr(node.getChild(i).getChild(0).getChild(1), scope)); + } + } else { + assert node.getChildCount() % 2 == 0; + int numPairs = node.getChildCount() / 2; + fieldNames = Lists.newArrayListWithExpectedSize(numPairs); + fieldValues = Lists.newArrayListWithExpectedSize(numPairs); + for (int i = 0; i < numPairs; i++) { + fieldNames.add((String) convertExpr(node.getChild(i).getChild(0), scope).getArgument(1)); + fieldValues.add(convertExpr(node.getChild(numPairs + i), scope)); + } + } + return OperatorNode.create(ExpressionOperator.MAP, fieldNames, fieldValues); + } + + /* + * Converts node list + * + * a_name, b_name, c_name, a_value_1, b_value_1, c_value_1, a_value_2, b_value_2, c_value2, a_value_3, b_value_3, c_value_3 + * + * into corresponding constant sequence: + * + * [ { a_name : a_value_1, b_name : b_value_1, c_name : c_value_1 }, ... ] + * + */ + private OperatorNode<SequenceOperator> readBatchValues(Field_names_specContext nameDefs, List<Field_values_group_specContext> valueGroups, Scope scope) { + List<Field_defContext> nameContexts = nameDefs.field_def(); + List<String> fieldNames = Lists.newArrayList(); + for (Field_defContext nameContext:nameContexts) { + fieldNames.add((String) convertExpr(nameContext.getChild(0), scope).getArgument(1)); + } + List<OperatorNode> records = Lists.newArrayList(); + for (Field_values_group_specContext valueGorup:valueGroups) { + List<ExpressionContext> expressionList = valueGorup.expression(); + List<OperatorNode<ExpressionOperator>> fieldValues = Lists.newArrayListWithExpectedSize(expressionList.size()); + for (ExpressionContext expressionContext:expressionList) { + fieldValues.add(convertExpr(expressionContext, scope)); + } + records.add(OperatorNode.create(ExpressionOperator.MAP, fieldNames, fieldValues)); + } + // Return constant sequence of records with the given name/values + return OperatorNode.create(SequenceOperator.EVALUATE, OperatorNode.create(ExpressionOperator.ARRAY, records)); + } + + /* + * Scans the given node for READ_FIELD expressions. + * + * TODO: Search recursively and consider additional operators + * + * @param in the node to scan + * @return list of READ_FIELD expressions + */ + private List<OperatorNode<ExpressionOperator>> getReadFieldExpressions(OperatorNode<ExpressionOperator> in) { + List<OperatorNode<ExpressionOperator>> readFieldList = Lists.newArrayList(); + switch (in.getOperator()) { + case READ_FIELD: + readFieldList.add(in); + break; + case CALL: + List<OperatorNode<ExpressionOperator>> callArgs = in.getArgument(1); + for (OperatorNode<ExpressionOperator> callArg : callArgs) { + if (callArg.getOperator() == ExpressionOperator.READ_FIELD) { + readFieldList.add(callArg); + } + } + break; + } + return readFieldList; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/ProjectOperator.java b/container-search/src/main/java/com/yahoo/search/yql/ProjectOperator.java new file mode 100644 index 00000000000..16ecc4c4077 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/ProjectOperator.java @@ -0,0 +1,34 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Predicate; + +/** + * Represents a projection command which affects the output record. + */ +enum ProjectOperator implements Operator { + + FIELD(ExpressionOperator.class, String.class), // FIELD expr name + RECORD(ExpressionOperator.class, String.class), // RECORD expr name + MERGE_RECORD(String.class); // MERGE_RECORD name (alias of record to merge) + + private final ArgumentsTypeChecker checker; + + public static Predicate<OperatorNode<? extends Operator>> IS = new Predicate<OperatorNode<? extends Operator>>() { + @Override + public boolean apply(OperatorNode<? extends Operator> input) { + return input.getOperator() instanceof ProjectOperator; + } + }; + + private ProjectOperator(Object... types) { + checker = TypeCheckers.make(this, types); + } + + + @Override + public void checkArguments(Object... args) { + checker.check(args); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/ProjectionBuilder.java b/container-search/src/main/java/com/yahoo/search/yql/ProjectionBuilder.java new file mode 100644 index 00000000000..109d1cd654b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/ProjectionBuilder.java @@ -0,0 +1,73 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import java.util.Map; +import java.util.Set; + +class ProjectionBuilder { + + private Map<String, OperatorNode<ExpressionOperator>> fields = Maps.newLinkedHashMap(); + private Set<String> aliasNames = Sets.newHashSet(); + + public void addField(String name, OperatorNode<ExpressionOperator> expr) { + String aliasName = name; + if (name == null) { + name = assignName(expr); + } + if (fields.containsKey(name)) { + throw new ProgramCompileException(expr.getLocation(), "Field alias '%s' already defined", name); + } + fields.put(name, expr); + if (aliasName != null) { + // Store use + aliasNames.add(aliasName); + } + } + + public boolean isAlias(String name) { + return aliasNames.contains(name); + } + + private String assignName(OperatorNode<ExpressionOperator> expr) { + String baseName = "expr"; + switch (expr.getOperator()) { + case PROPREF: + baseName = (String) expr.getArgument(1); + break; + case READ_RECORD: + baseName = (String) expr.getArgument(0); + break; + case READ_FIELD: + baseName = (String) expr.getArgument(1); + break; + case VARREF: + baseName = (String) expr.getArgument(0); + break; + // fall through, leaving baseName alone + } + int c = 0; + String candidate = baseName; + while (fields.containsKey(candidate)) { + candidate = baseName + (++c); + } + return candidate; + } + + public OperatorNode<SequenceOperator> make(OperatorNode<SequenceOperator> target) { + ImmutableList.Builder<OperatorNode<ProjectOperator>> lst = ImmutableList.builder(); + for (Map.Entry<String, OperatorNode<ExpressionOperator>> e : fields.entrySet()) { + if (e.getKey().startsWith("*")) { + lst.add(OperatorNode.create(ProjectOperator.MERGE_RECORD, e.getValue().getArgument(0))); + } else if (e.getValue().getOperator() == ExpressionOperator.READ_RECORD) { + lst.add(OperatorNode.create(ProjectOperator.RECORD, e.getValue(), e.getKey())); + } else { + lst.add(OperatorNode.create(ProjectOperator.FIELD, e.getValue(), e.getKey())); + } + } + return OperatorNode.create(SequenceOperator.PROJECT, target, lst.build()); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/SequenceOperator.java b/container-search/src/main/java/com/yahoo/search/yql/SequenceOperator.java new file mode 100644 index 00000000000..65d1e039e10 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/SequenceOperator.java @@ -0,0 +1,68 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Predicate; +import com.google.inject.TypeLiteral; + +import java.util.List; + +/** + * Logical sequence operators represent a logical description of a "source" (query against data stores + pipes), representing + * a source_expression in the grammar. + */ +enum SequenceOperator implements Operator { + + SCAN(TypeCheckers.LIST_OF_STRING, TypeCheckers.EXPRS), // scan a named data source (with optional arguments) + /** + * INSERT(target-sequence, input-records) + */ + INSERT(SequenceOperator.class, SequenceOperator.class), + UPDATE(SequenceOperator.class, ExpressionOperator.MAP, ExpressionOperator.class), + UPDATE_ALL(SequenceOperator.class, ExpressionOperator.MAP), + DELETE(SequenceOperator.class, ExpressionOperator.class), + DELETE_ALL(SequenceOperator.class), + EMPTY(), // emits a single, empty row + // evaluate the given expression and use the result as a sequence + EVALUATE(ExpressionOperator.class), + NEXT(String.class), + + PROJECT(SequenceOperator.class, new TypeLiteral<List<OperatorNode<ProjectOperator>>>() { + }), // transform a sequence into a new schema + FILTER(SequenceOperator.class, ExpressionOperator.class), // filter a sequence by an expression + SORT(SequenceOperator.class, new TypeLiteral<List<OperatorNode<SortOperator>>>() { + }), // sort a sequence + PIPE(SequenceOperator.class, TypeCheckers.LIST_OF_STRING, TypeCheckers.EXPRS), // pipe from one source through a named transformation + LIMIT(SequenceOperator.class, ExpressionOperator.class), + OFFSET(SequenceOperator.class, ExpressionOperator.class), + SLICE(SequenceOperator.class, ExpressionOperator.class, ExpressionOperator.class), + MERGE(TypeCheckers.SEQUENCES), + JOIN(SequenceOperator.class, SequenceOperator.class, ExpressionOperator.class), // combine two (or more, in the case of MERGE) sequences to produce a new sequence + LEFT_JOIN(SequenceOperator.class, SequenceOperator.class, ExpressionOperator.class), + + FALLBACK(SequenceOperator.class, SequenceOperator.class), + + TIMEOUT(SequenceOperator.class, ExpressionOperator.class), + PAGE(SequenceOperator.class, ExpressionOperator.class), + ALL(), + MULTISOURCE(TypeCheckers.LIST_OF_LIST_OF_STRING); + + private final ArgumentsTypeChecker checker; + + public static Predicate<OperatorNode<? extends Operator>> IS = new Predicate<OperatorNode<? extends Operator>>() { + @Override + public boolean apply(OperatorNode<? extends Operator> input) { + return input.getOperator() instanceof SequenceOperator; + } + }; + + private SequenceOperator(Object... types) { + checker = TypeCheckers.make(this, types); + } + + + @Override + public void checkArguments(Object... args) { + checker.check(args); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/SortOperator.java b/container-search/src/main/java/com/yahoo/search/yql/SortOperator.java new file mode 100644 index 00000000000..db03f787524 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/SortOperator.java @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Predicate; + +/** + * Represents a sort argument. ORDER BY foo; → (ASC foo) + */ +enum SortOperator implements Operator { + + ASC(ExpressionOperator.class), + DESC(ExpressionOperator.class); + + private final ArgumentsTypeChecker checker; + + public static Predicate<OperatorNode<? extends Operator>> IS = new Predicate<OperatorNode<? extends Operator>>() { + @Override + public boolean apply(OperatorNode<? extends Operator> input) { + return input.getOperator() instanceof SortOperator; + } + }; + + private SortOperator(Object... types) { + checker = TypeCheckers.make(this, types); + } + + + @Override + public void checkArguments(Object... args) { + checker.check(args); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/StatementOperator.java b/container-search/src/main/java/com/yahoo/search/yql/StatementOperator.java new file mode 100644 index 00000000000..f25212e1098 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/StatementOperator.java @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Predicate; +import com.google.inject.TypeLiteral; + +import java.util.List; + +/** + * Represents program statements. + */ +enum StatementOperator implements Operator { + + PROGRAM(new TypeLiteral<List<OperatorNode<StatementOperator>>>() { + }), + ARGUMENT(String.class, TypeOperator.class, ExpressionOperator.class), + DEFINE_VIEW(String.class, SequenceOperator.class), + EXECUTE(SequenceOperator.class, String.class), + OUTPUT(String.class), + COUNT(String.class); + + private final ArgumentsTypeChecker checker; + + public static Predicate<OperatorNode<? extends Operator>> IS = new Predicate<OperatorNode<? extends Operator>>() { + @Override + public boolean apply(OperatorNode<? extends Operator> input) { + return input.getOperator() instanceof StatementOperator; + } + }; + + private StatementOperator(Object... types) { + checker = TypeCheckers.make(this, types); + } + + + @Override + public void checkArguments(Object... args) { + checker.check(args); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/StringUnescaper.java b/container-search/src/main/java/com/yahoo/search/yql/StringUnescaper.java new file mode 100644 index 00000000000..76d81429ab3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/StringUnescaper.java @@ -0,0 +1,123 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +class StringUnescaper { + + private static boolean lookaheadOctal(String v, int point) { + return point < v.length() && "01234567".indexOf(v.charAt(point)) != -1; + } + + public static String unquote(String token) { + if (null == token || !(token.startsWith("'") && token.endsWith("'") || token.startsWith("\"") && token.endsWith("\""))) { + return token; + } + // remove quotes from around string and unescape it + String value = token.substring(1, token.length() - 1); + // first quickly check to see if \ is present -- if not then there's no escaping and we're done + int idx = value.indexOf('\\'); + if (idx == -1) { + return value; + } + // the output string will be no bigger than the input string, since escapes add characters + StringBuilder result = new StringBuilder(value.length()); + int start = 0; + while (idx != -1) { + result.append(value.subSequence(start, idx)); + start = idx + 1; + switch (value.charAt(start)) { + case 'b': + result.append('\b'); + ++start; + break; + case 't': + result.append('\t'); + ++start; + break; + case 'n': + result.append('\n'); + ++start; + break; + case 'f': + result.append('\f'); + ++start; + break; + case 'r': + result.append('\r'); + ++start; + break; + case '"': + result.append('"'); + ++start; + break; + case '\'': + result.append('\''); + ++start; + break; + case '\\': + result.append('\\'); + ++start; + break; + case '/': + result.append('/'); + ++start; + break; + case 'u': + // hex hex hex hex + ++start; + result.append((char) Integer.parseInt(value.substring(start, start + 4), 16)); + start += 4; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + // octal escape + // 1, 2, or 3 bytes + // peek ahead + if (lookaheadOctal(value, start + 1) && lookaheadOctal(value, start + 2)) { + result.append((char) Integer.parseInt(value.substring(start, start + 3), 8)); + start += 3; + } else if (lookaheadOctal(value, start + 1)) { + result.append((char) Integer.parseInt(value.substring(start, start + 2), 8)); + start += 2; + } else { + result.append((char) Integer.parseInt(value.substring(start, start + 1), 8)); + start += 1; + } + break; + default: + // the lexer should be ensuring there are no malformed escapes here, so we'll just blow up + throw new IllegalArgumentException("Unknown escape sequence in token: " + token); + } + idx = value.indexOf('\\', start); + } + result.append(value.subSequence(start, value.length())); + return result.toString(); + } + + public static String escape(String value) { + int idx = value.indexOf('\''); + if (idx == -1) { + return "\'" + value + "\'"; + + } + StringBuilder result = new StringBuilder(value.length() + 5); + result.append("'"); + // right now we only escape ' on output + int start = 0; + while (idx != -1) { + result.append(value.subSequence(start, idx)); + start = idx + 1; + result.append("\\'"); + idx = value.indexOf('\\', start); + } + result.append(value.subSequence(start, value.length())); + result.append("'"); + return result.toString(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/TypeCheckers.java b/container-search/src/main/java/com/yahoo/search/yql/TypeCheckers.java new file mode 100644 index 00000000000..32aca6d5708 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/TypeCheckers.java @@ -0,0 +1,108 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import com.google.inject.TypeLiteral; + +import java.lang.reflect.ParameterizedType; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +final class TypeCheckers { + + public static final TypeLiteral<List<String>> LIST_OF_STRING = new TypeLiteral<List<String>>() { + }; + public static final TypeLiteral<List<List<String>>> LIST_OF_LIST_OF_STRING = new TypeLiteral<List<List<String>>>() { + }; + public static final TypeLiteral<List<OperatorNode<SequenceOperator>>> SEQUENCES = new TypeLiteral<List<OperatorNode<SequenceOperator>>>() { + }; + public static final TypeLiteral<List<OperatorNode<ExpressionOperator>>> EXPRS = new TypeLiteral<List<OperatorNode<ExpressionOperator>>>() { + }; + public static final TypeLiteral<List<List<OperatorNode<ExpressionOperator>>>> LIST_OF_EXPRS = new TypeLiteral<List<List<OperatorNode<ExpressionOperator>>>>() { + }; + public static final ImmutableSet<Class<?>> LITERAL_TYPES = ImmutableSet.<Class<?>>builder() + .add(String.class) + .add(Integer.class) + .add(Double.class) + .add(Boolean.class) + .add(Float.class) + .add(Byte.class) + .add(Long.class) + .add(List.class) + .add(Map.class) + .build(); + + private TypeCheckers() { + } + + public static ArgumentsTypeChecker make(Operator target, Object... types) { + // Class<?> extends Operator -> NodeTypeChecker + if (types == null) { + types = new Object[0]; + } + List<OperatorTypeChecker> checkers = Lists.newArrayListWithCapacity(types.length); + for (int i = 0; i < types.length; ++i) { + checkers.add(createChecker(target, i, types[i])); + } + return new ArgumentsTypeChecker(target, checkers); + } + + // this is festooned with instance checkes before all the casting + @SuppressWarnings("unchecked") + private static OperatorTypeChecker createChecker(Operator parent, int idx, Object value) { + if (value instanceof TypeLiteral) { + TypeLiteral<?> lit = (TypeLiteral<?>) value; + Class<?> raw = lit.getRawType(); + if (List.class.isAssignableFrom(raw)) { + Preconditions.checkArgument(lit.getType() instanceof ParameterizedType, "TypeLiteral without a ParameterizedType for List"); + ParameterizedType type = (ParameterizedType) lit.getType(); + TypeLiteral<?> arg = TypeLiteral.get(type.getActualTypeArguments()[0]); + if (OperatorNode.class.isAssignableFrom(arg.getRawType())) { + Preconditions.checkArgument(arg.getType() instanceof ParameterizedType, "Type spec must be List<OperatorNode<?>>"); + Class<? extends Operator> optype = (Class<? extends Operator>) TypeLiteral.get(((ParameterizedType) arg.getType()).getActualTypeArguments()[0]).getRawType(); + return new OperatorNodeListTypeChecker(parent, idx, optype, ImmutableSet.<Operator>of()); + } else { + return new JavaListTypeChecker(parent, idx, arg.getRawType()); + } + } + throw new IllegalArgumentException("don't know how to handle TypeLiteral " + value); + } + if (value instanceof Class) { + Class<?> clazz = (Class<?>) value; + if (Operator.class.isAssignableFrom(clazz)) { + return new NodeTypeChecker(parent, idx, (Class<? extends Operator>) clazz, ImmutableSet.<Operator>of()); + } else { + return new JavaTypeChecker(parent, idx, clazz); + } + } else if (value instanceof Operator) { + Operator operator = (Operator) value; + Class<? extends Operator> clazz = operator.getClass(); + Set<? extends Operator> allowed; + if (Enum.class.isInstance(value)) { + Class<? extends Enum> enumClazz = (Class<? extends Enum>) clazz; + allowed = (Set<? extends Operator>) EnumSet.of(enumClazz.cast(value)); + } else { + allowed = ImmutableSet.of(operator); + } + return new NodeTypeChecker(parent, idx, clazz, allowed); + } else if (value instanceof EnumSet) { + EnumSet<?> v = (EnumSet<?>) value; + Enum elt = Iterables.get(v, 0); + if (elt instanceof Operator) { + Class<? extends Operator> opclass = (Class<? extends Operator>) elt.getClass(); + Set<? extends Operator> allowed = (Set<? extends Operator>) v; + return new NodeTypeChecker(parent, idx, opclass, allowed); + } + } else if (value instanceof Set) { + // Set<Class<?>> + return new JavaUnionTypeChecker(parent, idx, (Set<Class<?>>) value); + } + throw new IllegalArgumentException("I don't know how to create a checker from " + value); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/TypeOperator.java b/container-search/src/main/java/com/yahoo/search/yql/TypeOperator.java new file mode 100644 index 00000000000..01b1f88cc5e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/TypeOperator.java @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.google.common.base.Predicate; + +enum TypeOperator implements Operator { + + BYTE, + INT16, + INT32, + INT64, + STRING, + DOUBLE, + TIMESTAMP, + BOOLEAN, + ARRAY(TypeOperator.class), + MAP(TypeOperator.class); + + private final ArgumentsTypeChecker checker; + + public static Predicate<OperatorNode<? extends Operator>> IS = new Predicate<OperatorNode<? extends Operator>>() { + @Override + public boolean apply(OperatorNode<? extends Operator> input) { + return input.getOperator() instanceof TypeOperator; + } + }; + + TypeOperator(Object... types) { + checker = TypeCheckers.make(this, types); + } + + @Override + public void checkArguments(Object... args) { + checker.check(args); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/VespaGroupingStep.java b/container-search/src/main/java/com/yahoo/search/yql/VespaGroupingStep.java new file mode 100644 index 00000000000..520728dc231 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/VespaGroupingStep.java @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import com.yahoo.search.grouping.Continuation; +import com.yahoo.search.grouping.request.GroupingOperation; + +import java.util.ArrayList; +import java.util.List; + +/** + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen Hult</a> + */ +public class VespaGroupingStep { + + private final GroupingOperation operation; + private final List<Continuation> continuations = new ArrayList<>(); + + public VespaGroupingStep(GroupingOperation operation) { + this.operation = operation; + } + + public GroupingOperation getOperation() { + return operation; + } + + public List<Continuation> continuations() { + return continuations; + } +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java b/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java new file mode 100644 index 00000000000..397225a087c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java @@ -0,0 +1,1381 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import static com.yahoo.search.yql.YqlParser.ACCENT_DROP; +import static com.yahoo.search.yql.YqlParser.ALTERNATIVES; +import static com.yahoo.search.yql.YqlParser.AND_SEGMENTING; +import static com.yahoo.search.yql.YqlParser.BOUNDS; +import static com.yahoo.search.yql.YqlParser.BOUNDS_LEFT_OPEN; +import static com.yahoo.search.yql.YqlParser.BOUNDS_OPEN; +import static com.yahoo.search.yql.YqlParser.BOUNDS_RIGHT_OPEN; +import static com.yahoo.search.yql.YqlParser.CONNECTION_ID; +import static com.yahoo.search.yql.YqlParser.CONNECTION_WEIGHT; +import static com.yahoo.search.yql.YqlParser.CONNECTIVITY; +import static com.yahoo.search.yql.YqlParser.DISTANCE; +import static com.yahoo.search.yql.YqlParser.DOT_PRODUCT; +import static com.yahoo.search.yql.YqlParser.EQUIV; +import static com.yahoo.search.yql.YqlParser.FILTER; +import static com.yahoo.search.yql.YqlParser.HIT_LIMIT; +import static com.yahoo.search.yql.YqlParser.IMPLICIT_TRANSFORMS; +import static com.yahoo.search.yql.YqlParser.LABEL; +import static com.yahoo.search.yql.YqlParser.NEAR; +import static com.yahoo.search.yql.YqlParser.NORMALIZE_CASE; +import static com.yahoo.search.yql.YqlParser.ONEAR; +import static com.yahoo.search.yql.YqlParser.ORIGIN; +import static com.yahoo.search.yql.YqlParser.ORIGIN_LENGTH; +import static com.yahoo.search.yql.YqlParser.ORIGIN_OFFSET; +import static com.yahoo.search.yql.YqlParser.ORIGIN_ORIGINAL; +import static com.yahoo.search.yql.YqlParser.PHRASE; +import static com.yahoo.search.yql.YqlParser.PREFIX; +import static com.yahoo.search.yql.YqlParser.RANGE; +import static com.yahoo.search.yql.YqlParser.RANK; +import static com.yahoo.search.yql.YqlParser.RANKED; +import static com.yahoo.search.yql.YqlParser.SCORE_THRESHOLD; +import static com.yahoo.search.yql.YqlParser.SIGNIFICANCE; +import static com.yahoo.search.yql.YqlParser.STEM; +import static com.yahoo.search.yql.YqlParser.SUBSTRING; +import static com.yahoo.search.yql.YqlParser.SUFFIX; +import static com.yahoo.search.yql.YqlParser.TARGET_NUM_HITS; +import static com.yahoo.search.yql.YqlParser.THRESHOLD_BOOST_FACTOR; +import static com.yahoo.search.yql.YqlParser.UNIQUE_ID; +import static com.yahoo.search.yql.YqlParser.USE_POSITION_DATA; +import static com.yahoo.search.yql.YqlParser.WAND; +import static com.yahoo.search.yql.YqlParser.WEAK_AND; +import static com.yahoo.search.yql.YqlParser.WEIGHT; +import static com.yahoo.search.yql.YqlParser.WEIGHTED_SET; + +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.Deque; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.ListIterator; +import java.util.Map; +import java.util.Map.Entry; + +import com.google.common.collect.ImmutableMap; +import com.yahoo.prelude.query.AndItem; +import com.yahoo.prelude.query.AndSegmentItem; +import com.yahoo.prelude.query.DotProductItem; +import com.yahoo.prelude.query.EquivItem; +import com.yahoo.prelude.query.IndexedItem; +import com.yahoo.prelude.query.IntItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.MarkerWordItem; +import com.yahoo.prelude.query.NearItem; +import com.yahoo.prelude.query.NotItem; +import com.yahoo.prelude.query.NullItem; +import com.yahoo.prelude.query.ONearItem; +import com.yahoo.prelude.query.OrItem; +import com.yahoo.prelude.query.PhraseItem; +import com.yahoo.prelude.query.PhraseSegmentItem; +import com.yahoo.prelude.query.PredicateQueryItem; +import com.yahoo.prelude.query.PrefixItem; +import com.yahoo.prelude.query.RangeItem; +import com.yahoo.prelude.query.RankItem; +import com.yahoo.prelude.query.RegExpItem; +import com.yahoo.prelude.query.SegmentingRule; +import com.yahoo.prelude.query.Substring; +import com.yahoo.prelude.query.SubstringItem; +import com.yahoo.prelude.query.SuffixItem; +import com.yahoo.prelude.query.TaggableItem; +import com.yahoo.prelude.query.ToolBox; +import com.yahoo.prelude.query.ToolBox.QueryVisitor; +import com.yahoo.prelude.query.WandItem; +import com.yahoo.prelude.query.WeakAndItem; +import com.yahoo.prelude.query.WeightedSetItem; +import com.yahoo.prelude.query.WordAlternativesItem; +import com.yahoo.prelude.query.WordItem; +import com.yahoo.search.Query; +import com.yahoo.search.grouping.Continuation; +import com.yahoo.search.grouping.GroupingRequest; + +import edu.umd.cs.findbugs.annotations.NonNull; + +/** + * Serialize Vespa query trees to YQL+ strings. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class VespaSerializer { + // TODO refactor, too much copy/paste + + private static class AndSegmentSerializer extends Serializer { + private static void serializeWords(StringBuilder destination, + AndSegmentItem segment) { + for (int i = 0; i < segment.getItemCount(); ++i) { + if (i > 0) { + destination.append(", "); + } + Item current = segment.getItem(i); + if (current instanceof WordItem) { + destination.append('"'); + escape(((WordItem) current).getIndexedString(), destination) + .append('"'); + } else { + throw new IllegalArgumentException( + "Serializing of " + + current.getClass().getSimpleName() + + " in segment AND expressions not implemented, please report this as a bug."); + } + } + } + + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + return serialize(destination, item, true); + } + + static boolean serialize(StringBuilder destination, Item item, + boolean includeField) { + AndSegmentItem phrase = (AndSegmentItem) item; + Substring origin = phrase.getOrigin(); + String image; + int offset; + int length; + + if (origin == null) { + image = phrase.getRawWord(); + offset = 0; + length = image.length(); + } else { + image = origin.getSuperstring(); + offset = origin.start; + length = origin.end - origin.start; + } + + if (includeField) { + destination.append(normalizeIndexName(phrase.getIndexName())) + .append(" contains "); + } + destination.append("([{"); + serializeOrigin(destination, image, offset, length); + destination.append(", \"").append(AND_SEGMENTING) + .append("\": true"); + destination.append("}]"); + destination.append(PHRASE).append('('); + serializeWords(destination, phrase); + destination.append("))"); + return false; + } + } + + private static class AndSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + destination.append(')'); + } + + @Override + String separator(Deque<SerializerWrapper> state) { + return " AND "; + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + destination.append("("); + return true; + } + } + + private static class DotProductSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + serializeWeightedSetContents(destination, DOT_PRODUCT, + (WeightedSetItem) item); + return false; + } + + } + + private static class EquivSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + EquivItem e = (EquivItem) item; + String annotations = leafAnnotations(e); + destination.append(getIndexName(e.getItem(0))).append(" contains "); + if (annotations.length() > 0) { + destination.append("([{").append(annotations).append("}]"); + } + destination.append(EQUIV).append('('); + int initLen = destination.length(); + for (Iterator<Item> i = e.getItemIterator(); i.hasNext();) { + Item x = i.next(); + if (destination.length() > initLen) { + destination.append(", "); + } + if (x instanceof PhraseItem) { + PhraseSerializer.serialize(destination, x, false); + } else { + destination.append('"'); + escape(((IndexedItem) x).getIndexedString(), destination); + destination.append('"'); + } + } + if (annotations.length() > 0) { + destination.append(')'); + } + destination.append(')'); + return false; + } + + } + + private static class NearSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + NearItem n = (NearItem) item; + String annotations = nearAnnotations(n); + + destination.append(getIndexName(n.getItem(0))).append(" contains "); + if (annotations.length() > 0) { + destination.append('(').append(annotations); + } + destination.append(NEAR).append('('); + int initLen = destination.length(); + for (ListIterator<Item> i = n.getItemIterator(); i.hasNext();) { + WordItem close = (WordItem) i.next(); + if (destination.length() > initLen) { + destination.append(", "); + } + destination.append('"'); + escape(close.getIndexedString(), destination).append('"'); + } + destination.append(')'); + if (annotations.length() > 0) { + destination.append(')'); + } + return false; + } + + static String nearAnnotations(NearItem n) { + if (n.getDistance() != NearItem.defaultDistance) { + return "[{\"" + DISTANCE + "\": " + n.getDistance() + "}]"; + } else { + return ""; + } + } + + } + + private static class NotSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + destination.append(')'); + } + + @Override + String separator(Deque<SerializerWrapper> state) { + if (state.peekFirst().subItems == 1) { + return ") AND !("; + } else { + return " OR "; + } + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + destination.append("("); + return true; + } + } + + private static class NullSerializer extends Serializer { + + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + throw new NullItemException( + "NullItem encountered in query tree." + + " This is usually a symptom of an invalid query or an error" + + " in a query transformer."); + } + } + + private static class NumberSerializer extends Serializer { + + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + IntItem intItem = (IntItem) item; + if (intItem.getFromLimit().number() + .equals(intItem.getToLimit().number())) { + destination.append(normalizeIndexName(intItem.getIndexName())) + .append(" = "); + annotatedNumberImage(intItem, intItem.getFromLimit().number() + .toString(), destination); + } else if (intItem.getFromLimit().isInfinite()) { + destination.append(normalizeIndexName(intItem.getIndexName())); + destination.append(intItem.getToLimit().isInclusive() ? " <= " + : " < "); + annotatedNumberImage(intItem, intItem.getToLimit().number() + .toString(), destination); + } else if (intItem.getToLimit().isInfinite()) { + destination.append(normalizeIndexName(intItem.getIndexName())); + destination + .append(intItem.getFromLimit().isInclusive() ? " >= " + : " > "); + annotatedNumberImage(intItem, intItem.getFromLimit().number() + .toString(), destination); + } else { + serializeAsRange(destination, intItem); + } + return false; + } + + private void serializeAsRange(StringBuilder destination, IntItem intItem) { + String annotations = leafAnnotations(intItem); + boolean leftOpen = !intItem.getFromLimit().isInclusive(); + boolean rightOpen = !intItem.getToLimit().isInclusive(); + String boundsAnnotation = ""; + int initLen; + + if (leftOpen && rightOpen) { + boundsAnnotation = "\"" + BOUNDS + "\": " + "\"" + BOUNDS_OPEN + + "\""; + } else if (leftOpen) { + boundsAnnotation = "\"" + BOUNDS + "\": " + "\"" + + BOUNDS_LEFT_OPEN + "\""; + } else if (rightOpen) { + boundsAnnotation = "\"" + BOUNDS + "\": " + "\"" + + BOUNDS_RIGHT_OPEN + "\""; + } + if (annotations.length() > 0 || boundsAnnotation.length() > 0) { + destination.append("[{"); + } + initLen = destination.length(); + if (annotations.length() > 0) { + + destination.append(annotations); + } + comma(destination, initLen); + if (boundsAnnotation.length() > 0) { + destination.append(boundsAnnotation); + } + if (initLen != annotations.length()) { + destination.append("}]"); + } + destination.append(RANGE).append('(') + .append(normalizeIndexName(intItem.getIndexName())) + .append(", ").append(intItem.getFromLimit().number()) + .append(", ").append(intItem.getToLimit().number()) + .append(")"); + } + + private void annotatedNumberImage(IntItem item, String rawNumber, + StringBuilder image) { + String annotations = leafAnnotations(item); + + if (annotations.length() > 0) { + image.append("([{").append(annotations).append("}]"); + } + if ('-' == rawNumber.charAt(0)) { + image.append('('); + } + image.append(rawNumber); + appendLongIfNecessary(rawNumber, image); + if ('-' == rawNumber.charAt(0)) { + image.append(')'); + } + if (annotations.length() > 0) { + image.append(')'); + } + } + + private void appendLongIfNecessary(String rawNumber, StringBuilder image) { + // floating point + if (rawNumber.indexOf('.') >= 0) { + return; + } + try { + long l = Long.parseLong(rawNumber); + if (l < Integer.MIN_VALUE || l > Integer.MAX_VALUE) { + image.append('L'); + } + } catch (NumberFormatException e) { + // somebody has managed to init an IntItem containing noise, + // just give up + return; + } + } + } + + private static class RegExpSerializer extends Serializer { + + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + RegExpItem regexp = (RegExpItem) item; + + String annotations = leafAnnotations(regexp); + destination.append(normalizeIndexName(regexp.getIndexName())).append( + " matches "); + annotatedTerm(destination, regexp, annotations); + return false; + } + } + + private static class ONearSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + NearItem n = (NearItem) item; + String annotations = NearSerializer.nearAnnotations(n); + + destination.append(getIndexName(n.getItem(0))).append(" contains "); + if (annotations.length() > 0) { + destination.append('(').append(annotations); + } + destination.append(ONEAR).append('('); + int initLen = destination.length(); + for (ListIterator<Item> i = n.getItemIterator(); i.hasNext();) { + WordItem close = (WordItem) i.next(); + if (destination.length() > initLen) { + destination.append(", "); + } + destination.append('"'); + escape(close.getIndexedString(), destination).append('"'); + } + destination.append(')'); + if (annotations.length() > 0) { + destination.append(')'); + } + return false; + } + + } + + private static class OrSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + destination.append(')'); + } + + @Override + String separator(Deque<SerializerWrapper> state) { + return " OR "; + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + destination.append("("); + return true; + } + } + + private static class PhraseSegmentSerializer extends Serializer { + + private static void serializeWords(StringBuilder destination, + PhraseSegmentItem segment) { + for (int i = 0; i < segment.getItemCount(); ++i) { + if (i > 0) { + destination.append(", "); + } + Item current = segment.getItem(i); + if (current instanceof WordItem) { + destination.append('"'); + escape(((WordItem) current).getIndexedString(), destination) + .append('"'); + } else { + throw new IllegalArgumentException( + "Serializing of " + + current.getClass().getSimpleName() + + " in phrases not implemented, please report this as a bug."); + } + } + } + + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + return serialize(destination, item, true); + } + + static boolean serialize(StringBuilder destination, Item item, + boolean includeField) { + PhraseSegmentItem phrase = (PhraseSegmentItem) item; + Substring origin = phrase.getOrigin(); + String image; + int offset; + int length; + + if (includeField) { + destination.append(normalizeIndexName(phrase.getIndexName())) + .append(" contains "); + } + if (origin == null) { + image = phrase.getRawWord(); + offset = 0; + length = image.length(); + } else { + image = origin.getSuperstring(); + offset = origin.start; + length = origin.end - origin.start; + } + + destination.append("([{"); + serializeOrigin(destination, image, offset, length); + String annotations = leafAnnotations(phrase); + if (annotations.length() > 0) { + destination.append(", ").append(annotations); + } + if (phrase.getSegmentingRule() == SegmentingRule.BOOLEAN_AND) { + destination.append(", ").append('"').append(AND_SEGMENTING) + .append("\": true"); + } + destination.append("}]"); + destination.append(PHRASE).append('('); + serializeWords(destination, phrase); + destination.append("))"); + return false; + } + } + + private static class PhraseSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + return serialize(destination, item, true); + } + + static boolean serialize(StringBuilder destination, Item item, + boolean includeField) { + + PhraseItem phrase = (PhraseItem) item; + String annotations = leafAnnotations(phrase); + + if (includeField) { + destination.append(normalizeIndexName(phrase.getIndexName())) + .append(" contains "); + + } + if (annotations.length() > 0) { + destination.append("([{").append(annotations).append("}]"); + } + + destination.append(PHRASE).append('('); + for (int i = 0; i < phrase.getItemCount(); ++i) { + if (i > 0) { + destination.append(", "); + } + Item current = phrase.getItem(i); + if (current instanceof WordItem) { + WordSerializer.serializeWordWithoutIndex(destination, + current); + } else if (current instanceof PhraseSegmentItem) { + PhraseSegmentSerializer.serialize(destination, current, + false); + } else if (current instanceof WordAlternativesItem) { + WordAlternativesSerializer.serialize(destination, (WordAlternativesItem) current, false); + } else { + throw new IllegalArgumentException( + "Serializing of " + + current.getClass().getSimpleName() + + " in phrases not implemented, please report this as a bug."); + } + } + destination.append(')'); + if (annotations.length() > 0) { + destination.append(')'); + } + return false; + } + + } + + private static class PredicateQuerySerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + PredicateQueryItem pItem = (PredicateQueryItem) item; + destination.append("predicate(").append(pItem.getIndexName()) + .append(','); + appendFeatures(destination, pItem.getFeatures()); + destination.append(','); + appendFeatures(destination, pItem.getRangeFeatures()); + destination.append(')'); + return false; + } + + private void appendFeatures(StringBuilder destination, + Collection<? extends PredicateQueryItem.EntryBase> features) { + if (features.isEmpty()) { + destination.append('0'); // Workaround for empty maps. + return; + } + destination.append('{'); + boolean first = true; + for (PredicateQueryItem.EntryBase entry : features) { + if (!first) { + destination.append(','); + } + if (entry.getSubQueryBitmap() != PredicateQueryItem.ALL_SUB_QUERIES) { + destination.append("\"0x").append( + Long.toHexString(entry.getSubQueryBitmap())); + destination.append("\":{"); + appendKeyValue(destination, entry); + destination.append('}'); + } else { + appendKeyValue(destination, entry); + } + first = false; + } + destination.append('}'); + } + + private void appendKeyValue(StringBuilder destination, + PredicateQueryItem.EntryBase entry) { + destination.append('"'); + escape(entry.getKey(), destination); + destination.append("\":"); + if (entry instanceof PredicateQueryItem.Entry) { + destination.append('"'); + escape(((PredicateQueryItem.Entry) entry).getValue(), + destination); + destination.append('"'); + } else { + destination.append(((PredicateQueryItem.RangeEntry) entry) + .getValue()); + destination.append('L'); + } + } + + } + + private static class RangeSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + RangeItem range = (RangeItem) item; + String annotations = leafAnnotations(range); + if (annotations.length() > 0) { + destination.append("[{").append(annotations).append("}]"); + } + destination.append(RANGE).append('(') + .append(normalizeIndexName(range.getIndexName())) + .append(", "); + appendNumberImage(destination, range.getFrom()); // TODO: Serialize + // inclusive/exclusive + destination.append(", "); + appendNumberImage(destination, range.getTo()); + destination.append(')'); + return false; + } + + private void appendNumberImage(StringBuilder destination, Number number) { + destination.append(number.toString()); + if (number instanceof Long) { + destination.append('L'); + } + } + } + + private static class RankSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + destination.append(')'); + } + + @Override + String separator(Deque<SerializerWrapper> state) { + return ", "; + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + destination.append(RANK).append('('); + return true; + + } + + } + + private static class WordAlternativesSerializer extends Serializer { + + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + return serialize(destination, (WordAlternativesItem) item, true); + } + + static boolean serialize(StringBuilder destination, WordAlternativesItem alternatives, boolean includeField) { + String annotations = leafAnnotations(alternatives); + Substring origin = alternatives.getOrigin(); + boolean isFromQuery = alternatives.isFromQuery(); + boolean needsAnnotations = annotations.length() > 0 || origin != null || !isFromQuery; + + if (includeField) { + destination.append(normalizeIndexName(alternatives.getIndexName())).append(" contains "); + } + + if (needsAnnotations) { + destination.append("([{"); + int initLen = destination.length(); + + if (origin != null) { + String image = origin.getSuperstring(); + int offset = origin.start; + int length = origin.end - origin.start; + serializeOrigin(destination, image, offset, length); + } + if (!isFromQuery) { + comma(destination, initLen); + destination.append('"').append(IMPLICIT_TRANSFORMS).append("\": false"); + } + if (annotations.length() > 0) { + comma(destination, initLen); + destination.append(annotations); + } + + destination.append("}]"); + } + + destination.append(ALTERNATIVES).append("({"); + int initLen = destination.length(); + List<WordAlternativesItem.Alternative> sortedAlternatives = new ArrayList<>(alternatives.getAlternatives()); + // ensure most precise forms first + Collections.sort(sortedAlternatives, (x, y) -> Double.compare(y.exactness, x.exactness)); + for (WordAlternativesItem.Alternative alternative : sortedAlternatives) { + comma(destination, initLen); + destination.append('"'); + escape(alternative.word, destination); + destination.append("\": ").append(Double.toString(alternative.exactness)); + } + destination.append("})"); + if (needsAnnotations) { + destination.append(')'); + } + return false; + } + } + + private static abstract class Serializer { + abstract void onExit(StringBuilder destination, Item item); + + String separator(Deque<SerializerWrapper> state) { + throw new UnsupportedOperationException( + "Having several items for this query operator serializer, " + + this.getClass().getSimpleName() + + ", not yet implemented."); + } + + abstract boolean serialize(StringBuilder destination, Item item); + } + + private static final class SerializerWrapper { + int subItems; + final Serializer type; + final Item item; + + SerializerWrapper(Serializer type, Item item) { + subItems = 0; + this.type = type; + this.item = item; + } + + } + + private static final class TokenComparator implements + Comparator<Entry<Object, Integer>> { + + @SuppressWarnings({ "rawtypes", "unchecked" }) + @Override + public int compare(Entry<Object, Integer> o1, Entry<Object, Integer> o2) { + Comparable c1 = (Comparable) o1.getKey(); + Comparable c2 = (Comparable) o2.getKey(); + return c1.compareTo(c2); + } + } + + private static class VespaVisitor extends QueryVisitor { + + final StringBuilder destination; + final Deque<SerializerWrapper> state = new ArrayDeque<>(); + + VespaVisitor(StringBuilder destination) { + this.destination = destination; + } + + @Override + public void onExit() { + SerializerWrapper w = state.removeFirst(); + w.type.onExit(destination, w.item); + w = state.peekFirst(); + if (w != null) { + w.subItems += 1; + } + } + + @Override + public boolean visit(Item item) { + Serializer doIt = dispatch.get(item.getClass()); + + if (doIt == null) { + throw new IllegalArgumentException(item.getClass() + + " not supported for YQL+ marshalling."); + } + + if (state.peekFirst() != null && state.peekFirst().subItems > 0) { + destination.append(state.peekFirst().type.separator(state)); + } + state.addFirst(new SerializerWrapper(doIt, item)); + return doIt.serialize(destination, item); + + } + } + + private static class WandSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + serializeWeightedSetContents(destination, WAND, + (WeightedSetItem) item, + specificAnnotations((WandItem) item)); + return false; + } + + private String specificAnnotations(WandItem w) { + StringBuilder annotations = new StringBuilder(); + int targetNumHits = w.getTargetNumHits(); + double scoreThreshold = w.getScoreThreshold(); + double thresholdBoostFactor = w.getThresholdBoostFactor(); + if (targetNumHits != 10) { + annotations.append('"').append(TARGET_NUM_HITS).append("\": ") + .append(targetNumHits); + } + if (scoreThreshold != 0) { + comma(annotations, 0); + annotations.append('"').append(SCORE_THRESHOLD).append("\": ") + .append(scoreThreshold); + } + if (thresholdBoostFactor != 1) { + comma(annotations, 0); + annotations.append('"').append(THRESHOLD_BOOST_FACTOR) + .append("\": ").append(thresholdBoostFactor); + } + return annotations.toString(); + } + + } + + private static class WeakAndSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + destination.append(')'); + if (needsAnnotationBlock((WeakAndItem) item)) { + destination.append(')'); + } + } + + @Override + String separator(Deque<SerializerWrapper> state) { + return ", "; + } + + private boolean needsAnnotationBlock(WeakAndItem item) { + return nonDefaultScoreThreshold(item) || nonDefaultTargetNumHits(item); + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + WeakAndItem w = (WeakAndItem) item; + if (needsAnnotationBlock(w)) { + destination.append("([{"); + } + int lengthBeforeAnnotations = destination.length(); + if (nonDefaultTargetNumHits(w)) { + destination.append('"').append(TARGET_NUM_HITS).append("\": ").append(w.getN()); + } + if (nonDefaultScoreThreshold(w)) { + comma(destination, lengthBeforeAnnotations); + destination.append('"').append(SCORE_THRESHOLD).append("\": ").append(w.getScoreThreshold()); + } + if (needsAnnotationBlock(w)) { + destination.append("}]"); + } + destination.append(WEAK_AND).append('('); + return true; + } + + private boolean nonDefaultScoreThreshold(WeakAndItem w) { + return w.getScoreThreshold() > 0; + } + + private boolean nonDefaultTargetNumHits(WeakAndItem w) { + return w.getN() != WeakAndItem.defaultN; + } + } + + private static class WeightedSetSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + serializeWeightedSetContents(destination, WEIGHTED_SET, + (WeightedSetItem) item); + return false; + } + + } + + private static class WordSerializer extends Serializer { + + @Override + void onExit(StringBuilder destination, Item item) { + } + + @Override + boolean serialize(StringBuilder destination, Item item) { + WordItem w = (WordItem) item; + StringBuilder wordAnnotations = getAllAnnotations(w); + + destination.append(normalizeIndexName(w.getIndexName())).append( + " contains "); + VespaSerializer.annotatedTerm(destination, w, wordAnnotations.toString()); + return false; + } + + static void serializeWordWithoutIndex(StringBuilder destination, + Item item) { + WordItem w = (WordItem) item; + StringBuilder wordAnnotations = getAllAnnotations(w); + + VespaSerializer.annotatedTerm(destination, w, wordAnnotations.toString()); + } + + private static StringBuilder getAllAnnotations(WordItem w) { + StringBuilder wordAnnotations = new StringBuilder( + WordSerializer.wordAnnotations(w)); + String leafAnnotations = leafAnnotations(w); + + if (leafAnnotations.length() > 0) { + comma(wordAnnotations, 0); + wordAnnotations.append(leafAnnotations(w)); + } + return wordAnnotations; + } + + private static String wordAnnotations(WordItem item) { + Substring origin = item.getOrigin(); + boolean usePositionData = item.usePositionData(); + boolean stemmed = item.isStemmed(); + boolean lowercased = item.isLowercased(); + boolean accentDrop = item.isNormalizable(); + SegmentingRule andSegmenting = item.getSegmentingRule(); + boolean isFromQuery = item.isFromQuery(); + StringBuilder annotation = new StringBuilder(); + boolean prefix = item instanceof PrefixItem; + boolean suffix = item instanceof SuffixItem; + boolean substring = item instanceof SubstringItem; + int initLen = annotation.length(); + String image; + int offset; + int length; + + if (origin == null) { + image = item.getRawWord(); + offset = 0; + length = image.length(); + } else { + image = origin.getSuperstring(); + offset = origin.start; + length = origin.end - origin.start; + } + + if (!image.substring(offset, offset + length).equals( + item.getIndexedString())) { + VespaSerializer.serializeOrigin(annotation, image, offset, + length); + } + if (usePositionData != true) { + VespaSerializer.comma(annotation, initLen); + annotation.append('"').append(USE_POSITION_DATA) + .append("\": false"); + } + if (stemmed == true) { + VespaSerializer.comma(annotation, initLen); + annotation.append('"').append(STEM).append("\": false"); + } + if (lowercased == true) { + VespaSerializer.comma(annotation, initLen); + annotation.append('"').append(NORMALIZE_CASE) + .append("\": false"); + } + if (accentDrop == false) { + VespaSerializer.comma(annotation, initLen); + annotation.append('"').append(ACCENT_DROP).append("\": false"); + } + if (andSegmenting == SegmentingRule.BOOLEAN_AND) { + VespaSerializer.comma(annotation, initLen); + annotation.append('"').append(AND_SEGMENTING) + .append("\": true"); + } + if (!isFromQuery) { + VespaSerializer.comma(annotation, initLen); + annotation.append('"').append(IMPLICIT_TRANSFORMS) + .append("\": false"); + } + if (prefix) { + VespaSerializer.comma(annotation, initLen); + annotation.append('"').append(PREFIX).append("\": true"); + } + if (suffix) { + VespaSerializer.comma(annotation, initLen); + annotation.append('"').append(SUFFIX).append("\": true"); + } + if (substring) { + VespaSerializer.comma(annotation, initLen); + annotation.append('"').append(SUBSTRING).append("\": true"); + } + return annotation.toString(); + } + + } + + private static final char[] DIGITS = new char[] { '0', '1', '2', '3', '4', + '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + + private static final Map<Class<?>, Serializer> dispatch; + + private static final Comparator<? super Entry<Object, Integer>> tokenComparator = new TokenComparator(); + + static { + Map<Class<?>, Serializer> dispatchBuilder = new HashMap<>(); + dispatchBuilder.put(AndItem.class, new AndSerializer()); + dispatchBuilder.put(AndSegmentItem.class, new AndSegmentSerializer()); + dispatchBuilder.put(DotProductItem.class, new DotProductSerializer()); + dispatchBuilder.put(EquivItem.class, new EquivSerializer()); + dispatchBuilder.put(IntItem.class, new NumberSerializer()); + dispatchBuilder.put(MarkerWordItem.class, new WordSerializer()); // gotcha + dispatchBuilder.put(NearItem.class, new NearSerializer()); + dispatchBuilder.put(NotItem.class, new NotSerializer()); + dispatchBuilder.put(NullItem.class, new NullSerializer()); + dispatchBuilder.put(ONearItem.class, new ONearSerializer()); + dispatchBuilder.put(OrItem.class, new OrSerializer()); + dispatchBuilder.put(PhraseItem.class, new PhraseSerializer()); + dispatchBuilder.put(PhraseSegmentItem.class, new PhraseSegmentSerializer()); + dispatchBuilder.put(PredicateQueryItem.class, + new PredicateQuerySerializer()); + dispatchBuilder.put(PrefixItem.class, new WordSerializer()); // gotcha + dispatchBuilder.put(WordAlternativesItem.class, new WordAlternativesSerializer()); + dispatchBuilder.put(RangeItem.class, new RangeSerializer()); + dispatchBuilder.put(RankItem.class, new RankSerializer()); + dispatchBuilder.put(SubstringItem.class, new WordSerializer()); // gotcha + dispatchBuilder.put(SuffixItem.class, new WordSerializer()); // gotcha + dispatchBuilder.put(WandItem.class, new WandSerializer()); + dispatchBuilder.put(WeakAndItem.class, new WeakAndSerializer()); + dispatchBuilder.put(WeightedSetItem.class, new WeightedSetSerializer()); + dispatchBuilder.put(WordItem.class, new WordSerializer()); + dispatchBuilder.put(RegExpItem.class, new RegExpSerializer()); + dispatch = ImmutableMap.copyOf(dispatchBuilder); + } + + /** + * Do YQL+ escaping, which is basically the same as for JSON, of the + * incoming string to the "quoted" buffer. The buffer returned is the same + * as the one given in the "quoted" parameter. + * + * @param in a string to escape + * @param escaped the target buffer for escaped data + * @return the same buffer as given in the "quoted" parameter + */ + private static StringBuilder escape(String in, StringBuilder escaped) { + for (char c : in.toCharArray()) { + switch (c) { + case ('\b'): + escaped.append("\\b"); + break; + case ('\t'): + escaped.append("\\t"); + break; + case ('\n'): + escaped.append("\\n"); + break; + case ('\f'): + escaped.append("\\f"); + break; + case ('\r'): + escaped.append("\\r"); + break; + case ('"'): + escaped.append("\\\""); + break; + case ('\''): + escaped.append("\\'"); + break; + case ('\\'): + escaped.append("\\\\"); + break; + case ('/'): + escaped.append("\\/"); + break; + default: + if (c < 32 || c >= 127) { + escaped.append("\\u").append(fourDigitHexString(c)); + } else { + escaped.append(c); + } + } + } + return escaped; + } + + private static char[] fourDigitHexString(char c) { + char[] hex = new char[4]; + int in = ((c) & 0xFFFF); + for (int i = 3; i >= 0; --i) { + hex[i] = DIGITS[in & 0xF]; + in >>>= 4; + } + return hex; + } + + static String getIndexName(Item item) { + if (!(item instanceof IndexedItem)) + throw new IllegalArgumentException("Expected IndexedItem, got " + item.getClass()); + return normalizeIndexName(((IndexedItem) item).getIndexName()); + } + + public static String serialize(Query query) { + StringBuilder out = new StringBuilder(); + serialize(query.getModel().getQueryTree().getRoot(), out); + for (GroupingRequest request : GroupingRequest.getRequests(query)) { + out.append(" | "); + serialize(request, out); + } + return out.toString(); + } + + private static void serialize(GroupingRequest request, StringBuilder out) { + Iterator<Continuation> it = request.continuations().iterator(); + if (it.hasNext()) { + out.append("[{ 'continuations':["); + while (it.hasNext()) { + out.append('\'').append(it.next()).append('\''); + if (it.hasNext()) { + out.append(", "); + } + } + out.append("] }]"); + } + out.append(request.getRootOperation()); + } + + private static void serialize(Item item, StringBuilder out) { + VespaVisitor visitor = new VespaVisitor(out); + ToolBox.visit(visitor, item); + } + + static String serialize(Item item) { + StringBuilder out = new StringBuilder(); + serialize(item, out); + return out.toString(); + } + + private static void serializeWeightedSetContents(StringBuilder destination, + String opName, WeightedSetItem weightedSet) { + serializeWeightedSetContents(destination, opName, weightedSet, ""); + } + + private static void serializeWeightedSetContents( + StringBuilder destination, + String opName, WeightedSetItem weightedSet, + String optionalAnnotations) { + addAnnotations(destination, weightedSet, optionalAnnotations); + destination.append(opName).append('(') + .append(normalizeIndexName(weightedSet.getIndexName())) + .append(", {"); + int initLen = destination.length(); + List<Entry<Object, Integer>> tokens = new ArrayList<>( + weightedSet.getNumTokens()); + for (Iterator<Entry<Object, Integer>> i = weightedSet.getTokens(); i + .hasNext();) { + tokens.add(i.next()); + } + Collections.sort(tokens, tokenComparator); + for (Entry<Object, Integer> entry : tokens) { + comma(destination, initLen); + destination.append('"'); + escape(entry.getKey().toString(), destination); + destination.append("\": ").append(entry.getValue().toString()); + } + destination.append("})"); + } + + private static void addAnnotations( + StringBuilder destination, + WeightedSetItem weightedSet, String optionalAnnotations) { + int preAnnotationValueLen; + int incomingLen = destination.length(); + String annotations = leafAnnotations(weightedSet); + + if (optionalAnnotations.length() > 0 || annotations.length() > 0) { + destination.append("[{"); + } + preAnnotationValueLen = destination.length(); + if (annotations.length() > 0) { + destination.append(annotations); + } + if (optionalAnnotations.length() > 0) { + comma(destination, preAnnotationValueLen); + destination.append(optionalAnnotations); + } + if (destination.length() > incomingLen) { + destination.append("}]"); + } + } + + private static void comma(StringBuilder annotation, int initLen) { + if (annotation.length() > initLen) { + annotation.append(", "); + } + } + + private static String leafAnnotations(TaggableItem item) { + // TODO there is no usable API for the general annotations map in the + // Item instances + StringBuilder annotation = new StringBuilder(); + int initLen = annotation.length(); + { + int uniqueId = item.getUniqueID(); + double connectivity = item.getConnectivity(); + TaggableItem connectedTo = (TaggableItem) item.getConnectedItem(); + double significance = item.getSignificance(); + if (connectedTo != null && connectedTo.getUniqueID() != 0) { + annotation.append('"').append(CONNECTIVITY).append("\": {\"") + .append(CONNECTION_ID).append("\": ") + .append(connectedTo.getUniqueID()).append(", \"") + .append(CONNECTION_WEIGHT).append("\": ") + .append(connectivity).append("}"); + } + if (item.hasExplicitSignificance()) { + comma(annotation, initLen); + annotation.append('"').append(SIGNIFICANCE).append("\": ") + .append(significance); + } + if (uniqueId != 0) { + comma(annotation, initLen); + annotation.append('"').append(UNIQUE_ID).append("\": ") + .append(uniqueId); + } + } + { + Item leaf = (Item) item; + boolean filter = leaf.isFilter(); + boolean isRanked = leaf.isRanked(); + String label = leaf.getLabel(); + int weight = leaf.getWeight(); + + if (filter == true) { + comma(annotation, initLen); + annotation.append("\"").append(FILTER).append("\": true"); + } + if (isRanked == false) { + comma(annotation, initLen); + annotation.append("\"").append(RANKED).append("\": false"); + } + if (label != null) { + comma(annotation, initLen); + annotation.append("\"").append(LABEL).append("\": \""); + escape(label, annotation); + annotation.append("\""); + } + if (weight != 100) { + comma(annotation, initLen); + annotation.append('"').append(WEIGHT).append("\": ") + .append(weight); + } + } + if (item instanceof IntItem) { + int hitLimit = ((IntItem) item).getHitLimit(); + if (hitLimit != 0) { + comma(annotation, initLen); + annotation.append('"').append(HIT_LIMIT).append("\": ") + .append(hitLimit); + } + } + return annotation.toString(); + } + + private static void serializeOrigin(StringBuilder destination, + String image, int offset, int length) { + destination.append('"').append(ORIGIN).append("\": {\"") + .append(ORIGIN_ORIGINAL).append("\": \""); + escape(image, destination); + destination.append("\", \"").append(ORIGIN_OFFSET).append("\": ") + .append(offset).append(", \"").append(ORIGIN_LENGTH) + .append("\": ").append(length).append("}"); + } + + private static String normalizeIndexName(@NonNull String indexName) { + if (indexName.length() == 0) { + return "default"; + } else { + return indexName; + } + } + + private static void annotatedTerm(StringBuilder destination, IndexedItem w, String annotations) { + if (annotations.length() > 0) { + destination.append("([{").append(annotations).append("}]"); + } + destination.append('"'); + escape(w.getIndexedString(), destination).append('"'); + if (annotations.length() > 0) { + destination.append(')'); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java new file mode 100644 index 00000000000..a7cc06c95f7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java @@ -0,0 +1,1894 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +import java.math.BigInteger; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Deque; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.StringTokenizer; + +import com.google.common.annotations.Beta; +import com.google.common.base.Preconditions; +import com.yahoo.collections.LazyMap; +import com.yahoo.collections.LazySet; +import com.yahoo.collections.Tuple2; +import com.yahoo.component.Version; +import com.yahoo.language.Language; +import com.yahoo.language.Linguistics; +import com.yahoo.language.process.Normalizer; +import com.yahoo.language.process.Segmenter; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.AndItem; +import com.yahoo.prelude.query.AndSegmentItem; +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.DotProductItem; +import com.yahoo.prelude.query.EquivItem; +import com.yahoo.prelude.query.IntItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.Limit; +import com.yahoo.prelude.query.NearItem; +import com.yahoo.prelude.query.NotItem; +import com.yahoo.prelude.query.NullItem; +import com.yahoo.prelude.query.ONearItem; +import com.yahoo.prelude.query.OrItem; +import com.yahoo.prelude.query.PhraseItem; +import com.yahoo.prelude.query.PhraseSegmentItem; +import com.yahoo.prelude.query.PredicateQueryItem; +import com.yahoo.prelude.query.PrefixItem; +import com.yahoo.prelude.query.RangeItem; +import com.yahoo.prelude.query.RankItem; +import com.yahoo.prelude.query.RegExpItem; +import com.yahoo.prelude.query.SegmentItem; +import com.yahoo.prelude.query.SegmentingRule; +import com.yahoo.prelude.query.Substring; +import com.yahoo.prelude.query.SubstringItem; +import com.yahoo.prelude.query.SuffixItem; +import com.yahoo.prelude.query.TaggableItem; +import com.yahoo.prelude.query.ToolBox; +import com.yahoo.prelude.query.ToolBox.QueryVisitor; +import com.yahoo.prelude.query.WandItem; +import com.yahoo.prelude.query.WeakAndItem; +import com.yahoo.prelude.query.WeightedSetItem; +import com.yahoo.prelude.query.WordAlternativesItem; +import com.yahoo.prelude.query.WordItem; +import com.yahoo.search.Query; +import com.yahoo.search.grouping.Continuation; +import com.yahoo.search.grouping.request.GroupingOperation; +import com.yahoo.search.query.QueryTree; +import com.yahoo.search.query.Sorting; +import com.yahoo.search.query.Sorting.AttributeSorter; +import com.yahoo.search.query.Sorting.FieldOrder; +import com.yahoo.search.query.Sorting.LowerCaseSorter; +import com.yahoo.search.query.Sorting.Order; +import com.yahoo.search.query.Sorting.RawSorter; +import com.yahoo.search.query.Sorting.UcaSorter; +import com.yahoo.search.query.parser.Parsable; +import com.yahoo.search.query.parser.Parser; +import com.yahoo.search.query.parser.ParserEnvironment; +import com.yahoo.search.query.parser.ParserFactory; + +import edu.umd.cs.findbugs.annotations.NonNull; + +/** + * The YQL query language. + * + * <p> + * This class <em>must</em> be kept in lockstep with {@link VespaSerializer}. + * Adding anything here will usually require a corresponding addition in + * VespaSerializer. + * </p> + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author <a href="mailto:stiankri@yahoo-inc.com">Stian Kristoffersen</a> + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +@Beta +public class YqlParser implements Parser { + + private static final String DESCENDING_HITS_ORDER = "descending"; + private static final String ASCENDING_HITS_ORDER = "ascending"; + + private enum SegmentWhen { + NEVER, POSSIBLY, ALWAYS; + } + + private static final Integer DEFAULT_HITS = 10; + private static final Integer DEFAULT_OFFSET = 0; + private static final Integer DEFAULT_TARGET_NUM_HITS = 10; + private static final String ACCENT_DROP_DESCRIPTION = "setting for whether to remove accents if field implies it"; + private static final String ANNOTATIONS = "annotations"; + private static final String FILTER_DESCRIPTION = "term filter setting"; + private static final String IMPLICIT_TRANSFORMS_DESCRIPTION = "setting for whether built-in query transformers should touch the term"; + private static final String NFKC = "nfkc"; + private static final String NORMALIZE_CASE_DESCRIPTION = "setting for whether to do case normalization if field implies it"; + private static final String ORIGIN_DESCRIPTION = "string origin for a term"; + private static final String RANKED_DESCRIPTION = "setting for whether to use term for ranking"; + private static final String SEGMENTER_BACKEND = "backend"; + private static final String SEGMENTER = "segmenter"; + private static final String SEGMENTER_VERSION = "version"; + private static final String STEM_DESCRIPTION = "setting for whether to use stem if field implies it"; + private static final String USE_POSITION_DATA_DESCRIPTION = "setting for whether to use position data for ranking this item"; + private static final String USER_INPUT_ALLOW_EMPTY = "allowEmpty"; + private static final String USER_INPUT_DEFAULT_INDEX = "defaultIndex"; + private static final String USER_INPUT_GRAMMAR = "grammar"; + private static final String USER_INPUT_LANGUAGE = "language"; + private static final String USER_INPUT_RAW = "raw"; + private static final String USER_INPUT_SEGMENT = "segment"; + private static final String USER_INPUT = "userInput"; + private static final String USER_QUERY = "userQuery"; + private static final String NON_EMPTY = "nonEmpty"; + + public static final String SORTING_FUNCTION = "function"; + public static final String SORTING_LOCALE = "locale"; + public static final String SORTING_STRENGTH = "strength"; + + static final String ACCENT_DROP = "accentDrop"; + static final String ALTERNATIVES = "alternatives"; + static final String AND_SEGMENTING = "andSegmenting"; + static final String BOUNDS = "bounds"; + static final String BOUNDS_LEFT_OPEN = "leftOpen"; + static final String BOUNDS_OPEN = "open"; + static final String BOUNDS_RIGHT_OPEN = "rightOpen"; + static final String CONNECTION_ID = "id"; + static final String CONNECTION_WEIGHT = "weight"; + static final String CONNECTIVITY = "connectivity"; + static final String DISTANCE = "distance"; + static final String DOT_PRODUCT = "dotProduct"; + static final String EQUIV = "equiv"; + static final String FILTER = "filter"; + static final String HIT_LIMIT = "hitLimit"; + static final String IMPLICIT_TRANSFORMS = "implicitTransforms"; + static final String LABEL = "label"; + static final String NEAR = "near"; + static final String NORMALIZE_CASE = "normalizeCase"; + static final String ONEAR = "onear"; + static final String ORIGIN_LENGTH = "length"; + static final String ORIGIN_OFFSET = "offset"; + static final String ORIGIN = "origin"; + static final String ORIGIN_ORIGINAL = "original"; + static final String PHRASE = "phrase"; + static final String PREDICATE = "predicate"; + static final String PREFIX = "prefix"; + static final String RANGE = "range"; + static final String RANKED = "ranked"; + static final String RANK = "rank"; + static final String SCORE_THRESHOLD = "scoreThreshold"; + static final String SIGNIFICANCE = "significance"; + static final String STEM = "stem"; + static final String SUBSTRING = "substring"; + static final String SUFFIX = "suffix"; + static final String TARGET_NUM_HITS = "targetNumHits"; + static final String THRESHOLD_BOOST_FACTOR = "thresholdBoostFactor"; + static final String UNIQUE_ID = "id"; + static final String USE_POSITION_DATA = "usePositionData"; + static final String WAND = "wand"; + static final String WEAK_AND = "weakAnd"; + static final String WEIGHTED_SET = "weightedSet"; + static final String WEIGHT = "weight"; + + private final IndexFacts indexFacts; + private final List<ConnectedItem> connectedItems = new ArrayList<>(); + private final List<VespaGroupingStep> groupingSteps = new ArrayList<>(); + private final Map<Integer, TaggableItem> identifiedItems = LazyMap.newHashMap(); + private final Normalizer normalizer; + private final Segmenter segmenter; + private final Set<String> yqlSources = LazySet.newHashSet(); + private final Set<String> yqlSummaryFields = LazySet.newHashSet(); + private final String localSegmenterBackend; + private final Version localSegmenterVersion; + private Integer hits; + private Integer offset; + private Integer timeout; + private Query userQuery; + private Parsable currentlyParsing; + private IndexFacts.Session indexFactsSession; + private Set<String> docTypes; + private Sorting sorting; + private String segmenterBackend; + private Version segmenterVersion; + private boolean queryParser = true; + private boolean resegment = false; + private final Deque<OperatorNode<?>> annotationStack = new ArrayDeque<>(); + private final ParserEnvironment environment; + + private static final QueryVisitor noEmptyTerms = new QueryVisitor() { + + @Override + public boolean visit(Item item) { + if (item instanceof NullItem) { + throw new IllegalArgumentException("Got NullItem inside nonEmpty()."); + } else if (item instanceof WordItem) { + if (((WordItem) item).getIndexedString().isEmpty()) { + throw new IllegalArgumentException("Searching for empty string inside nonEmpty()"); + } + } else if (item instanceof CompositeItem) { + if (((CompositeItem) item).getItemCount() == 0) { + throw new IllegalArgumentException("Empty composite operator (" + item.getName() + ") inside nonEmpty()"); + } + } + return true; + } + + @Override + public void onExit() { + // NOP + } + }; + + public YqlParser(ParserEnvironment environment) { + indexFacts = environment.getIndexFacts(); + normalizer = environment.getLinguistics().getNormalizer(); + segmenter = environment.getLinguistics().getSegmenter(); + this.environment = environment; + + Tuple2<String, Version> version = environment.getLinguistics().getVersion(Linguistics.Component.SEGMENTER); + localSegmenterBackend = version.first; + localSegmenterVersion = version.second; + } + + @NonNull + @Override + public QueryTree parse(Parsable query) { + indexFactsSession = indexFacts.newSession(query.getSources(), query.getRestrict()); + connectedItems.clear(); + groupingSteps.clear(); + identifiedItems.clear(); + yqlSources.clear(); + yqlSummaryFields.clear(); + annotationStack.clear(); + hits = null; + offset = null; + timeout = null; + // userQuery set prior to calling this + currentlyParsing = query; + docTypes = null; + sorting = null; + segmenterBackend = null; + segmenterVersion = null; + // queryParser set prior to calling this + resegment = false; + return buildTree(fetchFilterPart()); + } + + private void joinDocTypesFromUserQueryAndYql() { + List<String> allSourceNames = new ArrayList<>(currentlyParsing.getSources().size() + yqlSources.size()); + if ( ! yqlSources.isEmpty()) { + allSourceNames.addAll(currentlyParsing.getSources()); + allSourceNames.addAll(yqlSources); + } else { + // no sources == all sources in Vespa + } + indexFactsSession = indexFacts.newSession(allSourceNames, currentlyParsing.getRestrict()); + docTypes = new HashSet<>(indexFactsSession.documentTypes()); + } + + @NonNull + private QueryTree buildTree(OperatorNode<?> filterPart) { + Preconditions.checkArgument(filterPart.getArguments().length == 2, + "Expected 2 arguments to filter, got %s.", + filterPart.getArguments().length); + populateYqlSources(filterPart.<OperatorNode<?>> getArgument(0)); + final OperatorNode<ExpressionOperator> filterExpression = filterPart + .getArgument(1); + populateLinguisticsAnnotations(filterExpression); + Item root = convertExpression(filterExpression); + connectItems(); + userQuery = null; + return new QueryTree(root); + } + + private void populateLinguisticsAnnotations( + OperatorNode<ExpressionOperator> filterExpression) { + Map<?, ?> segmenter = getAnnotation(filterExpression, SEGMENTER, + Map.class, null, "segmenter engine and version"); + if (segmenter == null) { + segmenterVersion = null; + segmenterBackend = null; + resegment = false; + } else { + segmenterBackend = getMapValue(SEGMENTER, segmenter, + SEGMENTER_BACKEND, String.class); + try { + segmenterVersion = new Version(getMapValue(SEGMENTER, + segmenter, SEGMENTER_VERSION, String.class)); + } catch (RuntimeException e) { + segmenterVersion = null; + } + if (localSegmenterBackend.equals(segmenterBackend) + && localSegmenterVersion.equals(segmenterVersion)) { + resegment = false; + } else { + resegment = true; + } + } + } + + private void populateYqlSources(OperatorNode<?> filterArgs) { + yqlSources.clear(); + if (filterArgs.getOperator() == SequenceOperator.SCAN) { + for (String source : filterArgs.<List<String>> getArgument(0)) { + yqlSources.add(source); + } + } else if (filterArgs.getOperator() == SequenceOperator.ALL) { + // yqlSources has already been cleared + } else if (filterArgs.getOperator() == SequenceOperator.MULTISOURCE) { + for (List<String> source : filterArgs.<List<List<String>>> getArgument(0)) { + yqlSources.add(source.get(0)); + } + } else { + throw newUnexpectedArgumentException(filterArgs.getOperator(), + SequenceOperator.SCAN, SequenceOperator.ALL, + SequenceOperator.MULTISOURCE); + } + joinDocTypesFromUserQueryAndYql(); + } + + private void populateYqlSummaryFields( + List<OperatorNode<ProjectOperator>> fields) { + yqlSummaryFields.clear(); + for (OperatorNode<ProjectOperator> field : fields) { + assertHasOperator(field, ProjectOperator.FIELD); + yqlSummaryFields.add(field.getArgument(1, String.class)); + } + } + + private void connectItems() { + for (ConnectedItem entry : connectedItems) { + TaggableItem to = identifiedItems.get(entry.toId); + Preconditions + .checkNotNull(to, + "Item '%s' was specified to connect to item with ID %s, which does not " + + "exist in the query.", entry.fromItem, + entry.toId); + entry.fromItem.setConnectivity((Item) to, entry.weight); + } + } + + @NonNull + private Item convertExpression(OperatorNode<ExpressionOperator> ast) { + try { + annotationStack.addFirst(ast); + switch (ast.getOperator()) { + case AND: + return buildAnd(ast); + case OR: + return buildOr(ast); + case EQ: + return buildEquals(ast); + case LT: + return buildLessThan(ast); + case GT: + return buildGreaterThan(ast); + case LTEQ: + return buildLessThanOrEquals(ast); + case GTEQ: + return buildGreaterThanOrEquals(ast); + case CONTAINS: + return buildTermSearch(ast); + case MATCHES: + return buildRegExpSearch(ast); + case CALL: + return buildFunctionCall(ast); + default: + throw newUnexpectedArgumentException(ast.getOperator(), + ExpressionOperator.AND, ExpressionOperator.CALL, + ExpressionOperator.CONTAINS, ExpressionOperator.EQ, + ExpressionOperator.GT, ExpressionOperator.GTEQ, + ExpressionOperator.LT, ExpressionOperator.LTEQ, + ExpressionOperator.OR); + } + } finally { + annotationStack.removeFirst(); + } + } + + @NonNull + private Item buildFunctionCall(OperatorNode<ExpressionOperator> ast) { + List<String> names = ast.getArgument(0); + Preconditions.checkArgument(names.size() == 1, + "Expected 1 name, got %s.", names.size()); + switch (names.get(0)) { + case USER_QUERY: + return fetchUserQuery(); + case RANGE: + return buildRange(ast); + case WAND: + return buildWand(ast); + case WEIGHTED_SET: + return buildWeightedSet(ast); + case DOT_PRODUCT: + return buildDotProduct(ast); + case PREDICATE: + return buildPredicate(ast); + case RANK: + return buildRank(ast); + case WEAK_AND: + return buildWeakAnd(ast); + case USER_INPUT: + return buildUserInput(ast); + case NON_EMPTY: + return ensureNonEmpty(ast); + default: + throw newUnexpectedArgumentException(names.get(0), DOT_PRODUCT, + RANGE, RANK, USER_QUERY, WAND, WEAK_AND, WEIGHTED_SET, + PREDICATE, USER_INPUT, NON_EMPTY); + } + } + + private Item ensureNonEmpty(OperatorNode<ExpressionOperator> ast) { + List<OperatorNode<ExpressionOperator>> args = ast.getArgument(1); + Preconditions.checkArgument(args.size() == 1, + "Expected 1 arguments, got %s.", args.size()); + Item item = convertExpression(args.get(0)); + ToolBox.visit(noEmptyTerms, item); + return item; + } + + @NonNull + private Item buildWeightedSet(OperatorNode<ExpressionOperator> ast) { + List<OperatorNode<ExpressionOperator>> args = ast.getArgument(1); + Preconditions.checkArgument(args.size() == 2, + "Expected 2 arguments, got %s.", args.size()); + + return fillWeightedSet(ast, args.get(1), new WeightedSetItem( + getIndex(args.get(0)))); + } + + @NonNull + private Item buildDotProduct(OperatorNode<ExpressionOperator> ast) { + List<OperatorNode<ExpressionOperator>> args = ast.getArgument(1); + Preconditions.checkArgument(args.size() == 2, + "Expected 2 arguments, got %s.", args.size()); + + return fillWeightedSet(ast, args.get(1), new DotProductItem( + getIndex(args.get(0)))); + } + + @NonNull + private Item buildPredicate(OperatorNode<ExpressionOperator> ast) { + List<OperatorNode<ExpressionOperator>> args = ast.getArgument(1); + Preconditions.checkArgument(args.size() == 3, + "Expected 3 arguments, got %s.", args.size()); + + final PredicateQueryItem item = new PredicateQueryItem(); + item.setIndexName(getIndex(args.get(0))); + + addFeatures(args.get(1), + (key, value, subqueryBitmap) -> item.addFeature(key, (String) value, subqueryBitmap), PredicateQueryItem.ALL_SUB_QUERIES); + addFeatures(args.get(2), (key, value, subqueryBitmap) -> { + if (value instanceof Long) { + item.addRangeFeature(key, (Long) value, subqueryBitmap); + } else { + item.addRangeFeature(key, (Integer) value, subqueryBitmap); + } + }, PredicateQueryItem.ALL_SUB_QUERIES); + return leafStyleSettings(ast, item); + } + + interface AddFeature { + public void addFeature(String key, Object value, long subqueryBitmap); + } + + private void addFeatures(OperatorNode<ExpressionOperator> map, + AddFeature item, long subqueryBitmap) { + if (map.getOperator() != ExpressionOperator.MAP) { + return; + } + assertHasOperator(map, ExpressionOperator.MAP); + List<String> keys = map.getArgument(0); + List<OperatorNode<ExpressionOperator>> values = map.getArgument(1); + for (int i = 0; i < keys.size(); ++i) { + String key = keys.get(i); + OperatorNode<ExpressionOperator> value = values.get(i); + if (value.getOperator() == ExpressionOperator.ARRAY) { + List<OperatorNode<ExpressionOperator>> multiValues = value + .getArgument(0); + for (OperatorNode<ExpressionOperator> multiValue : multiValues) { + assertHasOperator(multiValue, ExpressionOperator.LITERAL); + item.addFeature(key, multiValue.getArgument(0), subqueryBitmap); + } + } else if (value.getOperator() == ExpressionOperator.LITERAL) { + item.addFeature(key, value.getArgument(0), subqueryBitmap); + } else { + assertHasOperator(value, ExpressionOperator.MAP); // Subquery syntax + Preconditions.checkArgument(key.indexOf("0x") == 0 || key.indexOf("[") == 0); + if (key.indexOf("0x") == 0) { + String subqueryString = key.substring(2); + if (subqueryString.length() > 16) { + throw new NumberFormatException( + "Too long subquery string: " + key); + } + long currentSubqueryBitmap = new BigInteger(subqueryString, 16).longValue(); + addFeatures(value, item, currentSubqueryBitmap); + } else { + StringTokenizer bits = new StringTokenizer(key.substring(1, key.length() - 1), ","); + long currentSubqueryBitmap = 0; + while (bits.hasMoreTokens()) { + int bit = Integer.parseInt(bits.nextToken().trim()); + currentSubqueryBitmap |= 1L << bit; + } + addFeatures(value, item, currentSubqueryBitmap); + } + } + } + } + + @NonNull + private Item buildWand(OperatorNode<ExpressionOperator> ast) { + List<OperatorNode<ExpressionOperator>> args = ast.getArgument(1); + Preconditions.checkArgument(args.size() == 2, "Expected 2 arguments, got %s.", args.size()); + + WandItem out = new WandItem(getIndex(args.get(0)), getAnnotation(ast, + TARGET_NUM_HITS, Integer.class, DEFAULT_TARGET_NUM_HITS, + "desired number of hits to accumulate in wand")); + Double scoreThreshold = getAnnotation(ast, SCORE_THRESHOLD, + Double.class, null, "min score for hit inclusion"); + if (scoreThreshold != null) { + out.setScoreThreshold(scoreThreshold); + } + Double thresholdBoostFactor = getAnnotation(ast, + THRESHOLD_BOOST_FACTOR, Double.class, null, + "boost factor used to boost threshold before comparing against upper bound score"); + if (thresholdBoostFactor != null) { + out.setThresholdBoostFactor(thresholdBoostFactor); + } + return fillWeightedSet(ast, args.get(1), out); + } + + @NonNull + private WeightedSetItem fillWeightedSet(OperatorNode<ExpressionOperator> ast, + OperatorNode<ExpressionOperator> arg, + @NonNull WeightedSetItem out) { + addItems(arg, out); + return leafStyleSettings(ast, out); + } + + @NonNull + private Item instantiatePhraseItem(String field, OperatorNode<ExpressionOperator> ast) { + assertHasFunctionName(ast, PHRASE); + + if (getAnnotation(ast, ORIGIN, Map.class, null, ORIGIN_DESCRIPTION, false) != null) { + return instantiatePhraseSegmentItem(field, ast, false); + } + + PhraseItem phrase = new PhraseItem(); + phrase.setIndexName(field); + for (OperatorNode<ExpressionOperator> word : ast.<List<OperatorNode<ExpressionOperator>>> getArgument(1)) { + if (word.getOperator() == ExpressionOperator.CALL) { + List<String> names = word.getArgument(0); + switch (names.get(0)) { + case PHRASE: + if (getAnnotation(word, ORIGIN, Map.class, null, ORIGIN_DESCRIPTION, false) == null) { + phrase.addItem(instantiatePhraseItem(field, word)); + } else { + phrase.addItem(instantiatePhraseSegmentItem(field, word, true)); + } + break; + case ALTERNATIVES: + phrase.addItem(instantiateWordAlternativesItem(field, word)); + break; + default: + throw new IllegalArgumentException("Expected phrase or word alternatives, got " + names.get(0)); + } + } else { + phrase.addItem(instantiateWordItem(field, word, phrase.getClass())); + } + } + return leafStyleSettings(ast, phrase); + } + + @NonNull + private Item instantiatePhraseSegmentItem(String field, OperatorNode<ExpressionOperator> ast, boolean forcePhrase) { + Substring origin = getOrigin(ast); + Boolean stem = getAnnotation(ast, STEM, Boolean.class, Boolean.TRUE, STEM_DESCRIPTION); + Boolean andSegmenting = getAnnotation(ast, AND_SEGMENTING, Boolean.class, Boolean.FALSE, + "setting for whether to force using AND for segments on and off"); + SegmentItem phrase; + List<String> words = null; + + if (forcePhrase || !andSegmenting) { + phrase = new PhraseSegmentItem(origin.getValue(), origin.getValue(), true, !stem, origin); + } else { + phrase = new AndSegmentItem(origin.getValue(), true, !stem); + } + phrase.setIndexName(field); + + if (resegment + && getAnnotation(ast, IMPLICIT_TRANSFORMS, Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION)) { + words = segmenter.segment(origin.getValue(), currentlyParsing.getLanguage()); + } + + if (words != null && words.size() > 0) { + for (String word : words) { + phrase.addItem(new WordItem(word, field, true)); + } + } else { + for (OperatorNode<ExpressionOperator> word : ast.<List<OperatorNode<ExpressionOperator>>> getArgument(1)) { + phrase.addItem(instantiateWordItem(field, word, phrase.getClass(), SegmentWhen.NEVER)); + } + } + if (phrase instanceof TaggableItem) { + leafStyleSettings(ast, (TaggableItem) phrase); + } + phrase.lock(); + return phrase; + } + + @NonNull + private Item instantiateNearItem(String field, OperatorNode<ExpressionOperator> ast) { + assertHasFunctionName(ast, NEAR); + + NearItem near = new NearItem(); + near.setIndexName(field); + for (OperatorNode<ExpressionOperator> word : ast.<List<OperatorNode<ExpressionOperator>>> getArgument(1)) { + near.addItem(instantiateWordItem(field, word, near.getClass())); + } + Integer distance = getAnnotation(ast, DISTANCE, Integer.class, null, "term distance for NEAR operator"); + if (distance != null) { + near.setDistance(distance); + } + return near; + } + + @NonNull + private Item instantiateONearItem(String field, OperatorNode<ExpressionOperator> ast) { + assertHasFunctionName(ast, ONEAR); + + NearItem onear = new ONearItem(); + onear.setIndexName(field); + for (OperatorNode<ExpressionOperator> word : ast.<List<OperatorNode<ExpressionOperator>>> getArgument(1)) { + onear.addItem(instantiateWordItem(field, word, onear.getClass())); + } + Integer distance = getAnnotation(ast, DISTANCE, Integer.class, null, "term distance for ONEAR operator"); + if (distance != null) { + onear.setDistance(distance); + } + return onear; + } + + @NonNull + private Item fetchUserQuery() { + Preconditions.checkState(!queryParser, + "Tried inserting user query into itself."); + Preconditions.checkState(userQuery != null, + "User query must be set before trying to build complete query " + + "tree including user query."); + return userQuery.getModel().getQueryTree().getRoot(); + } + + @NonNull + private Item buildUserInput(OperatorNode<ExpressionOperator> ast) { + + String grammar = getAnnotation(ast, USER_INPUT_GRAMMAR, String.class, + Query.Type.ALL.toString(), "grammar for handling user input"); + String defaultIndex = getAnnotation(ast, USER_INPUT_DEFAULT_INDEX, + String.class, "default", "default index for user input terms"); + Boolean allowEmpty = getAnnotation(ast, USER_INPUT_ALLOW_EMPTY, Boolean.class, + Boolean.FALSE, "flag for allowing NullItem to be returned"); + String wordData; + List<OperatorNode<ExpressionOperator>> args = ast.getArgument(1); + + // TODO add support for default arguments if property results in nothing + wordData = getStringContents(args.get(0)); + if (allowEmpty.booleanValue() && (wordData == null || wordData.isEmpty())) { + return new NullItem(); + } + String languageTag = getAnnotation(ast, USER_INPUT_LANGUAGE, + String.class, "en", + "language setting for segmenting user input parameter"); + Language language = Language.fromLanguageTag(languageTag); + Item item; + if (USER_INPUT_RAW.equals(grammar)) { + item = instantiateWordItem(defaultIndex, wordData, ast, null, SegmentWhen.NEVER, + language); + } else if (USER_INPUT_SEGMENT.equals(grammar)) { + item = instantiateWordItem(defaultIndex, wordData, ast, null, + SegmentWhen.ALWAYS, language); + } else { + item = parseUserInput(grammar, defaultIndex, wordData, language, allowEmpty.booleanValue()); + propagateUserInputAnnotations(ast, item); + } + return item; + } + + private String getStringContents( + OperatorNode<ExpressionOperator> propertySniffer) { + String wordData; + + switch (propertySniffer.getOperator()) { + case LITERAL: + wordData = propertySniffer.getArgument(0, String.class); + break; + case VARREF: + Preconditions + .checkState(userQuery != null, + "properties must be available when trying to fetch user input"); + wordData = userQuery.properties().getString( + propertySniffer.getArgument(0, String.class)); + break; + default: + throw newUnexpectedArgumentException(propertySniffer.getOperator(), + ExpressionOperator.LITERAL, ExpressionOperator.VARREF); + } + return wordData; + } + + private class AnnotationPropagator extends QueryVisitor { + private final Boolean isRanked; + private final Boolean filter; + private final Boolean stem; + private final Boolean normalizeCase; + private final Boolean accentDrop; + private final Boolean usePositionData; + + public AnnotationPropagator(OperatorNode<ExpressionOperator> ast) { + isRanked = getAnnotation(ast, RANKED, Boolean.class, null, + RANKED_DESCRIPTION); + filter = getAnnotation(ast, FILTER, Boolean.class, null, + FILTER_DESCRIPTION); + stem = getAnnotation(ast, STEM, Boolean.class, null, + STEM_DESCRIPTION); + normalizeCase = getAnnotation(ast, NORMALIZE_CASE, Boolean.class, + Boolean.TRUE, NORMALIZE_CASE_DESCRIPTION); + accentDrop = getAnnotation(ast, ACCENT_DROP, Boolean.class, null, + ACCENT_DROP_DESCRIPTION); + usePositionData = getAnnotation(ast, USE_POSITION_DATA, + Boolean.class, null, USE_POSITION_DATA_DESCRIPTION); + } + + @Override + public boolean visit(Item item) { + if (item instanceof WordItem) { + WordItem w = (WordItem) item; + if (usePositionData != null) { + w.setPositionData(usePositionData); + } + if (stem != null) { + w.setStemmed(!stem); + } + if (normalizeCase != null) { + w.setLowercased(!normalizeCase); + } + if (accentDrop != null) { + w.setNormalizable(accentDrop); + } + } + if (item instanceof TaggableItem) { + if (isRanked != null) { + item.setRanked(isRanked); + } + if (filter != null) { + item.setFilter(filter); + } + } + return true; + } + + @Override + public void onExit() { + // intentionally left blank + } + } + + private void propagateUserInputAnnotations( + OperatorNode<ExpressionOperator> ast, Item item) { + ToolBox.visit(new AnnotationPropagator(ast), item); + + } + + @NonNull + private Item parseUserInput(String grammar, String defaultIndex, String wordData, + Language language, boolean allowNullItem) { + Item item; + Query.Type parseAs = Query.Type.getType(grammar); + Parser parser = ParserFactory.newInstance(parseAs, environment); + // perhaps not use already resolved doctypes, but respect source and + // restrict + item = parser.parse( + new Parsable().setQuery(wordData).addSources(docTypes) + .setLanguage(language) + .setDefaultIndexName(defaultIndex)).getRoot(); + // the null check should be unnecessary, but is there to avoid having to + // suppress null warnings + if (!allowNullItem && (item == null || item instanceof NullItem)) { + throw new IllegalArgumentException("Parsing \"" + wordData + + "\" only resulted in NullItem."); + } + return item; + } + + @NonNull + private OperatorNode<?> fetchFilterPart() { + ProgramParser parser = new ProgramParser(); + OperatorNode<?> ast; + try { + ast = parser.parse("query", currentlyParsing.getQuery()); + } catch (Exception e) { + throw new IllegalArgumentException(e); + } + assertHasOperator(ast, StatementOperator.PROGRAM); + Preconditions.checkArgument(ast.getArguments().length == 1, + "Expected only a single argument to the root node, got %s.", + ast.getArguments().length); + // TODO: should we check size of first argument as well? + ast = ast.<List<OperatorNode<?>>> getArgument(0).get(0); + assertHasOperator(ast, StatementOperator.EXECUTE); + + ast = ast.getArgument(0); + ast = fetchTimeout(ast); + ast = fetchPipe(ast); + ast = fetchSummaryFields(ast); + ast = fetchOffsetAndHits(ast); + ast = fetchSorting(ast); + assertHasOperator(ast, SequenceOperator.FILTER); + return ast; + } + + @SuppressWarnings("unchecked") + private OperatorNode<?> fetchPipe(OperatorNode<?> toScan) { + OperatorNode<?> ast = toScan; + while (ast.getOperator() == SequenceOperator.PIPE) { + OperatorNode<ExpressionOperator> groupingAst = ast + .<List<OperatorNode<ExpressionOperator>>> getArgument(2) + .get(0); + GroupingOperation groupingOperation = GroupingOperation + .fromString(groupingAst.<String> getArgument(0)); + VespaGroupingStep groupingStep = new VespaGroupingStep( + groupingOperation); + List<String> continuations = getAnnotation(groupingAst, + "continuations", List.class, Collections.emptyList(), + "grouping continuations"); + for (String continuation : continuations) { + groupingStep.continuations().add( + Continuation.fromString(continuation)); + } + groupingSteps.add(groupingStep); + ast = ast.getArgument(0); + } + Collections.reverse(groupingSteps); + return ast; + } + + @NonNull + private OperatorNode<?> fetchSorting(OperatorNode<?> ast) { + if (ast.getOperator() != SequenceOperator.SORT) { + return ast; + } + List<FieldOrder> sortingInit = new ArrayList<>(); + List<OperatorNode<?>> sortArguments = ast.getArgument(1); + for (OperatorNode<?> op : sortArguments) { + final OperatorNode<ExpressionOperator> fieldNode = op + .<OperatorNode<ExpressionOperator>> getArgument(0); + String field = fetchFieldRead(fieldNode); + String locale = getAnnotation(fieldNode, SORTING_LOCALE, + String.class, null, "locale used by sorting function"); + String function = getAnnotation(fieldNode, SORTING_FUNCTION, + String.class, null, + "sorting function for the specified attribute"); + String strength = getAnnotation(fieldNode, SORTING_STRENGTH, + String.class, null, "strength for sorting function"); + AttributeSorter sorter; + if (function == null) { + sorter = new AttributeSorter(field); + } else if (Sorting.LOWERCASE.equals(function)) { + sorter = new LowerCaseSorter(field); + } else if (Sorting.RAW.equals(function)) { + sorter = new RawSorter(field); + } else if (Sorting.UCA.equals(function)) { + if (locale != null) { + UcaSorter.Strength ucaStrength = UcaSorter.Strength.UNDEFINED; + if (strength != null) { + if (Sorting.STRENGTH_PRIMARY.equalsIgnoreCase(strength)) { + ucaStrength = UcaSorter.Strength.PRIMARY; + } else if (Sorting.STRENGTH_SECONDARY + .equalsIgnoreCase(strength)) { + ucaStrength = UcaSorter.Strength.SECONDARY; + } else if (Sorting.STRENGTH_TERTIARY + .equalsIgnoreCase(strength)) { + ucaStrength = UcaSorter.Strength.TERTIARY; + } else if (Sorting.STRENGTH_QUATERNARY + .equalsIgnoreCase(strength)) { + ucaStrength = UcaSorter.Strength.QUATERNARY; + } else if (Sorting.STRENGTH_IDENTICAL + .equalsIgnoreCase(strength)) { + ucaStrength = UcaSorter.Strength.IDENTICAL; + } else { + throw newUnexpectedArgumentException(function, + Sorting.STRENGTH_PRIMARY, + Sorting.STRENGTH_SECONDARY, + Sorting.STRENGTH_TERTIARY, + Sorting.STRENGTH_QUATERNARY, + Sorting.STRENGTH_IDENTICAL); + } + sorter = new UcaSorter(field, locale, ucaStrength); + } else { + sorter = new UcaSorter(field, locale, ucaStrength); + } + } else { + sorter = new UcaSorter(field); + } + } else { + throw newUnexpectedArgumentException(function, "lowercase", + "raw", "uca"); + } + switch ((SortOperator) op.getOperator()) { + case ASC: + sortingInit.add(new FieldOrder(sorter, Order.ASCENDING)); + break; + case DESC: + sortingInit.add(new FieldOrder(sorter, Order.DESCENDING)); + break; + default: + throw newUnexpectedArgumentException(op.getOperator(), + SortOperator.ASC, SortOperator.DESC); + } + } + sorting = new Sorting(sortingInit); + return ast.getArgument(0); + } + + @NonNull + private OperatorNode<?> fetchOffsetAndHits(OperatorNode<?> ast) { + if (ast.getOperator() == SequenceOperator.OFFSET) { + offset = ast.<OperatorNode<?>> getArgument(1) + .<Integer> getArgument(0); + hits = DEFAULT_HITS; + return ast.getArgument(0); + } + if (ast.getOperator() == SequenceOperator.SLICE) { + offset = ast.<OperatorNode<?>> getArgument(1) + .<Integer> getArgument(0); + hits = ast.<OperatorNode<?>> getArgument(2) + .<Integer> getArgument(0) - offset; + return ast.getArgument(0); + } + if (ast.getOperator() == SequenceOperator.LIMIT) { + hits = ast.<OperatorNode<?>> getArgument(1) + .<Integer> getArgument(0); + offset = DEFAULT_OFFSET; + return ast.getArgument(0); + } + return ast; + } + + @NonNull + private OperatorNode<?> fetchSummaryFields(OperatorNode<?> ast) { + if (ast.getOperator() != SequenceOperator.PROJECT) { + return ast; + } + Preconditions.checkArgument(ast.getArguments().length == 2, + "Expected 2 arguments to PROJECT, got %s.", + ast.getArguments().length); + populateYqlSummaryFields(ast + .<List<OperatorNode<ProjectOperator>>> getArgument(1)); + return ast.getArgument(0); + } + + private OperatorNode<?> fetchTimeout(OperatorNode<?> ast) { + if (ast.getOperator() != SequenceOperator.TIMEOUT) { + return ast; + } + timeout = ast.<OperatorNode<?>> getArgument(1).<Integer> getArgument(0); + return ast.getArgument(0); + } + + @NonNull + private static String fetchFieldRead(OperatorNode<ExpressionOperator> ast) { + assertHasOperator(ast, ExpressionOperator.READ_FIELD); + return ast.getArgument(1); + } + + @NonNull + private IntItem buildGreaterThanOrEquals( + OperatorNode<ExpressionOperator> ast) { + IntItem number; + if (isIndexOnLeftHandSide(ast)) { + number = new IntItem("[" + fetchConditionWord(ast) + ";]", + fetchConditionIndex(ast)); + number = leafStyleSettings(ast.getArgument(1, OperatorNode.class), + number); + } else { + number = new IntItem("[;" + fetchConditionWord(ast) + "]", + fetchConditionIndex(ast)); + number = leafStyleSettings(ast.getArgument(0, OperatorNode.class), + number); + } + return number; + } + + @NonNull + private IntItem buildLessThanOrEquals(OperatorNode<ExpressionOperator> ast) { + IntItem number; + if (isIndexOnLeftHandSide(ast)) { + number = new IntItem("[;" + fetchConditionWord(ast) + "]", + fetchConditionIndex(ast)); + number = leafStyleSettings(ast.getArgument(1, OperatorNode.class), + number); + } else { + number = new IntItem("[" + fetchConditionWord(ast) + ";]", + fetchConditionIndex(ast)); + number = leafStyleSettings(ast.getArgument(0, OperatorNode.class), + number); + } + return number; + } + + @NonNull + private IntItem buildGreaterThan(OperatorNode<ExpressionOperator> ast) { + IntItem number; + if (isIndexOnLeftHandSide(ast)) { + number = new IntItem(">" + fetchConditionWord(ast), + fetchConditionIndex(ast)); + number = leafStyleSettings(ast.getArgument(1, OperatorNode.class), + number); + } else { + number = new IntItem("<" + fetchConditionWord(ast), + fetchConditionIndex(ast)); + number = leafStyleSettings(ast.getArgument(0, OperatorNode.class), + number); + } + return number; + } + + @NonNull + private IntItem buildLessThan(OperatorNode<ExpressionOperator> ast) { + IntItem number; + if (isIndexOnLeftHandSide(ast)) { + number = new IntItem("<" + fetchConditionWord(ast), + fetchConditionIndex(ast)); + number = leafStyleSettings(ast.getArgument(1, OperatorNode.class), + number); + } else { + number = new IntItem(">" + fetchConditionWord(ast), + fetchConditionIndex(ast)); + number = leafStyleSettings(ast.getArgument(0, OperatorNode.class), + number); + } + return number; + } + + @NonNull + private IntItem buildEquals(OperatorNode<ExpressionOperator> ast) { + IntItem number = new IntItem(fetchConditionWord(ast), + fetchConditionIndex(ast)); + if (isIndexOnLeftHandSide(ast)) { + number = leafStyleSettings(ast.getArgument(1, OperatorNode.class), + number); + } else { + number = leafStyleSettings(ast.getArgument(0, OperatorNode.class), + number); + } + return number; + } + + @NonNull + private String fetchConditionIndex(OperatorNode<ExpressionOperator> ast) { + OperatorNode<ExpressionOperator> lhs = ast.getArgument(0); + OperatorNode<ExpressionOperator> rhs = ast.getArgument(1); + if (lhs.getOperator() == ExpressionOperator.LITERAL + || lhs.getOperator() == ExpressionOperator.NEGATE) { + assertHasOperator(rhs, ExpressionOperator.READ_FIELD); + return getIndex(rhs); + } + if (rhs.getOperator() == ExpressionOperator.LITERAL + || rhs.getOperator() == ExpressionOperator.NEGATE) { + assertHasOperator(lhs, ExpressionOperator.READ_FIELD); + return getIndex(lhs); + } + throw new IllegalArgumentException( + "Expected LITERAL and READ_FIELD, got " + lhs.getOperator() + + " and " + rhs.getOperator() + "."); + } + + private static String getNumberAsString(OperatorNode<ExpressionOperator> ast) { + String negative = ""; + OperatorNode<ExpressionOperator> currentAst = ast; + if (currentAst.getOperator() == ExpressionOperator.NEGATE) { + negative = "-"; + currentAst = currentAst.getArgument(0); + } + assertHasOperator(currentAst, ExpressionOperator.LITERAL); + return negative + currentAst.getArgument(0).toString(); + } + + @NonNull + private static String fetchConditionWord( + OperatorNode<ExpressionOperator> ast) { + OperatorNode<ExpressionOperator> lhs = ast.getArgument(0); + OperatorNode<ExpressionOperator> rhs = ast.getArgument(1); + if (lhs.getOperator() == ExpressionOperator.LITERAL + || lhs.getOperator() == ExpressionOperator.NEGATE) { + assertHasOperator(rhs, ExpressionOperator.READ_FIELD); + return getNumberAsString(lhs); + } + if (rhs.getOperator() == ExpressionOperator.LITERAL + || rhs.getOperator() == ExpressionOperator.NEGATE) { + assertHasOperator(lhs, ExpressionOperator.READ_FIELD); + return getNumberAsString(rhs); + } + throw new IllegalArgumentException( + "Expected LITERAL/NEGATE and READ_FIELD, got " + + lhs.getOperator() + " and " + rhs.getOperator() + "."); + } + + private static boolean isIndexOnLeftHandSide( + OperatorNode<ExpressionOperator> ast) { + return ast.getArgument(0, OperatorNode.class).getOperator() == ExpressionOperator.READ_FIELD; + } + + @NonNull + private CompositeItem buildAnd(OperatorNode<ExpressionOperator> ast) { + AndItem andItem = new AndItem(); + NotItem notItem = new NotItem(); + convertVarArgsAnd(ast, 0, andItem, notItem); + Preconditions + .checkArgument(andItem.getItemCount() > 0, + "Vespa does not support AND with no logically positive branches."); + if (notItem.getItemCount() == 0) { + return andItem; + } + if (andItem.getItemCount() == 1) { + notItem.setPositiveItem(andItem.getItem(0)); + } else { + notItem.setPositiveItem(andItem); + } + return notItem; + } + + @NonNull + private CompositeItem buildOr(OperatorNode<ExpressionOperator> spec) { + return convertVarArgs(spec, 0, new OrItem()); + } + + @NonNull + private CompositeItem buildWeakAnd(OperatorNode<ExpressionOperator> spec) { + WeakAndItem weakAnd = new WeakAndItem(); + Integer targetNumHits = getAnnotation(spec, TARGET_NUM_HITS, + Integer.class, null, "desired minimum hits to produce"); + if (targetNumHits != null) { + weakAnd.setN(targetNumHits); + } + Integer scoreThreshold = getAnnotation(spec, SCORE_THRESHOLD, + Integer.class, null, "min dot product score for hit inclusion"); + if (scoreThreshold != null) { + weakAnd.setScoreThreshold(scoreThreshold); + } + return convertVarArgs(spec, 1, weakAnd); + } + + @NonNull + private CompositeItem buildRank(OperatorNode<ExpressionOperator> spec) { + return convertVarArgs(spec, 1, new RankItem()); + } + + @NonNull + private CompositeItem convertVarArgs(OperatorNode<ExpressionOperator> ast, + int argIdx, @NonNull + CompositeItem out) { + Iterable<OperatorNode<ExpressionOperator>> args = ast + .getArgument(argIdx); + for (OperatorNode<ExpressionOperator> arg : args) { + assertHasOperator(arg, ExpressionOperator.class); + out.addItem(convertExpression(arg)); + } + return out; + } + + private void convertVarArgsAnd(OperatorNode<ExpressionOperator> ast, + int argIdx, AndItem outAnd, NotItem outNot) { + Iterable<OperatorNode<ExpressionOperator>> args = ast + .getArgument(argIdx); + for (OperatorNode<ExpressionOperator> arg : args) { + assertHasOperator(arg, ExpressionOperator.class); + if (arg.getOperator() == ExpressionOperator.NOT) { + OperatorNode<ExpressionOperator> exp = arg.getArgument(0); + assertHasOperator(exp, ExpressionOperator.class); + outNot.addNegativeItem(convertExpression(exp)); + } else { + outAnd.addItem(convertExpression(arg)); + } + } + } + + @NonNull + private Item buildTermSearch(OperatorNode<ExpressionOperator> ast) { + assertHasOperator(ast, ExpressionOperator.CONTAINS); + return instantiateLeafItem( + getIndex(ast.<OperatorNode<ExpressionOperator>> getArgument(0)), + ast.<OperatorNode<ExpressionOperator>> getArgument(1)); + } + + @NonNull + private Item buildRegExpSearch(OperatorNode<ExpressionOperator> ast) { + assertHasOperator(ast, ExpressionOperator.MATCHES); + String field = getIndex(ast.<OperatorNode<ExpressionOperator>> getArgument(0)); + OperatorNode<ExpressionOperator> ast1 = ast.<OperatorNode<ExpressionOperator>> getArgument(1); + String wordData = getStringContents(ast1); + RegExpItem regExp = new RegExpItem(field, true, wordData); + return leafStyleSettings(ast1, regExp); + } + + + @NonNull + private Item buildRange(OperatorNode<ExpressionOperator> spec) { + assertHasOperator(spec, ExpressionOperator.CALL); + assertHasFunctionName(spec, RANGE); + + IntItem range = instantiateRangeItem( + spec.<List<OperatorNode<ExpressionOperator>>> getArgument(1), + spec); + return leafStyleSettings(spec, range); + } + + private static Number negate(Number x) { + if (x.getClass() == Integer.class) { + int x1 = x.intValue(); + return Integer.valueOf(-x1); + } else if (x.getClass() == Long.class) { + long x1 = x.longValue(); + return Long.valueOf(-x1); + } else if (x.getClass() == Float.class) { + float x1 = x.floatValue(); + return Float.valueOf(-x1); + } else if (x.getClass() == Double.class) { + double x1 = x.doubleValue(); + return Double.valueOf(-x1); + } else { + throw newUnexpectedArgumentException(x.getClass(), Integer.class, + Long.class, Float.class, Double.class); + } + } + + @NonNull + private IntItem instantiateRangeItem( + List<OperatorNode<ExpressionOperator>> args, + OperatorNode<ExpressionOperator> spec) { + Preconditions.checkArgument(args.size() == 3, + "Expected 3 arguments, got %s.", args.size()); + + Number lowerArg = getBound(args.get(1)); + Number upperArg = getBound(args.get(2)); + String bounds = getAnnotation(spec, BOUNDS, String.class, null, + "whether bounds should be open or closed"); + // TODO: add support for implicit transforms + if (bounds == null) { + return new RangeItem(lowerArg, upperArg, getIndex(args.get(0))); + } else { + Limit from; + Limit to; + if (BOUNDS_OPEN.equals(bounds)) { + from = new Limit(lowerArg, false); + to = new Limit(upperArg, false); + } else if (BOUNDS_LEFT_OPEN.equals(bounds)) { + from = new Limit(lowerArg, false); + to = new Limit(upperArg, true); + } else if (BOUNDS_RIGHT_OPEN.equals(bounds)) { + from = new Limit(lowerArg, true); + to = new Limit(upperArg, false); + } else { + throw newUnexpectedArgumentException(bounds, BOUNDS_OPEN, + BOUNDS_LEFT_OPEN, BOUNDS_RIGHT_OPEN); + } + return new IntItem(from, to, getIndex(args.get(0))); + } + } + + private Number getBound(OperatorNode<ExpressionOperator> bound) { + Number boundValue; + OperatorNode<ExpressionOperator> currentBound = bound; + boolean negate = false; + if (currentBound.getOperator() == ExpressionOperator.NEGATE) { + currentBound = currentBound.getArgument(0); + negate = true; + } + assertHasOperator(currentBound, ExpressionOperator.LITERAL); + boundValue = currentBound.getArgument(0, Number.class); + if (negate) { + boundValue = negate(boundValue); + } + return boundValue; + } + + @NonNull + private Item instantiateLeafItem(String field, + OperatorNode<ExpressionOperator> ast) { + switch (ast.getOperator()) { + case LITERAL: + case VARREF: + return instantiateWordItem(field, ast, null); + case CALL: + return instantiateCompositeLeaf(field, ast); + default: + throw newUnexpectedArgumentException(ast.getOperator().name(), + ExpressionOperator.CALL, ExpressionOperator.LITERAL); + } + } + + @NonNull + private Item instantiateCompositeLeaf(String field, + OperatorNode<ExpressionOperator> ast) { + List<String> names = ast.getArgument(0); + Preconditions.checkArgument(names.size() == 1, + "Expected 1 name, got %s.", names.size()); + switch (names.get(0)) { + case PHRASE: + return instantiatePhraseItem(field, ast); + case NEAR: + return instantiateNearItem(field, ast); + case ONEAR: + return instantiateONearItem(field, ast); + case EQUIV: + return instantiateEquivItem(field, ast); + case ALTERNATIVES: + return instantiateWordAlternativesItem(field, ast); + default: + throw newUnexpectedArgumentException(names.get(0), EQUIV, NEAR, + ONEAR, PHRASE); + } + } + + private Item instantiateWordAlternativesItem(String field, OperatorNode<ExpressionOperator> ast) { + List<OperatorNode<ExpressionOperator>> args = ast.getArgument(1); + Preconditions.checkArgument(args.size() >= 1, "Expected 1 or more arguments, got %s.", args.size()); + Preconditions.checkArgument(args.get(0).getOperator() == ExpressionOperator.MAP, "Expected MAP, got %s.", args.get(0) + .getOperator()); + + List<WordAlternativesItem.Alternative> terms = new ArrayList<>(); + List<String> keys = args.get(0).getArgument(0); + List<OperatorNode<ExpressionOperator>> values = args.get(0).getArgument(1); + for (int i = 0; i < keys.size(); ++i) { + String term = keys.get(i); + double exactness; + OperatorNode<ExpressionOperator> value = values.get(i); + switch (value.getOperator()) { + case LITERAL: + exactness = value.getArgument(0, Double.class); + break; + default: + throw newUnexpectedArgumentException(value.getOperator(), ExpressionOperator.LITERAL); + } + terms.add(new WordAlternativesItem.Alternative(term, exactness)); + } + Substring origin = getOrigin(ast); + final Boolean isFromQuery = getAnnotation(ast, IMPLICIT_TRANSFORMS, Boolean.class, Boolean.TRUE, + IMPLICIT_TRANSFORMS_DESCRIPTION); + return leafStyleSettings(ast, new WordAlternativesItem(field, isFromQuery, origin, terms)); + } + + @NonNull + private Item instantiateEquivItem(String field, + OperatorNode<ExpressionOperator> ast) { + List<OperatorNode<ExpressionOperator>> args = ast.getArgument(1); + Preconditions.checkArgument(args.size() >= 2, + "Expected 2 or more arguments, got %s.", args.size()); + + EquivItem equiv = new EquivItem(); + equiv.setIndexName(field); + for (OperatorNode<ExpressionOperator> arg : args) { + switch (arg.getOperator()) { + case LITERAL: + equiv.addItem(instantiateWordItem(field, arg, equiv.getClass())); + break; + case CALL: + assertHasFunctionName(arg, PHRASE); + equiv.addItem(instantiatePhraseItem(field, arg)); + break; + default: + throw newUnexpectedArgumentException(arg.getOperator(), + ExpressionOperator.CALL, ExpressionOperator.LITERAL); + } + } + return leafStyleSettings(ast, equiv); + } + + @NonNull + private Item instantiateWordItem(String field, + OperatorNode<ExpressionOperator> ast, Class<?> parent) { + return instantiateWordItem(field, ast, parent, SegmentWhen.POSSIBLY); + } + + @NonNull + private Item instantiateWordItem(String field, + OperatorNode<ExpressionOperator> ast, Class<?> parent, + SegmentWhen segmentPolicy) { + String wordData = getStringContents(ast); + return instantiateWordItem(field, wordData, ast, parent, + segmentPolicy, null); + } + + @NonNull + private Item instantiateWordItem(String field, + String rawWord, + OperatorNode<ExpressionOperator> ast, Class<?> parent, + SegmentWhen segmentPolicy, Language language) { + String wordData = rawWord; + if (getAnnotation(ast, NFKC, Boolean.class, Boolean.TRUE, + "setting for whether to NFKC normalize input data")) { + wordData = normalizer.normalize(wordData); + } + boolean fromQuery = getAnnotation(ast, IMPLICIT_TRANSFORMS, + Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION); + boolean prefixMatch = getAnnotation(ast, PREFIX, Boolean.class, + Boolean.FALSE, + "setting for whether to use prefix match of input data"); + boolean suffixMatch = getAnnotation(ast, SUFFIX, Boolean.class, + Boolean.FALSE, + "setting for whether to use suffix match of input data"); + boolean substrMatch = getAnnotation(ast, SUBSTRING, Boolean.class, + Boolean.FALSE, + "setting for whether to use substring match of input data"); + Preconditions.checkArgument((prefixMatch ? 1 : 0) + + (substrMatch ? 1 : 0) + (suffixMatch ? 1 : 0) < 2, + "Only one of prefix, substring and suffix can be set."); + @NonNull + final TaggableItem wordItem; + + if (prefixMatch) { + wordItem = new PrefixItem(wordData, fromQuery); + } else if (suffixMatch) { + wordItem = new SuffixItem(wordData, fromQuery); + } else if (substrMatch) { + wordItem = new SubstringItem(wordData, fromQuery); + } else { + switch (segmentPolicy) { + case NEVER: + wordItem = new WordItem(wordData, fromQuery); + break; + case POSSIBLY: + if (shouldResegmentWord(field, fromQuery)) { + wordItem = resegment(field, ast, wordData, fromQuery, + parent, language); + } else { + wordItem = new WordItem(wordData, fromQuery); + } + break; + case ALWAYS: + wordItem = resegment(field, ast, wordData, fromQuery, parent, + language); + break; + default: + throw new IllegalArgumentException( + "Unexpected segmenting rule: " + segmentPolicy); + } + } + if (wordItem instanceof WordItem) { + prepareWord(field, ast, fromQuery, (WordItem) wordItem); + } + return (Item) leafStyleSettings(ast, wordItem); + } + + @SuppressWarnings({"deprecation"}) + private boolean shouldResegmentWord(String field, boolean fromQuery) { + return resegment && fromQuery && ! indexFactsSession.getIndex(field).isAttribute(); + } + + @NonNull + private TaggableItem resegment(String field, + OperatorNode<ExpressionOperator> ast, String wordData, + boolean fromQuery, Class<?> parent, Language language) { + final TaggableItem wordItem; + String toSegment = wordData; + final Substring s = getOrigin(ast); + final Language usedLanguage = language == null ? currentlyParsing.getLanguage() : language; + if (s != null) { + toSegment = s.getValue(); + } + List<String> words = segmenter.segment(toSegment, + usedLanguage); + if (words.size() == 0) { + wordItem = new WordItem(wordData, fromQuery); + } else if (words.size() == 1 || !phraseArgumentSupported(parent)) { + wordItem = new WordItem(words.get(0), fromQuery); + } else { + wordItem = new PhraseSegmentItem(toSegment, fromQuery, false); + ((PhraseSegmentItem) wordItem).setIndexName(field); + for (String w : words) { + WordItem segment = new WordItem(w, fromQuery); + prepareWord(field, ast, fromQuery, segment); + ((PhraseSegmentItem) wordItem).addItem(segment); + } + ((PhraseSegmentItem) wordItem).lock(); + } + return wordItem; + } + + private boolean phraseArgumentSupported(Class<?> parent) { + if (parent == null) { + return true; + } else if (parent == PhraseItem.class) { + // not supported in backend, but the container flattens the + // arguments itself + return true; + } else if (parent == EquivItem.class) { + return true; + } else { + return false; + } + } + + private void prepareWord(String field, + OperatorNode<ExpressionOperator> ast, boolean fromQuery, + WordItem wordItem) { + wordItem.setIndexName(field); + wordStyleSettings(ast, wordItem); + if (shouldResegmentWord(field, fromQuery)) { + // force re-stemming, new case normalization, etc + wordItem.setStemmed(false); + wordItem.setLowercased(false); + wordItem.setNormalizable(true); + } + } + + @NonNull + private <T extends TaggableItem> T leafStyleSettings(OperatorNode<?> ast, + @NonNull + T out) { + { + Map<?, ?> connectivity = getAnnotation(ast, CONNECTIVITY, + Map.class, null, "connectivity settings"); + if (connectivity != null) { + connectedItems.add(new ConnectedItem(out, getMapValue( + CONNECTIVITY, connectivity, CONNECTION_ID, + Integer.class), getMapValue(CONNECTIVITY, connectivity, + CONNECTION_WEIGHT, Number.class).doubleValue())); + } + Number significance = getAnnotation(ast, SIGNIFICANCE, + Number.class, null, "term significance"); + if (significance != null) { + out.setSignificance(significance.doubleValue()); + } + Integer uniqueId = getAnnotation(ast, UNIQUE_ID, Integer.class, + null, "term ID", false); + if (uniqueId != null) { + out.setUniqueID(uniqueId); + identifiedItems.put(uniqueId, out); + } + } + { + Item leaf = (Item) out; + Map<?, ?> itemAnnotations = getAnnotation(ast, ANNOTATIONS, + Map.class, Collections.emptyMap(), "item annotation map"); + for (Map.Entry<?, ?> entry : itemAnnotations.entrySet()) { + Preconditions.checkArgument(entry.getKey() instanceof String, + "Expected String annotation key, got %s.", entry + .getKey().getClass()); + Preconditions.checkArgument(entry.getValue() instanceof String, + "Expected String annotation value, got %s.", entry + .getValue().getClass()); + leaf.addAnnotation((String) entry.getKey(), entry.getValue()); + } + Boolean filter = getAnnotation(ast, FILTER, Boolean.class, null, + FILTER_DESCRIPTION); + if (filter != null) { + leaf.setFilter(filter); + } + Boolean isRanked = getAnnotation(ast, RANKED, Boolean.class, null, + RANKED_DESCRIPTION); + if (isRanked != null) { + leaf.setRanked(isRanked); + } + String label = getAnnotation(ast, LABEL, String.class, null, + "item label"); + if (label != null) { + leaf.setLabel(label); + } + Integer weight = getAnnotation(ast, WEIGHT, Integer.class, null, + "term weight for ranking"); + if (weight != null) { + leaf.setWeight(weight); + } + } + if (out instanceof IntItem) { + IntItem number = (IntItem) out; + Integer hitLimit = getCappedRangeSearchParameter(ast); + if (hitLimit != null) { + number.setHitLimit(hitLimit.intValue()); + } + } + + return out; + } + + private Integer getCappedRangeSearchParameter(OperatorNode<?> ast) { + Integer hitLimit = getAnnotation(ast, HIT_LIMIT, Integer.class, null, "hit limit"); + + if (hitLimit != null) { + Boolean ascending = getAnnotation(ast, ASCENDING_HITS_ORDER, Boolean.class, null, + "ascending population ordering for capped range search"); + Boolean descending = getAnnotation(ast, DESCENDING_HITS_ORDER, Boolean.class, null, + "descending population ordering for capped range search"); + Preconditions.checkArgument(ascending == null || descending == null, + "Settings for both ascending and descending ordering set, only one of these expected."); + if (Boolean.TRUE.equals(descending) || Boolean.FALSE.equals(ascending)) { + hitLimit = Integer.valueOf(hitLimit.intValue() * -1); + } + } + return hitLimit; + } + + @Beta + public boolean isQueryParser() { + return queryParser; + } + + @Beta + public void setQueryParser(boolean queryParser) { + this.queryParser = queryParser; + } + + @Beta + public void setUserQuery(@NonNull Query userQuery) { + this.userQuery = userQuery; + } + + @Beta + public Set<String> getYqlSummaryFields() { + return yqlSummaryFields; + } + + @Beta + public List<VespaGroupingStep> getGroupingSteps() { + return groupingSteps; + } + + /** + * Give the offset expected from the latest parsed query if anything is + * explicitly specified. + * + * @return an Integer instance or null + */ + public Integer getOffset() { + return offset; + } + + /** + * Give the number of hits expected from the latest parsed query if anything + * is explicitly specified. + * + * @return an Integer instance or null + */ + public Integer getHits() { + return hits; + } + + /** + * The timeout specified in the YQL+ query last parsed. + * + * @return an Integer instance or null + */ + public Integer getTimeout() { + return timeout; + } + + /** + * The sorting specified in the YQL+ query last parsed. + * + * @return a Sorting instance or null + */ + public Sorting getSorting() { + return sorting; + } + + Set<String> getDocTypes() { + return docTypes; + } + + Set<String> getYqlSources() { + return yqlSources; + } + + private static void assertHasOperator(OperatorNode<?> ast, + Class<? extends Operator> expectedOperatorClass) { + Preconditions.checkArgument( + expectedOperatorClass.isInstance(ast.getOperator()), + "Expected operator class %s, got %s.", + expectedOperatorClass.getName(), ast.getOperator().getClass() + .getName()); + } + + private static void assertHasOperator(OperatorNode<?> ast, + Operator expectedOperator) { + Preconditions.checkArgument(ast.getOperator() == expectedOperator, + "Expected operator %s, got %s.", expectedOperator, + ast.getOperator()); + } + + private static void assertHasFunctionName(OperatorNode<?> ast, + String expectedFunctionName) { + List<String> names = ast.getArgument(0); + Preconditions.checkArgument(expectedFunctionName.equals(names.get(0)), + "Expected function '%s', got '%s'.", expectedFunctionName, + names.get(0)); + } + + private static void addItems(OperatorNode<ExpressionOperator> ast, + WeightedSetItem out) { + switch (ast.getOperator()) { + case MAP: + addStringItems(ast, out); + break; + case ARRAY: + addLongItems(ast, out); + break; + default: + throw newUnexpectedArgumentException(ast.getOperator(), + ExpressionOperator.ARRAY, ExpressionOperator.MAP); + } + } + + private static void addStringItems(OperatorNode<ExpressionOperator> ast, + WeightedSetItem out) { + List<String> keys = ast.getArgument(0); + List<OperatorNode<ExpressionOperator>> values = ast.getArgument(1); + for (int i = 0; i < keys.size(); ++i) { + OperatorNode<ExpressionOperator> tokenWeight = values.get(i); + assertHasOperator(tokenWeight, ExpressionOperator.LITERAL); + out.addToken(keys.get(i), tokenWeight.getArgument(0, Integer.class)); + } + } + + private static void addLongItems(OperatorNode<ExpressionOperator> ast, + WeightedSetItem out) { + List<OperatorNode<ExpressionOperator>> values = ast.getArgument(0); + for (OperatorNode<ExpressionOperator> value : values) { + assertHasOperator(value, ExpressionOperator.ARRAY); + List<OperatorNode<ExpressionOperator>> args = value.getArgument(0); + Preconditions.checkArgument(args.size() == 2, + "Expected item and weight, got %s.", args); + + OperatorNode<ExpressionOperator> tokenValueNode = args.get(0); + assertHasOperator(tokenValueNode, ExpressionOperator.LITERAL); + Number tokenValue = tokenValueNode.getArgument(0, Number.class); + Preconditions.checkArgument(tokenValue instanceof Integer + || tokenValue instanceof Long, + "Expected Integer or Long, got %s.", tokenValue.getClass() + .getName()); + + OperatorNode<ExpressionOperator> tokenWeightNode = args.get(1); + assertHasOperator(tokenWeightNode, ExpressionOperator.LITERAL); + Integer tokenWeight = tokenWeightNode.getArgument(0, Integer.class); + + out.addToken(tokenValue.longValue(), tokenWeight); + } + } + + private void wordStyleSettings(OperatorNode<ExpressionOperator> ast, + WordItem out) { + Substring origin = getOrigin(ast); + if (origin != null) { + out.setOrigin(origin); + } + Boolean usePositionData = getAnnotation(ast, USE_POSITION_DATA, + Boolean.class, null, + USE_POSITION_DATA_DESCRIPTION); + if (usePositionData != null) { + out.setPositionData(usePositionData); + } + Boolean stem = getAnnotation(ast, STEM, Boolean.class, null, + STEM_DESCRIPTION); + if (stem != null) { + out.setStemmed(!stem); + } + Boolean normalizeCase = getAnnotation(ast, NORMALIZE_CASE, + Boolean.class, null, + NORMALIZE_CASE_DESCRIPTION); + if (normalizeCase != null) { + out.setLowercased(!normalizeCase); + } + Boolean accentDrop = getAnnotation(ast, ACCENT_DROP, Boolean.class, + null, + ACCENT_DROP_DESCRIPTION); + if (accentDrop != null) { + out.setNormalizable(accentDrop); + } + Boolean andSegmenting = getAnnotation(ast, AND_SEGMENTING, + Boolean.class, null, + "setting for whether to force using AND for segments on and off"); + if (andSegmenting != null) { + if (andSegmenting) { + out.setSegmentingRule(SegmentingRule.BOOLEAN_AND); + } else { + out.setSegmentingRule(SegmentingRule.PHRASE); + } + } + } + + @NonNull + private String getIndex(OperatorNode<ExpressionOperator> operatorNode) { + String index = fetchFieldRead(operatorNode); + Preconditions.checkArgument(indexFactsSession.isIndex(index), "Field '%s' does not exist.", index); + return indexFactsSession.getCanonicName(index); + } + + private Substring getOrigin(OperatorNode<ExpressionOperator> ast) { + Map<?, ?> origin = getAnnotation(ast, ORIGIN, Map.class, null, + ORIGIN_DESCRIPTION); + if (origin == null) { + return null; + } + String original = getMapValue(ORIGIN, origin, ORIGIN_ORIGINAL, + String.class); + int offset = getMapValue(ORIGIN, origin, ORIGIN_OFFSET, Integer.class); + int length = getMapValue(ORIGIN, origin, ORIGIN_LENGTH, Integer.class); + return new Substring(offset, length + offset, original); + } + + private static <T> T getMapValue(String mapName, Map<?, ?> map, String key, + Class<T> expectedValueClass) { + Object value = map.get(key); + Preconditions.checkArgument(value != null, + "Map annotation '%s' must contain an entry with key '%s'.", + mapName, key); + assert value != null; + Preconditions.checkArgument(expectedValueClass.isInstance(value), + "Expected %s for entry '%s' in map annotation '%s', got %s.", + expectedValueClass.getName(), key, mapName, value.getClass() + .getName()); + return expectedValueClass.cast(value); + } + + private <T> T getAnnotation(OperatorNode<?> ast, String key, + Class<T> expectedClass, T defaultValue, String description) { + return getAnnotation(ast, key, expectedClass, defaultValue, + description, true); + } + + private <T> T getAnnotation(OperatorNode<?> ast, String key, + Class<T> expectedClass, T defaultValue, String description, boolean considerParents) { + Object value = ast.getAnnotation(key); + for (Iterator<OperatorNode<?>> i = annotationStack.iterator(); value == null + && considerParents && i.hasNext();) { + value = i.next().getAnnotation(key); + } + if (value == null) { + return defaultValue; + } + Preconditions.checkArgument(expectedClass.isInstance(value), + "Expected %s for annotation '%s' (%s), got %s.", expectedClass + .getName(), key, description, value.getClass() + .getName()); + return expectedClass.cast(value); + } + + private static IllegalArgumentException newUnexpectedArgumentException( + Object actual, Object... expected) { + StringBuilder out = new StringBuilder("Expected "); + for (int i = 0, len = expected.length; i < len; ++i) { + out.append(expected[i]); + if (i < len - 2) { + out.append(", "); + } else if (i < len - 1) { + out.append(" or "); + } + } + out.append(", got ").append(actual).append("."); + return new IllegalArgumentException(out.toString()); + } + + String getSegmenterBackend() { + return segmenterBackend; + } + + Version getSegmenterVersion() { + return segmenterVersion; + } + + private static final class ConnectedItem { + + final double weight; + final int toId; + final TaggableItem fromItem; + + ConnectedItem(TaggableItem fromItem, int toId, double weight) { + this.weight = weight; + this.toId = toId; + this.fromItem = fromItem; + } + } +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/YqlQuery.java b/container-search/src/main/java/com/yahoo/search/yql/YqlQuery.java new file mode 100644 index 00000000000..27c27b88d24 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/YqlQuery.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.yql; + +/** + * A Yql query. These usually contains variables, which allows the yql query to be parsed once at configuration + * time and turned into fully specified queries at request time without reparsing. + * + * @author bratseth + */ +// TODO: This is just a skeleton +public class YqlQuery { + + private YqlQuery(String yqlQuery) { + // TODO + } + + /** Creates a YQl query form a string */ + public static YqlQuery from(String yqlQueryString) { + return new YqlQuery(yqlQueryString); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/package-info.java b/container-search/src/main/java/com/yahoo/search/yql/package-info.java new file mode 100644 index 00000000000..79cf983e471 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/yql/package-info.java @@ -0,0 +1,11 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * YQL+ integration. + * + * <p>Not a public API.</p> + */ +@ExportPackage +package com.yahoo.search.yql; + +import com.yahoo.osgi.annotation.ExportPackage; + |