// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.search.yql; import java.math.BigInteger; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collections; import java.util.Deque; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.StringTokenizer; import com.google.common.annotations.Beta; import com.google.common.base.Preconditions; import com.yahoo.collections.LazyMap; import com.yahoo.collections.LazySet; import com.yahoo.collections.Tuple2; import com.yahoo.component.Version; import com.yahoo.language.Language; import com.yahoo.language.Linguistics; import com.yahoo.language.detect.Detector; import com.yahoo.language.process.Normalizer; import com.yahoo.language.process.Segmenter; import com.yahoo.prelude.IndexFacts; import com.yahoo.prelude.query.AndItem; import com.yahoo.prelude.query.AndSegmentItem; import com.yahoo.prelude.query.CompositeItem; import com.yahoo.prelude.query.DotProductItem; import com.yahoo.prelude.query.EquivItem; import com.yahoo.prelude.query.ExactStringItem; import com.yahoo.prelude.query.IntItem; import com.yahoo.prelude.query.Item; import com.yahoo.prelude.query.Limit; import com.yahoo.prelude.query.NearItem; import com.yahoo.prelude.query.NotItem; import com.yahoo.prelude.query.NullItem; import com.yahoo.prelude.query.ONearItem; import com.yahoo.prelude.query.OrItem; import com.yahoo.prelude.query.PhraseItem; import com.yahoo.prelude.query.PhraseSegmentItem; import com.yahoo.prelude.query.PredicateQueryItem; import com.yahoo.prelude.query.PrefixItem; import com.yahoo.prelude.query.RangeItem; import com.yahoo.prelude.query.RankItem; import com.yahoo.prelude.query.RegExpItem; import com.yahoo.prelude.query.SameElementItem; import com.yahoo.prelude.query.SegmentItem; import com.yahoo.prelude.query.SegmentingRule; import com.yahoo.prelude.query.Substring; import com.yahoo.prelude.query.SubstringItem; import com.yahoo.prelude.query.SuffixItem; import com.yahoo.prelude.query.TaggableItem; import com.yahoo.prelude.query.ToolBox; import com.yahoo.prelude.query.ToolBox.QueryVisitor; import com.yahoo.prelude.query.WandItem; import com.yahoo.prelude.query.WeakAndItem; import com.yahoo.prelude.query.WeightedSetItem; import com.yahoo.prelude.query.WordAlternativesItem; import com.yahoo.prelude.query.WordItem; import com.yahoo.search.Query; import com.yahoo.search.grouping.Continuation; import com.yahoo.search.grouping.request.GroupingOperation; import com.yahoo.search.query.QueryTree; import com.yahoo.search.query.Sorting; import com.yahoo.search.query.Sorting.AttributeSorter; import com.yahoo.search.query.Sorting.FieldOrder; import com.yahoo.search.query.Sorting.LowerCaseSorter; import com.yahoo.search.query.Sorting.Order; import com.yahoo.search.query.Sorting.RawSorter; import com.yahoo.search.query.Sorting.UcaSorter; import com.yahoo.search.query.parser.Parsable; import com.yahoo.search.query.parser.Parser; import com.yahoo.search.query.parser.ParserEnvironment; import com.yahoo.search.query.parser.ParserFactory; import edu.umd.cs.findbugs.annotations.NonNull; /** * The YQL query language. * *

* This class must be kept in lockstep with {@link VespaSerializer}. * Adding anything here will usually require a corresponding addition in * VespaSerializer. *

* * @author Steinar Knutsen * @author Stian Kristoffersen * @author Simon Thoresen Hult */ @Beta public class YqlParser implements Parser { private static final String DESCENDING_HITS_ORDER = "descending"; private static final String ASCENDING_HITS_ORDER = "ascending"; private enum SegmentWhen { NEVER, POSSIBLY, ALWAYS; } private static class IndexNameExpander { public String expand(String leaf) { return leaf; } } private static final Integer DEFAULT_HITS = 10; private static final Integer DEFAULT_OFFSET = 0; private static final Integer DEFAULT_TARGET_NUM_HITS = 10; private static final String ACCENT_DROP_DESCRIPTION = "setting for whether to remove accents if field implies it"; private static final String ANNOTATIONS = "annotations"; private static final String FILTER_DESCRIPTION = "term filter setting"; private static final String IMPLICIT_TRANSFORMS_DESCRIPTION = "setting for whether built-in query transformers should touch the term"; private static final String NFKC = "nfkc"; private static final String NORMALIZE_CASE_DESCRIPTION = "setting for whether to do case normalization if field implies it"; private static final String ORIGIN_DESCRIPTION = "string origin for a term"; private static final String RANKED_DESCRIPTION = "setting for whether to use term for ranking"; private static final String SEGMENTER_BACKEND = "backend"; private static final String SEGMENTER = "segmenter"; private static final String SEGMENTER_VERSION = "version"; private static final String STEM_DESCRIPTION = "setting for whether to use stem if field implies it"; private static final String USE_POSITION_DATA_DESCRIPTION = "setting for whether to use position data for ranking this item"; private static final String USER_INPUT_ALLOW_EMPTY = "allowEmpty"; private static final String USER_INPUT_DEFAULT_INDEX = "defaultIndex"; private static final String USER_INPUT_GRAMMAR = "grammar"; private static final String USER_INPUT_LANGUAGE = "language"; private static final String USER_INPUT_RAW = "raw"; private static final String USER_INPUT_SEGMENT = "segment"; private static final String USER_INPUT = "userInput"; private static final String USER_QUERY = "userQuery"; private static final String NON_EMPTY = "nonEmpty"; public static final String SORTING_FUNCTION = "function"; public static final String SORTING_LOCALE = "locale"; public static final String SORTING_STRENGTH = "strength"; static final String ACCENT_DROP = "accentDrop"; static final String ALTERNATIVES = "alternatives"; static final String AND_SEGMENTING = "andSegmenting"; static final String BOUNDS = "bounds"; static final String BOUNDS_LEFT_OPEN = "leftOpen"; static final String BOUNDS_OPEN = "open"; static final String BOUNDS_RIGHT_OPEN = "rightOpen"; static final String CONNECTION_ID = "id"; static final String CONNECTION_WEIGHT = "weight"; static final String CONNECTIVITY = "connectivity"; static final String DISTANCE = "distance"; static final String DOT_PRODUCT = "dotProduct"; static final String EQUIV = "equiv"; static final String FILTER = "filter"; static final String HIT_LIMIT = "hitLimit"; static final String IMPLICIT_TRANSFORMS = "implicitTransforms"; static final String LABEL = "label"; static final String NEAR = "near"; static final String NORMALIZE_CASE = "normalizeCase"; static final String ONEAR = "onear"; static final String ORIGIN_LENGTH = "length"; static final String ORIGIN_OFFSET = "offset"; static final String ORIGIN = "origin"; static final String ORIGIN_ORIGINAL = "original"; static final String PHRASE = "phrase"; static final String PREDICATE = "predicate"; static final String PREFIX = "prefix"; static final String RANGE = "range"; static final String RANKED = "ranked"; static final String RANK = "rank"; static final String SAME_ELEMENT = "sameElement"; static final String SCORE_THRESHOLD = "scoreThreshold"; static final String SIGNIFICANCE = "significance"; static final String STEM = "stem"; static final String SUBSTRING = "substring"; static final String SUFFIX = "suffix"; static final String TARGET_NUM_HITS = "targetNumHits"; static final String THRESHOLD_BOOST_FACTOR = "thresholdBoostFactor"; static final String UNIQUE_ID = "id"; static final String USE_POSITION_DATA = "usePositionData"; static final String WAND = "wand"; static final String WEAK_AND = "weakAnd"; static final String WEIGHTED_SET = "weightedSet"; static final String WEIGHT = "weight"; private final IndexFacts indexFacts; private final List connectedItems = new ArrayList<>(); private final List groupingSteps = new ArrayList<>(); private final Map identifiedItems = LazyMap.newHashMap(); private final Normalizer normalizer; private final Segmenter segmenter; private final Detector detector; private final Set yqlSources = LazySet.newHashSet(); private final Set yqlSummaryFields = LazySet.newHashSet(); private final String localSegmenterBackend; private final Version localSegmenterVersion; private Integer hits; private Integer offset; private Integer timeout; private Query userQuery; private Parsable currentlyParsing; private IndexFacts.Session indexFactsSession; private IndexNameExpander indexNameExpander = new IndexNameExpander(); private Set docTypes; private Sorting sorting; private String segmenterBackend; private Version segmenterVersion; private boolean queryParser = true; private boolean resegment = false; private final Deque> annotationStack = new ArrayDeque<>(); private final ParserEnvironment environment; private static final QueryVisitor noEmptyTerms = new QueryVisitor() { @Override public boolean visit(Item item) { if (item instanceof NullItem) { throw new IllegalArgumentException("Got NullItem inside nonEmpty()."); } else if (item instanceof WordItem) { if (((WordItem) item).getIndexedString().isEmpty()) { throw new IllegalArgumentException("Searching for empty string inside nonEmpty()"); } } else if (item instanceof CompositeItem) { if (((CompositeItem) item).getItemCount() == 0) { throw new IllegalArgumentException("Empty composite operator (" + item.getName() + ") inside nonEmpty()"); } } return true; } @Override public void onExit() { // NOP } }; public YqlParser(ParserEnvironment environment) { indexFacts = environment.getIndexFacts(); normalizer = environment.getLinguistics().getNormalizer(); segmenter = environment.getLinguistics().getSegmenter(); detector = environment.getLinguistics().getDetector(); this.environment = environment; Tuple2 version = environment.getLinguistics().getVersion(Linguistics.Component.SEGMENTER); localSegmenterBackend = version.first; localSegmenterVersion = version.second; } @NonNull @Override public QueryTree parse(Parsable query) { indexFactsSession = indexFacts.newSession(query.getSources(), query.getRestrict()); connectedItems.clear(); groupingSteps.clear(); identifiedItems.clear(); yqlSources.clear(); yqlSummaryFields.clear(); annotationStack.clear(); hits = null; offset = null; timeout = null; // userQuery set prior to calling this currentlyParsing = query; docTypes = null; sorting = null; segmenterBackend = null; segmenterVersion = null; // queryParser set prior to calling this resegment = false; return buildTree(parseYqlProgram()); } private void joinDocTypesFromUserQueryAndYql() { List allSourceNames = new ArrayList<>(currentlyParsing.getSources().size() + yqlSources.size()); if ( ! yqlSources.isEmpty()) { allSourceNames.addAll(currentlyParsing.getSources()); allSourceNames.addAll(yqlSources); } else { // no sources == all sources in Vespa } indexFactsSession = indexFacts.newSession(allSourceNames, currentlyParsing.getRestrict()); docTypes = new HashSet<>(indexFactsSession.documentTypes()); } @NonNull private QueryTree buildTree(OperatorNode filterPart) { Preconditions.checkArgument(filterPart.getArguments().length == 2, "Expected 2 arguments to filter, got %s.", filterPart.getArguments().length); populateYqlSources(filterPart.> getArgument(0)); OperatorNode filterExpression = filterPart.getArgument(1); populateLinguisticsAnnotations(filterExpression); Item root = convertExpression(filterExpression); connectItems(); userQuery = null; return new QueryTree(root); } private void populateLinguisticsAnnotations(OperatorNode filterExpression) { Map segmenter = getAnnotation(filterExpression, SEGMENTER, Map.class, null, "segmenter engine and version"); if (segmenter == null) { segmenterVersion = null; segmenterBackend = null; resegment = false; } else { segmenterBackend = getMapValue(SEGMENTER, segmenter, SEGMENTER_BACKEND, String.class); try { segmenterVersion = new Version(getMapValue(SEGMENTER, segmenter, SEGMENTER_VERSION, String.class)); } catch (RuntimeException e) { segmenterVersion = null; } resegment = ! localSegmenterBackend.equals(segmenterBackend) || ! localSegmenterVersion.equals(segmenterVersion); } } private void populateYqlSources(OperatorNode filterArgs) { yqlSources.clear(); if (filterArgs.getOperator() == SequenceOperator.SCAN) { for (String source : filterArgs.> getArgument(0)) { yqlSources.add(source); } } else if (filterArgs.getOperator() == SequenceOperator.ALL) { // yqlSources has already been cleared } else if (filterArgs.getOperator() == SequenceOperator.MULTISOURCE) { for (List source : filterArgs.>> getArgument(0)) { yqlSources.add(source.get(0)); } } else { throw newUnexpectedArgumentException(filterArgs.getOperator(), SequenceOperator.SCAN, SequenceOperator.ALL, SequenceOperator.MULTISOURCE); } joinDocTypesFromUserQueryAndYql(); } private void populateYqlSummaryFields(List> fields) { yqlSummaryFields.clear(); for (OperatorNode field : fields) { assertHasOperator(field, ProjectOperator.FIELD); yqlSummaryFields.add(field.getArgument(1, String.class)); } } private void connectItems() { for (ConnectedItem entry : connectedItems) { TaggableItem to = identifiedItems.get(entry.toId); Preconditions.checkNotNull(to, "Item '%s' was specified to connect to item with ID %s, which does not " + "exist in the query.", entry.fromItem, entry.toId); entry.fromItem.setConnectivity((Item) to, entry.weight); } } @NonNull private Item convertExpression(OperatorNode ast) { try { annotationStack.addFirst(ast); switch (ast.getOperator()) { case AND: return buildAnd(ast); case OR: return buildOr(ast); case EQ: return buildEquals(ast); case LT: return buildLessThan(ast); case GT: return buildGreaterThan(ast); case LTEQ: return buildLessThanOrEquals(ast); case GTEQ: return buildGreaterThanOrEquals(ast); case CONTAINS: return buildTermSearch(ast); case MATCHES: return buildRegExpSearch(ast); case CALL: return buildFunctionCall(ast); default: throw newUnexpectedArgumentException(ast.getOperator(), ExpressionOperator.AND, ExpressionOperator.CALL, ExpressionOperator.CONTAINS, ExpressionOperator.EQ, ExpressionOperator.GT, ExpressionOperator.GTEQ, ExpressionOperator.LT, ExpressionOperator.LTEQ, ExpressionOperator.OR); } } finally { annotationStack.removeFirst(); } } @NonNull private Item buildFunctionCall(OperatorNode ast) { List names = ast.getArgument(0); Preconditions.checkArgument(names.size() == 1, "Expected 1 name, got %s.", names.size()); switch (names.get(0)) { case USER_QUERY: return fetchUserQuery(); case RANGE: return buildRange(ast); case WAND: return buildWand(ast); case WEIGHTED_SET: return buildWeightedSet(ast); case DOT_PRODUCT: return buildDotProduct(ast); case PREDICATE: return buildPredicate(ast); case RANK: return buildRank(ast); case WEAK_AND: return buildWeakAnd(ast); case USER_INPUT: return buildUserInput(ast); case NON_EMPTY: return ensureNonEmpty(ast); default: throw newUnexpectedArgumentException(names.get(0), DOT_PRODUCT, RANGE, RANK, USER_QUERY, WAND, WEAK_AND, WEIGHTED_SET, PREDICATE, USER_INPUT, NON_EMPTY); } } private Item ensureNonEmpty(OperatorNode ast) { List> args = ast.getArgument(1); Preconditions.checkArgument(args.size() == 1, "Expected 1 arguments, got %s.", args.size()); Item item = convertExpression(args.get(0)); ToolBox.visit(noEmptyTerms, item); return item; } @NonNull private Item buildWeightedSet(OperatorNode ast) { List> args = ast.getArgument(1); Preconditions.checkArgument(args.size() == 2, "Expected 2 arguments, got %s.", args.size()); return fillWeightedSet(ast, args.get(1), new WeightedSetItem(getIndex(args.get(0)))); } @NonNull private Item buildDotProduct(OperatorNode ast) { List> args = ast.getArgument(1); Preconditions.checkArgument(args.size() == 2, "Expected 2 arguments, got %s.", args.size()); return fillWeightedSet(ast, args.get(1), new DotProductItem(getIndex(args.get(0)))); } @NonNull private Item buildPredicate(OperatorNode ast) { List> args = ast.getArgument(1); Preconditions.checkArgument(args.size() == 3, "Expected 3 arguments, got %s.", args.size()); PredicateQueryItem item = new PredicateQueryItem(); item.setIndexName(getIndex(args.get(0))); addFeatures(args.get(1), (key, value, subqueryBitmap) -> item.addFeature(key, (String) value, subqueryBitmap), PredicateQueryItem.ALL_SUB_QUERIES); addFeatures(args.get(2), (key, value, subqueryBitmap) -> { if (value instanceof Long) { item.addRangeFeature(key, (Long) value, subqueryBitmap); } else { item.addRangeFeature(key, (Integer) value, subqueryBitmap); } }, PredicateQueryItem.ALL_SUB_QUERIES); return leafStyleSettings(ast, item); } interface AddFeature { void addFeature(String key, Object value, long subqueryBitmap); } private void addFeatures(OperatorNode map, AddFeature item, long subqueryBitmap) { if (map.getOperator() != ExpressionOperator.MAP) return; assertHasOperator(map, ExpressionOperator.MAP); List keys = map.getArgument(0); List> values = map.getArgument(1); for (int i = 0; i < keys.size(); ++i) { String key = keys.get(i); OperatorNode value = values.get(i); if (value.getOperator() == ExpressionOperator.ARRAY) { List> multiValues = value.getArgument(0); for (OperatorNode multiValue : multiValues) { assertHasOperator(multiValue, ExpressionOperator.LITERAL); item.addFeature(key, multiValue.getArgument(0), subqueryBitmap); } } else if (value.getOperator() == ExpressionOperator.LITERAL) { item.addFeature(key, value.getArgument(0), subqueryBitmap); } else { assertHasOperator(value, ExpressionOperator.MAP); // Subquery syntax Preconditions.checkArgument(key.indexOf("0x") == 0 || key.indexOf("[") == 0); if (key.indexOf("0x") == 0) { String subqueryString = key.substring(2); if (subqueryString.length() > 16) throw new NumberFormatException("Too long subquery string: " + key); long currentSubqueryBitmap = new BigInteger(subqueryString, 16).longValue(); addFeatures(value, item, currentSubqueryBitmap); } else { StringTokenizer bits = new StringTokenizer(key.substring(1, key.length() - 1), ","); long currentSubqueryBitmap = 0; while (bits.hasMoreTokens()) { int bit = Integer.parseInt(bits.nextToken().trim()); currentSubqueryBitmap |= 1L << bit; } addFeatures(value, item, currentSubqueryBitmap); } } } } @NonNull private Item buildWand(OperatorNode ast) { List> args = ast.getArgument(1); Preconditions.checkArgument(args.size() == 2, "Expected 2 arguments, got %s.", args.size()); WandItem out = new WandItem(getIndex(args.get(0)), getAnnotation(ast, TARGET_NUM_HITS, Integer.class, DEFAULT_TARGET_NUM_HITS, "desired number of hits to accumulate in wand")); Double scoreThreshold = getAnnotation(ast, SCORE_THRESHOLD, Double.class, null, "min score for hit inclusion"); if (scoreThreshold != null) { out.setScoreThreshold(scoreThreshold); } Double thresholdBoostFactor = getAnnotation(ast, THRESHOLD_BOOST_FACTOR, Double.class, null, "boost factor used to boost threshold before comparing against upper bound score"); if (thresholdBoostFactor != null) { out.setThresholdBoostFactor(thresholdBoostFactor); } return fillWeightedSet(ast, args.get(1), out); } @NonNull private WeightedSetItem fillWeightedSet(OperatorNode ast, OperatorNode arg, @NonNull WeightedSetItem out) { addItems(arg, out); return leafStyleSettings(ast, out); } private static class PrefixExpander extends IndexNameExpander { private final String prefix; public PrefixExpander(String prefix) { this.prefix = prefix + "."; } @Override public String expand(String leaf) { return prefix + leaf; } } @NonNull private Item instantiateSameElementItem(String field, OperatorNode ast) { assertHasFunctionName(ast, SAME_ELEMENT); SameElementItem sameElement = new SameElementItem(field); // All terms below sameElement are relative to this. IndexNameExpander prev = swapIndexCreator(new PrefixExpander(field)); for (OperatorNode term : ast.>> getArgument(1)) { sameElement.addItem(convertExpression(term)); } swapIndexCreator(prev); return sameElement; } @NonNull private Item instantiatePhraseItem(String field, OperatorNode ast) { assertHasFunctionName(ast, PHRASE); if (getAnnotation(ast, ORIGIN, Map.class, null, ORIGIN_DESCRIPTION, false) != null) { return instantiatePhraseSegmentItem(field, ast, false); } PhraseItem phrase = new PhraseItem(); phrase.setIndexName(field); for (OperatorNode word : ast.>> getArgument(1)) { if (word.getOperator() == ExpressionOperator.CALL) { List names = word.getArgument(0); switch (names.get(0)) { case PHRASE: if (getAnnotation(word, ORIGIN, Map.class, null, ORIGIN_DESCRIPTION, false) == null) { phrase.addItem(instantiatePhraseItem(field, word)); } else { phrase.addItem(instantiatePhraseSegmentItem(field, word, true)); } break; case ALTERNATIVES: phrase.addItem(instantiateWordAlternativesItem(field, word)); break; default: throw new IllegalArgumentException("Expected phrase or word alternatives, got " + names.get(0)); } } else { phrase.addItem(instantiateWordItem(field, word, phrase.getClass())); } } return leafStyleSettings(ast, phrase); } @NonNull private Item instantiatePhraseSegmentItem(String field, OperatorNode ast, boolean forcePhrase) { Substring origin = getOrigin(ast); Boolean stem = getAnnotation(ast, STEM, Boolean.class, Boolean.TRUE, STEM_DESCRIPTION); Boolean andSegmenting = getAnnotation(ast, AND_SEGMENTING, Boolean.class, Boolean.FALSE, "setting for whether to force using AND for segments on and off"); SegmentItem phrase; List words = null; if (forcePhrase || !andSegmenting) { phrase = new PhraseSegmentItem(origin.getValue(), origin.getValue(), true, !stem, origin); } else { phrase = new AndSegmentItem(origin.getValue(), true, !stem); } phrase.setIndexName(field); if (resegment && getAnnotation(ast, IMPLICIT_TRANSFORMS, Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION)) { words = segmenter.segment(origin.getValue(), currentlyParsing.getLanguage()); } if (words != null && words.size() > 0) { for (String word : words) { phrase.addItem(new WordItem(word, field, true)); } } else { for (OperatorNode word : ast.>> getArgument(1)) { phrase.addItem(instantiateWordItem(field, word, phrase.getClass(), SegmentWhen.NEVER)); } } if (phrase instanceof TaggableItem) { leafStyleSettings(ast, (TaggableItem) phrase); } phrase.lock(); return phrase; } @NonNull private Item instantiateNearItem(String field, OperatorNode ast) { assertHasFunctionName(ast, NEAR); NearItem near = new NearItem(); near.setIndexName(field); for (OperatorNode word : ast.>> getArgument(1)) { near.addItem(instantiateWordItem(field, word, near.getClass())); } Integer distance = getAnnotation(ast, DISTANCE, Integer.class, null, "term distance for NEAR operator"); if (distance != null) { near.setDistance(distance); } return near; } @NonNull private Item instantiateONearItem(String field, OperatorNode ast) { assertHasFunctionName(ast, ONEAR); NearItem onear = new ONearItem(); onear.setIndexName(field); for (OperatorNode word : ast.>> getArgument(1)) { onear.addItem(instantiateWordItem(field, word, onear.getClass())); } Integer distance = getAnnotation(ast, DISTANCE, Integer.class, null, "term distance for ONEAR operator"); if (distance != null) { onear.setDistance(distance); } return onear; } @NonNull private Item fetchUserQuery() { Preconditions.checkState(!queryParser, "Tried inserting user query into itself."); Preconditions.checkState(userQuery != null, "User query must be set before trying to build complete query " + "tree including user query."); return userQuery.getModel().getQueryTree().getRoot(); } @NonNull private Item buildUserInput(OperatorNode ast) { // TODO add support for default arguments if property results in nothing List> args = ast.getArgument(1); String wordData = getStringContents(args.get(0)); Boolean allowEmpty = getAnnotation(ast, USER_INPUT_ALLOW_EMPTY, Boolean.class, Boolean.FALSE, "flag for allowing NullItem to be returned"); if (allowEmpty && (wordData == null || wordData.isEmpty())) return new NullItem(); String grammar = getAnnotation(ast, USER_INPUT_GRAMMAR, String.class, Query.Type.ALL.toString(), "grammar for handling user input"); String defaultIndex = getAnnotation(ast, USER_INPUT_DEFAULT_INDEX, String.class, "default", "default index for user input terms"); Language language = decideParsingLanguage(ast, wordData); Item item; if (USER_INPUT_RAW.equals(grammar)) { item = instantiateWordItem(defaultIndex, wordData, ast, null, SegmentWhen.NEVER, true, language); } else if (USER_INPUT_SEGMENT.equals(grammar)) { item = instantiateWordItem(defaultIndex, wordData, ast, null, SegmentWhen.ALWAYS, false, language); } else { item = parseUserInput(grammar, defaultIndex, wordData, language, allowEmpty); propagateUserInputAnnotations(ast, item); } return item; } private Language decideParsingLanguage(OperatorNode ast, String wordData) { String languageTag = getAnnotation(ast, USER_INPUT_LANGUAGE, String.class, null, "language setting for segmenting query section"); Language language = Language.fromLanguageTag(languageTag); if (language != Language.UNKNOWN) return language; Optional explicitLanguage = currentlyParsing.getExplicitLanguage(); if (explicitLanguage.isPresent()) return explicitLanguage.get(); language = detector.detect(wordData, null).getLanguage(); if (language != Language.UNKNOWN) return language; return Language.ENGLISH; } private String getStringContents(OperatorNode propertySniffer) { switch (propertySniffer.getOperator()) { case LITERAL: return propertySniffer.getArgument(0, String.class); case VARREF: Preconditions.checkState(userQuery != null, "properties must be available when trying to fetch user input"); return userQuery.properties().getString(propertySniffer.getArgument(0, String.class)); default: throw newUnexpectedArgumentException(propertySniffer.getOperator(), ExpressionOperator.LITERAL, ExpressionOperator.VARREF); } } private void propagateUserInputAnnotations(OperatorNode ast, Item item) { ToolBox.visit(new AnnotationPropagator(ast), item); } @NonNull private Item parseUserInput(String grammar, String defaultIndex, String wordData, Language language, boolean allowNullItem) { Query.Type parseAs = Query.Type.getType(grammar); Parser parser = ParserFactory.newInstance(parseAs, environment); // perhaps not use already resolved doctypes, but respect source and restrict Item item = parser.parse(new Parsable().setQuery(wordData) .addSources(docTypes) .setLanguage(language) .setDefaultIndexName(defaultIndex)).getRoot(); // the null check should be unnecessary, but is there to avoid having to suppress null warnings if ( !allowNullItem && (item == null || item instanceof NullItem)) throw new IllegalArgumentException("Parsing '" + wordData + "' only resulted in NullItem."); if (language != Language.ENGLISH) // mark the language used, unless it's the default item.setLanguage(language); return item; } @NonNull private OperatorNode parseYqlProgram() { OperatorNode ast; try { ast = new ProgramParser().parse("query", currentlyParsing.getQuery()); } catch (Exception e) { throw new IllegalArgumentException(e); } assertHasOperator(ast, StatementOperator.PROGRAM); Preconditions.checkArgument(ast.getArguments().length == 1, "Expected only a single argument to the root node, got %s.", ast.getArguments().length); // TODO: should we check size of first argument as well? ast = ast.>> getArgument(0).get(0); assertHasOperator(ast, StatementOperator.EXECUTE); ast = ast.getArgument(0); ast = fetchTimeout(ast); ast = fetchPipe(ast); ast = fetchSummaryFields(ast); ast = fetchOffsetAndHits(ast); ast = fetchSorting(ast); assertHasOperator(ast, SequenceOperator.FILTER); return ast; } @SuppressWarnings("unchecked") private OperatorNode fetchPipe(OperatorNode toScan) { OperatorNode ast = toScan; while (ast.getOperator() == SequenceOperator.PIPE) { OperatorNode groupingAst = ast.>> getArgument(2).get(0); GroupingOperation groupingOperation = GroupingOperation.fromString(groupingAst. getArgument(0)); VespaGroupingStep groupingStep = new VespaGroupingStep(groupingOperation); List continuations = getAnnotation(groupingAst, "continuations", List.class, Collections.emptyList(), "grouping continuations"); for (String continuation : continuations) { groupingStep.continuations().add(Continuation.fromString(continuation)); } groupingSteps.add(groupingStep); ast = ast.getArgument(0); } Collections.reverse(groupingSteps); return ast; } @NonNull private OperatorNode fetchSorting(OperatorNode ast) { if (ast.getOperator() != SequenceOperator.SORT) return ast; List sortingInit = new ArrayList<>(); List> sortArguments = ast.getArgument(1); for (OperatorNode op : sortArguments) { OperatorNode fieldNode = op.> getArgument(0); String field = fetchFieldRead(fieldNode); String locale = getAnnotation(fieldNode, SORTING_LOCALE, String.class, null, "locale used by sorting function"); String function = getAnnotation(fieldNode, SORTING_FUNCTION, String.class, null, "sorting function for the specified attribute"); String strength = getAnnotation(fieldNode, SORTING_STRENGTH, String.class, null, "strength for sorting function"); AttributeSorter sorter; if (function == null) { sorter = new AttributeSorter(field); } else if (Sorting.LOWERCASE.equals(function)) { sorter = new LowerCaseSorter(field); } else if (Sorting.RAW.equals(function)) { sorter = new RawSorter(field); } else if (Sorting.UCA.equals(function)) { if (locale != null) { UcaSorter.Strength ucaStrength = UcaSorter.Strength.UNDEFINED; if (strength != null) { if (Sorting.STRENGTH_PRIMARY.equalsIgnoreCase(strength)) { ucaStrength = UcaSorter.Strength.PRIMARY; } else if (Sorting.STRENGTH_SECONDARY .equalsIgnoreCase(strength)) { ucaStrength = UcaSorter.Strength.SECONDARY; } else if (Sorting.STRENGTH_TERTIARY .equalsIgnoreCase(strength)) { ucaStrength = UcaSorter.Strength.TERTIARY; } else if (Sorting.STRENGTH_QUATERNARY .equalsIgnoreCase(strength)) { ucaStrength = UcaSorter.Strength.QUATERNARY; } else if (Sorting.STRENGTH_IDENTICAL .equalsIgnoreCase(strength)) { ucaStrength = UcaSorter.Strength.IDENTICAL; } else { throw newUnexpectedArgumentException(function, Sorting.STRENGTH_PRIMARY, Sorting.STRENGTH_SECONDARY, Sorting.STRENGTH_TERTIARY, Sorting.STRENGTH_QUATERNARY, Sorting.STRENGTH_IDENTICAL); } sorter = new UcaSorter(field, locale, ucaStrength); } else { sorter = new UcaSorter(field, locale, ucaStrength); } } else { sorter = new UcaSorter(field); } } else { throw newUnexpectedArgumentException(function, "lowercase", "raw", "uca"); } switch ((SortOperator) op.getOperator()) { case ASC: sortingInit.add(new FieldOrder(sorter, Order.ASCENDING)); break; case DESC: sortingInit.add(new FieldOrder(sorter, Order.DESCENDING)); break; default: throw newUnexpectedArgumentException(op.getOperator(), SortOperator.ASC, SortOperator.DESC); } } sorting = new Sorting(sortingInit); return ast.getArgument(0); } @NonNull private OperatorNode fetchOffsetAndHits(OperatorNode ast) { if (ast.getOperator() == SequenceOperator.OFFSET) { offset = ast.> getArgument(1). getArgument(0); hits = DEFAULT_HITS; return ast.getArgument(0); } if (ast.getOperator() == SequenceOperator.SLICE) { offset = ast.> getArgument(1). getArgument(0); hits = ast.> getArgument(2). getArgument(0) - offset; return ast.getArgument(0); } if (ast.getOperator() == SequenceOperator.LIMIT) { hits = ast.> getArgument(1). getArgument(0); offset = DEFAULT_OFFSET; return ast.getArgument(0); } return ast; } @NonNull private OperatorNode fetchSummaryFields(OperatorNode ast) { if (ast.getOperator() != SequenceOperator.PROJECT) return ast; Preconditions.checkArgument(ast.getArguments().length == 2, "Expected 2 arguments to PROJECT, got %s.", ast.getArguments().length); populateYqlSummaryFields(ast.>> getArgument(1)); return ast.getArgument(0); } private OperatorNode fetchTimeout(OperatorNode ast) { if (ast.getOperator() != SequenceOperator.TIMEOUT) return ast; timeout = ast.> getArgument(1). getArgument(0); return ast.getArgument(0); } @NonNull private static String fetchFieldRead(OperatorNode ast) { switch (ast.getOperator()) { case READ_FIELD: return ast.getArgument(1); case PROPREF: return new StringBuilder(fetchFieldRead(ast.getArgument(0))) .append('.').append(ast.getArgument(1).toString()).toString(); default: throw newUnexpectedArgumentException(ast.getOperator(), ExpressionOperator.READ_FIELD, ExpressionOperator.PROPREF); } } @NonNull private IntItem buildGreaterThanOrEquals(OperatorNode ast) { IntItem number; if (isIndexOnLeftHandSide(ast)) { number = new IntItem("[" + fetchConditionWord(ast) + ";]", fetchConditionIndex(ast)); number = leafStyleSettings(ast.getArgument(1, OperatorNode.class), number); } else { number = new IntItem("[;" + fetchConditionWord(ast) + "]", fetchConditionIndex(ast)); number = leafStyleSettings(ast.getArgument(0, OperatorNode.class), number); } return number; } @NonNull private IntItem buildLessThanOrEquals(OperatorNode ast) { IntItem number; if (isIndexOnLeftHandSide(ast)) { number = new IntItem("[;" + fetchConditionWord(ast) + "]", fetchConditionIndex(ast)); number = leafStyleSettings(ast.getArgument(1, OperatorNode.class), number); } else { number = new IntItem("[" + fetchConditionWord(ast) + ";]", fetchConditionIndex(ast)); number = leafStyleSettings(ast.getArgument(0, OperatorNode.class), number); } return number; } @NonNull private IntItem buildGreaterThan(OperatorNode ast) { IntItem number; if (isIndexOnLeftHandSide(ast)) { number = new IntItem(">" + fetchConditionWord(ast), fetchConditionIndex(ast)); number = leafStyleSettings(ast.getArgument(1, OperatorNode.class), number); } else { number = new IntItem("<" + fetchConditionWord(ast), fetchConditionIndex(ast)); number = leafStyleSettings(ast.getArgument(0, OperatorNode.class), number); } return number; } @NonNull private IntItem buildLessThan(OperatorNode ast) { IntItem number; if (isIndexOnLeftHandSide(ast)) { number = new IntItem("<" + fetchConditionWord(ast), fetchConditionIndex(ast)); number = leafStyleSettings(ast.getArgument(1, OperatorNode.class), number); } else { number = new IntItem(">" + fetchConditionWord(ast), fetchConditionIndex(ast)); number = leafStyleSettings(ast.getArgument(0, OperatorNode.class), number); } return number; } @NonNull private IntItem buildEquals(OperatorNode ast) { IntItem number = new IntItem(fetchConditionWord(ast), fetchConditionIndex(ast)); if (isIndexOnLeftHandSide(ast)) { return leafStyleSettings(ast.getArgument(1, OperatorNode.class), number); } else { return leafStyleSettings(ast.getArgument(0, OperatorNode.class), number); } } @NonNull private String fetchConditionIndex(OperatorNode ast) { OperatorNode lhs = ast.getArgument(0); OperatorNode rhs = ast.getArgument(1); if (lhs.getOperator() == ExpressionOperator.LITERAL || lhs.getOperator() == ExpressionOperator.NEGATE) { return getIndex(rhs); } if (rhs.getOperator() == ExpressionOperator.LITERAL || rhs.getOperator() == ExpressionOperator.NEGATE) { return getIndex(lhs); } throw new IllegalArgumentException("Expected LITERAL and READ_FIELD/PROPREF, got " + lhs.getOperator() + " and " + rhs.getOperator() + "."); } private static String getNumberAsString(OperatorNode ast) { String negative = ""; OperatorNode currentAst = ast; if (currentAst.getOperator() == ExpressionOperator.NEGATE) { negative = "-"; currentAst = currentAst.getArgument(0); } assertHasOperator(currentAst, ExpressionOperator.LITERAL); return negative + currentAst.getArgument(0).toString(); } @NonNull private static String fetchConditionWord(OperatorNode ast) { OperatorNode lhs = ast.getArgument(0); OperatorNode rhs = ast.getArgument(1); if (lhs.getOperator() == ExpressionOperator.LITERAL || lhs.getOperator() == ExpressionOperator.NEGATE) { assertFieldName(rhs); return getNumberAsString(lhs); } if (rhs.getOperator() == ExpressionOperator.LITERAL || rhs.getOperator() == ExpressionOperator.NEGATE) { assertFieldName(lhs); return getNumberAsString(rhs); } throw new IllegalArgumentException("Expected LITERAL/NEGATE and READ_FIELD/PROPREF, got " + lhs.getOperator() + " and " + rhs.getOperator() + "."); } private static boolean isIndexOnLeftHandSide(OperatorNode ast) { OperatorNode node = ast.getArgument(0, OperatorNode.class); return node.getOperator() == ExpressionOperator.READ_FIELD || node.getOperator() == ExpressionOperator.PROPREF; } @NonNull private CompositeItem buildAnd(OperatorNode ast) { AndItem andItem = new AndItem(); NotItem notItem = new NotItem(); convertVarArgsAnd(ast, 0, andItem, notItem); Preconditions .checkArgument(andItem.getItemCount() > 0, "Vespa does not support AND with no logically positive branches."); if (notItem.getItemCount() == 0) { return andItem; } if (andItem.getItemCount() == 1) { notItem.setPositiveItem(andItem.getItem(0)); } else { notItem.setPositiveItem(andItem); } return notItem; } @NonNull private CompositeItem buildOr(OperatorNode spec) { return convertVarArgs(spec, 0, new OrItem()); } @NonNull private CompositeItem buildWeakAnd(OperatorNode spec) { WeakAndItem weakAnd = new WeakAndItem(); Integer targetNumHits = getAnnotation(spec, TARGET_NUM_HITS, Integer.class, null, "desired minimum hits to produce"); if (targetNumHits != null) { weakAnd.setN(targetNumHits); } Integer scoreThreshold = getAnnotation(spec, SCORE_THRESHOLD, Integer.class, null, "min dot product score for hit inclusion"); if (scoreThreshold != null) { weakAnd.setScoreThreshold(scoreThreshold); } return convertVarArgs(spec, 1, weakAnd); } @NonNull private CompositeItem buildRank(OperatorNode spec) { return convertVarArgs(spec, 1, new RankItem()); } @NonNull private CompositeItem convertVarArgs(OperatorNode ast, int argIdx, @NonNull CompositeItem out) { Iterable> args = ast.getArgument(argIdx); for (OperatorNode arg : args) { assertHasOperator(arg, ExpressionOperator.class); out.addItem(convertExpression(arg)); } return out; } private void convertVarArgsAnd(OperatorNode ast, int argIdx, AndItem outAnd, NotItem outNot) { Iterable> args = ast.getArgument(argIdx); for (OperatorNode arg : args) { assertHasOperator(arg, ExpressionOperator.class); if (arg.getOperator() == ExpressionOperator.NOT) { OperatorNode exp = arg.getArgument(0); assertHasOperator(exp, ExpressionOperator.class); outNot.addNegativeItem(convertExpression(exp)); } else { outAnd.addItem(convertExpression(arg)); } } } @NonNull private Item buildTermSearch(OperatorNode ast) { assertHasOperator(ast, ExpressionOperator.CONTAINS); return instantiateLeafItem(getIndex(ast.> getArgument(0)), ast.> getArgument(1)); } @NonNull private Item buildRegExpSearch(OperatorNode ast) { assertHasOperator(ast, ExpressionOperator.MATCHES); String field = getIndex(ast.> getArgument(0)); OperatorNode ast1 = ast.> getArgument(1); String wordData = getStringContents(ast1); RegExpItem regExp = new RegExpItem(field, true, wordData); return leafStyleSettings(ast1, regExp); } @NonNull private Item buildRange(OperatorNode spec) { assertHasOperator(spec, ExpressionOperator.CALL); assertHasFunctionName(spec, RANGE); IntItem range = instantiateRangeItem(spec.>> getArgument(1), spec); return leafStyleSettings(spec, range); } private static Number negate(Number x) { if (x.getClass() == Integer.class) { int x1 = x.intValue(); return -x1; } else if (x.getClass() == Long.class) { long x1 = x.longValue(); return -x1; } else if (x.getClass() == Float.class) { float x1 = x.floatValue(); return -x1; } else if (x.getClass() == Double.class) { double x1 = x.doubleValue(); return -x1; } else { throw newUnexpectedArgumentException(x.getClass(), Integer.class, Long.class, Float.class, Double.class); } } @NonNull private IntItem instantiateRangeItem( List> args, OperatorNode spec) { Preconditions.checkArgument(args.size() == 3, "Expected 3 arguments, got %s.", args.size()); Number lowerArg = getBound(args.get(1)); Number upperArg = getBound(args.get(2)); String bounds = getAnnotation(spec, BOUNDS, String.class, null, "whether bounds should be open or closed"); // TODO: add support for implicit transforms if (bounds == null) { return new RangeItem(lowerArg, upperArg, getIndex(args.get(0))); } else { Limit from; Limit to; if (BOUNDS_OPEN.equals(bounds)) { from = new Limit(lowerArg, false); to = new Limit(upperArg, false); } else if (BOUNDS_LEFT_OPEN.equals(bounds)) { from = new Limit(lowerArg, false); to = new Limit(upperArg, true); } else if (BOUNDS_RIGHT_OPEN.equals(bounds)) { from = new Limit(lowerArg, true); to = new Limit(upperArg, false); } else { throw newUnexpectedArgumentException(bounds, BOUNDS_OPEN, BOUNDS_LEFT_OPEN, BOUNDS_RIGHT_OPEN); } return new IntItem(from, to, getIndex(args.get(0))); } } private Number getBound(OperatorNode bound) { Number boundValue; OperatorNode currentBound = bound; boolean negate = false; if (currentBound.getOperator() == ExpressionOperator.NEGATE) { currentBound = currentBound.getArgument(0); negate = true; } assertHasOperator(currentBound, ExpressionOperator.LITERAL); boundValue = currentBound.getArgument(0, Number.class); if (negate) { boundValue = negate(boundValue); } return boundValue; } @NonNull private Item instantiateLeafItem(String field, OperatorNode ast) { switch (ast.getOperator()) { case LITERAL: case VARREF: return instantiateWordItem(field, ast, null); case CALL: return instantiateCompositeLeaf(field, ast); default: throw newUnexpectedArgumentException(ast.getOperator().name(), ExpressionOperator.CALL, ExpressionOperator.LITERAL); } } @NonNull private Item instantiateCompositeLeaf(String field, OperatorNode ast) { List names = ast.getArgument(0); Preconditions.checkArgument(names.size() == 1, "Expected 1 name, got %s.", names.size()); switch (names.get(0)) { case SAME_ELEMENT: return instantiateSameElementItem(field, ast); case PHRASE: return instantiatePhraseItem(field, ast); case NEAR: return instantiateNearItem(field, ast); case ONEAR: return instantiateONearItem(field, ast); case EQUIV: return instantiateEquivItem(field, ast); case ALTERNATIVES: return instantiateWordAlternativesItem(field, ast); default: throw newUnexpectedArgumentException(names.get(0), EQUIV, NEAR, ONEAR, PHRASE, SAME_ELEMENT); } } private Item instantiateWordAlternativesItem(String field, OperatorNode ast) { List> args = ast.getArgument(1); Preconditions.checkArgument(args.size() >= 1, "Expected 1 or more arguments, got %s.", args.size()); Preconditions.checkArgument(args.get(0).getOperator() == ExpressionOperator.MAP, "Expected MAP, got %s.", args.get(0).getOperator()); List terms = new ArrayList<>(); List keys = args.get(0).getArgument(0); List> values = args.get(0).getArgument(1); for (int i = 0; i < keys.size(); ++i) { OperatorNode value = values.get(i); if (value.getOperator() != ExpressionOperator.LITERAL) throw newUnexpectedArgumentException(value.getOperator(), ExpressionOperator.LITERAL); String term = keys.get(i); double exactness = value.getArgument(0, Double.class); terms.add(new WordAlternativesItem.Alternative(term, exactness)); } Substring origin = getOrigin(ast); Boolean isFromQuery = getAnnotation(ast, IMPLICIT_TRANSFORMS, Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION); return leafStyleSettings(ast, new WordAlternativesItem(field, isFromQuery, origin, terms)); } @NonNull private Item instantiateEquivItem(String field, OperatorNode ast) { List> args = ast.getArgument(1); Preconditions.checkArgument(args.size() >= 2, "Expected 2 or more arguments, got %s.", args.size()); EquivItem equiv = new EquivItem(); equiv.setIndexName(field); for (OperatorNode arg : args) { switch (arg.getOperator()) { case LITERAL: equiv.addItem(instantiateWordItem(field, arg, equiv.getClass())); break; case CALL: assertHasFunctionName(arg, PHRASE); equiv.addItem(instantiatePhraseItem(field, arg)); break; default: throw newUnexpectedArgumentException(arg.getOperator(), ExpressionOperator.CALL, ExpressionOperator.LITERAL); } } return leafStyleSettings(ast, equiv); } @NonNull private Item instantiateWordItem(String field, OperatorNode ast, Class parent) { return instantiateWordItem(field, ast, parent, SegmentWhen.POSSIBLY); } @NonNull private Item instantiateWordItem(String field, OperatorNode ast, Class parent, SegmentWhen segmentPolicy) { String wordData = getStringContents(ast); return instantiateWordItem(field, wordData, ast, parent, segmentPolicy, false, decideParsingLanguage(ast, wordData)); } @NonNull private Item instantiateWordItem(String field, String rawWord, OperatorNode ast, Class parent, SegmentWhen segmentPolicy, boolean exactMatch, Language language) { String wordData = rawWord; if (getAnnotation(ast, NFKC, Boolean.class, Boolean.FALSE, "setting for whether to NFKC normalize input data")) { // NOTE: If this is set to FALSE (default), we will still NFKC normalize text data // during tokenization/segmentation, as that is always turned on also on the indexing side. wordData = normalizer.normalize(wordData); } boolean fromQuery = getAnnotation(ast, IMPLICIT_TRANSFORMS, Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION); boolean prefixMatch = getAnnotation(ast, PREFIX, Boolean.class, Boolean.FALSE, "setting for whether to use prefix match of input data"); boolean suffixMatch = getAnnotation(ast, SUFFIX, Boolean.class, Boolean.FALSE, "setting for whether to use suffix match of input data"); boolean substrMatch = getAnnotation(ast, SUBSTRING, Boolean.class, Boolean.FALSE, "setting for whether to use substring match of input data"); Preconditions.checkArgument((prefixMatch ? 1 : 0) + (substrMatch ? 1 : 0) + (suffixMatch ? 1 : 0) < 2, "Only one of prefix, substring and suffix can be set."); @NonNull final TaggableItem wordItem; if (exactMatch) { wordItem = new ExactStringItem(wordData, fromQuery); } else if (prefixMatch) { wordItem = new PrefixItem(wordData, fromQuery); } else if (suffixMatch) { wordItem = new SuffixItem(wordData, fromQuery); } else if (substrMatch) { wordItem = new SubstringItem(wordData, fromQuery); } else { switch (segmentPolicy) { case NEVER: wordItem = new WordItem(wordData, fromQuery); break; case POSSIBLY: if (shouldResegmentWord(field, fromQuery)) { wordItem = resegment(field, ast, wordData, fromQuery, parent, language); } else { wordItem = new WordItem(wordData, fromQuery); } break; case ALWAYS: wordItem = resegment(field, ast, wordData, fromQuery, parent, language); break; default: throw new IllegalArgumentException("Unexpected segmenting rule: " + segmentPolicy); } } if (wordItem instanceof WordItem) { prepareWord(field, ast, fromQuery, (WordItem) wordItem); } if (language != Language.ENGLISH) // mark the language used, unless it's the default ((Item)wordItem).setLanguage(language); return (Item) leafStyleSettings(ast, wordItem); } @SuppressWarnings({"deprecation"}) private boolean shouldResegmentWord(String field, boolean fromQuery) { return resegment && fromQuery && ! indexFactsSession.getIndex(field).isAttribute(); } @NonNull private TaggableItem resegment(String field, OperatorNode ast, String wordData, boolean fromQuery, Class parent, Language language) { String toSegment = wordData; Substring s = getOrigin(ast); Language usedLanguage = language == null ? currentlyParsing.getLanguage() : language; if (s != null) { toSegment = s.getValue(); } List words = segmenter.segment(toSegment, usedLanguage); TaggableItem wordItem; if (words.size() == 0) { wordItem = new WordItem(wordData, fromQuery); } else if (words.size() == 1 || !phraseArgumentSupported(parent)) { wordItem = new WordItem(words.get(0), fromQuery); } else { wordItem = new PhraseSegmentItem(toSegment, fromQuery, false); ((PhraseSegmentItem) wordItem).setIndexName(field); for (String w : words) { WordItem segment = new WordItem(w, fromQuery); prepareWord(field, ast, fromQuery, segment); ((PhraseSegmentItem) wordItem).addItem(segment); } ((PhraseSegmentItem) wordItem).lock(); } return wordItem; } private boolean phraseArgumentSupported(Class parent) { if (parent == null) return true; // not supported in backend, but the container flattens the arguments itself: if (parent == PhraseItem.class) return true; return parent == EquivItem.class; } private void prepareWord(String field, OperatorNode ast, boolean fromQuery, WordItem wordItem) { wordItem.setIndexName(field); wordStyleSettings(ast, wordItem); if (shouldResegmentWord(field, fromQuery)) { // force re-stemming, new case normalization, etc wordItem.setStemmed(false); wordItem.setLowercased(false); wordItem.setNormalizable(true); } } @NonNull private T leafStyleSettings(OperatorNode ast, @NonNull T out) { { Map connectivity = getAnnotation(ast, CONNECTIVITY, Map.class, null, "connectivity settings"); if (connectivity != null) { connectedItems.add(new ConnectedItem(out, getMapValue( CONNECTIVITY, connectivity, CONNECTION_ID, Integer.class), getMapValue(CONNECTIVITY, connectivity, CONNECTION_WEIGHT, Number.class).doubleValue())); } Number significance = getAnnotation(ast, SIGNIFICANCE, Number.class, null, "term significance"); if (significance != null) { out.setSignificance(significance.doubleValue()); } Integer uniqueId = getAnnotation(ast, UNIQUE_ID, Integer.class, null, "term ID", false); if (uniqueId != null) { out.setUniqueID(uniqueId); identifiedItems.put(uniqueId, out); } } { Item leaf = (Item) out; Map itemAnnotations = getAnnotation(ast, ANNOTATIONS, Map.class, Collections.emptyMap(), "item annotation map"); for (Map.Entry entry : itemAnnotations.entrySet()) { Preconditions.checkArgument(entry.getKey() instanceof String, "Expected String annotation key, got %s.", entry.getKey().getClass()); Preconditions.checkArgument(entry.getValue() instanceof String, "Expected String annotation value, got %s.", entry.getValue().getClass()); leaf.addAnnotation((String) entry.getKey(), entry.getValue()); } Boolean filter = getAnnotation(ast, FILTER, Boolean.class, null, FILTER_DESCRIPTION); if (filter != null) { leaf.setFilter(filter); } Boolean isRanked = getAnnotation(ast, RANKED, Boolean.class, null, RANKED_DESCRIPTION); if (isRanked != null) { leaf.setRanked(isRanked); } String label = getAnnotation(ast, LABEL, String.class, null, "item label"); if (label != null) { leaf.setLabel(label); } Integer weight = getAnnotation(ast, WEIGHT, Integer.class, null, "term weight for ranking"); if (weight != null) { leaf.setWeight(weight); } } if (out instanceof IntItem) { IntItem number = (IntItem) out; Integer hitLimit = getCappedRangeSearchParameter(ast); if (hitLimit != null) { number.setHitLimit(hitLimit); } } return out; } private Integer getCappedRangeSearchParameter(OperatorNode ast) { Integer hitLimit = getAnnotation(ast, HIT_LIMIT, Integer.class, null, "hit limit"); if (hitLimit != null) { Boolean ascending = getAnnotation(ast, ASCENDING_HITS_ORDER, Boolean.class, null, "ascending population ordering for capped range search"); Boolean descending = getAnnotation(ast, DESCENDING_HITS_ORDER, Boolean.class, null, "descending population ordering for capped range search"); Preconditions.checkArgument(ascending == null || descending == null, "Settings for both ascending and descending ordering set, only one of these expected."); if (Boolean.TRUE.equals(descending) || Boolean.FALSE.equals(ascending)) { hitLimit = hitLimit * -1; } } return hitLimit; } @Beta public boolean isQueryParser() { return queryParser; } @Beta public void setQueryParser(boolean queryParser) { this.queryParser = queryParser; } @Beta public void setUserQuery(@NonNull Query userQuery) { this.userQuery = userQuery; } @Beta public Set getYqlSummaryFields() { return yqlSummaryFields; } @Beta public List getGroupingSteps() { return groupingSteps; } /** * Give the offset expected from the latest parsed query if anything is * explicitly specified. * * @return an Integer instance or null */ public Integer getOffset() { return offset; } /** * Give the number of hits expected from the latest parsed query if anything * is explicitly specified. * * @return an Integer instance or null */ public Integer getHits() { return hits; } /** * The timeout specified in the YQL+ query last parsed. * * @return an Integer instance or null */ public Integer getTimeout() { return timeout; } /** * The sorting specified in the YQL+ query last parsed. * * @return a Sorting instance or null */ public Sorting getSorting() { return sorting; } Set getDocTypes() { return docTypes; } Set getYqlSources() { return yqlSources; } private static void assertHasOperator(OperatorNode ast, Class expectedOperatorClass) { Preconditions.checkArgument(expectedOperatorClass.isInstance(ast.getOperator()), "Expected operator class %s, got %s.", expectedOperatorClass.getName(), ast.getOperator().getClass().getName()); } private static void assertHasOperator(OperatorNode ast, Operator expectedOperator) { Preconditions.checkArgument(ast.getOperator() == expectedOperator, "Expected operator %s, got %s.", expectedOperator, ast.getOperator()); } private static void assertHasFunctionName(OperatorNode ast, String expectedFunctionName) { List names = ast.getArgument(0); Preconditions.checkArgument(expectedFunctionName.equals(names.get(0)), "Expected function '%s', got '%s'.", expectedFunctionName, names.get(0)); } private static void assertFieldName(OperatorNode ast) { Preconditions.checkArgument(ast.getOperator() == ExpressionOperator.READ_FIELD || ast.getOperator() == ExpressionOperator.PROPREF, "Expected operator READ_FIELD or PRPPREF, got %s.", ast.getOperator()); } private static void addItems(OperatorNode ast, WeightedSetItem out) { switch (ast.getOperator()) { case MAP: addStringItems(ast, out); break; case ARRAY: addLongItems(ast, out); break; default: throw newUnexpectedArgumentException(ast.getOperator(), ExpressionOperator.ARRAY, ExpressionOperator.MAP); } } private static void addStringItems(OperatorNode ast, WeightedSetItem out) { List keys = ast.getArgument(0); List> values = ast.getArgument(1); for (int i = 0; i < keys.size(); ++i) { OperatorNode tokenWeight = values.get(i); assertHasOperator(tokenWeight, ExpressionOperator.LITERAL); out.addToken(keys.get(i), tokenWeight.getArgument(0, Integer.class)); } } private static void addLongItems(OperatorNode ast, WeightedSetItem out) { List> values = ast.getArgument(0); for (OperatorNode value : values) { assertHasOperator(value, ExpressionOperator.ARRAY); List> args = value.getArgument(0); Preconditions.checkArgument(args.size() == 2, "Expected item and weight, got %s.", args); OperatorNode tokenValueNode = args.get(0); assertHasOperator(tokenValueNode, ExpressionOperator.LITERAL); Number tokenValue = tokenValueNode.getArgument(0, Number.class); Preconditions.checkArgument(tokenValue instanceof Integer || tokenValue instanceof Long, "Expected Integer or Long, got %s.", tokenValue.getClass() .getName()); OperatorNode tokenWeightNode = args.get(1); assertHasOperator(tokenWeightNode, ExpressionOperator.LITERAL); Integer tokenWeight = tokenWeightNode.getArgument(0, Integer.class); out.addToken(tokenValue.longValue(), tokenWeight); } } private void wordStyleSettings(OperatorNode ast, WordItem out) { Substring origin = getOrigin(ast); if (origin != null) { out.setOrigin(origin); } Boolean usePositionData = getAnnotation(ast, USE_POSITION_DATA, Boolean.class, null, USE_POSITION_DATA_DESCRIPTION); if (usePositionData != null) { out.setPositionData(usePositionData); } Boolean stem = getAnnotation(ast, STEM, Boolean.class, null, STEM_DESCRIPTION); if (stem != null) { out.setStemmed(!stem); } Boolean normalizeCase = getAnnotation(ast, NORMALIZE_CASE, Boolean.class, null, NORMALIZE_CASE_DESCRIPTION); if (normalizeCase != null) { out.setLowercased(!normalizeCase); } Boolean accentDrop = getAnnotation(ast, ACCENT_DROP, Boolean.class, null, ACCENT_DROP_DESCRIPTION); if (accentDrop != null) { out.setNormalizable(accentDrop); } Boolean andSegmenting = getAnnotation(ast, AND_SEGMENTING, Boolean.class, null, "setting for whether to force using AND for segments on and off"); if (andSegmenting != null) { if (andSegmenting) { out.setSegmentingRule(SegmentingRule.BOOLEAN_AND); } else { out.setSegmentingRule(SegmentingRule.PHRASE); } } } private IndexNameExpander swapIndexCreator(IndexNameExpander newExpander) { IndexNameExpander old = indexNameExpander; indexNameExpander = newExpander; return old; } @NonNull private String getIndex(OperatorNode operatorNode) { String index = fetchFieldRead(operatorNode); String expanded = indexNameExpander.expand(index); Preconditions.checkArgument(indexFactsSession.isIndex(expanded), "Field '%s' does not exist.", expanded); return indexFactsSession.getCanonicName(index); } private Substring getOrigin(OperatorNode ast) { Map origin = getAnnotation(ast, ORIGIN, Map.class, null, ORIGIN_DESCRIPTION); if (origin == null) { return null; } String original = getMapValue(ORIGIN, origin, ORIGIN_ORIGINAL, String.class); int offset = getMapValue(ORIGIN, origin, ORIGIN_OFFSET, Integer.class); int length = getMapValue(ORIGIN, origin, ORIGIN_LENGTH, Integer.class); return new Substring(offset, length + offset, original); } private static T getMapValue(String mapName, Map map, String key, Class expectedValueClass) { Object value = map.get(key); Preconditions.checkArgument(value != null, "Map annotation '%s' must contain an entry with key '%s'.", mapName, key); Preconditions.checkArgument(expectedValueClass.isInstance(value), "Expected %s for entry '%s' in map annotation '%s', got %s.", expectedValueClass.getName(), key, mapName, value.getClass().getName()); return expectedValueClass.cast(value); } private T getAnnotation(OperatorNode ast, String key, Class expectedClass, T defaultValue, String description) { return getAnnotation(ast, key, expectedClass, defaultValue, description, true); } private T getAnnotation(OperatorNode ast, String key, Class expectedClass, T defaultValue, String description, boolean considerParents) { Object value = ast.getAnnotation(key); for (Iterator> i = annotationStack.iterator(); value == null && considerParents && i.hasNext();) { value = i.next().getAnnotation(key); } if (value == null) return defaultValue; Preconditions.checkArgument(expectedClass.isInstance(value), "Expected %s for annotation '%s' (%s), got %s.", expectedClass.getName(), key, description, value.getClass().getName()); return expectedClass.cast(value); } private static IllegalArgumentException newUnexpectedArgumentException(Object actual, Object... expected) { StringBuilder out = new StringBuilder("Expected "); for (int i = 0, len = expected.length; i < len; ++i) { out.append(expected[i]); if (i < len - 2) { out.append(", "); } else if (i < len - 1) { out.append(" or "); } } out.append(", got ").append(actual).append("."); return new IllegalArgumentException(out.toString()); } String getSegmenterBackend() { return segmenterBackend; } Version getSegmenterVersion() { return segmenterVersion; } private static final class ConnectedItem { final double weight; final int toId; final TaggableItem fromItem; ConnectedItem(TaggableItem fromItem, int toId, double weight) { this.weight = weight; this.toId = toId; this.fromItem = fromItem; } } private class AnnotationPropagator extends QueryVisitor { private final Boolean isRanked; private final Boolean filter; private final Boolean stem; private final Boolean normalizeCase; private final Boolean accentDrop; private final Boolean usePositionData; public AnnotationPropagator(OperatorNode ast) { isRanked = getAnnotation(ast, RANKED, Boolean.class, null, RANKED_DESCRIPTION); filter = getAnnotation(ast, FILTER, Boolean.class, null, FILTER_DESCRIPTION); stem = getAnnotation(ast, STEM, Boolean.class, null, STEM_DESCRIPTION); normalizeCase = getAnnotation(ast, NORMALIZE_CASE, Boolean.class, Boolean.TRUE, NORMALIZE_CASE_DESCRIPTION); accentDrop = getAnnotation(ast, ACCENT_DROP, Boolean.class, null, ACCENT_DROP_DESCRIPTION); usePositionData = getAnnotation(ast, USE_POSITION_DATA, Boolean.class, null, USE_POSITION_DATA_DESCRIPTION); } @Override public boolean visit(Item item) { if (item instanceof WordItem) { WordItem w = (WordItem) item; if (usePositionData != null) { w.setPositionData(usePositionData); } if (stem != null) { w.setStemmed(!stem); } if (normalizeCase != null) { w.setLowercased(!normalizeCase); } if (accentDrop != null) { w.setNormalizable(accentDrop); } } if (item instanceof TaggableItem) { if (isRanked != null) { item.setRanked(isRanked); } if (filter != null) { item.setFilter(filter); } } return true; } @Override public void onExit() { // intentionally left blank } } }