// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.search.query; import com.google.common.base.Preconditions; import com.yahoo.collections.LazyMap; import com.yahoo.language.Language; import com.yahoo.language.process.Normalizer; import com.yahoo.prelude.IndexFacts; import com.yahoo.prelude.query.AndItem; import com.yahoo.prelude.query.CompositeItem; import com.yahoo.prelude.query.DotProductItem; import com.yahoo.prelude.query.EquivItem; import com.yahoo.prelude.query.ExactStringItem; import com.yahoo.prelude.query.IntItem; import com.yahoo.prelude.query.Item; import com.yahoo.prelude.query.Limit; import com.yahoo.prelude.query.NearItem; import com.yahoo.prelude.query.NotItem; import com.yahoo.prelude.query.ONearItem; import com.yahoo.prelude.query.OrItem; import com.yahoo.prelude.query.PhraseItem; import com.yahoo.prelude.query.PredicateQueryItem; import com.yahoo.prelude.query.PrefixItem; import com.yahoo.prelude.query.QueryException; import com.yahoo.prelude.query.RangeItem; import com.yahoo.prelude.query.RankItem; import com.yahoo.prelude.query.RegExpItem; import com.yahoo.prelude.query.SameElementItem; import com.yahoo.prelude.query.SegmentingRule; import com.yahoo.prelude.query.Substring; import com.yahoo.prelude.query.SubstringItem; import com.yahoo.prelude.query.SuffixItem; import com.yahoo.prelude.query.TaggableItem; import com.yahoo.prelude.query.WandItem; import com.yahoo.prelude.query.WeakAndItem; import com.yahoo.prelude.query.WeightedSetItem; import com.yahoo.prelude.query.WordAlternativesItem; import com.yahoo.prelude.query.WordItem; import com.yahoo.search.grouping.request.GroupingOperation; import com.yahoo.search.query.parser.Parsable; import com.yahoo.search.query.parser.Parser; import com.yahoo.search.query.parser.ParserEnvironment; import com.yahoo.search.yql.VespaGroupingStep; import com.yahoo.slime.ArrayTraverser; import com.yahoo.slime.Inspector; import com.yahoo.slime.ObjectTraverser; import com.yahoo.vespa.config.SlimeUtils; import edu.umd.cs.findbugs.annotations.NonNull; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; import static com.yahoo.slime.Type.ARRAY; import static com.yahoo.slime.Type.DOUBLE; import static com.yahoo.slime.Type.LONG; import static com.yahoo.slime.Type.OBJECT; import static com.yahoo.slime.Type.STRING; /** * The Select query language. * * This class will be parsing the Select parameters, and will be used when the query has the SELECT-type. * * @author henrhoi */ public class SelectParser implements Parser { Parsable query; private final IndexFacts indexFacts; private final Map identifiedItems = LazyMap.newHashMap(); private final List connectedItems = new ArrayList<>(); private final Normalizer normalizer; private IndexFacts.Session indexFactsSession; // YQL parameters and functions private static final String DESCENDING_HITS_ORDER = "descending"; private static final String ASCENDING_HITS_ORDER = "ascending"; private static final Integer DEFAULT_TARGET_NUM_HITS = 10; private static final String ORIGIN_LENGTH = "length"; private static final String ORIGIN_OFFSET = "offset"; private static final String ORIGIN = "origin"; private static final String ORIGIN_ORIGINAL = "original"; private static final String CONNECTION_ID = "id"; private static final String CONNECTION_WEIGHT = "weight"; private static final String CONNECTIVITY = "connectivity"; private static final String ANNOTATIONS = "annotations"; private static final String NFKC = "nfkc"; private static final String USER_INPUT_LANGUAGE = "language"; private static final String ACCENT_DROP = "accentDrop"; private static final String ALTERNATIVES = "alternatives"; private static final String AND_SEGMENTING = "andSegmenting"; private static final String DISTANCE = "distance"; private static final String DOT_PRODUCT = "dotProduct"; private static final String EQUIV = "equiv"; private static final String FILTER = "filter"; private static final String HIT_LIMIT = "hitLimit"; private static final String IMPLICIT_TRANSFORMS = "implicitTransforms"; private static final String LABEL = "label"; private static final String NEAR = "near"; private static final String NORMALIZE_CASE = "normalizeCase"; private static final String ONEAR = "onear"; private static final String PHRASE = "phrase"; private static final String PREDICATE = "predicate"; private static final String PREFIX = "prefix"; private static final String RANKED = "ranked"; private static final String RANK = "rank"; private static final String SAME_ELEMENT = "sameElement"; private static final String SCORE_THRESHOLD = "scoreThreshold"; private static final String SIGNIFICANCE = "significance"; private static final String STEM = "stem"; private static final String SUBSTRING = "substring"; private static final String SUFFIX = "suffix"; private static final String TARGET_NUM_HITS = "targetNumHits"; private static final String THRESHOLD_BOOST_FACTOR = "thresholdBoostFactor"; private static final String UNIQUE_ID = "id"; private static final String USE_POSITION_DATA = "usePositionData"; private static final String WAND = "wand"; private static final String WEAK_AND = "weakAnd"; private static final String WEIGHTED_SET = "weightedSet"; private static final String WEIGHT = "weight"; private static final String AND = "and"; private static final String AND_NOT = "and_not"; private static final String OR = "or"; private static final String EQ = "equals"; private static final String RANGE = "range"; private static final String CONTAINS = "contains"; private static final String MATCHES = "matches"; private static final String CALL = "call"; private static final List FUNCTION_CALLS = Arrays.asList(WAND, WEIGHTED_SET, DOT_PRODUCT, PREDICATE, RANK, WEAK_AND); public SelectParser(ParserEnvironment environment) { indexFacts = environment.getIndexFacts(); normalizer = environment.getLinguistics().getNormalizer(); } @Override public QueryTree parse(Parsable query) { indexFactsSession = indexFacts.newSession(query.getSources(), query.getRestrict()); connectedItems.clear(); identifiedItems.clear(); this.query = query; return buildTree(); } private QueryTree buildTree() { Inspector inspector = SlimeUtils.jsonToSlime(this.query.getSelect().getWhereString().getBytes()).get(); if (inspector.field("error_message").valid()) { throw new QueryException("Illegal query: " + inspector.field("error_message").asString() + " at: '" + new String(inspector.field("offending_input").asData(), StandardCharsets.UTF_8) + "'"); } Item root = walkJson(inspector); connectItems(); return new QueryTree(root); } private Item walkJson(Inspector inspector){ Item[] item = {null}; inspector.traverse((ObjectTraverser) (key, value) -> { String type = (FUNCTION_CALLS.contains(key)) ? CALL : key; switch (type) { case AND: item[0] = buildAnd(key, value); break; case AND_NOT: item[0] = buildNotAnd(key, value); break; case OR: item[0] = buildOr(key, value); break; case EQ: item[0] = buildEquals(key, value); break; case RANGE: item[0] = buildRange(key, value); break; case CONTAINS: item[0] = buildTermSearch(key, value); break; case MATCHES: item[0] = buildRegExpSearch(key, value); break; case CALL: item[0] = buildFunctionCall(key, value); break; default: throw newUnexpectedArgumentException(key, AND, CALL, CONTAINS, EQ, OR, RANGE, AND_NOT); } }); return item[0]; } public List getGroupingSteps(String grouping){ List groupingSteps = new ArrayList<>(); List groupingOperations = getOperations(grouping); for (String groupingString : groupingOperations){ GroupingOperation groupingOperation = GroupingOperation.fromString(groupingString); VespaGroupingStep groupingStep = new VespaGroupingStep(groupingOperation); groupingSteps.add(groupingStep); } return groupingSteps; } private List getOperations(String grouping) { List operations = new ArrayList<>(); Inspector inspector = SlimeUtils.jsonToSlime(grouping.getBytes()).get(); if (inspector.field("error_message").valid()){ throw new QueryException("Illegal query: "+inspector.field("error_message").asString() + " at: '" + new String(inspector.field("offending_input").asData(), StandardCharsets.UTF_8) + "'"); } inspector.traverse( (ArrayTraverser) (key, value) -> { String groupingString = value.toString(); groupingString = groupingString.replace(" ", "").replace("\"", "").replace("\'", "").replace(":{", "(").replace(":", "(").replace("}", ")").replace(",", ")"); groupingString = groupingString.substring(1, groupingString.length()); operations.add(groupingString); }); return operations; } @NonNull private Item buildFunctionCall(String key, Inspector value) { switch (key) { case WAND: return buildWand(key, value); case WEIGHTED_SET: return buildWeightedSet(key, value); case DOT_PRODUCT: return buildDotProduct(key, value); case PREDICATE: return buildPredicate(key, value); case RANK: return buildRank(key, value); case WEAK_AND: return buildWeakAnd(key, value); default: throw newUnexpectedArgumentException(key, DOT_PRODUCT, RANK, WAND, WEAK_AND, WEIGHTED_SET, PREDICATE); } } private void addItemsFromInspector(CompositeItem item, Inspector inspector){ if (inspector.type() == ARRAY){ inspector.traverse((ArrayTraverser) (index, new_value) -> { item.addItem(walkJson(new_value)); }); } else if (inspector.type() == OBJECT){ if (inspector.field("children").valid()){ inspector.field("children").traverse((ArrayTraverser) (index, new_value) -> { item.addItem(walkJson(new_value)); }); } } } private Inspector getChildren(Inspector inspector){ if (inspector.type() == ARRAY){ return inspector; } else if (inspector.type() == OBJECT){ if (inspector.field("children").valid()){ return inspector.field("children"); } if (inspector.field(1).valid()){ return inspector.field(1); } } return null; } private HashMap getChildrenMap(Inspector inspector){ HashMap children = new HashMap<>(); if (inspector.type() == ARRAY){ inspector.traverse((ArrayTraverser) (index, new_value) -> { children.put(index, new_value); }); } else if (inspector.type() == OBJECT){ if (inspector.field("children").valid()){ inspector.field("children").traverse((ArrayTraverser) (index, new_value) -> { children.put(index, new_value); }); } } return children; } private Inspector getAnnotations(Inspector inspector){ if (inspector.type() == OBJECT && inspector.field("attributes").valid()){ return inspector.field("attributes"); } return null; } private HashMap getAnnotationMapFromAnnotationInspector(Inspector annotation){ HashMap attributes = new HashMap<>(); if (annotation.type() == OBJECT){ annotation.traverse((ObjectTraverser) (index, new_value) -> { attributes.put(index, new_value); }); } return attributes; } private HashMap getAnnotationMap(Inspector inspector){ HashMap attributes = new HashMap<>(); if (inspector.type() == OBJECT && inspector.field("attributes").valid()){ inspector.field("attributes").traverse((ObjectTraverser) (index, new_value) -> { attributes.put(index, new_value); }); } return attributes; } private T getAnnotation(String annotationName, HashMap annotations, Class expectedClass, T defaultValue) { return (annotations.get(annotationName) == null) ? defaultValue : expectedClass.cast(annotations.get(annotationName).asString()); } private Boolean getBoolAnnotation(String annotationName, HashMap annotations, Boolean defaultValue) { if (annotations != null){ Inspector annotation = annotations.getOrDefault(annotationName, null); if (annotation != null){ return annotation.asBool(); } } return defaultValue; } private Integer getIntegerAnnotation(String annotationName, HashMap annotations, Integer defaultValue) { if (annotations != null){ Inspector annotation = annotations.getOrDefault(annotationName, null); if (annotation != null){ return (int)annotation.asLong(); } } return defaultValue; } private Double getDoubleAnnotation(String annotationName, HashMap annotations, Double defaultValue) { if (annotations != null){ Inspector annotation = annotations.getOrDefault(annotationName, null); if (annotation != null){ return annotation.asDouble(); } } return defaultValue; } private Inspector getAnnotationAsInspectorOrNull(String annotationName, HashMap annotations) { return annotations.get(annotationName); } @NonNull private CompositeItem buildAnd(String key, Inspector value) { AndItem andItem = new AndItem(); addItemsFromInspector(andItem, value); return andItem; } @NonNull private CompositeItem buildNotAnd(String key, Inspector value) { NotItem notItem = new NotItem(); addItemsFromInspector(notItem, value); return notItem; } @NonNull private CompositeItem buildOr(String key, Inspector value) { OrItem orItem = new OrItem(); addItemsFromInspector(orItem, value); return orItem; } @NonNull private CompositeItem buildWeakAnd(String key, Inspector value) { WeakAndItem weakAnd = new WeakAndItem(); addItemsFromInspector(weakAnd, value); Inspector annotations = getAnnotations(value); if (annotations != null){ annotations.traverse((ObjectTraverser) (annotation_name, annotation_value) -> { if (TARGET_NUM_HITS.equals(annotation_name)){ weakAnd.setN((int)(annotation_value.asDouble())); } if (SCORE_THRESHOLD.equals(annotation_name)){ weakAnd.setScoreThreshold((int)(annotation_value.asDouble())); } }); } return weakAnd; } @NonNull private T leafStyleSettings(Inspector annotations, @NonNull T out) { { if (annotations != null) { Inspector itemConnectivity= getAnnotationAsInspectorOrNull(CONNECTIVITY, getAnnotationMapFromAnnotationInspector(annotations)); if (itemConnectivity != null) { Integer[] id = {null}; Double[] weight = {null}; itemConnectivity.traverse((ObjectTraverser) (key, value) -> { switch (key){ case CONNECTION_ID: id[0] = (int) value.asLong(); break; case CONNECTION_WEIGHT: weight[0] = value.asDouble(); break; } }); connectedItems.add(new ConnectedItem(out, id[0], weight[0])); } annotations.traverse((ObjectTraverser) (annotation_name, annotation_value) -> { if (SIGNIFICANCE.equals(annotation_name)) { if (annotation_value != null) { out.setSignificance(annotation_value.asDouble()); } } if (UNIQUE_ID.equals(annotation_name)) { if (annotation_value != null) { out.setUniqueID((int)annotation_value.asLong()); identifiedItems.put((int)annotation_value.asLong(), out); } } }); } } { Item leaf = (Item) out; if (annotations != null) { Inspector itemAnnotations = getAnnotationAsInspectorOrNull(ANNOTATIONS, getAnnotationMapFromAnnotationInspector(annotations)); if (itemAnnotations != null) { itemAnnotations.traverse((ObjectTraverser) (key, value) -> { leaf.addAnnotation(key, value.asString()); }); } annotations.traverse((ObjectTraverser) (annotation_name, annotation_value) -> { if (FILTER.equals(annotation_name)) { if (annotation_value != null) { leaf.setFilter(annotation_value.asBool()); } } if (RANKED.equals(annotation_name)) { if (annotation_value != null) { leaf.setRanked(annotation_value.asBool()); } } if (LABEL.equals(annotation_name)) { if (annotation_value != null) { leaf.setLabel(annotation_value.asString()); } } if (WEIGHT.equals(annotation_name)) { if (annotation_value != null) { leaf.setWeight((int)annotation_value.asDouble()); } } }); } if (out instanceof IntItem && annotations != null) { IntItem number = (IntItem) out; Integer hitLimit = getCappedRangeSearchParameter(annotations); if (hitLimit != null) { number.setHitLimit(hitLimit); } } } return out; } private Integer getCappedRangeSearchParameter(Inspector annotations) { Integer[] hitLimit = {null}; annotations.traverse((ObjectTraverser) (annotation_name, annotation_value) -> { if (HIT_LIMIT.equals(annotation_name)) { if (annotation_value != null) { hitLimit[0] = (int)(annotation_value.asDouble()); } } }); Boolean[] ascending = {null}; Boolean[] descending = {null}; if (hitLimit[0] != null) { annotations.traverse((ObjectTraverser) (annotation_name, annotation_value) -> { if (ASCENDING_HITS_ORDER.equals(annotation_name)) { ascending[0] = annotation_value.asBool(); } if (DESCENDING_HITS_ORDER.equals(annotation_name)) { descending[0] = annotation_value.asBool(); } }); Preconditions.checkArgument(ascending[0] == null || descending[0] == null, "Settings for both ascending and descending ordering set, only one of these expected."); if (Boolean.TRUE.equals(descending[0]) || Boolean.FALSE.equals(ascending[0])) { hitLimit[0] = hitLimit[0] * -1; } } return hitLimit[0]; } @NonNull private Item buildRange(String key, Inspector value) { HashMap children = getChildrenMap(value); Inspector annotations = getAnnotations(value); boolean[] equals = {false}; String field; Inspector boundInspector; if (children.get(0).type() == STRING){ field = children.get(0).asString(); boundInspector = children.get(1); } else { field = children.get(1).asString(); boundInspector = children.get(0); } Number[] bounds = {null, null}; String[] operators = {null, null}; boundInspector.traverse((ObjectTraverser) (operator, bound) -> { if (bound.type() == STRING) { throw new IllegalArgumentException("Expected operator LITERAL, got READ_FIELD."); } if (operator.equals("=")) { bounds[0] = (bound.type() == DOUBLE) ? Number.class.cast(bound.asDouble()) : Number.class.cast(bound.asLong()); operators[0] = operator; equals[0] = true; } if (operator.equals(">=") || operator.equals(">")){ bounds[0] = (bound.type() == DOUBLE) ? Number.class.cast(bound.asDouble()) : Number.class.cast(bound.asLong()); operators[0] = operator; } else if (operator.equals("<=") || operator.equals("<")){ bounds[1] = (bound.type() == DOUBLE) ? Number.class.cast(bound.asDouble()) : Number.class.cast(bound.asLong()); operators[1] = operator; } }); IntItem range = null; if (equals[0]){ range = new IntItem(bounds[0].toString(), field); } else if (operators[0]==null || operators[1]==null){ Integer index = (operators[0] == null) ? 1 : 0; switch (operators[index]){ case ">=": range = buildGreaterThanOrEquals(field, bounds[index].toString()); break; case ">": range = buildGreaterThan(field, bounds[index].toString()); break; case "<": range = buildLessThan(field, bounds[index].toString()); break; case "<=": range = buildLessThanOrEquals(field, bounds[index].toString()); break; } } else { range = instantiateRangeItem(bounds[0], bounds[1], field, operators[0].equals(">"), operators[1].equals("<")); } return leafStyleSettings(annotations, range); } @NonNull private IntItem buildGreaterThanOrEquals(String field, String bound) { return new IntItem("[" + bound + ";]", field); } @NonNull private IntItem buildLessThanOrEquals(String field, String bound) { return new IntItem("[;" + bound + "]", field); } @NonNull private IntItem buildGreaterThan(String field, String bound) { return new IntItem(">" + bound, field); } @NonNull private IntItem buildLessThan(String field, String bound) { return new IntItem("<" + bound, field); } @NonNull private IntItem instantiateRangeItem(Number lowerBound, Number upperBound, String field, boolean bounds_left_open, boolean bounds_right_open) { Preconditions.checkArgument(lowerBound != null && upperBound != null && field != null, "Expected 3 NonNull-arguments"); if (!bounds_left_open && !bounds_right_open) { return new RangeItem(lowerBound, upperBound, field); } else { Limit from; Limit to; if (bounds_left_open && bounds_right_open) { from = new Limit(lowerBound, false); to = new Limit(upperBound, false); } else if (bounds_left_open) { from = new Limit(lowerBound, false); to = new Limit(upperBound, true); } else { from = new Limit(lowerBound, true); to = new Limit(upperBound, false); } return new IntItem(from, to, field); } } @NonNull private Item buildEquals(String key, Inspector value) { return buildRange(key, value); } @NonNull private Item buildWand(String key, Inspector value) { HashMap annotations = getAnnotationMap(value); HashMap children = getChildrenMap(value); Preconditions.checkArgument(children.size() == 2, "Expected 2 arguments, got %s.", children.size()); Integer target_num_hits= getIntegerAnnotation(TARGET_NUM_HITS, annotations, DEFAULT_TARGET_NUM_HITS); WandItem out = new WandItem(children.get(0).asString(), target_num_hits); Double scoreThreshold = getDoubleAnnotation(SCORE_THRESHOLD, annotations, null); if (scoreThreshold != null) { out.setScoreThreshold(scoreThreshold); } Double thresholdBoostFactor = getDoubleAnnotation(THRESHOLD_BOOST_FACTOR, annotations, null); if (thresholdBoostFactor != null) { out.setThresholdBoostFactor(thresholdBoostFactor); } return fillWeightedSet(value, children, out); } @NonNull private WeightedSetItem fillWeightedSet(Inspector value, HashMap children, @NonNull WeightedSetItem out) { addItems(children, out); return leafStyleSettings(getAnnotations(value), out); } private static void addItems(HashMap children, WeightedSetItem out) { switch (children.get(1).type()) { case OBJECT: addStringItems(children, out); break; case ARRAY: addLongItems(children, out); break; default: throw newUnexpectedArgumentException(children.get(1).type(), ARRAY, OBJECT); } } private static void addStringItems(HashMap children, WeightedSetItem out) { //{"a":1, "b":2} children.get(1).traverse((ObjectTraverser) (key, value) -> { if (value.type() == STRING){ throw new IllegalArgumentException("Expected operator LITERAL, got READ_FIELD."); } out.addToken(key, (int)value.asLong()); }); } private static void addLongItems(HashMap children, WeightedSetItem out) { children.get(1).traverse((ArrayTraverser) (index, pair) -> { List pairValues = new ArrayList<>(); pair.traverse((ArrayTraverser) (pairIndex, pairValue) -> { pairValues.add((int)pairValue.asLong()); }); Preconditions.checkArgument(pairValues.size() == 2, "Expected item and weight, got %s.", pairValues); out.addToken(pairValues.get(0).longValue(), pairValues.get(1)); }); } @NonNull private Item buildRegExpSearch(String key, Inspector value) { assertHasOperator(key, MATCHES); HashMap children = getChildrenMap(value); String field = children.get(0).asString(); String wordData = children.get(1).asString(); RegExpItem regExp = new RegExpItem(field, true, wordData); return leafStyleSettings(getAnnotations(value), regExp); } @NonNull private Item buildWeightedSet(String key, Inspector value) { HashMap children = getChildrenMap(value); String field = children.get(0).asString(); Preconditions.checkArgument(children.size() == 2, "Expected 2 arguments, got %s.", children.size()); return fillWeightedSet(value, children, new WeightedSetItem(field)); } @NonNull private Item buildDotProduct(String key, Inspector value) { HashMap children = getChildrenMap(value); String field = children.get(0).asString(); Preconditions.checkArgument(children.size() == 2, "Expected 2 arguments, got %s.", children.size()); return fillWeightedSet(value, children, new DotProductItem(field)); } @NonNull private Item buildPredicate(String key, Inspector value) { HashMap children = getChildrenMap(value); String field = children.get(0).asString(); Inspector args = children.get(1); Preconditions.checkArgument(children.size() == 3, "Expected 3 arguments, got %s.", children.size()); PredicateQueryItem item = new PredicateQueryItem(); item.setIndexName(field); List argumentList = valueListFromInspector(getChildren(value)); // Adding attributes argumentList.get(1).traverse((ObjectTraverser) (attrKey, attrValue) -> { if (attrValue.type() == ARRAY){ List attributes = valueListFromInspector(attrValue); attributes.forEach( (attribute) -> item.addFeature(attrKey, attribute.asString())); } else { item.addFeature(attrKey, attrValue.asString()); } }); // Adding range attributes argumentList.get(2).traverse((ObjectTraverser) (attrKey, attrValue) -> item.addRangeFeature(attrKey, (int)attrValue.asDouble())); return leafStyleSettings(getAnnotations(value), item); } @NonNull private CompositeItem buildRank(String key, Inspector value) { RankItem rankItem = new RankItem(); addItemsFromInspector(rankItem, value); return rankItem; } @NonNull private Item buildTermSearch(String key, Inspector value) { HashMap children = getChildrenMap(value); String field = children.get(0).asString(); return instantiateLeafItem(field, key, value); } private String getInspectorKey(Inspector inspector){ String[] actualKey = {""}; if (inspector.type() == OBJECT){ inspector.traverse((ObjectTraverser) (key, value) -> { actualKey[0] = key; }); } return actualKey[0]; } @NonNull private Item instantiateLeafItem(String field, String key, Inspector value) { List possibleLeafFunction = valueListFromInspector(value); String possibleLeafFunctionName = (possibleLeafFunction.size() > 1) ? getInspectorKey(possibleLeafFunction.get(1)) : ""; if (FUNCTION_CALLS.contains(key)) { return instantiateCompositeLeaf(field, key, value); } else if ( ! possibleLeafFunctionName.equals("")){ return instantiateCompositeLeaf(field, possibleLeafFunctionName, valueListFromInspector(value).get(1).field(possibleLeafFunctionName)); } else { return instantiateWordItem(field, key, value); } } @NonNull private Item instantiateCompositeLeaf(String field, String key, Inspector value) { switch (key) { case SAME_ELEMENT: return instantiateSameElementItem(field, key, value); case PHRASE: return instantiatePhraseItem(field, key, value); case NEAR: return instantiateNearItem(field, key, value); case ONEAR: return instantiateONearItem(field, key, value); case EQUIV: return instantiateEquivItem(field, key, value); case ALTERNATIVES: return instantiateWordAlternativesItem(field, key, value); default: throw newUnexpectedArgumentException(key, EQUIV, NEAR, ONEAR, PHRASE, SAME_ELEMENT); } } @NonNull private Item instantiateWordItem(String field, String key, Inspector value) { var children = getChildrenMap(value); if (children.size() < 2) throw new IllegalArgumentException("Expected at least 2 children of '" + key + "', but got " + children.size()); String wordData = children.get(1).asString(); return instantiateWordItem(field, wordData, key, value, false, decideParsingLanguage(value, wordData)); } @NonNull private Item instantiateWordItem(String field, String rawWord, String key, Inspector value, boolean exactMatch, Language language) { String wordData = rawWord; HashMap annotations = getAnnotationMap(value); if (getBoolAnnotation(NFKC, annotations, Boolean.FALSE)) { // NOTE: If this is set to FALSE (default), we will still NFKC normalize text data // during tokenization/segmentation, as that is always turned on also on the indexing side. wordData = normalizer.normalize(wordData); } boolean fromQuery = getBoolAnnotation(IMPLICIT_TRANSFORMS, annotations, Boolean.TRUE); boolean prefixMatch = getBoolAnnotation(PREFIX, annotations, Boolean.FALSE); boolean suffixMatch = getBoolAnnotation(SUFFIX, annotations, Boolean.FALSE); boolean substrMatch = getBoolAnnotation(SUBSTRING,annotations, Boolean.FALSE); Preconditions.checkArgument((prefixMatch ? 1 : 0) + (substrMatch ? 1 : 0) + (suffixMatch ? 1 : 0) < 2, "Only one of prefix, substring and suffix can be set."); @NonNull final TaggableItem wordItem; if (exactMatch) { wordItem = new ExactStringItem(wordData, fromQuery); } else if (prefixMatch) { wordItem = new PrefixItem(wordData, fromQuery); } else if (suffixMatch) { wordItem = new SuffixItem(wordData, fromQuery); } else if (substrMatch) { wordItem = new SubstringItem(wordData, fromQuery); } else { wordItem = new WordItem(wordData, fromQuery); } if (wordItem instanceof WordItem) { prepareWord(field, value, (WordItem) wordItem); } if (language != Language.ENGLISH) ((Item)wordItem).setLanguage(language); return (Item) leafStyleSettings(getAnnotations(value), wordItem); } private Language decideParsingLanguage(Inspector value, String wordData) { String languageTag = getAnnotation(USER_INPUT_LANGUAGE, getAnnotationMap(value), String.class, null); Language language = Language.fromLanguageTag(languageTag); if (language != Language.UNKNOWN) return language; Optional explicitLanguage = query.getExplicitLanguage(); if (explicitLanguage.isPresent()) return explicitLanguage.get(); return Language.ENGLISH; } private void prepareWord(String field, Inspector value, WordItem wordItem) { wordItem.setIndexName(field); wordStyleSettings(value, wordItem); } private void wordStyleSettings(Inspector value, WordItem out) { HashMap annotations = getAnnotationMap(value); Substring origin = getOrigin(getAnnotations(value)); if (origin != null) { out.setOrigin(origin); } if (annotations != null){ Boolean usePositionData = getBoolAnnotation(USE_POSITION_DATA, annotations, null); if (usePositionData != null) { out.setPositionData(usePositionData); } Boolean stem = getBoolAnnotation(STEM, annotations, null); if (stem != null) { out.setStemmed(!stem); } Boolean normalizeCase = getBoolAnnotation(NORMALIZE_CASE, annotations, null); if (normalizeCase != null) { out.setLowercased(!normalizeCase); } Boolean accentDrop = getBoolAnnotation(ACCENT_DROP, annotations, null); if (accentDrop != null) { out.setNormalizable(accentDrop); } Boolean andSegmenting = getBoolAnnotation(AND_SEGMENTING, annotations, null); if (andSegmenting != null) { if (andSegmenting) { out.setSegmentingRule(SegmentingRule.BOOLEAN_AND); } else { out.setSegmentingRule(SegmentingRule.PHRASE); } } } } private Substring getOrigin(Inspector annotations) { if (annotations != null) { Inspector origin = getAnnotationAsInspectorOrNull(ORIGIN, getAnnotationMapFromAnnotationInspector(annotations)); if (origin == null) { return null; } String[] original = {null}; Integer[] offset = {null}; Integer[] length = {null}; origin.traverse((ObjectTraverser) (key, value) -> { switch (key) { case (ORIGIN_ORIGINAL): original[0] = value.asString(); break; case (ORIGIN_OFFSET): offset[0] = (int) value.asDouble(); break; case (ORIGIN_LENGTH): length[0] = (int) value.asDouble(); break; } }); return new Substring(offset[0], length[0] + offset[0], original[0]); } return null; } @NonNull private Item instantiateSameElementItem(String field, String key, Inspector value) { assertHasOperator(key, SAME_ELEMENT); SameElementItem sameElement = new SameElementItem(field); // All terms below sameElement are relative to this. getChildren(value).traverse((ArrayTraverser) (index, term) -> { sameElement.addItem(walkJson(term)); }); return sameElement; } @NonNull private Item instantiatePhraseItem(String field, String key, Inspector value) { assertHasOperator(key, PHRASE); PhraseItem phrase = new PhraseItem(); phrase.setIndexName(field); HashMap children = getChildrenMap(value); for (Inspector word : children.values()) { if (word.type() == STRING) phrase.addItem(new WordItem(word.asString())); else if (word.type() == OBJECT && word.field(PHRASE).valid()) phrase.addItem(instantiatePhraseItem(field, key, getChildren(word))); } return leafStyleSettings(getAnnotations(value), phrase); } @NonNull private Item instantiateNearItem(String field, String key, Inspector value) { assertHasOperator(key, NEAR); NearItem near = new NearItem(); near.setIndexName(field); HashMap children = getChildrenMap(value); for (Inspector word : children.values()){ near.addItem(new WordItem(word.asString(), field)); } Integer distance = getIntegerAnnotation(DISTANCE, getAnnotationMap(value), null); if (distance != null) { near.setDistance((int)distance); } return near; } @NonNull private Item instantiateONearItem(String field, String key, Inspector value) { assertHasOperator(key, ONEAR); NearItem onear = new ONearItem(); onear.setIndexName(field); HashMap children = getChildrenMap(value); for (Inspector word : children.values()){ onear.addItem(new WordItem(word.asString(), field)); } Integer distance = getIntegerAnnotation(DISTANCE, getAnnotationMap(value), null); if (distance != null) { onear.setDistance(distance); } return onear; } @NonNull private Item instantiateEquivItem(String field, String key, Inspector value) { HashMap children = getChildrenMap(value); Preconditions.checkArgument(children.size() >= 2, "Expected 2 or more arguments, got %s.", children.size()); EquivItem equiv = new EquivItem(); equiv.setIndexName(field); for (Inspector word : children.values()){ if (word.type() == STRING || word.type() == LONG || word.type() == DOUBLE){ equiv.addItem(new WordItem(word.asString(), field)); } if (word.type() == OBJECT){ word.traverse((ObjectTraverser) (key2, value2) -> { assertHasOperator(key2, PHRASE); equiv.addItem(instantiatePhraseItem(field, key2, value2)); }); } } return leafStyleSettings(getAnnotations(value), equiv); } private Item instantiateWordAlternativesItem(String field, String key, Inspector value) { HashMap children = getChildrenMap(value); Preconditions.checkArgument(children.size() >= 1, "Expected 1 or more arguments, got %s.", children.size()); Preconditions.checkArgument(children.get(0).type() == OBJECT, "Expected OBJECT, got %s.", children.get(0).type()); List terms = new ArrayList<>(); children.get(0).traverse((ObjectTraverser) (keys, values) -> { terms.add(new WordAlternativesItem.Alternative(keys, values.asDouble())); }); return leafStyleSettings(getAnnotations(value), new WordAlternativesItem(field, Boolean.TRUE, null, terms)); } // Not in use yet @NonNull private String getIndex(String field) { Preconditions.checkArgument(indexFactsSession.isIndex(field), "Field '%s' does not exist.", field); //return indexFactsSession.getCanonicName(field); return field; } private static void assertHasOperator(String key, String expectedKey) { Preconditions.checkArgument(key.equals(expectedKey), "Expected operator %s, got %s.", expectedKey, key); } private static IllegalArgumentException newUnexpectedArgumentException(Object actual, Object... expected) { StringBuilder out = new StringBuilder("Expected "); for (int i = 0, len = expected.length; i < len; ++i) { out.append(expected[i]); if (i < len - 2) { out.append(", "); } else if (i < len - 1) { out.append(" or "); } } out.append(", got ").append(actual).append("."); return new IllegalArgumentException(out.toString()); } private List valueListFromInspector(Inspector inspector){ List inspectorList = new ArrayList<>(); inspector.traverse((ArrayTraverser) (key, value) -> inspectorList.add(value)); return inspectorList; } private void connectItems() { for (ConnectedItem entry : connectedItems) { TaggableItem to = identifiedItems.get(entry.toId); Preconditions.checkNotNull(to, "Item '%s' was specified to connect to item with ID %s, which does not " + "exist in the query.", entry.fromItem, entry.toId); entry.fromItem.setConnectivity((Item) to, entry.weight); } } private static final class ConnectedItem { final double weight; final int toId; final TaggableItem fromItem; ConnectedItem(TaggableItem fromItem, int toId, double weight) { this.weight = weight; this.toId = toId; this.fromItem = fromItem; } } }