diff options
Diffstat (limited to 'container-search/src/main/java/com/yahoo/search/query/SelectParser.java')
-rw-r--r-- | container-search/src/main/java/com/yahoo/search/query/SelectParser.java | 1185 |
1 files changed, 1185 insertions, 0 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/query/SelectParser.java b/container-search/src/main/java/com/yahoo/search/query/SelectParser.java new file mode 100644 index 00000000000..13ebacb62ef --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/query/SelectParser.java @@ -0,0 +1,1185 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.query; + + +import com.google.common.base.Preconditions; +import com.yahoo.collections.LazyMap; +import com.yahoo.language.Language; +import com.yahoo.language.process.Normalizer; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.AndItem; +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.DotProductItem; +import com.yahoo.prelude.query.EquivItem; +import com.yahoo.prelude.query.ExactStringItem; +import com.yahoo.prelude.query.IntItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.Limit; +import com.yahoo.prelude.query.NearItem; +import com.yahoo.prelude.query.NotItem; +import com.yahoo.prelude.query.ONearItem; +import com.yahoo.prelude.query.OrItem; +import com.yahoo.prelude.query.PhraseItem; +import com.yahoo.prelude.query.PredicateQueryItem; +import com.yahoo.prelude.query.PrefixItem; +import com.yahoo.prelude.query.QueryException; +import com.yahoo.prelude.query.RangeItem; +import com.yahoo.prelude.query.RankItem; +import com.yahoo.prelude.query.RegExpItem; +import com.yahoo.prelude.query.SameElementItem; +import com.yahoo.prelude.query.SegmentingRule; +import com.yahoo.prelude.query.Substring; +import com.yahoo.prelude.query.SubstringItem; +import com.yahoo.prelude.query.SuffixItem; +import com.yahoo.prelude.query.TaggableItem; +import com.yahoo.prelude.query.WandItem; +import com.yahoo.prelude.query.WeakAndItem; +import com.yahoo.prelude.query.WeightedSetItem; +import com.yahoo.prelude.query.WordAlternativesItem; +import com.yahoo.prelude.query.WordItem; +import com.yahoo.search.grouping.request.GroupingOperation; +import com.yahoo.search.query.parser.Parsable; +import com.yahoo.search.query.parser.Parser; +import com.yahoo.search.query.parser.ParserEnvironment; +import com.yahoo.search.yql.VespaGroupingStep; +import com.yahoo.slime.ArrayTraverser; +import com.yahoo.slime.Inspector; +import com.yahoo.slime.ObjectTraverser; +import com.yahoo.vespa.config.SlimeUtils; +import edu.umd.cs.findbugs.annotations.NonNull; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import static com.yahoo.slime.Type.ARRAY; +import static com.yahoo.slime.Type.DOUBLE; +import static com.yahoo.slime.Type.LONG; +import static com.yahoo.slime.Type.OBJECT; +import static com.yahoo.slime.Type.STRING; + +/** + * The Select query language. + * + * This class will be parsing the Select parameters, and will be used when the query has the SELECT-type. + * + * @author henrhoi + */ + + +public class SelectParser implements Parser { + + Parsable query; + private final IndexFacts indexFacts; + private final Map<Integer, TaggableItem> identifiedItems = LazyMap.newHashMap(); + private final List<ConnectedItem> connectedItems = new ArrayList<>(); + private final Normalizer normalizer; + private final ParserEnvironment environment; + private IndexFacts.Session indexFactsSession; + + + + /** YQL parameters and functions */ + + private static final String DESCENDING_HITS_ORDER = "descending"; + private static final String ASCENDING_HITS_ORDER = "ascending"; + private static final Integer DEFAULT_TARGET_NUM_HITS = 10; + private static final String ORIGIN_LENGTH = "length"; + private static final String ORIGIN_OFFSET = "offset"; + private static final String ORIGIN = "origin"; + private static final String ORIGIN_ORIGINAL = "original"; + private static final String CONNECTION_ID = "id"; + private static final String CONNECTION_WEIGHT = "weight"; + private static final String CONNECTIVITY = "connectivity"; + private static final String ANNOTATIONS = "annotations"; + private static final String NFKC = "nfkc"; + private static final String USER_INPUT_LANGUAGE = "language"; + private static final String ACCENT_DROP = "accentDrop"; + private static final String ALTERNATIVES = "alternatives"; + private static final String AND_SEGMENTING = "andSegmenting"; + private static final String DISTANCE = "distance"; + private static final String DOT_PRODUCT = "dotProduct"; + private static final String EQUIV = "equiv"; + private static final String FILTER = "filter"; + private static final String HIT_LIMIT = "hitLimit"; + private static final String IMPLICIT_TRANSFORMS = "implicitTransforms"; + private static final String LABEL = "label"; + private static final String NEAR = "near"; + private static final String NORMALIZE_CASE = "normalizeCase"; + private static final String ONEAR = "onear"; + private static final String PHRASE = "phrase"; + private static final String PREDICATE = "predicate"; + private static final String PREFIX = "prefix"; + private static final String RANKED = "ranked"; + private static final String RANK = "rank"; + private static final String SAME_ELEMENT = "sameElement"; + private static final String SCORE_THRESHOLD = "scoreThreshold"; + private static final String SIGNIFICANCE = "significance"; + private static final String STEM = "stem"; + private static final String SUBSTRING = "substring"; + private static final String SUFFIX = "suffix"; + private static final String TARGET_NUM_HITS = "targetNumHits"; + private static final String THRESHOLD_BOOST_FACTOR = "thresholdBoostFactor"; + private static final String UNIQUE_ID = "id"; + private static final String USE_POSITION_DATA = "usePositionData"; + private static final String WAND = "wand"; + private static final String WEAK_AND = "weakAnd"; + private static final String WEIGHTED_SET = "weightedSet"; + private static final String WEIGHT = "weight"; + private static final String AND = "and"; + private static final String AND_NOT = "and_not"; + private static final String OR = "or"; + private static final String EQ = "equals"; + private static final String RANGE = "range"; + private static final String CONTAINS = "contains"; + private static final String MATCHES = "matches"; + private static final String CALL = "call"; + private static final List<String> FUNCTION_CALLS = Arrays.asList(WAND, WEIGHTED_SET, DOT_PRODUCT, PREDICATE, RANK, WEAK_AND); + + /**************************************/ + + + + public SelectParser(ParserEnvironment environment) { + indexFacts = environment.getIndexFacts(); + normalizer = environment.getLinguistics().getNormalizer(); + + this.environment = environment; + } + + + @Override + public QueryTree parse(Parsable query) { + indexFactsSession = indexFacts.newSession(query.getSources(), query.getRestrict()); + connectedItems.clear(); + identifiedItems.clear(); + this.query = query; + + return buildTree(); + } + + + + private QueryTree buildTree() { + Inspector inspector = SlimeUtils.jsonToSlime(this.query.getSelect().getWhereString().getBytes()).get(); + if (inspector.field("error_message").valid()){ + throw new QueryException("Illegal query: "+inspector.field("error_message").asString() + ", at: "+ new String(inspector.field("offending_input").asData(), StandardCharsets.UTF_8)); + } + + Item root = walkJson(inspector); + connectItems(); + QueryTree newTree = new QueryTree(root); + + return newTree; + } + + + private Item walkJson(Inspector inspector){ + final Item[] item = {null}; + inspector.traverse((ObjectTraverser) (key, value) -> { + String type = (FUNCTION_CALLS.contains(key)) ? CALL : key; + + switch (type) { + + case AND: + item[0] = buildAnd(key, value); + break; + case AND_NOT: + item[0] = buildNotAnd(key, value); + break; + case OR: + item[0] = buildOr(key, value); + break; + case EQ: + item[0] = buildEquals(key, value); + break; + case RANGE: + item[0] = buildRange(key, value); + break; + case CONTAINS: + item[0] = buildTermSearch(key, value); + break; + case MATCHES: + item[0] = buildRegExpSearch(key, value); + break; + case CALL: + item[0] = buildFunctionCall(key, value); + break; + default: + throw newUnexpectedArgumentException(key, AND, CALL, CONTAINS, EQ, OR, RANGE, AND_NOT); + } + }); + return item[0]; + } + + + public List<VespaGroupingStep> getGroupingSteps(String grouping){ + List<VespaGroupingStep> groupingSteps = new ArrayList<>(); + List<String> groupingOperations = getOperations(grouping); + for (String groupingString : groupingOperations){ + GroupingOperation groupingOperation = GroupingOperation.fromString(groupingString); + VespaGroupingStep groupingStep = new VespaGroupingStep(groupingOperation); + groupingSteps.add(groupingStep); + } + return groupingSteps; + } + + private List<String> getOperations(String grouping) { + List<String> operations = new ArrayList<>(); + Inspector inspector = SlimeUtils.jsonToSlime(grouping.getBytes()).get(); + if (inspector.field("error_message").valid()){ + throw new QueryException("Illegal query: "+inspector.field("error_message").asString() + ", at: "+ new String(inspector.field("offending_input").asData(), StandardCharsets.UTF_8)); + } + + inspector.traverse( (ArrayTraverser) (key, value) -> { + String groupingString = value.toString(); + groupingString = groupingString.replace(" ", "").replace("\"", "").replace("\'", "").replace(":{", "(").replace(":", "(").replace("}", ")").replace(",", ")"); + groupingString = groupingString.substring(1, groupingString.length()); + operations.add(groupingString); + }); + + return operations; + + } + + + @NonNull + private Item buildFunctionCall(String key, Inspector value) { + switch (key) { + case WAND: + return buildWand(key, value); + case WEIGHTED_SET: + return buildWeightedSet(key, value); + case DOT_PRODUCT: + return buildDotProduct(key, value); + case PREDICATE: + return buildPredicate(key, value); + case RANK: + return buildRank(key, value); + case WEAK_AND: + return buildWeakAnd(key, value); + default: + throw newUnexpectedArgumentException(key, DOT_PRODUCT, RANK, WAND, WEAK_AND, WEIGHTED_SET, PREDICATE); + } + } + + + private void addItemsFromInspector(CompositeItem item, Inspector inspector){ + if (inspector.type() == ARRAY){ + inspector.traverse((ArrayTraverser) (index, new_value) -> { + item.addItem(walkJson(new_value)); + }); + + } else if (inspector.type() == OBJECT){ + if (inspector.field("children").valid()){ + inspector.field("children").traverse((ArrayTraverser) (index, new_value) -> { + item.addItem(walkJson(new_value)); + }); + } + + } + } + + + private Inspector getChildren(Inspector inspector){ + if (inspector.type() == ARRAY){ + return inspector; + + } else if (inspector.type() == OBJECT){ + if (inspector.field("children").valid()){ + return inspector.field("children"); + } + if (inspector.field(1).valid()){ + return inspector.field(1); + } + } + return null; + } + + + private HashMap<Integer, Inspector> getChildrenMap(Inspector inspector){ + HashMap<Integer, Inspector> children = new HashMap<>(); + if (inspector.type() == ARRAY){ + inspector.traverse((ArrayTraverser) (index, new_value) -> { + children.put(index, new_value); + }); + + } else if (inspector.type() == OBJECT){ + if (inspector.field("children").valid()){ + inspector.field("children").traverse((ArrayTraverser) (index, new_value) -> { + children.put(index, new_value); + }); + } + } + return children; + } + + + private Inspector getAnnotations(Inspector inspector){ + if (inspector.type() == OBJECT && inspector.field("attributes").valid()){ + return inspector.field("attributes"); + } + return null; + } + + + private HashMap<String, Inspector> getAnnotationMapFromAnnotationInspector(Inspector annotation){ + HashMap<String, Inspector> attributes = new HashMap<>(); + if (annotation.type() == OBJECT){ + annotation.traverse((ObjectTraverser) (index, new_value) -> { + attributes.put(index, new_value); + }); + } + return attributes; + } + + + private HashMap<String, Inspector> getAnnotationMap(Inspector inspector){ + HashMap<String, Inspector> attributes = new HashMap<>(); + if (inspector.type() == OBJECT && inspector.field("attributes").valid()){ + inspector.field("attributes").traverse((ObjectTraverser) (index, new_value) -> { + attributes.put(index, new_value); + }); + } + return attributes; + } + + + private <T> T getAnnotation(String annotationName, HashMap<String, Inspector> annotations, Class<T> expectedClass, T defaultValue) { + return (annotations.get(annotationName) == null) ? defaultValue : expectedClass.cast(annotations.get(annotationName).asString()); + } + + + private Boolean getBoolAnnotation(String annotationName, HashMap<String, Inspector> annotations, Boolean defaultValue) { + if (annotations != null){ + Inspector annotation = annotations.getOrDefault(annotationName, null); + if (annotation != null){ + return annotation.asBool(); + } + } + return defaultValue; + } + + + private Integer getIntegerAnnotation(String annotationName, HashMap<String, Inspector> annotations, Integer defaultValue) { + if (annotations != null){ + Inspector annotation = annotations.getOrDefault(annotationName, null); + if (annotation != null){ + return (int)annotation.asLong(); + } + } + return defaultValue; + } + + + private Double getDoubleAnnotation(String annotationName, HashMap<String, Inspector> annotations, Double defaultValue) { + if (annotations != null){ + Inspector annotation = annotations.getOrDefault(annotationName, null); + if (annotation != null){ + return annotation.asDouble(); + } + } + return defaultValue; + } + + + private Inspector getAnnotationAsInspectorOrNull(String annotationName, HashMap<String, Inspector> annotations) { + return annotations.get(annotationName); + } + + + @NonNull + private CompositeItem buildAnd(String key, Inspector value) { + AndItem andItem = new AndItem(); + addItemsFromInspector(andItem, value); + + return andItem; + } + + + @NonNull + private CompositeItem buildNotAnd(String key, Inspector value) { + NotItem notItem = new NotItem(); + addItemsFromInspector(notItem, value); + + return notItem; + } + + + @NonNull + private CompositeItem buildOr(String key, Inspector value) { + OrItem orItem = new OrItem(); + addItemsFromInspector(orItem, value); + return orItem; + } + + + @NonNull + private CompositeItem buildWeakAnd(String key, Inspector value) { + WeakAndItem weakAnd = new WeakAndItem(); + addItemsFromInspector(weakAnd, value); + Inspector annotations = getAnnotations(value); + + if (annotations != null){ + annotations.traverse((ObjectTraverser) (annotation_name, annotation_value) -> { + if (TARGET_NUM_HITS.equals(annotation_name)){ + weakAnd.setN((int)(annotation_value.asDouble())); + } + if (SCORE_THRESHOLD.equals(annotation_name)){ + weakAnd.setScoreThreshold((int)(annotation_value.asDouble())); + } + }); + } + + return weakAnd; + } + + + @NonNull + private <T extends TaggableItem> T leafStyleSettings(Inspector annotations, @NonNull T out) { + { + if (annotations != null) { + Inspector itemConnectivity= getAnnotationAsInspectorOrNull(CONNECTIVITY, getAnnotationMapFromAnnotationInspector(annotations)); + if (itemConnectivity != null) { + Integer[] id = {null}; + Double[] weight = {null}; + itemConnectivity.traverse((ObjectTraverser) (key, value) -> { + switch (key){ + case CONNECTION_ID: + id[0] = (int) value.asLong(); + break; + case CONNECTION_WEIGHT: + weight[0] = value.asDouble(); + break; + } + }); + connectedItems.add(new ConnectedItem(out, id[0], weight[0])); + } + + annotations.traverse((ObjectTraverser) (annotation_name, annotation_value) -> { + + if (SIGNIFICANCE.equals(annotation_name)) { + if (annotation_value != null) { + out.setSignificance(annotation_value.asDouble()); + } + } + if (UNIQUE_ID.equals(annotation_name)) { + if (annotation_value != null) { + out.setUniqueID((int)annotation_value.asLong()); + identifiedItems.put((int)annotation_value.asLong(), out); + } + } + }); + } + } + { + Item leaf = (Item) out; + if (annotations != null) { + Inspector itemAnnotations = getAnnotationAsInspectorOrNull(ANNOTATIONS, getAnnotationMapFromAnnotationInspector(annotations)); + if (itemAnnotations != null) { + itemAnnotations.traverse((ObjectTraverser) (key, value) -> { + leaf.addAnnotation(key, value.asString()); + }); + } + + annotations.traverse((ObjectTraverser) (annotation_name, annotation_value) -> { + if (FILTER.equals(annotation_name)) { + if (annotation_value != null) { + leaf.setFilter(annotation_value.asBool()); + } + } + if (RANKED.equals(annotation_name)) { + if (annotation_value != null) { + leaf.setRanked(annotation_value.asBool()); + } + } + if (LABEL.equals(annotation_name)) { + if (annotation_value != null) { + leaf.setLabel(annotation_value.asString()); + } + } + if (WEIGHT.equals(annotation_name)) { + if (annotation_value != null) { + leaf.setWeight((int)annotation_value.asDouble()); + } + } + }); + } + if (out instanceof IntItem && annotations != null) { + IntItem number = (IntItem) out; + Integer hitLimit = getCappedRangeSearchParameter(annotations); + if (hitLimit != null) { + number.setHitLimit(hitLimit); + } + + } + } + + return out; + } + + + private Integer getCappedRangeSearchParameter(Inspector annotations) { + final Integer[] hitLimit = {null}; + annotations.traverse((ObjectTraverser) (annotation_name, annotation_value) -> { + if (HIT_LIMIT.equals(annotation_name)) { + if (annotation_value != null) { + hitLimit[0] = (int)(annotation_value.asDouble()); + } + } + }); + final Boolean[] ascending = {null}; + final Boolean[] descending = {null}; + + if (hitLimit[0] != null) { + annotations.traverse((ObjectTraverser) (annotation_name, annotation_value) -> { + if (ASCENDING_HITS_ORDER.equals(annotation_name)) { + ascending[0] = annotation_value.asBool(); + } + if (DESCENDING_HITS_ORDER.equals(annotation_name)) { + descending[0] = annotation_value.asBool(); + } + + }); + Preconditions.checkArgument(ascending[0] == null || descending[0] == null, + "Settings for both ascending and descending ordering set, only one of these expected."); + + if (Boolean.TRUE.equals(descending[0]) || Boolean.FALSE.equals(ascending[0])) { + hitLimit[0] = hitLimit[0] * -1; + } + } + return hitLimit[0]; + } + + + @NonNull + private Item buildRange(String key, Inspector value) { + HashMap<Integer, Inspector> children = getChildrenMap(value); + Inspector annotations = getAnnotations(value); + + final boolean[] equals = {false}; + + String field; + Inspector boundInspector; + if (children.get(0).type() == STRING){ + field = children.get(0).asString(); + boundInspector = children.get(1); + } else { + field = children.get(1).asString(); + boundInspector = children.get(0); + } + + final Number[] bounds = {null, null}; + final String[] operators = {null, null}; + boundInspector.traverse((ObjectTraverser) (operator, bound) -> { + if (bound.type() == STRING) { + throw new IllegalArgumentException("Expected operator LITERAL, got READ_FIELD."); + } + if (operator.equals("=")) { + bounds[0] = (bound.type() == DOUBLE) ? Number.class.cast(bound.asDouble()) : Number.class.cast(bound.asLong()); + operators[0] = operator; + equals[0] = true; + } + if (operator.equals(">=") || operator.equals(">")){ + bounds[0] = (bound.type() == DOUBLE) ? Number.class.cast(bound.asDouble()) : Number.class.cast(bound.asLong()); + operators[0] = operator; + } else if (operator.equals("<=") || operator.equals("<")){ + bounds[1] = (bound.type() == DOUBLE) ? Number.class.cast(bound.asDouble()) : Number.class.cast(bound.asLong()); + operators[1] = operator; + } + + }); + IntItem range = null; + if (equals[0]){ + range = new IntItem(bounds[0].toString(), field); + } else if (operators[0]==null || operators[1]==null){ + Integer index = (operators[0] == null) ? 1 : 0; + switch (operators[index]){ + case ">=": + range = buildGreaterThanOrEquals(field, bounds[index].toString()); + break; + case ">": + range = buildGreaterThan(field, bounds[index].toString()); + break; + case "<": + range = buildLessThan(field, bounds[index].toString()); + break; + case "<=": + range = buildLessThanOrEquals(field, bounds[index].toString()); + break; + } + } + else { + range = instantiateRangeItem(bounds[0], bounds[1], field, operators[0].equals(">"), operators[1].equals("<")); + } + + return leafStyleSettings(annotations, range); + } + + @NonNull + private IntItem buildGreaterThanOrEquals(String field, String bound) { + return new IntItem("[" + bound + ";]", field); + + } + + + @NonNull + private IntItem buildLessThanOrEquals(String field, String bound) { + return new IntItem("[;" + bound + "]", field); + } + + + @NonNull + private IntItem buildGreaterThan(String field, String bound) { + return new IntItem(">" + bound, field); + + } + + + @NonNull + private IntItem buildLessThan(String field, String bound) { + return new IntItem("<" + bound, field); + } + + + @NonNull + private IntItem instantiateRangeItem(Number lowerBound, Number upperBound, String field, boolean bounds_left_open, boolean bounds_right_open) { + Preconditions.checkArgument(lowerBound != null && upperBound != null && field != null, + "Expected 3 NonNull-arguments"); + + if (!bounds_left_open && !bounds_right_open) { + return new RangeItem(lowerBound, upperBound, field); + } else { + Limit from; + Limit to; + if (bounds_left_open && bounds_right_open) { + from = new Limit(lowerBound, false); + to = new Limit(upperBound, false); + } else if (bounds_left_open) { + from = new Limit(lowerBound, false); + to = new Limit(upperBound, true); + } else { + from = new Limit(lowerBound, true); + to = new Limit(upperBound, false); + } + return new IntItem(from, to, field); + } + } + + + @NonNull + private Item buildEquals(String key, Inspector value) { + return buildRange(key, value); + } + + + @NonNull + private Item buildWand(String key, Inspector value) { + HashMap<String, Inspector> annotations = getAnnotationMap(value); + HashMap<Integer, Inspector> children = getChildrenMap(value); + + Preconditions.checkArgument(children.size() == 2, "Expected 2 arguments, got %s.", children.size()); + Integer target_num_hits= getIntegerAnnotation(TARGET_NUM_HITS, annotations, DEFAULT_TARGET_NUM_HITS); + + WandItem out = new WandItem(children.get(0).asString(), target_num_hits); + + Double scoreThreshold = getDoubleAnnotation(SCORE_THRESHOLD, annotations, null); + + if (scoreThreshold != null) { + out.setScoreThreshold(scoreThreshold); + } + + Double thresholdBoostFactor = getDoubleAnnotation(THRESHOLD_BOOST_FACTOR, annotations, null); + if (thresholdBoostFactor != null) { + out.setThresholdBoostFactor(thresholdBoostFactor); + } + return fillWeightedSet(value, children, out); + } + + + @NonNull + private WeightedSetItem fillWeightedSet(Inspector value, HashMap<Integer, Inspector> children, @NonNull WeightedSetItem out) { + addItems(children, out); + + return leafStyleSettings(getAnnotations(value), out); + } + + + private static void addItems(HashMap<Integer, Inspector> children, WeightedSetItem out) { + switch (children.get(1).type()) { + case OBJECT: + addStringItems(children, out); + break; + case ARRAY: + addLongItems(children, out); + break; + default: + throw newUnexpectedArgumentException(children.get(1).type(), ARRAY, OBJECT); + } + } + + + private static void addStringItems(HashMap<Integer, Inspector> children, WeightedSetItem out) { + //{"a":1, "b":2} + children.get(1).traverse((ObjectTraverser) (key, value) -> { + if (value.type() == STRING){ + throw new IllegalArgumentException("Expected operator LITERAL, got READ_FIELD."); + } + out.addToken(key, (int)value.asLong()); + }); + } + + + private static void addLongItems(HashMap<Integer, Inspector> children, WeightedSetItem out) { + //[[11,1], [37,2]] + children.get(1).traverse((ArrayTraverser) (index, pair) -> { + List<Integer> pairValues = new ArrayList<>(); + pair.traverse((ArrayTraverser) (pairIndex, pairValue) -> { + pairValues.add((int)pairValue.asLong()); + }); + Preconditions.checkArgument(pairValues.size() == 2, + "Expected item and weight, got %s.", pairValues); + out.addToken(pairValues.get(0).longValue(), pairValues.get(1)); + }); + } + + + @NonNull + private Item buildRegExpSearch(String key, Inspector value) { + assertHasOperator(key, MATCHES); + HashMap<Integer, Inspector> children = getChildrenMap(value); + String field = children.get(0).asString(); + String wordData = children.get(1).asString(); + RegExpItem regExp = new RegExpItem(field, true, wordData); + return leafStyleSettings(getAnnotations(value), regExp); + } + + + @NonNull + private Item buildWeightedSet(String key, Inspector value) { + HashMap<Integer, Inspector> children = getChildrenMap(value); + String field = children.get(0).asString(); + Preconditions.checkArgument(children.size() == 2, "Expected 2 arguments, got %s.", children.size()); + return fillWeightedSet(value, children, new WeightedSetItem(field)); + } + + + @NonNull + private Item buildDotProduct(String key, Inspector value) { + HashMap<Integer, Inspector> children = getChildrenMap(value); + String field = children.get(0).asString(); + Preconditions.checkArgument(children.size() == 2, "Expected 2 arguments, got %s.", children.size()); + return fillWeightedSet(value, children, new DotProductItem(field)); + } + + + @NonNull + private Item buildPredicate(String key, Inspector value) { + HashMap<Integer, Inspector> children = getChildrenMap(value); + String field = children.get(0).asString(); + Inspector args = children.get(1); + + Preconditions.checkArgument(children.size() == 3, "Expected 3 arguments, got %s.", children.size()); + + PredicateQueryItem item = new PredicateQueryItem(); + item.setIndexName(field); + + List<Inspector> argumentList = valueListFromInspector(getChildren(value)); + + // Adding attributes + argumentList.get(1).traverse((ObjectTraverser) (attrKey, attrValue) -> { + if (attrValue.type() == ARRAY){ + List<Inspector> attributes = valueListFromInspector(attrValue); + attributes.forEach( (attribute) -> item.addFeature(attrKey, attribute.asString())); + } else { + item.addFeature(attrKey, attrValue.asString()); + } + }); + + // Adding range attributes + argumentList.get(2).traverse((ObjectTraverser) (attrKey, attrValue) -> item.addRangeFeature(attrKey, (int)attrValue.asDouble())); + + return leafStyleSettings(getAnnotations(value), item); + } + + + @NonNull + private CompositeItem buildRank(String key, Inspector value) { + RankItem rankItem = new RankItem(); + addItemsFromInspector(rankItem, value); + return rankItem; + } + + + @NonNull + private Item buildTermSearch(String key, Inspector value) { + HashMap<Integer, Inspector> children = getChildrenMap(value); + String field = children.get(0).asString(); + + return instantiateLeafItem(field, key, value); + } + + + private String getInspectorKey(Inspector inspector){ + String[] actualKey = {""}; + if (inspector.type() == OBJECT){ + inspector.traverse((ObjectTraverser) (key, value) -> { + actualKey[0] = key; + + }); + } + return actualKey[0]; + } + + + @NonNull + private Item instantiateLeafItem(String field, String key, Inspector value) { + List<Inspector> possibleLeafFunction = valueListFromInspector(value); + String possibleLeafFunctionName = (possibleLeafFunction.size() > 1) ? getInspectorKey(possibleLeafFunction.get(1)) : ""; + if (FUNCTION_CALLS.contains(key)) { + return instantiateCompositeLeaf(field, key, value); + } else if(!possibleLeafFunctionName.equals("")){ + return instantiateCompositeLeaf(field, possibleLeafFunctionName, valueListFromInspector(value).get(1).field(possibleLeafFunctionName)); + } else { + return instantiateWordItem(field, key, value); + } + } + + + @NonNull + private Item instantiateCompositeLeaf(String field, String key, Inspector value) { + switch (key) { + case SAME_ELEMENT: + return instantiateSameElementItem(field, key, value); + case PHRASE: + return instantiatePhraseItem(field, key, value); + case NEAR: + return instantiateNearItem(field, key, value); + case ONEAR: + return instantiateONearItem(field, key, value); + case EQUIV: + return instantiateEquivItem(field, key, value); + case ALTERNATIVES: + return instantiateWordAlternativesItem(field, key, value); + default: + throw newUnexpectedArgumentException(key, EQUIV, NEAR, ONEAR, PHRASE, SAME_ELEMENT); + } + } + + + @NonNull + private Item instantiateWordItem(String field, String key, Inspector value) { + String wordData = getChildrenMap(value).get(1).asString(); + return instantiateWordItem(field, wordData, key, value, false, decideParsingLanguage(value, wordData)); + } + + + @NonNull + private Item instantiateWordItem(String field, String rawWord, String key, Inspector value, boolean exactMatch, Language language) { + String wordData = rawWord; + HashMap<String, Inspector> annotations = getAnnotationMap(value); + + if (getBoolAnnotation(NFKC, annotations, Boolean.FALSE)) { + // NOTE: If this is set to FALSE (default), we will still NFKC normalize text data + // during tokenization/segmentation, as that is always turned on also on the indexing side. + wordData = normalizer.normalize(wordData); + } + boolean fromQuery = getBoolAnnotation(IMPLICIT_TRANSFORMS, annotations, Boolean.TRUE); + boolean prefixMatch = getBoolAnnotation(PREFIX, annotations, Boolean.FALSE); + boolean suffixMatch = getBoolAnnotation(SUFFIX, annotations, Boolean.FALSE); + boolean substrMatch = getBoolAnnotation(SUBSTRING,annotations, Boolean.FALSE); + + Preconditions.checkArgument((prefixMatch ? 1 : 0) + + (substrMatch ? 1 : 0) + (suffixMatch ? 1 : 0) < 2, + "Only one of prefix, substring and suffix can be set."); + @NonNull + final TaggableItem wordItem; + + if (exactMatch) { + wordItem = new ExactStringItem(wordData, fromQuery); + } else if (prefixMatch) { + wordItem = new PrefixItem(wordData, fromQuery); + } else if (suffixMatch) { + wordItem = new SuffixItem(wordData, fromQuery); + } else if (substrMatch) { + wordItem = new SubstringItem(wordData, fromQuery); + } else { + wordItem = new WordItem(wordData, fromQuery); + } + + if (wordItem instanceof WordItem) { + prepareWord(field, value, (WordItem) wordItem); + } + if (language != Language.ENGLISH) + ((Item)wordItem).setLanguage(language); + + return (Item) leafStyleSettings(getAnnotations(value), wordItem); + } + + + private Language decideParsingLanguage(Inspector value, String wordData) { + String languageTag = getAnnotation(USER_INPUT_LANGUAGE, getAnnotationMap(value), String.class, null); + + Language language = Language.fromLanguageTag(languageTag); + if (language != Language.UNKNOWN) return language; + + Optional<Language> explicitLanguage = query.getExplicitLanguage(); + if (explicitLanguage.isPresent()) return explicitLanguage.get(); + + return Language.ENGLISH; + } + + + private void prepareWord(String field, Inspector value, WordItem wordItem) { + wordItem.setIndexName(field); + wordStyleSettings(value, wordItem); + } + + + private void wordStyleSettings(Inspector value, WordItem out) { + HashMap<String, Inspector> annotations = getAnnotationMap(value); + + Substring origin = getOrigin(getAnnotations(value)); + if (origin != null) { + out.setOrigin(origin); + } + if (annotations != null){ + Boolean usePositionData = Boolean.getBoolean(getAnnotation(USE_POSITION_DATA, annotations, String.class, null)); + if (usePositionData != null) { + out.setPositionData(usePositionData); + } + Boolean stem = getBoolAnnotation(STEM, annotations, null); + if (stem != null) { + out.setStemmed(!stem); + } + + Boolean normalizeCase = getBoolAnnotation(NORMALIZE_CASE, annotations, null); + if (normalizeCase != null) { + out.setLowercased(!normalizeCase); + } + Boolean accentDrop = getBoolAnnotation(ACCENT_DROP, annotations, null); + if (accentDrop != null) { + out.setNormalizable(accentDrop); + } + Boolean andSegmenting = getBoolAnnotation(AND_SEGMENTING, annotations, null); + if (andSegmenting != null) { + if (andSegmenting) { + out.setSegmentingRule(SegmentingRule.BOOLEAN_AND); + } else { + out.setSegmentingRule(SegmentingRule.PHRASE); + } + } + } + } + + + private Substring getOrigin(Inspector annotations) { + if (annotations != null) { + Inspector origin = getAnnotationAsInspectorOrNull(ORIGIN, getAnnotationMapFromAnnotationInspector(annotations)); + if (origin == null) { + return null; + } + final String[] original = {null}; + final Integer[] offset = {null}; + final Integer[] length = {null}; + + origin.traverse((ObjectTraverser) (key, value) -> { + switch (key) { + case (ORIGIN_ORIGINAL): + original[0] = value.asString(); + break; + case (ORIGIN_OFFSET): + offset[0] = (int) value.asDouble(); + break; + case (ORIGIN_LENGTH): + length[0] = (int) value.asDouble(); + break; + } + + + }); + return new Substring(offset[0], length[0] + offset[0], original[0]); + } + return null; + } + + + @NonNull + private Item instantiateSameElementItem(String field, String key, Inspector value) { + assertHasOperator(key, SAME_ELEMENT); + + SameElementItem sameElement = new SameElementItem(field); + // All terms below sameElement are relative to this. + getChildren(value).traverse((ArrayTraverser) (index, term) -> { + sameElement.addItem(walkJson(term)); + }); + + return sameElement; + } + + + @NonNull + private Item instantiatePhraseItem(String field, String key, Inspector value) { + assertHasOperator(key, PHRASE); + HashMap<String, Inspector> annotations = getAnnotationMap(value); + + PhraseItem phrase = new PhraseItem(); + phrase.setIndexName(field); + HashMap<Integer, Inspector> children = getChildrenMap(value); + + for (Inspector word : children.values()) + if (word.type() == STRING) phrase.addItem(new WordItem(word.asString())); + else if (word.type() == OBJECT && word.field(PHRASE).valid()) { + phrase.addItem(instantiatePhraseItem(field, key, getChildren(word))); + } + return leafStyleSettings(getAnnotations(value), phrase); + } + + + @NonNull + private Item instantiateNearItem(String field, String key, Inspector value) { + assertHasOperator(key, NEAR); + + NearItem near = new NearItem(); + near.setIndexName(field); + + HashMap<Integer, Inspector> children = getChildrenMap(value); + + for (Inspector word : children.values()){ + near.addItem(new WordItem(word.asString(), field)); + } + + Integer distance = getIntegerAnnotation(DISTANCE, getAnnotationMap(value), null); + + if (distance != null) { + near.setDistance((int)distance); + } + return near; + } + + + @NonNull + private Item instantiateONearItem(String field, String key, Inspector value) { + assertHasOperator(key, ONEAR); + + NearItem onear = new ONearItem(); + onear.setIndexName(field); + HashMap<Integer, Inspector> children = getChildrenMap(value); + + for (Inspector word : children.values()){ + onear.addItem(new WordItem(word.asString(), field)); + } + + Integer distance = getIntegerAnnotation(DISTANCE, getAnnotationMap(value), null); + if (distance != null) { + onear.setDistance(distance); + } + return onear; + } + + + @NonNull + private Item instantiateEquivItem(String field, String key, Inspector value) { + + HashMap<Integer, Inspector> children = getChildrenMap(value); + Preconditions.checkArgument(children.size() >= 2, "Expected 2 or more arguments, got %s.", children.size()); + + EquivItem equiv = new EquivItem(); + equiv.setIndexName(field); + + for (Inspector word : children.values()){ + if (word.type() == STRING || word.type() == LONG || word.type() == DOUBLE){ + equiv.addItem(new WordItem(word.asString(), field)); + } + if (word.type() == OBJECT){ + word.traverse((ObjectTraverser) (key2, value2) -> { + assertHasOperator(key2, PHRASE); + equiv.addItem(instantiatePhraseItem(field, key2, value2)); + }); + } + } + + return leafStyleSettings(getAnnotations(value), equiv); + } + + + private Item instantiateWordAlternativesItem(String field, String key, Inspector value) { + HashMap<Integer, Inspector> children = getChildrenMap(value); + Preconditions.checkArgument(children.size() >= 1, "Expected 1 or more arguments, got %s.", children.size()); + Preconditions.checkArgument(children.get(0).type() == OBJECT, "Expected OBJECT, got %s.", children.get(0).type()); + + List<WordAlternativesItem.Alternative> terms = new ArrayList<>(); + + children.get(0).traverse((ObjectTraverser) (keys, values) -> { + terms.add(new WordAlternativesItem.Alternative(keys, values.asDouble())); + }); + return leafStyleSettings(getAnnotations(value), new WordAlternativesItem(field, Boolean.TRUE, null, terms)); + } + + + // Not in use yet + @NonNull + private String getIndex(String field) { + Preconditions.checkArgument(indexFactsSession.isIndex(field), "Field '%s' does not exist.", field); + //return indexFactsSession.getCanonicName(field); + return field; + } + + + private static void assertHasOperator(String key, String expectedKey) { + Preconditions.checkArgument(key.equals(expectedKey), "Expected operator %s, got %s.", expectedKey, key); + } + + + private static IllegalArgumentException newUnexpectedArgumentException(Object actual, Object... expected) { + StringBuilder out = new StringBuilder("Expected "); + for (int i = 0, len = expected.length; i < len; ++i) { + out.append(expected[i]); + if (i < len - 2) { + out.append(", "); + } else if (i < len - 1) { + out.append(" or "); + } + } + out.append(", got ").append(actual).append("."); + return new IllegalArgumentException(out.toString()); + } + + + private List<Inspector> valueListFromInspector(Inspector inspector){ + List<Inspector> inspectorList = new ArrayList<>(); + inspector.traverse((ArrayTraverser) (key, value) -> inspectorList.add(value)); + return inspectorList; + } + + + private void connectItems() { + for (ConnectedItem entry : connectedItems) { + TaggableItem to = identifiedItems.get(entry.toId); + Preconditions.checkNotNull(to, + "Item '%s' was specified to connect to item with ID %s, which does not " + + "exist in the query.", entry.fromItem, + entry.toId); + entry.fromItem.setConnectivity((Item) to, entry.weight); + } + } + + + private static final class ConnectedItem { + + final double weight; + final int toId; + final TaggableItem fromItem; + + ConnectedItem(TaggableItem fromItem, int toId, double weight) { + this.weight = weight; + this.toId = toId; + this.fromItem = fromItem; + } + } + + +} |