* This class must be kept in lockstep with {@link VespaSerializer}.
* Adding anything here will usually require a corresponding addition in
* VespaSerializer.
*
*
* @author Steinar Knutsen
* @author Stian Kristoffersen
* @author Simon Thoresen Hult
*/
public class YqlParser implements Parser {
public static final String DESCENDING_HITS_ORDER = "descending";
public static final String ASCENDING_HITS_ORDER = "ascending";
private enum SegmentWhen {
NEVER, POSSIBLY, ALWAYS;
}
private static class IndexNameExpander {
public String expand(String leaf) { return leaf; }
}
private static final Integer DEFAULT_HITS = 10;
private static final Integer DEFAULT_OFFSET = 0;
public static final Integer DEFAULT_TARGET_NUM_HITS = 10;
private static final String ACCENT_DROP_DESCRIPTION = "setting for whether to remove accents if field implies it";
public static final String ANNOTATIONS = "annotations";
private static final String FILTER_DESCRIPTION = "term filter setting";
private static final String IMPLICIT_TRANSFORMS_DESCRIPTION = "setting for whether built-in query transformers should touch the term";
public static final String NFKC = "nfkc";
private static final String NORMALIZE_CASE_DESCRIPTION = "setting for whether to do case normalization if field implies it";
private static final String ORIGIN_DESCRIPTION = "string origin for a term";
private static final String RANKED_DESCRIPTION = "setting for whether to use term for ranking";
private static final String STEM_DESCRIPTION = "setting for whether to use stem if field implies it";
private static final String USE_POSITION_DATA_DESCRIPTION = "setting for whether to use position data for ranking this item";
private static final String USER_INPUT_ALLOW_EMPTY = "allowEmpty";
private static final String USER_INPUT_DEFAULT_INDEX = "defaultIndex";
private static final String USER_INPUT_GRAMMAR = "grammar";
public static final String USER_INPUT_LANGUAGE = "language";
private static final String USER_INPUT_RAW = "raw";
private static final String USER_INPUT_SEGMENT = "segment";
private static final String USER_INPUT = "userInput";
private static final String USER_QUERY = "userQuery";
private static final String NON_EMPTY = "nonEmpty";
public static final String START_ANCHOR = "startAnchor";
public static final String END_ANCHOR = "endAnchor";
public static final String SORTING_FUNCTION = "function";
public static final String SORTING_LOCALE = "locale";
public static final String SORTING_STRENGTH = "strength";
public static final String ACCENT_DROP = "accentDrop";
public static final String ALTERNATIVES = "alternatives";
public static final String AND_SEGMENTING = "andSegmenting";
public static final String APPROXIMATE = "approximate";
public static final String BOUNDS = "bounds";
public static final String BOUNDS_LEFT_OPEN = "leftOpen";
public static final String BOUNDS_OPEN = "open";
public static final String BOUNDS_RIGHT_OPEN = "rightOpen";
public static final String CONNECTION_ID = "id";
public static final String CONNECTION_WEIGHT = "weight";
public static final String CONNECTIVITY = "connectivity";
public static final String DISTANCE = "distance";
public static final String DOT_PRODUCT = "dotProduct";
public static final String EQUIV = "equiv";
public static final String FILTER = "filter";
public static final String GEO_LOCATION = "geoLocation";
public static final String HIT_LIMIT = "hitLimit";
public static final String DISTANCE_THRESHOLD = "distanceThreshold";
public static final String HNSW_EXPLORE_ADDITIONAL_HITS = "hnsw.exploreAdditionalHits";
public static final String IMPLICIT_TRANSFORMS = "implicitTransforms";
public static final String LABEL = "label";
public static final String NEAR = "near";
public static final String NEAREST_NEIGHBOR = "nearestNeighbor";
public static final String NORMALIZE_CASE = "normalizeCase";
public static final String ONEAR = "onear";
public static final String ORIGIN_LENGTH = "length";
public static final String ORIGIN_OFFSET = "offset";
public static final String ORIGIN = "origin";
public static final String ORIGIN_ORIGINAL = "original";
public static final String PHRASE = "phrase";
public static final String PREDICATE = "predicate";
public static final String PREFIX = "prefix";
public static final String RANGE = "range";
public static final String RANKED = "ranked";
public static final String RANK = "rank";
public static final String SAME_ELEMENT = "sameElement";
public static final String SCORE_THRESHOLD = "scoreThreshold";
public static final String SIGNIFICANCE = "significance";
public static final String STEM = "stem";
public static final String SUBSTRING = "substring";
public static final String SUFFIX = "suffix";
public static final String TARGET_HITS = "targetHits";
public static final String TARGET_NUM_HITS = "targetNumHits";
public static final String THRESHOLD_BOOST_FACTOR = "thresholdBoostFactor";
public static final String UNIQUE_ID = "id";
public static final String USE_POSITION_DATA = "usePositionData";
public static final String WAND = "wand";
public static final String WEAK_AND = "weakAnd";
public static final String WEIGHTED_SET = "weightedSet";
public static final String WEIGHT = "weight";
public static final String URI = "uri";
private final IndexFacts indexFacts;
private final List connectedItems = new ArrayList<>();
private final List groupingSteps = new ArrayList<>();
private final Map identifiedItems = LazyMap.newHashMap();
private final Normalizer normalizer;
private final Segmenter segmenter;
private final Detector detector;
private final Set yqlSources = LazySet.newHashSet();
private final Set yqlSummaryFields = LazySet.newHashSet();
private Integer hits;
private Integer offset;
private Integer timeout;
private Query userQuery;
private Parsable currentlyParsing;
private IndexFacts.Session indexFactsSession;
private IndexNameExpander indexNameExpander = new IndexNameExpander();
private Set docTypes;
private Sorting sorting;
private boolean queryParser = true;
private final Deque> annotationStack = new ArrayDeque<>();
private final ParserEnvironment environment;
private static final QueryVisitor noEmptyTerms = new QueryVisitor() {
@Override
public boolean visit(Item item) {
if (item instanceof NullItem) {
throw new IllegalArgumentException("Got NullItem inside nonEmpty().");
} else if (item instanceof WordItem) {
if (((WordItem) item).getIndexedString().isEmpty()) {
throw new IllegalArgumentException("Searching for empty string inside nonEmpty()");
}
} else if (item instanceof CompositeItem) {
if (((CompositeItem) item).getItemCount() == 0) {
throw new IllegalArgumentException("Empty composite operator (" + item.getName() + ") inside nonEmpty()");
}
}
return true;
}
@Override
public void onExit() {
// NOP
}
};
public YqlParser(ParserEnvironment environment) {
indexFacts = environment.getIndexFacts();
normalizer = environment.getLinguistics().getNormalizer();
segmenter = environment.getLinguistics().getSegmenter();
detector = environment.getLinguistics().getDetector();
this.environment = environment;
}
@Override
public QueryTree parse(Parsable query) {
indexFactsSession = indexFacts.newSession(query.getSources(), query.getRestrict());
connectedItems.clear();
groupingSteps.clear();
identifiedItems.clear();
yqlSources.clear();
yqlSummaryFields.clear();
annotationStack.clear();
hits = null;
offset = null;
timeout = null;
// userQuery set prior to calling this
currentlyParsing = query;
docTypes = null;
sorting = null;
// queryParser set prior to calling this
return buildTree(parseYqlProgram());
}
private void joinDocTypesFromUserQueryAndYql() {
List allSourceNames = new ArrayList<>(currentlyParsing.getSources().size() + yqlSources.size());
if ( ! yqlSources.isEmpty()) {
allSourceNames.addAll(currentlyParsing.getSources());
allSourceNames.addAll(yqlSources);
} else {
// no sources == all sources in Vespa
}
indexFactsSession = indexFacts.newSession(allSourceNames, currentlyParsing.getRestrict());
docTypes = new HashSet<>(indexFactsSession.documentTypes());
}
private QueryTree buildTree(OperatorNode> filterPart) {
Preconditions.checkArgument(filterPart.getArguments().length == 2,
"Expected 2 arguments to filter, got %s.",
filterPart.getArguments().length);
populateYqlSources(filterPart.> getArgument(0));
OperatorNode filterExpression = filterPart.getArgument(1);
Item root = convertExpression(filterExpression);
connectItems();
userQuery = null;
return new QueryTree(root);
}
private void populateYqlSources(OperatorNode> filterArgs) {
yqlSources.clear();
if (filterArgs.getOperator() == SequenceOperator.SCAN) {
for (String source : filterArgs.> getArgument(0)) {
yqlSources.add(source);
}
} else if (filterArgs.getOperator() == SequenceOperator.ALL) {
// yqlSources has already been cleared
} else if (filterArgs.getOperator() == SequenceOperator.MULTISOURCE) {
for (List source : filterArgs.>> getArgument(0)) {
yqlSources.add(source.get(0));
}
} else {
throw newUnexpectedArgumentException(filterArgs.getOperator(),
SequenceOperator.SCAN, SequenceOperator.ALL,
SequenceOperator.MULTISOURCE);
}
joinDocTypesFromUserQueryAndYql();
}
private void populateYqlSummaryFields(List> fields) {
yqlSummaryFields.clear();
for (OperatorNode field : fields) {
assertHasOperator(field, ProjectOperator.FIELD);
yqlSummaryFields.add(field.getArgument(1, String.class));
}
}
private void connectItems() {
for (ConnectedItem entry : connectedItems) {
TaggableItem to = identifiedItems.get(entry.toId);
if (to == null)
throw new IllegalArgumentException("Item '" + entry.fromItem +
"' was specified to connect to item with ID " + entry.toId +
", which does not exist in the query.");
entry.fromItem.setConnectivity((Item) to, entry.weight);
}
}
private Item convertExpression(OperatorNode ast) {
try {
annotationStack.addFirst(ast);
switch (ast.getOperator()) {
case AND:
return buildAnd(ast);
case OR:
return buildOr(ast);
case EQ:
return buildEquals(ast);
case LT:
return buildLessThan(ast);
case GT:
return buildGreaterThan(ast);
case LTEQ:
return buildLessThanOrEquals(ast);
case GTEQ:
return buildGreaterThanOrEquals(ast);
case CONTAINS:
return buildTermSearch(ast);
case MATCHES:
return buildRegExpSearch(ast);
case CALL:
return buildFunctionCall(ast);
default:
throw newUnexpectedArgumentException(ast.getOperator(),
ExpressionOperator.AND, ExpressionOperator.CALL,
ExpressionOperator.CONTAINS, ExpressionOperator.EQ,
ExpressionOperator.GT, ExpressionOperator.GTEQ,
ExpressionOperator.LT, ExpressionOperator.LTEQ,
ExpressionOperator.OR);
}
} finally {
annotationStack.removeFirst();
}
}
private Item buildFunctionCall(OperatorNode ast) {
List names = ast.getArgument(0);
Preconditions.checkArgument(names.size() == 1, "Expected 1 name, got %s.", names.size());
switch (names.get(0)) {
case USER_QUERY:
return fetchUserQuery();
case RANGE:
return buildRange(ast);
case WAND:
return buildWand(ast);
case WEIGHTED_SET:
return buildWeightedSet(ast);
case DOT_PRODUCT:
return buildDotProduct(ast);
case GEO_LOCATION:
return buildGeoLocation(ast);
case NEAREST_NEIGHBOR:
return buildNearestNeighbor(ast);
case PREDICATE:
return buildPredicate(ast);
case RANK:
return buildRank(ast);
case WEAK_AND:
return buildWeakAnd(ast);
case USER_INPUT:
return buildUserInput(ast);
case NON_EMPTY:
return ensureNonEmpty(ast);
default:
throw newUnexpectedArgumentException(names.get(0), DOT_PRODUCT, NEAREST_NEIGHBOR,
RANGE, RANK, USER_QUERY, WAND, WEAK_AND, WEIGHTED_SET,
PREDICATE, USER_INPUT, NON_EMPTY);
}
}
private Item ensureNonEmpty(OperatorNode ast) {
List> args = ast.getArgument(1);
Preconditions.checkArgument(args.size() == 1, "Expected 1 arguments, got %s.", args.size());
Item item = convertExpression(args.get(0));
ToolBox.visit(noEmptyTerms, item);
return item;
}
private Item buildWeightedSet(OperatorNode ast) {
List> args = ast.getArgument(1);
Preconditions.checkArgument(args.size() == 2, "Expected 2 arguments, got %s.", args.size());
return fillWeightedSet(ast, args.get(1), new WeightedSetItem(getIndex(args.get(0))));
}
private Item buildDotProduct(OperatorNode ast) {
List> args = ast.getArgument(1);
Preconditions.checkArgument(args.size() == 2, "Expected 2 arguments, got %s.", args.size());
return fillWeightedSet(ast, args.get(1), new DotProductItem(getIndex(args.get(0))));
}
private Item buildGeoLocation(OperatorNode ast) {
List> args = ast.getArgument(1);
Preconditions.checkArgument(args.size() == 4, "Expected 4 arguments, got %s.", args.size());
String field = fetchFieldRead(args.get(0));
var coord_1 = ParsedDegree.fromString(fetchFieldRead(args.get(1)), true, false);
var coord_2 = ParsedDegree.fromString(fetchFieldRead(args.get(2)), false, true);
double radius = DistanceParser.parse(fetchFieldRead(args.get(3)));
var loc = new Location();
if (coord_1.isLatitude && coord_2.isLongitude) {
loc.setGeoCircle(coord_1.degrees, coord_2.degrees, radius);
} else if (coord_2.isLatitude && coord_1.isLongitude) {
loc.setGeoCircle(coord_2.degrees, coord_1.degrees, radius);
} else {
throw new IllegalArgumentException("Invalid geoLocation coordinates '"+coord_1+"' and '"+coord_2+"'");
}
var item = new GeoLocationItem(loc, field);
String label = getAnnotation(ast, LABEL, String.class, null, "item label");
if (label != null) {
item.setLabel(label);
}
return item;
}
private Item buildNearestNeighbor(OperatorNode ast) {
List> args = ast.getArgument(1);
Preconditions.checkArgument(args.size() == 2, "Expected 2 arguments, got %s.", args.size());
String field = fetchFieldRead(args.get(0));
String property = fetchFieldRead(args.get(1));
NearestNeighborItem item = new NearestNeighborItem(field, property);
Integer targetNumHits = getAnnotation(ast, TARGET_HITS,
Integer.class, null, "desired minimum hits to produce");
if (targetNumHits == null) {
targetNumHits = getAnnotation(ast, TARGET_NUM_HITS,
Integer.class, null, "desired minimum hits to produce");
}
if (targetNumHits != null) {
item.setTargetNumHits(targetNumHits);
}
Double distanceThreshold = getAnnotation(ast, DISTANCE_THRESHOLD,
Double.class, null, "maximum distance allowed from query point");
if (distanceThreshold != null) {
item.setDistanceThreshold(distanceThreshold);
}
Integer hnswExploreAdditionalHits = getAnnotation(ast, HNSW_EXPLORE_ADDITIONAL_HITS,
Integer.class, null, "number of extra hits to explore for HNSW algorithm");
if (hnswExploreAdditionalHits != null) {
item.setHnswExploreAdditionalHits(hnswExploreAdditionalHits);
}
Boolean allowApproximate = getAnnotation(ast, APPROXIMATE,
Boolean.class, Boolean.TRUE, "allow approximate nearest neighbor search");
item.setAllowApproximate(allowApproximate);
String label = getAnnotation(ast, LABEL, String.class, null, "item label");
if (label != null) {
item.setLabel(label);
}
return item;
}
private Item buildPredicate(OperatorNode ast) {
List> args = ast.getArgument(1);
Preconditions.checkArgument(args.size() == 3, "Expected 3 arguments, got %s.", args.size());
PredicateQueryItem item = new PredicateQueryItem();
item.setIndexName(getIndex(args.get(0)));
addFeatures(args.get(1),
(key, value, subqueryBitmap) -> item.addFeature(key, (String) value, subqueryBitmap), PredicateQueryItem.ALL_SUB_QUERIES);
addFeatures(args.get(2), (key, value, subqueryBitmap) -> {
if (value instanceof Long) {
item.addRangeFeature(key, (Long) value, subqueryBitmap);
} else {
item.addRangeFeature(key, (Integer) value, subqueryBitmap);
}
}, PredicateQueryItem.ALL_SUB_QUERIES);
return leafStyleSettings(ast, item);
}
interface AddFeature {
void addFeature(String key, Object value, long subqueryBitmap);
}
private void addFeatures(OperatorNode map, AddFeature item, long subqueryBitmap) {
if (map.getOperator() != ExpressionOperator.MAP) return;
assertHasOperator(map, ExpressionOperator.MAP);
List keys = map.getArgument(0);
List> values = map.getArgument(1);
for (int i = 0; i < keys.size(); ++i) {
String key = keys.get(i);
OperatorNode value = values.get(i);
if (value.getOperator() == ExpressionOperator.ARRAY) {
List> multiValues = value.getArgument(0);
for (OperatorNode multiValue : multiValues) {
assertHasOperator(multiValue, ExpressionOperator.LITERAL);
item.addFeature(key, multiValue.getArgument(0), subqueryBitmap);
}
} else if (value.getOperator() == ExpressionOperator.LITERAL) {
item.addFeature(key, value.getArgument(0), subqueryBitmap);
} else {
assertHasOperator(value, ExpressionOperator.MAP); // Subquery syntax
Preconditions.checkArgument(key.indexOf("0x") == 0 || key.indexOf("[") == 0);
if (key.indexOf("0x") == 0) {
String subqueryString = key.substring(2);
if (subqueryString.length() > 16)
throw new NumberFormatException("Too long subquery string: " + key);
long currentSubqueryBitmap = new BigInteger(subqueryString, 16).longValue();
addFeatures(value, item, currentSubqueryBitmap);
} else {
StringTokenizer bits = new StringTokenizer(key.substring(1, key.length() - 1), ",");
long currentSubqueryBitmap = 0;
while (bits.hasMoreTokens()) {
int bit = Integer.parseInt(bits.nextToken().trim());
currentSubqueryBitmap |= 1L << bit;
}
addFeatures(value, item, currentSubqueryBitmap);
}
}
}
}
private Item buildWand(OperatorNode ast) {
List> args = ast.getArgument(1);
Preconditions.checkArgument(args.size() == 2, "Expected 2 arguments, got %s.", args.size());
Integer targetNumHits = getAnnotation(ast, TARGET_HITS,
Integer.class, null, "desired number of hits to accumulate in wand");
if (targetNumHits == null) {
targetNumHits = getAnnotation(ast, TARGET_NUM_HITS,
Integer.class, DEFAULT_TARGET_NUM_HITS, "desired number of hits to accumulate in wand");
}
WandItem out = new WandItem(getIndex(args.get(0)), targetNumHits);
Double scoreThreshold = getAnnotation(ast, SCORE_THRESHOLD, Double.class, null,
"min score for hit inclusion");
if (scoreThreshold != null) {
out.setScoreThreshold(scoreThreshold);
}
Double thresholdBoostFactor = getAnnotation(ast,
THRESHOLD_BOOST_FACTOR, Double.class, null,
"boost factor used to boost threshold before comparing against upper bound score");
if (thresholdBoostFactor != null) {
out.setThresholdBoostFactor(thresholdBoostFactor);
}
return fillWeightedSet(ast, args.get(1), out);
}
private WeightedSetItem fillWeightedSet(OperatorNode ast,
OperatorNode arg,
WeightedSetItem out) {
addItems(arg, out);
return leafStyleSettings(ast, out);
}
private static class PrefixExpander extends IndexNameExpander {
private final String prefix;
public PrefixExpander(String prefix) {
this.prefix = prefix + ".";
}
@Override
public String expand(String leaf) {
return prefix + leaf;
}
}
private Item instantiateSameElementItem(String field, OperatorNode ast) {
assertHasFunctionName(ast, SAME_ELEMENT);
SameElementItem sameElement = new SameElementItem(field);
// All terms below sameElement are relative to this.
IndexNameExpander prev = swapIndexCreator(new PrefixExpander(field));
for (OperatorNode term : ast.>> getArgument(1)) {
sameElement.addItem(convertExpression(term));
}
swapIndexCreator(prev);
return sameElement;
}
private Item instantiatePhraseItem(String field, OperatorNode ast) {
assertHasFunctionName(ast, PHRASE);
if (getAnnotation(ast, ORIGIN, Map.class, null, ORIGIN_DESCRIPTION, false) != null) {
return instantiatePhraseSegmentItem(field, ast, false);
}
PhraseItem phrase = new PhraseItem();
phrase.setIndexName(field);
phrase.setExplicit(true);
for (OperatorNode word : ast.>> getArgument(1)) {
if (word.getOperator() == ExpressionOperator.CALL) {
List names = word.getArgument(0);
switch (names.get(0)) {
case PHRASE:
if (getAnnotation(word, ORIGIN, Map.class, null, ORIGIN_DESCRIPTION, false) == null) {
phrase.addItem(instantiatePhraseItem(field, word));
} else {
phrase.addItem(instantiatePhraseSegmentItem(field, word, true));
}
break;
case ALTERNATIVES:
phrase.addItem(instantiateWordAlternativesItem(field, word));
break;
default:
throw new IllegalArgumentException("Expected phrase or word alternatives, got " + names.get(0));
}
} else {
phrase.addItem(instantiateWordItem(field, word, phrase.getClass()));
}
}
return leafStyleSettings(ast, phrase);
}
private Item instantiatePhraseSegmentItem(String field, OperatorNode ast, boolean forcePhrase) {
Substring origin = getOrigin(ast);
Boolean stem = getAnnotation(ast, STEM, Boolean.class, Boolean.TRUE, STEM_DESCRIPTION);
Boolean andSegmenting = getAnnotation(ast, AND_SEGMENTING, Boolean.class, Boolean.FALSE,
"setting for whether to force using AND for segments on and off");
SegmentItem phrase;
List words = null;
if (forcePhrase || !andSegmenting) {
phrase = new PhraseSegmentItem(origin.getValue(), origin.getValue(), true, !stem, origin);
} else {
phrase = new AndSegmentItem(origin.getValue(), true, !stem);
}
phrase.setIndexName(field);
if (getAnnotation(ast, IMPLICIT_TRANSFORMS, Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION)) {
words = segmenter.segment(origin.getValue(), currentlyParsing.getLanguage());
}
if (words != null && words.size() > 0) {
for (String word : words) {
phrase.addItem(new WordItem(word, field, true));
}
} else {
for (OperatorNode word : ast.>> getArgument(1)) {
phrase.addItem(instantiateWordItem(field, word, phrase.getClass(), SegmentWhen.NEVER));
}
}
if (phrase instanceof TaggableItem) {
leafStyleSettings(ast, (TaggableItem) phrase);
}
phrase.lock();
return phrase;
}
private Item instantiateNearItem(String field, OperatorNode ast) {
assertHasFunctionName(ast, NEAR);
NearItem near = new NearItem();
near.setIndexName(field);
for (OperatorNode word : ast.>> getArgument(1)) {
near.addItem(instantiateWordItem(field, word, near.getClass()));
}
Integer distance = getAnnotation(ast, DISTANCE, Integer.class, null, "term distance for NEAR operator");
if (distance != null) {
near.setDistance(distance);
}
return near;
}
private Item instantiateONearItem(String field, OperatorNode ast) {
assertHasFunctionName(ast, ONEAR);
NearItem onear = new ONearItem();
onear.setIndexName(field);
for (OperatorNode word : ast.>> getArgument(1)) {
onear.addItem(instantiateWordItem(field, word, onear.getClass()));
}
Integer distance = getAnnotation(ast, DISTANCE, Integer.class, null, "term distance for ONEAR operator");
if (distance != null) {
onear.setDistance(distance);
}
return onear;
}
private Item fetchUserQuery() {
Preconditions.checkState(!queryParser, "Tried inserting user query into itself.");
Preconditions.checkState(userQuery != null,
"User query must be set before trying to build complete query "
+ "tree including user query.");
return userQuery.getModel().getQueryTree().getRoot();
}
private Item buildUserInput(OperatorNode ast) {
// TODO add support for default arguments if property results in nothing
List> args = ast.getArgument(1);
String wordData = getStringContents(args.get(0));
Boolean allowEmpty = getAnnotation(ast, USER_INPUT_ALLOW_EMPTY, Boolean.class,
Boolean.FALSE, "flag for allowing NullItem to be returned");
if (allowEmpty && (wordData == null || wordData.isEmpty())) return new NullItem();
String grammar = getAnnotation(ast, USER_INPUT_GRAMMAR, String.class,
Query.Type.ALL.toString(), "grammar for handling user input");
String defaultIndex = getAnnotation(ast, USER_INPUT_DEFAULT_INDEX,
String.class, "default", "default index for user input terms");
Language language = decideParsingLanguage(ast, wordData);
Item item;
if (USER_INPUT_RAW.equals(grammar)) {
item = instantiateWordItem(defaultIndex, wordData, ast, null, SegmentWhen.NEVER, true, language);
} else if (USER_INPUT_SEGMENT.equals(grammar)) {
item = instantiateWordItem(defaultIndex, wordData, ast, null, SegmentWhen.ALWAYS, false, language);
} else {
item = parseUserInput(grammar, defaultIndex, wordData, language, allowEmpty);
propagateUserInputAnnotations(ast, item);
}
return item;
}
private Language decideParsingLanguage(OperatorNode ast, String wordData) {
String languageTag = getAnnotation(ast, USER_INPUT_LANGUAGE, String.class, null,
"language setting for segmenting query section");
Language language = Language.fromLanguageTag(languageTag);
if (language != Language.UNKNOWN) return language;
Optional explicitLanguage = currentlyParsing.getExplicitLanguage();
if (explicitLanguage.isPresent()) return explicitLanguage.get();
language = detector.detect(wordData, null).getLanguage();
if (language != Language.UNKNOWN) return language;
return Language.ENGLISH;
}
private String getStringContents(OperatorNode operator) {
switch (operator.getOperator()) {
case LITERAL:
return operator.getArgument(0, String.class);
case VARREF:
Preconditions.checkState(userQuery != null,
"properties must be available when trying to fetch user input");
return userQuery.properties().getString(operator.getArgument(0, String.class));
default:
throw newUnexpectedArgumentException(operator.getOperator(),
ExpressionOperator.LITERAL, ExpressionOperator.VARREF);
}
}
private void propagateUserInputAnnotations(OperatorNode ast, Item item) {
ToolBox.visit(new AnnotationPropagator(ast), item);
}
private Item parseUserInput(String grammar, String defaultIndex, String wordData,
Language language, boolean allowNullItem) {
Query.Type parseAs = Query.Type.getType(grammar);
Parser parser = ParserFactory.newInstance(parseAs, environment);
// perhaps not use already resolved doctypes, but respect source and restrict
Item item = parser.parse(new Parsable().setQuery(wordData)
.addSources(docTypes)
.setLanguage(language)
.setDefaultIndexName(defaultIndex)).getRoot();
// the null check should be unnecessary, but is there to avoid having to suppress null warnings
if ( ! allowNullItem && (item == null || item instanceof NullItem))
throw new IllegalArgumentException("Parsing '" + wordData + "' only resulted in NullItem.");
if (language != Language.ENGLISH) // mark the language used, unless it's the default
item.setLanguage(language);
return item;
}
private OperatorNode> parseYqlProgram() {
OperatorNode> ast;
try {
ast = new ProgramParser().parse("query", currentlyParsing.getQuery());
} catch (Exception e) {
throw new IllegalInputException(e);
}
assertHasOperator(ast, StatementOperator.PROGRAM);
Preconditions.checkArgument(ast.getArguments().length == 1,
"Expected only a single argument to the root node, got %s.",
ast.getArguments().length);
// TODO: should we check size of first argument as well?
ast = ast.>> getArgument(0).get(0);
assertHasOperator(ast, StatementOperator.EXECUTE);
ast = ast.getArgument(0);
ast = fetchPipe(ast);
ast = fetchTimeout(ast);
ast = fetchSummaryFields(ast);
ast = fetchOffsetAndHits(ast);
ast = fetchSorting(ast);
assertHasOperator(ast, SequenceOperator.FILTER);
return ast;
}
@SuppressWarnings("unchecked")
private OperatorNode> fetchPipe(OperatorNode> toScan) {
OperatorNode> ast = toScan;
while (ast.getOperator() == SequenceOperator.PIPE) {
OperatorNode groupingAst = ast.>> getArgument(2).get(0);
GroupingOperation groupingOperation = GroupingOperation.fromString(groupingAst. getArgument(0));
VespaGroupingStep groupingStep = new VespaGroupingStep(groupingOperation);
List