summaryrefslogtreecommitdiffstats
path: root/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2018-10-26 11:03:28 +0000
committerJon Bratseth <bratseth@oath.com>2018-10-26 11:03:28 +0000
commit365972496256236dfd3599d547a167af0d2d4de2 (patch)
treea9851548e37b765fac222f35e30008e6cc0c82af /container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
parent57ba28e794416ae7cc6e14660249459dac7df937 (diff)
Revert "Merge pull request #7409 from vespa-engine/revert-7380-bratseth/segment-yql-by-default"
This reverts commit 1cb09174cb579936eae49f7db632113795b0666a, reversing changes made to eedcfc980db65da04eaca866693ef215f337f325.
Diffstat (limited to 'container-search/src/main/java/com/yahoo/search/yql/YqlParser.java')
-rw-r--r--container-search/src/main/java/com/yahoo/search/yql/YqlParser.java123
1 files changed, 35 insertions, 88 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
index e0e9042e1a3..e4ae759eec7 100644
--- a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
+++ b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
@@ -18,10 +18,8 @@ import com.google.common.annotations.Beta;
import com.google.common.base.Preconditions;
import com.yahoo.collections.LazyMap;
import com.yahoo.collections.LazySet;
-import com.yahoo.collections.Tuple2;
import com.yahoo.component.Version;
import com.yahoo.language.Language;
-import com.yahoo.language.Linguistics;
import com.yahoo.language.detect.Detector;
import com.yahoo.language.process.Normalizer;
import com.yahoo.language.process.Segmenter;
@@ -117,9 +115,6 @@ public class YqlParser implements Parser {
private static final String NORMALIZE_CASE_DESCRIPTION = "setting for whether to do case normalization if field implies it";
private static final String ORIGIN_DESCRIPTION = "string origin for a term";
private static final String RANKED_DESCRIPTION = "setting for whether to use term for ranking";
- private static final String SEGMENTER_BACKEND = "backend";
- private static final String SEGMENTER = "segmenter";
- private static final String SEGMENTER_VERSION = "version";
private static final String STEM_DESCRIPTION = "setting for whether to use stem if field implies it";
private static final String USE_POSITION_DATA_DESCRIPTION = "setting for whether to use position data for ranking this item";
private static final String USER_INPUT_ALLOW_EMPTY = "allowEmpty";
@@ -190,8 +185,6 @@ public class YqlParser implements Parser {
private final Detector detector;
private final Set<String> yqlSources = LazySet.newHashSet();
private final Set<String> yqlSummaryFields = LazySet.newHashSet();
- private final String localSegmenterBackend;
- private final Version localSegmenterVersion;
private Integer hits;
private Integer offset;
private Integer timeout;
@@ -201,10 +194,7 @@ public class YqlParser implements Parser {
private IndexNameExpander indexNameExpander = new IndexNameExpander();
private Set<String> docTypes;
private Sorting sorting;
- private String segmenterBackend;
- private Version segmenterVersion;
private boolean queryParser = true;
- private boolean resegment = false;
private final Deque<OperatorNode<?>> annotationStack = new ArrayDeque<>();
private final ParserEnvironment environment;
@@ -238,10 +228,6 @@ public class YqlParser implements Parser {
segmenter = environment.getLinguistics().getSegmenter();
detector = environment.getLinguistics().getDetector();
this.environment = environment;
-
- Tuple2<String, Version> version = environment.getLinguistics().getVersion(Linguistics.Component.SEGMENTER);
- localSegmenterBackend = version.first;
- localSegmenterVersion = version.second;
}
@NonNull
@@ -261,10 +247,7 @@ public class YqlParser implements Parser {
currentlyParsing = query;
docTypes = null;
sorting = null;
- segmenterBackend = null;
- segmenterVersion = null;
// queryParser set prior to calling this
- resegment = false;
return buildTree(parseYqlProgram());
}
@@ -287,32 +270,12 @@ public class YqlParser implements Parser {
filterPart.getArguments().length);
populateYqlSources(filterPart.<OperatorNode<?>> getArgument(0));
OperatorNode<ExpressionOperator> filterExpression = filterPart.getArgument(1);
- populateLinguisticsAnnotations(filterExpression);
Item root = convertExpression(filterExpression);
connectItems();
userQuery = null;
return new QueryTree(root);
}
- private void populateLinguisticsAnnotations(OperatorNode<ExpressionOperator> filterExpression) {
- Map<?, ?> segmenter = getAnnotation(filterExpression, SEGMENTER,
- Map.class, null, "segmenter engine and version");
- if (segmenter == null) {
- segmenterVersion = null;
- segmenterBackend = null;
- resegment = false;
- } else {
- segmenterBackend = getMapValue(SEGMENTER, segmenter, SEGMENTER_BACKEND, String.class);
- try {
- segmenterVersion = new Version(getMapValue(SEGMENTER, segmenter, SEGMENTER_VERSION, String.class));
- } catch (RuntimeException e) {
- segmenterVersion = null;
- }
- resegment = ! localSegmenterBackend.equals(segmenterBackend) ||
- ! localSegmenterVersion.equals(segmenterVersion);
- }
- }
-
private void populateYqlSources(OperatorNode<?> filterArgs) {
yqlSources.clear();
if (filterArgs.getOperator() == SequenceOperator.SCAN) {
@@ -614,8 +577,7 @@ public class YqlParser implements Parser {
}
phrase.setIndexName(field);
- if (resegment
- && getAnnotation(ast, IMPLICIT_TRANSFORMS, Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION)) {
+ if (getAnnotation(ast, IMPLICIT_TRANSFORMS, Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION)) {
words = segmenter.segment(origin.getValue(), currentlyParsing.getLanguage());
}
@@ -719,16 +681,16 @@ public class YqlParser implements Parser {
return Language.ENGLISH;
}
- private String getStringContents(OperatorNode<ExpressionOperator> propertySniffer) {
- switch (propertySniffer.getOperator()) {
+ private String getStringContents(OperatorNode<ExpressionOperator> operator) {
+ switch (operator.getOperator()) {
case LITERAL:
- return propertySniffer.getArgument(0, String.class);
+ return operator.getArgument(0, String.class);
case VARREF:
Preconditions.checkState(userQuery != null,
"properties must be available when trying to fetch user input");
- return userQuery.properties().getString(propertySniffer.getArgument(0, String.class));
+ return userQuery.properties().getString(operator.getArgument(0, String.class));
default:
- throw newUnexpectedArgumentException(propertySniffer.getOperator(),
+ throw newUnexpectedArgumentException(operator.getOperator(),
ExpressionOperator.LITERAL, ExpressionOperator.VARREF);
}
}
@@ -1310,22 +1272,20 @@ public class YqlParser implements Parser {
wordData = normalizer.normalize(wordData);
}
boolean fromQuery = getAnnotation(ast, IMPLICIT_TRANSFORMS,
- Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION);
- boolean prefixMatch = getAnnotation(ast, PREFIX, Boolean.class,
- Boolean.FALSE,
- "setting for whether to use prefix match of input data");
- boolean suffixMatch = getAnnotation(ast, SUFFIX, Boolean.class,
- Boolean.FALSE,
- "setting for whether to use suffix match of input data");
- boolean substrMatch = getAnnotation(ast, SUBSTRING, Boolean.class,
- Boolean.FALSE,
- "setting for whether to use substring match of input data");
- Preconditions.checkArgument((prefixMatch ? 1 : 0)
- + (substrMatch ? 1 : 0) + (suffixMatch ? 1 : 0) < 2,
- "Only one of prefix, substring and suffix can be set.");
- @NonNull
- final TaggableItem wordItem;
+ Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION);
+ boolean prefixMatch = getAnnotation(ast, PREFIX, Boolean.class, Boolean.FALSE,
+ "setting for whether to use prefix match of input data");
+ boolean suffixMatch = getAnnotation(ast, SUFFIX, Boolean.class, Boolean.FALSE,
+ "setting for whether to use suffix match of input data");
+ boolean substrMatch = getAnnotation(ast, SUBSTRING, Boolean.class, Boolean.FALSE,
+ "setting for whether to use substring match of input data");
+ String grammar = getAnnotation(ast, USER_INPUT_GRAMMAR, String.class,
+ Query.Type.ALL.toString(), "grammar for handling word input");
+ Preconditions.checkArgument((prefixMatch ? 1 : 0) +
+ (substrMatch ? 1 : 0) + (suffixMatch ? 1 : 0) < 2,
+ "Only one of prefix, substring and suffix can be set.");
+ TaggableItem wordItem;
if (exactMatch) {
wordItem = new ExactStringItem(wordData, fromQuery);
} else if (prefixMatch) {
@@ -1340,21 +1300,21 @@ public class YqlParser implements Parser {
wordItem = new WordItem(wordData, fromQuery);
break;
case POSSIBLY:
- if (shouldResegmentWord(field, fromQuery)) {
- wordItem = resegment(field, ast, wordData, fromQuery, parent, language);
+ if (shouldSegment(field, fromQuery) && ! grammar.equals(USER_INPUT_RAW)) {
+ wordItem = segment(field, ast, wordData, fromQuery, parent, language);
} else {
wordItem = new WordItem(wordData, fromQuery);
}
break;
case ALWAYS:
- wordItem = resegment(field, ast, wordData, fromQuery, parent, language);
+ wordItem = segment(field, ast, wordData, fromQuery, parent, language);
break;
default:
throw new IllegalArgumentException("Unexpected segmenting rule: " + segmentPolicy);
}
}
if (wordItem instanceof WordItem) {
- prepareWord(field, ast, fromQuery, (WordItem) wordItem);
+ prepareWord(field, ast, (WordItem) wordItem);
}
if (language != Language.ENGLISH) // mark the language used, unless it's the default
((Item)wordItem).setLanguage(language);
@@ -1362,13 +1322,13 @@ public class YqlParser implements Parser {
}
@SuppressWarnings({"deprecation"})
- private boolean shouldResegmentWord(String field, boolean fromQuery) {
- return resegment && fromQuery && ! indexFactsSession.getIndex(field).isAttribute();
+ private boolean shouldSegment(String field, boolean fromQuery) {
+ return fromQuery && ! indexFactsSession.getIndex(field).isAttribute();
}
@NonNull
- private TaggableItem resegment(String field, OperatorNode<ExpressionOperator> ast, String wordData,
- boolean fromQuery, Class<?> parent, Language language) {
+ private TaggableItem segment(String field, OperatorNode<ExpressionOperator> ast, String wordData,
+ boolean fromQuery, Class<?> parent, Language language) {
String toSegment = wordData;
Substring s = getOrigin(ast);
Language usedLanguage = language == null ? currentlyParsing.getLanguage() : language;
@@ -1387,7 +1347,7 @@ public class YqlParser implements Parser {
((PhraseSegmentItem) wordItem).setIndexName(field);
for (String w : words) {
WordItem segment = new WordItem(w, fromQuery);
- prepareWord(field, ast, fromQuery, segment);
+ prepareWord(field, ast, segment);
((PhraseSegmentItem) wordItem).addItem(segment);
}
((PhraseSegmentItem) wordItem).lock();
@@ -1404,16 +1364,9 @@ public class YqlParser implements Parser {
return parent == EquivItem.class;
}
- private void prepareWord(String field, OperatorNode<ExpressionOperator> ast, boolean fromQuery,
- WordItem wordItem) {
+ private void prepareWord(String field, OperatorNode<ExpressionOperator> ast, WordItem wordItem) {
wordItem.setIndexName(field);
wordStyleSettings(ast, wordItem);
- if (shouldResegmentWord(field, fromQuery)) {
- // force re-stemming, new case normalization, etc
- wordItem.setStemmed(false);
- wordItem.setLowercased(false);
- wordItem.setNormalizable(true);
- }
}
@NonNull
@@ -1421,10 +1374,12 @@ public class YqlParser implements Parser {
{
Map<?, ?> connectivity = getAnnotation(ast, CONNECTIVITY, Map.class, null, "connectivity settings");
if (connectivity != null) {
- connectedItems.add(new ConnectedItem(out, getMapValue(
- CONNECTIVITY, connectivity, CONNECTION_ID,
- Integer.class), getMapValue(CONNECTIVITY, connectivity,
- CONNECTION_WEIGHT, Number.class).doubleValue()));
+ connectedItems.add(new ConnectedItem(out,
+ getMapValue(CONNECTIVITY, connectivity, CONNECTION_ID,
+ Integer.class), getMapValue(CONNECTIVITY,
+ connectivity,
+ CONNECTION_WEIGHT,
+ Number.class).doubleValue()));
}
Number significance = getAnnotation(ast, SIGNIFICANCE, Number.class, null, "term significance");
if (significance != null) {
@@ -1713,14 +1668,6 @@ public class YqlParser implements Parser {
return new IllegalArgumentException(out.toString());
}
- String getSegmenterBackend() {
- return segmenterBackend;
- }
-
- Version getSegmenterVersion() {
- return segmenterVersion;
- }
-
private static final class ConnectedItem {
final double weight;