aboutsummaryrefslogtreecommitdiffstats
path: root/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
diff options
context:
space:
mode:
Diffstat (limited to 'container-search/src/main/java/com/yahoo/search/yql/YqlParser.java')
-rw-r--r--container-search/src/main/java/com/yahoo/search/yql/YqlParser.java86
1 files changed, 18 insertions, 68 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
index ee288de4e88..e4ae759eec7 100644
--- a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
+++ b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
@@ -18,10 +18,8 @@ import com.google.common.annotations.Beta;
import com.google.common.base.Preconditions;
import com.yahoo.collections.LazyMap;
import com.yahoo.collections.LazySet;
-import com.yahoo.collections.Tuple2;
import com.yahoo.component.Version;
import com.yahoo.language.Language;
-import com.yahoo.language.Linguistics;
import com.yahoo.language.detect.Detector;
import com.yahoo.language.process.Normalizer;
import com.yahoo.language.process.Segmenter;
@@ -117,9 +115,6 @@ public class YqlParser implements Parser {
private static final String NORMALIZE_CASE_DESCRIPTION = "setting for whether to do case normalization if field implies it";
private static final String ORIGIN_DESCRIPTION = "string origin for a term";
private static final String RANKED_DESCRIPTION = "setting for whether to use term for ranking";
- private static final String SEGMENTER_BACKEND = "backend";
- private static final String SEGMENTER = "segmenter";
- private static final String SEGMENTER_VERSION = "version";
private static final String STEM_DESCRIPTION = "setting for whether to use stem if field implies it";
private static final String USE_POSITION_DATA_DESCRIPTION = "setting for whether to use position data for ranking this item";
private static final String USER_INPUT_ALLOW_EMPTY = "allowEmpty";
@@ -190,8 +185,6 @@ public class YqlParser implements Parser {
private final Detector detector;
private final Set<String> yqlSources = LazySet.newHashSet();
private final Set<String> yqlSummaryFields = LazySet.newHashSet();
- private final String localSegmenterBackend;
- private final Version localSegmenterVersion;
private Integer hits;
private Integer offset;
private Integer timeout;
@@ -201,10 +194,7 @@ public class YqlParser implements Parser {
private IndexNameExpander indexNameExpander = new IndexNameExpander();
private Set<String> docTypes;
private Sorting sorting;
- private String segmenterBackend;
- private Version segmenterVersion;
private boolean queryParser = true;
- private boolean resegment = false;
private final Deque<OperatorNode<?>> annotationStack = new ArrayDeque<>();
private final ParserEnvironment environment;
@@ -238,10 +228,6 @@ public class YqlParser implements Parser {
segmenter = environment.getLinguistics().getSegmenter();
detector = environment.getLinguistics().getDetector();
this.environment = environment;
-
- Tuple2<String, Version> version = environment.getLinguistics().getVersion(Linguistics.Component.SEGMENTER);
- localSegmenterBackend = version.first;
- localSegmenterVersion = version.second;
}
@NonNull
@@ -261,10 +247,7 @@ public class YqlParser implements Parser {
currentlyParsing = query;
docTypes = null;
sorting = null;
- segmenterBackend = null;
- segmenterVersion = null;
// queryParser set prior to calling this
- resegment = false;
return buildTree(parseYqlProgram());
}
@@ -287,32 +270,12 @@ public class YqlParser implements Parser {
filterPart.getArguments().length);
populateYqlSources(filterPart.<OperatorNode<?>> getArgument(0));
OperatorNode<ExpressionOperator> filterExpression = filterPart.getArgument(1);
- populateLinguisticsAnnotations(filterExpression);
Item root = convertExpression(filterExpression);
connectItems();
userQuery = null;
return new QueryTree(root);
}
- private void populateLinguisticsAnnotations(OperatorNode<ExpressionOperator> filterExpression) {
- Map<?, ?> segmenter = getAnnotation(filterExpression, SEGMENTER,
- Map.class, null, "segmenter engine and version");
- if (segmenter == null) {
- segmenterVersion = null;
- segmenterBackend = null;
- resegment = false;
- } else {
- segmenterBackend = getMapValue(SEGMENTER, segmenter, SEGMENTER_BACKEND, String.class);
- try {
- segmenterVersion = new Version(getMapValue(SEGMENTER, segmenter, SEGMENTER_VERSION, String.class));
- } catch (RuntimeException e) {
- segmenterVersion = null;
- }
- resegment = ! localSegmenterBackend.equals(segmenterBackend) ||
- ! localSegmenterVersion.equals(segmenterVersion);
- }
- }
-
private void populateYqlSources(OperatorNode<?> filterArgs) {
yqlSources.clear();
if (filterArgs.getOperator() == SequenceOperator.SCAN) {
@@ -614,8 +577,7 @@ public class YqlParser implements Parser {
}
phrase.setIndexName(field);
- if (resegment
- && getAnnotation(ast, IMPLICIT_TRANSFORMS, Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION)) {
+ if (getAnnotation(ast, IMPLICIT_TRANSFORMS, Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION)) {
words = segmenter.segment(origin.getValue(), currentlyParsing.getLanguage());
}
@@ -719,16 +681,16 @@ public class YqlParser implements Parser {
return Language.ENGLISH;
}
- private String getStringContents(OperatorNode<ExpressionOperator> propertySniffer) {
- switch (propertySniffer.getOperator()) {
+ private String getStringContents(OperatorNode<ExpressionOperator> operator) {
+ switch (operator.getOperator()) {
case LITERAL:
- return propertySniffer.getArgument(0, String.class);
+ return operator.getArgument(0, String.class);
case VARREF:
Preconditions.checkState(userQuery != null,
"properties must be available when trying to fetch user input");
- return userQuery.properties().getString(propertySniffer.getArgument(0, String.class));
+ return userQuery.properties().getString(operator.getArgument(0, String.class));
default:
- throw newUnexpectedArgumentException(propertySniffer.getOperator(),
+ throw newUnexpectedArgumentException(operator.getOperator(),
ExpressionOperator.LITERAL, ExpressionOperator.VARREF);
}
}
@@ -1317,6 +1279,8 @@ public class YqlParser implements Parser {
"setting for whether to use suffix match of input data");
boolean substrMatch = getAnnotation(ast, SUBSTRING, Boolean.class, Boolean.FALSE,
"setting for whether to use substring match of input data");
+ String grammar = getAnnotation(ast, USER_INPUT_GRAMMAR, String.class,
+ Query.Type.ALL.toString(), "grammar for handling word input");
Preconditions.checkArgument((prefixMatch ? 1 : 0) +
(substrMatch ? 1 : 0) + (suffixMatch ? 1 : 0) < 2,
"Only one of prefix, substring and suffix can be set.");
@@ -1336,21 +1300,21 @@ public class YqlParser implements Parser {
wordItem = new WordItem(wordData, fromQuery);
break;
case POSSIBLY:
- if (shouldResegmentWord(field, fromQuery)) {
- wordItem = resegment(field, ast, wordData, fromQuery, parent, language);
+ if (shouldSegment(field, fromQuery) && ! grammar.equals(USER_INPUT_RAW)) {
+ wordItem = segment(field, ast, wordData, fromQuery, parent, language);
} else {
wordItem = new WordItem(wordData, fromQuery);
}
break;
case ALWAYS:
- wordItem = resegment(field, ast, wordData, fromQuery, parent, language);
+ wordItem = segment(field, ast, wordData, fromQuery, parent, language);
break;
default:
throw new IllegalArgumentException("Unexpected segmenting rule: " + segmentPolicy);
}
}
if (wordItem instanceof WordItem) {
- prepareWord(field, ast, fromQuery, (WordItem) wordItem);
+ prepareWord(field, ast, (WordItem) wordItem);
}
if (language != Language.ENGLISH) // mark the language used, unless it's the default
((Item)wordItem).setLanguage(language);
@@ -1358,13 +1322,13 @@ public class YqlParser implements Parser {
}
@SuppressWarnings({"deprecation"})
- private boolean shouldResegmentWord(String field, boolean fromQuery) {
- return resegment && fromQuery && ! indexFactsSession.getIndex(field).isAttribute();
+ private boolean shouldSegment(String field, boolean fromQuery) {
+ return fromQuery && ! indexFactsSession.getIndex(field).isAttribute();
}
@NonNull
- private TaggableItem resegment(String field, OperatorNode<ExpressionOperator> ast, String wordData,
- boolean fromQuery, Class<?> parent, Language language) {
+ private TaggableItem segment(String field, OperatorNode<ExpressionOperator> ast, String wordData,
+ boolean fromQuery, Class<?> parent, Language language) {
String toSegment = wordData;
Substring s = getOrigin(ast);
Language usedLanguage = language == null ? currentlyParsing.getLanguage() : language;
@@ -1383,7 +1347,7 @@ public class YqlParser implements Parser {
((PhraseSegmentItem) wordItem).setIndexName(field);
for (String w : words) {
WordItem segment = new WordItem(w, fromQuery);
- prepareWord(field, ast, fromQuery, segment);
+ prepareWord(field, ast, segment);
((PhraseSegmentItem) wordItem).addItem(segment);
}
((PhraseSegmentItem) wordItem).lock();
@@ -1400,15 +1364,9 @@ public class YqlParser implements Parser {
return parent == EquivItem.class;
}
- private void prepareWord(String field, OperatorNode<ExpressionOperator> ast, boolean fromQuery, WordItem wordItem) {
+ private void prepareWord(String field, OperatorNode<ExpressionOperator> ast, WordItem wordItem) {
wordItem.setIndexName(field);
wordStyleSettings(ast, wordItem);
- if (shouldResegmentWord(field, fromQuery)) {
- // force re-stemming, new case normalization, etc
- wordItem.setStemmed(false);
- wordItem.setLowercased(false);
- wordItem.setNormalizable(true);
- }
}
@NonNull
@@ -1710,14 +1668,6 @@ public class YqlParser implements Parser {
return new IllegalArgumentException(out.toString());
}
- String getSegmenterBackend() {
- return segmenterBackend;
- }
-
- Version getSegmenterVersion() {
- return segmenterVersion;
- }
-
private static final class ConnectedItem {
final double weight;