From 365972496256236dfd3599d547a167af0d2d4de2 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Fri, 26 Oct 2018 11:03:28 +0000 Subject: Revert "Merge pull request #7409 from vespa-engine/revert-7380-bratseth/segment-yql-by-default" This reverts commit 1cb09174cb579936eae49f7db632113795b0666a, reversing changes made to eedcfc980db65da04eaca866693ef215f337f325. --- .../main/java/com/yahoo/search/yql/YqlParser.java | 123 ++++++--------------- 1 file changed, 35 insertions(+), 88 deletions(-) (limited to 'container-search/src/main/java/com/yahoo/search') diff --git a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java index e0e9042e1a3..e4ae759eec7 100644 --- a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java +++ b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java @@ -18,10 +18,8 @@ import com.google.common.annotations.Beta; import com.google.common.base.Preconditions; import com.yahoo.collections.LazyMap; import com.yahoo.collections.LazySet; -import com.yahoo.collections.Tuple2; import com.yahoo.component.Version; import com.yahoo.language.Language; -import com.yahoo.language.Linguistics; import com.yahoo.language.detect.Detector; import com.yahoo.language.process.Normalizer; import com.yahoo.language.process.Segmenter; @@ -117,9 +115,6 @@ public class YqlParser implements Parser { private static final String NORMALIZE_CASE_DESCRIPTION = "setting for whether to do case normalization if field implies it"; private static final String ORIGIN_DESCRIPTION = "string origin for a term"; private static final String RANKED_DESCRIPTION = "setting for whether to use term for ranking"; - private static final String SEGMENTER_BACKEND = "backend"; - private static final String SEGMENTER = "segmenter"; - private static final String SEGMENTER_VERSION = "version"; private static final String STEM_DESCRIPTION = "setting for whether to use stem if field implies it"; private static final String USE_POSITION_DATA_DESCRIPTION = "setting for whether to use position data for ranking this item"; private static final String USER_INPUT_ALLOW_EMPTY = "allowEmpty"; @@ -190,8 +185,6 @@ public class YqlParser implements Parser { private final Detector detector; private final Set yqlSources = LazySet.newHashSet(); private final Set yqlSummaryFields = LazySet.newHashSet(); - private final String localSegmenterBackend; - private final Version localSegmenterVersion; private Integer hits; private Integer offset; private Integer timeout; @@ -201,10 +194,7 @@ public class YqlParser implements Parser { private IndexNameExpander indexNameExpander = new IndexNameExpander(); private Set docTypes; private Sorting sorting; - private String segmenterBackend; - private Version segmenterVersion; private boolean queryParser = true; - private boolean resegment = false; private final Deque> annotationStack = new ArrayDeque<>(); private final ParserEnvironment environment; @@ -238,10 +228,6 @@ public class YqlParser implements Parser { segmenter = environment.getLinguistics().getSegmenter(); detector = environment.getLinguistics().getDetector(); this.environment = environment; - - Tuple2 version = environment.getLinguistics().getVersion(Linguistics.Component.SEGMENTER); - localSegmenterBackend = version.first; - localSegmenterVersion = version.second; } @NonNull @@ -261,10 +247,7 @@ public class YqlParser implements Parser { currentlyParsing = query; docTypes = null; sorting = null; - segmenterBackend = null; - segmenterVersion = null; // queryParser set prior to calling this - resegment = false; return buildTree(parseYqlProgram()); } @@ -287,32 +270,12 @@ public class YqlParser implements Parser { filterPart.getArguments().length); populateYqlSources(filterPart.> getArgument(0)); OperatorNode filterExpression = filterPart.getArgument(1); - populateLinguisticsAnnotations(filterExpression); Item root = convertExpression(filterExpression); connectItems(); userQuery = null; return new QueryTree(root); } - private void populateLinguisticsAnnotations(OperatorNode filterExpression) { - Map segmenter = getAnnotation(filterExpression, SEGMENTER, - Map.class, null, "segmenter engine and version"); - if (segmenter == null) { - segmenterVersion = null; - segmenterBackend = null; - resegment = false; - } else { - segmenterBackend = getMapValue(SEGMENTER, segmenter, SEGMENTER_BACKEND, String.class); - try { - segmenterVersion = new Version(getMapValue(SEGMENTER, segmenter, SEGMENTER_VERSION, String.class)); - } catch (RuntimeException e) { - segmenterVersion = null; - } - resegment = ! localSegmenterBackend.equals(segmenterBackend) || - ! localSegmenterVersion.equals(segmenterVersion); - } - } - private void populateYqlSources(OperatorNode filterArgs) { yqlSources.clear(); if (filterArgs.getOperator() == SequenceOperator.SCAN) { @@ -614,8 +577,7 @@ public class YqlParser implements Parser { } phrase.setIndexName(field); - if (resegment - && getAnnotation(ast, IMPLICIT_TRANSFORMS, Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION)) { + if (getAnnotation(ast, IMPLICIT_TRANSFORMS, Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION)) { words = segmenter.segment(origin.getValue(), currentlyParsing.getLanguage()); } @@ -719,16 +681,16 @@ public class YqlParser implements Parser { return Language.ENGLISH; } - private String getStringContents(OperatorNode propertySniffer) { - switch (propertySniffer.getOperator()) { + private String getStringContents(OperatorNode operator) { + switch (operator.getOperator()) { case LITERAL: - return propertySniffer.getArgument(0, String.class); + return operator.getArgument(0, String.class); case VARREF: Preconditions.checkState(userQuery != null, "properties must be available when trying to fetch user input"); - return userQuery.properties().getString(propertySniffer.getArgument(0, String.class)); + return userQuery.properties().getString(operator.getArgument(0, String.class)); default: - throw newUnexpectedArgumentException(propertySniffer.getOperator(), + throw newUnexpectedArgumentException(operator.getOperator(), ExpressionOperator.LITERAL, ExpressionOperator.VARREF); } } @@ -1310,22 +1272,20 @@ public class YqlParser implements Parser { wordData = normalizer.normalize(wordData); } boolean fromQuery = getAnnotation(ast, IMPLICIT_TRANSFORMS, - Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION); - boolean prefixMatch = getAnnotation(ast, PREFIX, Boolean.class, - Boolean.FALSE, - "setting for whether to use prefix match of input data"); - boolean suffixMatch = getAnnotation(ast, SUFFIX, Boolean.class, - Boolean.FALSE, - "setting for whether to use suffix match of input data"); - boolean substrMatch = getAnnotation(ast, SUBSTRING, Boolean.class, - Boolean.FALSE, - "setting for whether to use substring match of input data"); - Preconditions.checkArgument((prefixMatch ? 1 : 0) - + (substrMatch ? 1 : 0) + (suffixMatch ? 1 : 0) < 2, - "Only one of prefix, substring and suffix can be set."); - @NonNull - final TaggableItem wordItem; + Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION); + boolean prefixMatch = getAnnotation(ast, PREFIX, Boolean.class, Boolean.FALSE, + "setting for whether to use prefix match of input data"); + boolean suffixMatch = getAnnotation(ast, SUFFIX, Boolean.class, Boolean.FALSE, + "setting for whether to use suffix match of input data"); + boolean substrMatch = getAnnotation(ast, SUBSTRING, Boolean.class, Boolean.FALSE, + "setting for whether to use substring match of input data"); + String grammar = getAnnotation(ast, USER_INPUT_GRAMMAR, String.class, + Query.Type.ALL.toString(), "grammar for handling word input"); + Preconditions.checkArgument((prefixMatch ? 1 : 0) + + (substrMatch ? 1 : 0) + (suffixMatch ? 1 : 0) < 2, + "Only one of prefix, substring and suffix can be set."); + TaggableItem wordItem; if (exactMatch) { wordItem = new ExactStringItem(wordData, fromQuery); } else if (prefixMatch) { @@ -1340,21 +1300,21 @@ public class YqlParser implements Parser { wordItem = new WordItem(wordData, fromQuery); break; case POSSIBLY: - if (shouldResegmentWord(field, fromQuery)) { - wordItem = resegment(field, ast, wordData, fromQuery, parent, language); + if (shouldSegment(field, fromQuery) && ! grammar.equals(USER_INPUT_RAW)) { + wordItem = segment(field, ast, wordData, fromQuery, parent, language); } else { wordItem = new WordItem(wordData, fromQuery); } break; case ALWAYS: - wordItem = resegment(field, ast, wordData, fromQuery, parent, language); + wordItem = segment(field, ast, wordData, fromQuery, parent, language); break; default: throw new IllegalArgumentException("Unexpected segmenting rule: " + segmentPolicy); } } if (wordItem instanceof WordItem) { - prepareWord(field, ast, fromQuery, (WordItem) wordItem); + prepareWord(field, ast, (WordItem) wordItem); } if (language != Language.ENGLISH) // mark the language used, unless it's the default ((Item)wordItem).setLanguage(language); @@ -1362,13 +1322,13 @@ public class YqlParser implements Parser { } @SuppressWarnings({"deprecation"}) - private boolean shouldResegmentWord(String field, boolean fromQuery) { - return resegment && fromQuery && ! indexFactsSession.getIndex(field).isAttribute(); + private boolean shouldSegment(String field, boolean fromQuery) { + return fromQuery && ! indexFactsSession.getIndex(field).isAttribute(); } @NonNull - private TaggableItem resegment(String field, OperatorNode ast, String wordData, - boolean fromQuery, Class parent, Language language) { + private TaggableItem segment(String field, OperatorNode ast, String wordData, + boolean fromQuery, Class parent, Language language) { String toSegment = wordData; Substring s = getOrigin(ast); Language usedLanguage = language == null ? currentlyParsing.getLanguage() : language; @@ -1387,7 +1347,7 @@ public class YqlParser implements Parser { ((PhraseSegmentItem) wordItem).setIndexName(field); for (String w : words) { WordItem segment = new WordItem(w, fromQuery); - prepareWord(field, ast, fromQuery, segment); + prepareWord(field, ast, segment); ((PhraseSegmentItem) wordItem).addItem(segment); } ((PhraseSegmentItem) wordItem).lock(); @@ -1404,16 +1364,9 @@ public class YqlParser implements Parser { return parent == EquivItem.class; } - private void prepareWord(String field, OperatorNode ast, boolean fromQuery, - WordItem wordItem) { + private void prepareWord(String field, OperatorNode ast, WordItem wordItem) { wordItem.setIndexName(field); wordStyleSettings(ast, wordItem); - if (shouldResegmentWord(field, fromQuery)) { - // force re-stemming, new case normalization, etc - wordItem.setStemmed(false); - wordItem.setLowercased(false); - wordItem.setNormalizable(true); - } } @NonNull @@ -1421,10 +1374,12 @@ public class YqlParser implements Parser { { Map connectivity = getAnnotation(ast, CONNECTIVITY, Map.class, null, "connectivity settings"); if (connectivity != null) { - connectedItems.add(new ConnectedItem(out, getMapValue( - CONNECTIVITY, connectivity, CONNECTION_ID, - Integer.class), getMapValue(CONNECTIVITY, connectivity, - CONNECTION_WEIGHT, Number.class).doubleValue())); + connectedItems.add(new ConnectedItem(out, + getMapValue(CONNECTIVITY, connectivity, CONNECTION_ID, + Integer.class), getMapValue(CONNECTIVITY, + connectivity, + CONNECTION_WEIGHT, + Number.class).doubleValue())); } Number significance = getAnnotation(ast, SIGNIFICANCE, Number.class, null, "term significance"); if (significance != null) { @@ -1713,14 +1668,6 @@ public class YqlParser implements Parser { return new IllegalArgumentException(out.toString()); } - String getSegmenterBackend() { - return segmenterBackend; - } - - Version getSegmenterVersion() { - return segmenterVersion; - } - private static final class ConnectedItem { final double weight; -- cgit v1.2.3