aboutsummaryrefslogtreecommitdiffstats
path: root/container-search
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2018-10-30 21:37:15 +0100
committerGitHub <noreply@github.com>2018-10-30 21:37:15 +0100
commita5df4f75ac420eaa648b71b4a7a1da203be8d053 (patch)
tree46fd731aa85347ada83032ff6d2fe79a5eec1b3a /container-search
parentef0b462ee638974706820a422f5fa2692ebb62f4 (diff)
parent2473a58d220178dca8294072ee62e3d6118f546b (diff)
Merge pull request #7484 from vespa-engine/bratseth/segment-yql-by-default-take-2
Bratseth/segment yql by default take 2
Diffstat (limited to 'container-search')
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/TaggableItem.java23
-rw-r--r--container-search/src/main/java/com/yahoo/search/yql/YqlParser.java140
-rw-r--r--container-search/src/test/java/com/yahoo/search/yql/ResegmentingTestCase.java147
-rw-r--r--container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java94
4 files changed, 95 insertions, 309 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/TaggableItem.java b/container-search/src/main/java/com/yahoo/prelude/query/TaggableItem.java
index 4e9d3d11cc5..03e85fa3260 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/TaggableItem.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/TaggableItem.java
@@ -6,13 +6,13 @@ package com.yahoo.prelude.query;
* An interface used for anything which may be addressed using an external,
* unique ID in the query tree in the backend.
*
- * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a>
+ * @author Steinar Knutsen
*/
public interface TaggableItem {
- public int getUniqueID();
- public void setUniqueID(int id);
- public boolean hasUniqueID();
+ int getUniqueID();
+ void setUniqueID(int id);
+ boolean hasUniqueID();
/**
* Set the connectivity to another term in the same query tree.
@@ -30,9 +30,9 @@ public interface TaggableItem {
* @param connectivity a value between 0 (none) and 1 (maximal), defining the connectivity between this and the
* argument item. The default connectivity is 0.1.
*/
- public void setConnectivity(Item item, double connectivity);
- public Item getConnectedItem();
- public double getConnectivity();
+ void setConnectivity(Item item, double connectivity);
+ Item getConnectedItem();
+ double getConnectivity();
/**
@@ -41,8 +41,9 @@ public interface TaggableItem {
* This influences ranking features which take term significance into account and overrides the default
* partial corpus based term significance computation happening in the backend.
*/
- public void setSignificance(double significance);
- public boolean hasExplicitSignificance();
- public void setExplicitSignificance(boolean significance);
- public double getSignificance();
+ void setSignificance(double significance);
+ boolean hasExplicitSignificance();
+ void setExplicitSignificance(boolean significance);
+ double getSignificance();
+
}
diff --git a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
index e0e9042e1a3..3d9f6c48447 100644
--- a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
+++ b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
@@ -18,10 +18,8 @@ import com.google.common.annotations.Beta;
import com.google.common.base.Preconditions;
import com.yahoo.collections.LazyMap;
import com.yahoo.collections.LazySet;
-import com.yahoo.collections.Tuple2;
import com.yahoo.component.Version;
import com.yahoo.language.Language;
-import com.yahoo.language.Linguistics;
import com.yahoo.language.detect.Detector;
import com.yahoo.language.process.Normalizer;
import com.yahoo.language.process.Segmenter;
@@ -117,9 +115,6 @@ public class YqlParser implements Parser {
private static final String NORMALIZE_CASE_DESCRIPTION = "setting for whether to do case normalization if field implies it";
private static final String ORIGIN_DESCRIPTION = "string origin for a term";
private static final String RANKED_DESCRIPTION = "setting for whether to use term for ranking";
- private static final String SEGMENTER_BACKEND = "backend";
- private static final String SEGMENTER = "segmenter";
- private static final String SEGMENTER_VERSION = "version";
private static final String STEM_DESCRIPTION = "setting for whether to use stem if field implies it";
private static final String USE_POSITION_DATA_DESCRIPTION = "setting for whether to use position data for ranking this item";
private static final String USER_INPUT_ALLOW_EMPTY = "allowEmpty";
@@ -190,8 +185,6 @@ public class YqlParser implements Parser {
private final Detector detector;
private final Set<String> yqlSources = LazySet.newHashSet();
private final Set<String> yqlSummaryFields = LazySet.newHashSet();
- private final String localSegmenterBackend;
- private final Version localSegmenterVersion;
private Integer hits;
private Integer offset;
private Integer timeout;
@@ -201,10 +194,7 @@ public class YqlParser implements Parser {
private IndexNameExpander indexNameExpander = new IndexNameExpander();
private Set<String> docTypes;
private Sorting sorting;
- private String segmenterBackend;
- private Version segmenterVersion;
private boolean queryParser = true;
- private boolean resegment = false;
private final Deque<OperatorNode<?>> annotationStack = new ArrayDeque<>();
private final ParserEnvironment environment;
@@ -238,10 +228,6 @@ public class YqlParser implements Parser {
segmenter = environment.getLinguistics().getSegmenter();
detector = environment.getLinguistics().getDetector();
this.environment = environment;
-
- Tuple2<String, Version> version = environment.getLinguistics().getVersion(Linguistics.Component.SEGMENTER);
- localSegmenterBackend = version.first;
- localSegmenterVersion = version.second;
}
@NonNull
@@ -261,10 +247,7 @@ public class YqlParser implements Parser {
currentlyParsing = query;
docTypes = null;
sorting = null;
- segmenterBackend = null;
- segmenterVersion = null;
// queryParser set prior to calling this
- resegment = false;
return buildTree(parseYqlProgram());
}
@@ -287,32 +270,12 @@ public class YqlParser implements Parser {
filterPart.getArguments().length);
populateYqlSources(filterPart.<OperatorNode<?>> getArgument(0));
OperatorNode<ExpressionOperator> filterExpression = filterPart.getArgument(1);
- populateLinguisticsAnnotations(filterExpression);
Item root = convertExpression(filterExpression);
connectItems();
userQuery = null;
return new QueryTree(root);
}
- private void populateLinguisticsAnnotations(OperatorNode<ExpressionOperator> filterExpression) {
- Map<?, ?> segmenter = getAnnotation(filterExpression, SEGMENTER,
- Map.class, null, "segmenter engine and version");
- if (segmenter == null) {
- segmenterVersion = null;
- segmenterBackend = null;
- resegment = false;
- } else {
- segmenterBackend = getMapValue(SEGMENTER, segmenter, SEGMENTER_BACKEND, String.class);
- try {
- segmenterVersion = new Version(getMapValue(SEGMENTER, segmenter, SEGMENTER_VERSION, String.class));
- } catch (RuntimeException e) {
- segmenterVersion = null;
- }
- resegment = ! localSegmenterBackend.equals(segmenterBackend) ||
- ! localSegmenterVersion.equals(segmenterVersion);
- }
- }
-
private void populateYqlSources(OperatorNode<?> filterArgs) {
yqlSources.clear();
if (filterArgs.getOperator() == SequenceOperator.SCAN) {
@@ -614,8 +577,7 @@ public class YqlParser implements Parser {
}
phrase.setIndexName(field);
- if (resegment
- && getAnnotation(ast, IMPLICIT_TRANSFORMS, Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION)) {
+ if (getAnnotation(ast, IMPLICIT_TRANSFORMS, Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION)) {
words = segmenter.segment(origin.getValue(), currentlyParsing.getLanguage());
}
@@ -719,16 +681,16 @@ public class YqlParser implements Parser {
return Language.ENGLISH;
}
- private String getStringContents(OperatorNode<ExpressionOperator> propertySniffer) {
- switch (propertySniffer.getOperator()) {
+ private String getStringContents(OperatorNode<ExpressionOperator> operator) {
+ switch (operator.getOperator()) {
case LITERAL:
- return propertySniffer.getArgument(0, String.class);
+ return operator.getArgument(0, String.class);
case VARREF:
Preconditions.checkState(userQuery != null,
"properties must be available when trying to fetch user input");
- return userQuery.properties().getString(propertySniffer.getArgument(0, String.class));
+ return userQuery.properties().getString(operator.getArgument(0, String.class));
default:
- throw newUnexpectedArgumentException(propertySniffer.getOperator(),
+ throw newUnexpectedArgumentException(operator.getOperator(),
ExpressionOperator.LITERAL, ExpressionOperator.VARREF);
}
}
@@ -1292,15 +1254,23 @@ public class YqlParser implements Parser {
OperatorNode<ExpressionOperator> ast, Class<?> parent,
SegmentWhen segmentPolicy) {
String wordData = getStringContents(ast);
- return instantiateWordItem(field, wordData, ast, parent, segmentPolicy, false, decideParsingLanguage(ast, wordData));
+ return instantiateWordItem(field, wordData, ast, parent, segmentPolicy, null, decideParsingLanguage(ast, wordData));
}
+ /**
+ * Converts the payload of a contains statement into an Item
+ *
+ * @param exactMatch true to always create an ExactStringItem, false to never do so, and null to
+ * make the choice based on the field settings
+ */
+ // TODO: Clean up such that there is one way to look up an Index instance
+ // which always expands first, but not using getIndex, which performs checks that doesn't always work
@NonNull
private Item instantiateWordItem(String field,
String rawWord,
OperatorNode<ExpressionOperator> ast, Class<?> parent,
SegmentWhen segmentPolicy,
- boolean exactMatch,
+ Boolean exactMatch,
Language language) {
String wordData = rawWord;
if (getAnnotation(ast, NFKC, Boolean.class, Boolean.FALSE,
@@ -1310,23 +1280,22 @@ public class YqlParser implements Parser {
wordData = normalizer.normalize(wordData);
}
boolean fromQuery = getAnnotation(ast, IMPLICIT_TRANSFORMS,
- Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION);
- boolean prefixMatch = getAnnotation(ast, PREFIX, Boolean.class,
- Boolean.FALSE,
- "setting for whether to use prefix match of input data");
- boolean suffixMatch = getAnnotation(ast, SUFFIX, Boolean.class,
- Boolean.FALSE,
- "setting for whether to use suffix match of input data");
- boolean substrMatch = getAnnotation(ast, SUBSTRING, Boolean.class,
- Boolean.FALSE,
- "setting for whether to use substring match of input data");
- Preconditions.checkArgument((prefixMatch ? 1 : 0)
- + (substrMatch ? 1 : 0) + (suffixMatch ? 1 : 0) < 2,
- "Only one of prefix, substring and suffix can be set.");
- @NonNull
- final TaggableItem wordItem;
-
- if (exactMatch) {
+ Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION);
+ boolean prefixMatch = getAnnotation(ast, PREFIX, Boolean.class, Boolean.FALSE,
+ "setting for whether to use prefix match of input data");
+ boolean suffixMatch = getAnnotation(ast, SUFFIX, Boolean.class, Boolean.FALSE,
+ "setting for whether to use suffix match of input data");
+ boolean substrMatch = getAnnotation(ast, SUBSTRING, Boolean.class, Boolean.FALSE,
+ "setting for whether to use substring match of input data");
+ boolean exact = exactMatch != null ? exactMatch : indexFactsSession.getIndex(indexNameExpander.expand(field)).isExact();
+ String grammar = getAnnotation(ast, USER_INPUT_GRAMMAR, String.class,
+ Query.Type.ALL.toString(), "grammar for handling word input");
+ Preconditions.checkArgument((prefixMatch ? 1 : 0) +
+ (substrMatch ? 1 : 0) + (suffixMatch ? 1 : 0) < 2,
+ "Only one of prefix, substring and suffix can be set.");
+
+ TaggableItem wordItem;
+ if (exact) {
wordItem = new ExactStringItem(wordData, fromQuery);
} else if (prefixMatch) {
wordItem = new PrefixItem(wordData, fromQuery);
@@ -1340,21 +1309,21 @@ public class YqlParser implements Parser {
wordItem = new WordItem(wordData, fromQuery);
break;
case POSSIBLY:
- if (shouldResegmentWord(field, fromQuery)) {
- wordItem = resegment(field, ast, wordData, fromQuery, parent, language);
+ if (shouldSegment(field, ast, fromQuery) && ! grammar.equals(USER_INPUT_RAW)) {
+ wordItem = segment(field, ast, wordData, fromQuery, parent, language);
} else {
wordItem = new WordItem(wordData, fromQuery);
}
break;
case ALWAYS:
- wordItem = resegment(field, ast, wordData, fromQuery, parent, language);
+ wordItem = segment(field, ast, wordData, fromQuery, parent, language);
break;
default:
throw new IllegalArgumentException("Unexpected segmenting rule: " + segmentPolicy);
}
}
if (wordItem instanceof WordItem) {
- prepareWord(field, ast, fromQuery, (WordItem) wordItem);
+ prepareWord(field, ast, (WordItem) wordItem);
}
if (language != Language.ENGLISH) // mark the language used, unless it's the default
((Item)wordItem).setLanguage(language);
@@ -1362,13 +1331,13 @@ public class YqlParser implements Parser {
}
@SuppressWarnings({"deprecation"})
- private boolean shouldResegmentWord(String field, boolean fromQuery) {
- return resegment && fromQuery && ! indexFactsSession.getIndex(field).isAttribute();
+ private boolean shouldSegment(String field, OperatorNode<ExpressionOperator> ast, boolean fromQuery) {
+ return fromQuery && ! indexFactsSession.getIndex(indexNameExpander.expand(field)).isAttribute();
}
@NonNull
- private TaggableItem resegment(String field, OperatorNode<ExpressionOperator> ast, String wordData,
- boolean fromQuery, Class<?> parent, Language language) {
+ private TaggableItem segment(String field, OperatorNode<ExpressionOperator> ast, String wordData,
+ boolean fromQuery, Class<?> parent, Language language) {
String toSegment = wordData;
Substring s = getOrigin(ast);
Language usedLanguage = language == null ? currentlyParsing.getLanguage() : language;
@@ -1387,7 +1356,7 @@ public class YqlParser implements Parser {
((PhraseSegmentItem) wordItem).setIndexName(field);
for (String w : words) {
WordItem segment = new WordItem(w, fromQuery);
- prepareWord(field, ast, fromQuery, segment);
+ prepareWord(field, ast, segment);
((PhraseSegmentItem) wordItem).addItem(segment);
}
((PhraseSegmentItem) wordItem).lock();
@@ -1404,16 +1373,9 @@ public class YqlParser implements Parser {
return parent == EquivItem.class;
}
- private void prepareWord(String field, OperatorNode<ExpressionOperator> ast, boolean fromQuery,
- WordItem wordItem) {
+ private void prepareWord(String field, OperatorNode<ExpressionOperator> ast, WordItem wordItem) {
wordItem.setIndexName(field);
wordStyleSettings(ast, wordItem);
- if (shouldResegmentWord(field, fromQuery)) {
- // force re-stemming, new case normalization, etc
- wordItem.setStemmed(false);
- wordItem.setLowercased(false);
- wordItem.setNormalizable(true);
- }
}
@NonNull
@@ -1421,10 +1383,12 @@ public class YqlParser implements Parser {
{
Map<?, ?> connectivity = getAnnotation(ast, CONNECTIVITY, Map.class, null, "connectivity settings");
if (connectivity != null) {
- connectedItems.add(new ConnectedItem(out, getMapValue(
- CONNECTIVITY, connectivity, CONNECTION_ID,
- Integer.class), getMapValue(CONNECTIVITY, connectivity,
- CONNECTION_WEIGHT, Number.class).doubleValue()));
+ connectedItems.add(new ConnectedItem(out,
+ getMapValue(CONNECTIVITY, connectivity, CONNECTION_ID,
+ Integer.class), getMapValue(CONNECTIVITY,
+ connectivity,
+ CONNECTION_WEIGHT,
+ Number.class).doubleValue()));
}
Number significance = getAnnotation(ast, SIGNIFICANCE, Number.class, null, "term significance");
if (significance != null) {
@@ -1713,14 +1677,6 @@ public class YqlParser implements Parser {
return new IllegalArgumentException(out.toString());
}
- String getSegmenterBackend() {
- return segmenterBackend;
- }
-
- Version getSegmenterVersion() {
- return segmenterVersion;
- }
-
private static final class ConnectedItem {
final double weight;
diff --git a/container-search/src/test/java/com/yahoo/search/yql/ResegmentingTestCase.java b/container-search/src/test/java/com/yahoo/search/yql/ResegmentingTestCase.java
deleted file mode 100644
index 1bed8ff2233..00000000000
--- a/container-search/src/test/java/com/yahoo/search/yql/ResegmentingTestCase.java
+++ /dev/null
@@ -1,147 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.search.yql;
-
-import static org.junit.Assert.assertEquals;
-
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-
-import com.yahoo.search.query.parser.Parsable;
-import com.yahoo.search.query.parser.ParserEnvironment;
-
-/**
- * Check rules for resegmenting words in YQL+ when segmenter is deemed
- * incompatible. The class under testing is {@link YqlParser}.
- *
- * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a>
- */
-public class ResegmentingTestCase {
- private YqlParser parser;
-
- @Before
- public void setUp() throws Exception {
- ParserEnvironment env = new ParserEnvironment();
- parser = new YqlParser(env);
- }
-
- @After
- public void tearDown() throws Exception {
- parser = null;
- }
-
- @Test
- public final void testWord() {
- assertEquals(
- "title:'a b'",
- parser.parse(
- new Parsable()
- .setQuery("select * from sources * where [{\"segmenter\": {\"version\": \"18.47.39\", \"backend\": \"nonexistant\"}}] (title contains \"a b\");"))
- .toString());
- }
-
- @Test
- public final void testPhraseSegment() {
- assertEquals(
- "title:'c d'",
- parser.parse(
- new Parsable()
- .setQuery("select * from sources * where"
- + " [{\"segmenter\": {\"version\": \"18.47.39\", \"backend\": \"nonexistant\"}}]"
- + " (title contains ([{\"origin\": {\"offset\": 0, \"length\":3, \"original\": \"c d\"}}]"
- + " phrase(\"a\", \"b\")));"))
- .toString());
- }
-
- @Test
- public final void testPhraseInEquiv() {
- assertEquals(
- "EQUIV title:a title:'c d'",
- parser.parse(
- new Parsable()
- .setQuery("select * from sources * where"
- + " [{\"segmenter\": {\"version\": \"18.47.39\", \"backend\": \"nonexistant\"}}]"
- + " (title contains"
- + " equiv(\"a\","
- + " ([{\"origin\": {\"offset\": 0, \"length\":3, \"original\": \"c d\"}}]\"b\")"
- + ")"
- + ");"))
- .toString());
- }
-
- @Test
- public final void testPhraseSegmentToAndSegment() {
- assertEquals(
- "SAND title:c title:d",
- parser.parse(
- new Parsable()
- .setQuery("select * from sources * where"
- + " [{\"segmenter\": {\"version\": \"18.47.39\", \"backend\": \"nonexistant\"}}]"
- + " (title contains ([{\"origin\": {\"offset\": 0, \"length\":3, \"original\": \"c d\"}, \"andSegmenting\": true}]"
- + " phrase(\"a\", \"b\")));"))
- .toString());
- }
-
- @Test
- public final void testPhraseSegmentInPhrase() {
- assertEquals(
- "title:\"a 'c d'\"",
- parser.parse(
- new Parsable()
- .setQuery("select * from sources * where [{\"segmenter\": {\"version\": \"18.47.39\", \"backend\": \"nonexistant\"}}]"
- + " (title contains phrase(\"a\","
- + " ([{\"origin\": {\"offset\": 0, \"length\":3, \"original\": \"c d\"}}]"
- + " phrase(\"e\", \"f\"))));"))
- .toString());
- }
-
- @Test
- public final void testWordNoImplicitTransforms() {
- assertEquals(
- "title:a b",
- parser.parse(
- new Parsable()
- .setQuery("select * from sources * where [{\"segmenter\": {\"version\": \"18.47.39\", \"backend\": \"nonexistant\"}}] (title contains ([{\"implicitTransforms\": false}]\"a b\"));"))
- .toString());
- }
-
- @Test
- public final void testPhraseSegmentNoImplicitTransforms() {
- assertEquals(
- "title:'a b'",
- parser.parse(
- new Parsable()
- .setQuery("select * from sources * where"
- + " [{\"segmenter\": {\"version\": \"18.47.39\", \"backend\": \"nonexistant\"}}]"
- + " (title contains ([{\"origin\": {\"offset\": 0, \"length\":3, \"original\": \"c d\"}, \"implicitTransforms\": false}]"
- + " phrase(\"a\", \"b\")));"))
- .toString());
- }
-
- @Test
- public final void testPhraseSegmentToAndSegmentNoImplicitTransforms() {
- assertEquals(
- "SAND title:a title:b",
- parser.parse(
- new Parsable()
- .setQuery("select * from sources * where"
- + " [{\"segmenter\": {\"version\": \"18.47.39\", \"backend\": \"nonexistant\"}}]"
- + " (title contains ([{\"origin\": {\"offset\": 0, \"length\":3, \"original\": \"c d\"}, \"andSegmenting\": true, \"implicitTransforms\": false}]"
- + " phrase(\"a\", \"b\")));"))
- .toString());
- }
-
- @Test
- public final void testPhraseSegmentInPhraseNoImplicitTransforms() {
- assertEquals(
- "title:\"a 'e f'\"",
- parser.parse(
- new Parsable()
- .setQuery("select * from sources * where [{\"segmenter\": {\"version\": \"18.47.39\", \"backend\": \"nonexistant\"}}]"
- + " (title contains phrase(\"a\","
- + " ([{\"origin\": {\"offset\": 0, \"length\":3, \"original\": \"c d\"}, \"implicitTransforms\": false}]"
- + " phrase(\"e\", \"f\"))));"))
- .toString());
- }
-
-}
diff --git a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java
index e2325e52f63..127820bb7ae 100644
--- a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java
@@ -11,6 +11,7 @@ import com.yahoo.prelude.query.IndexedItem;
import com.yahoo.prelude.query.ExactStringItem;
import com.yahoo.prelude.query.Item;
import com.yahoo.prelude.query.PhraseItem;
+import com.yahoo.prelude.query.PhraseSegmentItem;
import com.yahoo.prelude.query.PrefixItem;
import com.yahoo.prelude.query.QueryCanonicalizer;
import com.yahoo.prelude.query.RegExpItem;
@@ -55,15 +56,15 @@ import static org.junit.Assert.fail;
/**
* Specification for the conversion of YQL+ expressions to Vespa search queries.
*
- * @author steinar
- * @author stiankri
+ * @author Steinar Knutsen
+ * @author Stian Kristoffersen
*/
public class YqlParserTestCase {
private final YqlParser parser = new YqlParser(new ParserEnvironment());
@Test
- public void requireThatDefaultsAreSane() {
+ public void testParserDefaults() {
assertTrue(parser.isQueryParser());
assertNull(parser.getDocTypes());
}
@@ -76,7 +77,7 @@ public class YqlParserTestCase {
}
@Test
- public void requireThatGroupingStepCanBeParsed() {
+ public void testGroupingStep() {
assertParse("select foo from bar where baz contains 'cox';",
"baz:cox");
assertEquals("[]",
@@ -98,7 +99,7 @@ public class YqlParserTestCase {
}
@Test
- public void requireThatGroupingContinuationCanBeParsed() {
+ public void testGroupingContinuation() {
assertParse("select foo from bar where baz contains 'cox' " +
"| [{ 'continuations': ['BCBCBCBEBG', 'BCBKCBACBKCCK'] }]all(group(a) each(output(count())));",
"baz:cox");
@@ -320,12 +321,14 @@ public class YqlParserTestCase {
@Test
public void testRaw() {
+ // Default: Not raw, for comparison
Item root = parse("select foo from bar where baz contains (\"yoni jo dima\");").getRoot();
- assertTrue(root instanceof WordItem);
- assertFalse(root instanceof ExactStringItem);
- assertEquals("yoni jo dima", ((WordItem)root).getWord());
+ assertEquals("baz:'yoni jo dima'", root.toString());
+ assertFalse(root instanceof WordItem);
+ assertTrue(root instanceof PhraseSegmentItem);
root = parse("select foo from bar where baz contains ([{\"grammar\":\"raw\"}]\"yoni jo dima\");").getRoot();
+ assertEquals("baz:yoni jo dima", root.toString());
assertTrue(root instanceof WordItem);
assertFalse(root instanceof ExactStringItem);
assertEquals("yoni jo dima", ((WordItem)root).getWord());
@@ -735,44 +738,17 @@ public class YqlParserTestCase {
@Test
public void testSegmenting() {
- assertParse("select * from bar where ([{\"segmenter\": {\"version\": \"58.67.49\", \"backend\": " +
- "\"yell\"}}] title contains \"madonna\");",
- "title:madonna");
- assertEquals("yell", parser.getSegmenterBackend());
- assertEquals(new Version("58.67.49"), parser.getSegmenterVersion());
-
- assertParse("select * from bar where ([{\"segmenter\": {\"version\": \"8.7.3\", \"backend\": " +
- "\"yell\"}}]([{\"targetNumHits\": 9999438}] weakAnd(format contains \"online\", title contains " +
- "\"madonna\")));",
- "WAND(9999438) format:online title:madonna");
- assertEquals("yell", parser.getSegmenterBackend());
- assertEquals(new Version("8.7.3"), parser.getSegmenterVersion());
-
- assertParse("select * from bar where [{\"segmenter\": {\"version\": \"18.47.39\", \"backend\": " +
- "\"yell\"}}] ([{\"targetNumHits\": 99909438}] weakAnd(format contains \"online\", title contains " +
- "\"madonna\"));",
- "WAND(99909438) format:online title:madonna");
- assertEquals("yell", parser.getSegmenterBackend());
- assertEquals(new Version("18.47.39"), parser.getSegmenterVersion());
-
- assertParse("select * from bar where [{\"targetNumHits\": 99909438}] weakAnd(format contains " +
- "\"online\", title contains \"madonna\");",
- "WAND(99909438) format:online title:madonna");
- assertNull(parser.getSegmenterBackend());
- assertNull(parser.getSegmenterVersion());
-
- assertParse("select * from bar where [{\"segmenter\": {\"version\": \"58.67.49\", \"backend\": " +
- "\"yell\"}}](title contains \"madonna\") order by shoesize;",
- "title:madonna");
- assertEquals("yell", parser.getSegmenterBackend());
- assertEquals(new Version("58.67.49"), parser.getSegmenterVersion());
+ assertParse("select * from bar where title contains 'foo.bar';",
+ "title:'foo bar'");
+
+ assertParse("select * from bar where title contains 'foo&123';",
+ "title:'foo 123'");
}
@Test
public void testNegativeHitLimit() {
- assertParse(
- "select * from sources * where [{\"hitLimit\": -38}]range(foo, 0, 1);",
- "foo:[0;1;-38]");
+ assertParse("select * from sources * where [{\"hitLimit\": -38}]range(foo, 0, 1);",
+ "foo:[0;1;-38]");
}
@Test
@@ -830,26 +806,26 @@ public class YqlParserTestCase {
@Test
public void testMoreInheritedAnnotations() {
- final String yqlQuery = "select * from sources * where "
- + "([{\"ranked\": false}](foo contains \"a\" "
- + "and ([{\"ranked\": true}](bar contains \"b\" "
- + "or ([{\"ranked\": false}](foo contains \"c\" "
- + "and foo contains ([{\"ranked\": true}]\"d\")))))));";
+ String yqlQuery = "select * from sources * where " +
+ "([{\"ranked\": false}](foo contains \"a\" " +
+ "and ([{\"ranked\": true}](bar contains \"b\" " +
+ "or ([{\"ranked\": false}](foo contains \"c\" " +
+ "and foo contains ([{\"ranked\": true}]\"d\")))))));";
QueryTree x = parse(yqlQuery);
List<IndexedItem> terms = QueryTree.getPositiveTerms(x);
assertEquals(4, terms.size());
for (IndexedItem term : terms) {
switch (term.getIndexedString()) {
- case "a":
- case "c":
- assertFalse(((Item) term).isRanked());
- break;
- case "b":
- case "d":
- assertTrue(((Item) term).isRanked());
- break;
- default:
- fail();
+ case "a":
+ case "c":
+ assertFalse(((Item) term).isRanked());
+ break;
+ case "b":
+ case "d":
+ assertTrue(((Item) term).isRanked());
+ break;
+ default:
+ fail();
}
}
}
@@ -921,8 +897,8 @@ public class YqlParserTestCase {
private void checkWordAlternativesContent(WordAlternativesItem alternatives) {
boolean seenTree = false;
boolean seenForest = false;
- final String forest = "trees";
- final String tree = "tree";
+ String forest = "trees";
+ String tree = "tree";
assertEquals(2, alternatives.getAlternatives().size());
for (WordAlternativesItem.Alternative alternative : alternatives.getAlternatives()) {
if (tree.equals(alternative.word)) {