summaryrefslogtreecommitdiffstats
path: root/container-search
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2020-06-24 11:42:26 +0200
committerJon Bratseth <bratseth@gmail.com>2020-06-24 11:42:26 +0200
commit547cee55234eb52cc8332381ee6dff219c5cd1f9 (patch)
tree06249d92218694c2e9a918277d8f53d4ca7291a4 /container-search
parentc2b57fddcbc9beb3b866d579c6cd68a68590651e (diff)
Segment to phrase not and when inside an expicit phrase
Diffstat (limited to 'container-search')
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/PhraseSegmentItem.java15
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java6
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/parser/PhraseParser.java5
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java21
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/querytransform/CJKSearcher.java30
-rw-r--r--container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java4
-rw-r--r--container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java2
7 files changed, 46 insertions, 37 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/PhraseSegmentItem.java b/container-search/src/main/java/com/yahoo/prelude/query/PhraseSegmentItem.java
index 542f1393852..9b34fd7d62b 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/PhraseSegmentItem.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/PhraseSegmentItem.java
@@ -19,16 +19,13 @@ public class PhraseSegmentItem extends IndexedSegmentItem {
/** Whether this was explicitly written as a phrase using quotes by the user */
private boolean explicit = false;
- /**
- * Creates a phrase containing the same words and state (as pertinent) as
- * the given SegmentAndItem.
- */
- public PhraseSegmentItem(AndSegmentItem segAnd) {
- super(segAnd.getRawWord(), segAnd.stringValue(), segAnd.isFromQuery(), segAnd.isStemmed(), segAnd.getOrigin());
- if (segAnd.getItemCount() > 0) {
- WordItem w = (WordItem) segAnd.getItem(0);
+ /** Creates a phrase containing the same words and state (as pertinent) as the given SegmentAndItem. */
+ public PhraseSegmentItem(AndSegmentItem andSegment) {
+ super(andSegment.getRawWord(), andSegment.stringValue(), andSegment.isFromQuery(), andSegment.isStemmed(), andSegment.getOrigin());
+ if (andSegment.getItemCount() > 0) {
+ WordItem w = (WordItem) andSegment.getItem(0);
setIndexName(w.getIndexName());
- for (Iterator<Item> i = segAnd.getItemIterator(); i.hasNext();) {
+ for (Iterator<Item> i = andSegment.getItemIterator(); i.hasNext();) {
WordItem word = (WordItem) i.next();
addWordItem(word);
}
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java
index cd8579be7f0..902be7e15dd 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java
@@ -326,6 +326,7 @@ public abstract class AbstractParser implements CustomParser {
*
* @param indexName the index name which preceeded this token, or null if none
* @param token the token to segment
+ * @param quoted whether this segment is within quoted text
* @return the resulting item
*/
// TODO: The segmenting stuff is a mess now, this will fix it:
@@ -341,7 +342,7 @@ public abstract class AbstractParser implements CustomParser {
// This can be solved by making the segment method language independent by
// always producing a query item containing the token text and resolve it to a WordItem or
// SegmentItem after parsing and language detection.
- protected Item segment(String indexName, Token token) {
+ protected Item segment(String indexName, Token token, boolean quoted) {
String normalizedToken = normalize(token.toString());
if (token.isSpecial()) {
@@ -361,12 +362,13 @@ public abstract class AbstractParser implements CustomParser {
if (segments.size() == 0) {
return null;
}
+
if (segments.size() == 1) {
return new WordItem(segments.get(0), "", true, token.substring);
}
CompositeItem composite;
- if (indexFacts.getIndex(indexName).getPhraseSegmenting()) {
+ if (indexFacts.getIndex(indexName).getPhraseSegmenting() || quoted) {
composite = new PhraseSegmentItem(token.toString(), normalizedToken, true, false, token.substring);
}
else {
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/PhraseParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/PhraseParser.java
index 6d4401aca04..12f63276269 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/parser/PhraseParser.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/PhraseParser.java
@@ -23,8 +23,7 @@ public class PhraseParser extends AbstractParser {
/**
* Ignores everything but words and numbers
*
- * @return a phrase item if several words/numbers was found,
- * a word item if only one was found
+ * @return a phrase item if several words/numbers was found, a word item if only one was found
*/
private Item forcedPhrase() {
Item firstWord = null;
@@ -38,7 +37,7 @@ public class PhraseParser extends AbstractParser {
}
// Note, this depends on segment never creating AndItems when quoted
// (the second argument) is true.
- Item newWord = segment(null, token);
+ Item newWord = segment(null, token, true);
if (firstWord == null) { // First pass
firstWord = newWord;
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java
index 9ba6c1a8101..76ea7fb11a8 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java
@@ -406,13 +406,18 @@ abstract class StructuredParser extends AbstractParser {
}
}
- /** Words for phrases also permits numerals as words */
- private Item phraseWord(String indexName, boolean insidePhrase) {
+ /**
+ * Words for phrases also permits numerals as words
+ *
+ * @param quoted whether we are consuming text within quoted
+ * @param insidePhrase whether we are consuming additional items for an existing phrase
+ */
+ private Item phraseWord(String indexName, boolean quoted, boolean insidePhrase) {
int position = tokens.getPosition();
Item item = null;
try {
- item = word(indexName);
+ item = word(indexName, quoted);
if (item == null && tokens.currentIs(NUMBER)) {
Token t = tokens.next();
@@ -434,10 +439,12 @@ abstract class StructuredParser extends AbstractParser {
/**
* Returns a WordItem if this is a non CJK query,
- * a WordItem or PhraseSegmentItem if this is a CJK query,
+ * a WordItem or SegmentItem if this is a CJK query,
* null if the current item is not a word
+ *
+ * @param quoted whether this token is inside quotes
*/
- private Item word(String indexName) {
+ private Item word(String indexName, boolean quoted) {
int position = tokens.getPosition();
Item item = null;
@@ -452,7 +459,7 @@ abstract class StructuredParser extends AbstractParser {
if (submodes.url) {
item = new WordItem(word, true);
} else {
- item = segment(indexName, word);
+ item = segment(indexName, word, quoted);
}
if (submodes.url || submodes.site) {
@@ -539,7 +546,7 @@ abstract class StructuredParser extends AbstractParser {
quoted = !quoted;
}
- Item word = phraseWord(indexName, (firstWord != null) || (composite != null));
+ Item word = phraseWord(indexName, quoted, (firstWord != null) || (composite != null));
if (word == null) {
if (tokens.skipMultiple(QUOTE)) {
diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/CJKSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/CJKSearcher.java
index ae8c289a5b0..785477d6df7 100644
--- a/container-search/src/main/java/com/yahoo/prelude/querytransform/CJKSearcher.java
+++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/CJKSearcher.java
@@ -56,26 +56,31 @@ public class CJKSearcher extends Searcher {
AndItem replacement = new AndItem();
for (ListIterator<Item> i = ((CompositeItem) root).getItemIterator(); i.hasNext();) {
Item item = i.next();
- if (item instanceof WordItem) replacement.addItem(item);
- else if (item instanceof PhraseSegmentItem) {
+ if (item instanceof WordItem)
+ replacement.addItem(item);
+ else if (item instanceof PhraseSegmentItem)
replacement.addItem(new AndSegmentItem((PhraseSegmentItem) item));
- }
- else replacement.addItem(item); // should never run, but hey... just convert and hope it's OK :)
+ else
+ replacement.addItem(item); // should never get here
}
return replacement;
- } else if (root instanceof PhraseSegmentItem) {
+ }
+ else if (root instanceof PhraseSegmentItem) {
PhraseSegmentItem asSegment = (PhraseSegmentItem) root;
- if (asSegment.isExplicit() || hasOverlappingTokens(asSegment)) return root;
- else return new AndSegmentItem(asSegment);
- } else if (root instanceof SegmentItem) {
+ if (asSegment.isExplicit() || hasOverlappingTokens(asSegment))
+ return root;
+ else
+ return new AndSegmentItem(asSegment);
+ }
+ else if (root instanceof SegmentItem) {
return root; // avoid descending into AndSegmentItems and similar
- } else if (root instanceof CompositeItem) {
+ }
+ else if (root instanceof CompositeItem) {
for (ListIterator<Item> i = ((CompositeItem) root).getItemIterator(); i.hasNext();) {
Item item = i.next();
Item transformedItem = transform(item);
- if (item != transformedItem) {
+ if (item != transformedItem)
i.set(transformedItem);
- }
}
return root;
}
@@ -96,8 +101,7 @@ public class CJKSearcher extends Searcher {
* We have overlapping tokens (see
* com.yahoo.prelude.querytransform.test.CJKSearcherTestCase
* .testCjkQueryWithOverlappingTokens and ParseTestCase for an explanation)
- * if the sum of length of tokens is greater than the lenght of the original
- * word
+ * if the sum of length of tokens is greater than the length of the original word
*/
private boolean hasOverlappingTokens(PhraseSegmentItem segments) {
int segmentsLength=0;
diff --git a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java
index c1db7d73561..80b2f845e84 100644
--- a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java
+++ b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java
@@ -1679,8 +1679,8 @@ public class ParseTestCase {
// "first" "second" and "third" are segments in the test language
Item item = tester.parseQuery("name:\"firstsecondthird\"", null, Language.CHINESE_SIMPLIFIED, Query.Type.ANY, TestLinguistics.INSTANCE);
- assertTrue(item instanceof AndSegmentItem);
- AndSegmentItem segment = (AndSegmentItem) item;
+ assertTrue(item instanceof PhraseSegmentItem);
+ PhraseSegmentItem segment = (PhraseSegmentItem) item;
assertEquals(3, segment.getItemCount());
assertEquals("name:first", segment.getItem(0).toString());
diff --git a/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java b/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java
index c831ee29631..3a67245e912 100644
--- a/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java
@@ -1037,7 +1037,7 @@ public class QueryTestCase {
IndexModel indexModel = new IndexModel(test);
query.getModel().setExecution(new Execution(Execution.Context.createContextStub(new IndexFacts(indexModel))));
- assertEquals("myfield:\"it s fine\"", query.getModel().getQueryTree().toString());
+ assertEquals("myfield:\"'it s' fine\"", query.getModel().getQueryTree().toString());
}
@Test