aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@vespa.ai>2024-03-09 21:22:26 +0100
committerJon Bratseth <bratseth@vespa.ai>2024-03-09 21:22:26 +0100
commit6fa0791c5bac03554f01fc5a8652741cb33921b5 (patch)
treeb7ad4541f98678d3e945789b13a11d5419506898
parent28a6770eb45e0a5571b1f9a4716d95f34c49cb60 (diff)
Stem prefix itemsbratseth/stem-prefixes
If we are searching a stemmed index, it's probably better to stem terms also when we are searching for prefixes.
-rw-r--r--config-model/src/main/java/com/yahoo/schema/document/Matching.java1
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/NoPrefixForIndexes.java3
-rw-r--r--container-search/abi-spec.json25
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/ExactStringItem.java8
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/MarkerWordItem.java10
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/PrefixItem.java12
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/SubstringItem.java8
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/SuffixItem.java8
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/WordItem.java8
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java15
10 files changed, 84 insertions, 14 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/document/Matching.java b/config-model/src/main/java/com/yahoo/schema/document/Matching.java
index 9d68553fa80..9f05045d090 100644
--- a/config-model/src/main/java/com/yahoo/schema/document/Matching.java
+++ b/config-model/src/main/java/com/yahoo/schema/document/Matching.java
@@ -31,6 +31,7 @@ public class Matching implements Cloneable, Serializable {
/** Maximum number of characters to consider when searching in this field. Used for limiting resources, especially in streaming search. */
private Integer maxLength;
+
/** Maximum number of occurrences for each term */
private Integer maxTermOccurrences;
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/NoPrefixForIndexes.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/NoPrefixForIndexes.java
index 15d293e4abc..0aa0dc85ab8 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/NoPrefixForIndexes.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/NoPrefixForIndexes.java
@@ -23,8 +23,7 @@ public class NoPrefixForIndexes implements Validator {
@Override
public void validate(Context context) {
for (SearchCluster cluster : context.model().getSearchClusters()) {
- if (cluster instanceof IndexedSearchCluster) {
- IndexedSearchCluster sc = (IndexedSearchCluster) cluster;
+ if (cluster instanceof IndexedSearchCluster sc) {
for (DocumentDatabase docDb : sc.getDocumentDbs()) {
DerivedConfiguration sdConfig = docDb.getDerivedConfiguration();
Schema schema = sdConfig.getSchema();
diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json
index 73376ac4b25..79cc578c6cd 100644
--- a/container-search/abi-spec.json
+++ b/container-search/abi-spec.json
@@ -524,9 +524,12 @@
"methods" : [
"public void <init>(java.lang.String)",
"public void <init>(java.lang.String, boolean)",
+ "public void <init>(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)",
+ "public com.yahoo.prelude.query.ExactStringItem newInstance(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)",
"public com.yahoo.prelude.query.Item$ItemType getItemType()",
"public java.lang.String getName()",
- "public java.lang.String stringValue()"
+ "public java.lang.String stringValue()",
+ "public bridge synthetic com.yahoo.prelude.query.WordItem newInstance(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)"
],
"fields" : [ ]
},
@@ -914,6 +917,7 @@
"public"
],
"methods" : [
+ "public com.yahoo.prelude.query.MarkerWordItem newInstance(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)",
"public boolean isStartAnchor()",
"public boolean isEndAnchor()",
"protected java.lang.String getEncodedWord()",
@@ -923,7 +927,8 @@
"public static com.yahoo.prelude.query.MarkerWordItem createStartOfHost(java.lang.String)",
"public static com.yahoo.prelude.query.MarkerWordItem createStartOfHost()",
"public static com.yahoo.prelude.query.MarkerWordItem createEndOfHost(java.lang.String)",
- "public static com.yahoo.prelude.query.MarkerWordItem createEndOfHost()"
+ "public static com.yahoo.prelude.query.MarkerWordItem createEndOfHost()",
+ "public bridge synthetic com.yahoo.prelude.query.WordItem newInstance(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)"
],
"fields" : [ ]
},
@@ -1296,9 +1301,12 @@
"public void <init>(java.lang.String)",
"public void <init>(java.lang.String, boolean)",
"public void <init>(java.lang.String, java.lang.String)",
+ "public void <init>(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)",
+ "public com.yahoo.prelude.query.PrefixItem newInstance(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)",
"public com.yahoo.prelude.query.Item$ItemType getItemType()",
"public java.lang.String getName()",
- "public java.lang.String stringValue()"
+ "public java.lang.String stringValue()",
+ "public bridge synthetic com.yahoo.prelude.query.WordItem newInstance(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)"
],
"fields" : [ ]
},
@@ -1622,9 +1630,12 @@
"methods" : [
"public void <init>(java.lang.String)",
"public void <init>(java.lang.String, boolean)",
+ "public void <init>(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)",
+ "public com.yahoo.prelude.query.SubstringItem newInstance(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)",
"public com.yahoo.prelude.query.Item$ItemType getItemType()",
"public java.lang.String getName()",
- "public java.lang.String stringValue()"
+ "public java.lang.String stringValue()",
+ "public bridge synthetic com.yahoo.prelude.query.WordItem newInstance(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)"
],
"fields" : [ ]
},
@@ -1637,9 +1648,12 @@
"methods" : [
"public void <init>(java.lang.String)",
"public void <init>(java.lang.String, boolean)",
+ "public void <init>(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)",
+ "public com.yahoo.prelude.query.SuffixItem newInstance(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)",
"public com.yahoo.prelude.query.Item$ItemType getItemType()",
"public java.lang.String getName()",
- "public java.lang.String stringValue()"
+ "public java.lang.String stringValue()",
+ "public bridge synthetic com.yahoo.prelude.query.WordItem newInstance(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)"
],
"fields" : [ ]
},
@@ -1962,6 +1976,7 @@
"public void <init>(com.yahoo.prelude.query.parser.Token, boolean)",
"public void <init>(java.lang.String, boolean, com.yahoo.prelude.query.Substring)",
"public void <init>(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)",
+ "public com.yahoo.prelude.query.WordItem newInstance(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)",
"public com.yahoo.prelude.query.Item$ItemType getItemType()",
"public java.lang.String getName()",
"public void setWord(java.lang.String)",
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/ExactStringItem.java b/container-search/src/main/java/com/yahoo/prelude/query/ExactStringItem.java
index cb0752e5408..36e24fa81db 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/ExactStringItem.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/ExactStringItem.java
@@ -17,6 +17,14 @@ public class ExactStringItem extends WordItem {
super(substring, isFromQuery);
}
+ public ExactStringItem(String word, String indexName, boolean isFromQuery, Substring origin) {
+ super(word, indexName, isFromQuery, origin);
+ }
+
+ public ExactStringItem newInstance(String word, String indexName, boolean isFromQuery, Substring origin) {
+ return new ExactStringItem(word, indexName, isFromQuery, origin);
+ }
+
@Override
public ItemType getItemType() {
return ItemType.EXACT;
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/MarkerWordItem.java b/container-search/src/main/java/com/yahoo/prelude/query/MarkerWordItem.java
index 40ea1e37c47..48309cdd8fa 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/MarkerWordItem.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/MarkerWordItem.java
@@ -25,6 +25,16 @@ public class MarkerWordItem extends WordItem {
this.markerWord = markerWord;
}
+ private MarkerWordItem(String publicSymbol, String markerWord, String indexName, boolean isFromQuery, Substring origin) {
+ super(publicSymbol, indexName);
+ this.markerWord = markerWord;
+ }
+
+ /** Returns a new instance of this kind of WordItem, initialized with the given data and nothing else. */
+ public MarkerWordItem newInstance(String word, String indexName, boolean isFromQuery, Substring origin) {
+ return new MarkerWordItem(word, markerWord, indexName, isFromQuery, origin);
+ }
+
public boolean isStartAnchor() { return getWord().equals(startAnchor); }
public boolean isEndAnchor() { return getWord().equals(endAnchor); }
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/PrefixItem.java b/container-search/src/main/java/com/yahoo/prelude/query/PrefixItem.java
index 5904d805a39..9fc087e70b4 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/PrefixItem.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/PrefixItem.java
@@ -17,7 +17,17 @@ public class PrefixItem extends WordItem {
super(prefix, isFromQuery);
}
- public PrefixItem(String prefix, String indexName) { super(prefix, indexName); }
+ public PrefixItem(String prefix, String indexName) {
+ super(prefix, indexName);
+ }
+
+ public PrefixItem(String prefix, String indexName, boolean isFromQuery, Substring origin) {
+ super(prefix, indexName, isFromQuery, origin);
+ }
+
+ public PrefixItem newInstance(String word, String indexName, boolean isFromQuery, Substring origin) {
+ return new PrefixItem(word, indexName, isFromQuery, origin);
+ }
@Override
public ItemType getItemType() {
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/SubstringItem.java b/container-search/src/main/java/com/yahoo/prelude/query/SubstringItem.java
index 7a05235b199..df9de84b04d 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/SubstringItem.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/SubstringItem.java
@@ -16,6 +16,14 @@ public class SubstringItem extends WordItem {
super(substring, isFromQuery);
}
+ public SubstringItem(String substring, String indexName, boolean isFromQuery, Substring origin) {
+ super(substring, indexName, isFromQuery, origin);
+ }
+
+ public SubstringItem newInstance(String word, String indexName, boolean isFromQuery, Substring origin) {
+ return new SubstringItem(word, indexName, isFromQuery, origin);
+ }
+
@Override
public ItemType getItemType() {
return ItemType.SUBSTRING;
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/SuffixItem.java b/container-search/src/main/java/com/yahoo/prelude/query/SuffixItem.java
index 700564853fd..e364330a377 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/SuffixItem.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/SuffixItem.java
@@ -16,6 +16,14 @@ public class SuffixItem extends WordItem {
super(suffix, isFromQuery);
}
+ public SuffixItem(String substring, String indexName, boolean isFromQuery, Substring origin) {
+ super(substring, indexName, isFromQuery, origin);
+ }
+
+ public SuffixItem newInstance(String word, String indexName, boolean isFromQuery, Substring origin) {
+ return new SuffixItem(word, indexName, isFromQuery, origin);
+ }
+
@Override
public ItemType getItemType() {
return ItemType.SUFFIX;
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/WordItem.java b/container-search/src/main/java/com/yahoo/prelude/query/WordItem.java
index 4f8b02a8d13..9cfa33fa07d 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/WordItem.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/WordItem.java
@@ -62,6 +62,14 @@ public class WordItem extends TermItem {
setWord(word);
}
+ /**
+ * Returns a new instance of this kind of WordItem, initialized with the given data and any other
+ * fields belonging to the item subclass copied from this instance.
+ */
+ public WordItem newInstance(String word, String indexName, boolean isFromQuery, Substring origin) {
+ return new WordItem(word, indexName, isFromQuery, origin);
+ }
+
public ItemType getItemType() {
return ItemType.WORD;
}
diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java
index e8350831381..e40f161ede2 100644
--- a/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java
+++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java
@@ -163,7 +163,7 @@ public class StemmingSearcher extends Searcher {
}
private Item checkBlock(BlockItem b, StemContext context) {
- if (b instanceof PrefixItem || !b.isWords()) return (Item) b;
+ if (!b.isWords()) return (Item) b;
if (b.isFromQuery() && !b.isStemmed()) {
Index index = context.indexFacts.getIndex(b.getIndexName());
@@ -190,10 +190,8 @@ public class StemmingSearcher extends Searcher {
// The rewriting logic is here
private Item stem(BlockItem current, StemContext context, Index index) {
- Item blockAsItem = (Item)current;
- CompositeItem composite;
List<StemList> segments = linguistics.getStemmer().stem(current.stringValue(), index.getStemMode(), context.language);
- if (segments.isEmpty()) return blockAsItem;
+ if (segments.isEmpty()) return (Item)current;
String indexName = current.getIndexName();
Substring substring = getOffsets(current);
@@ -203,6 +201,7 @@ public class StemmingSearcher extends Searcher {
return (Item)w;
}
+ CompositeItem composite;
if (context.isCJK)
composite = chooseCompositeForCJK(current, ((Item) current).getParent(), indexName);
else
@@ -219,7 +218,7 @@ public class StemmingSearcher extends Searcher {
if (composite instanceof AndSegmentItem) {
andSegmentConnectivity(current, context.reverseConnectivity, composite);
}
- copyAttributes(blockAsItem, composite);
+ copyAttributes((Item)current, composite);
composite.lock();
if (composite instanceof PhraseSegmentItem replacement) {
@@ -320,7 +319,11 @@ public class StemmingSearcher extends Searcher {
private WordItem singleStemSegment(Item blockAsItem, String stem, String indexName,
Substring substring) {
- WordItem replacement = new WordItem(stem, indexName, true, substring);
+ WordItem replacement;
+ if (blockAsItem instanceof WordItem) // preserve the WordItem subclass type
+ replacement = ((WordItem)blockAsItem).newInstance(stem, indexName, true, substring);
+ else
+ replacement = new WordItem(stem, indexName, true, substring);
replacement.setStemmed(true);
copyAttributes(blockAsItem, replacement);
return replacement;