summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2016-10-04 16:25:59 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2016-10-04 16:25:59 +0200
commit196ae42ae1d65da115ca9f6f88da934326eb5fbf (patch)
tree5273eeb4776782ffc946e2ce178498bbf081533e
parent067f59323ef06f2a8d81033b1a8a264069546cbe (diff)
Support 'best' stem mode
-rw-r--r--config-model/src/main/java/com/yahoo/searchdefinition/Search.java1
-rw-r--r--config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexInfo.java2
-rw-r--r--config-model/src/main/java/com/yahoo/searchdefinition/document/Stemming.java25
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java26
-rw-r--r--linguistics/src/main/java/com/yahoo/language/process/StemMode.java2
5 files changed, 29 insertions, 27 deletions
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/Search.java b/config-model/src/main/java/com/yahoo/searchdefinition/Search.java
index 2ab634801c2..9032f913d0b 100644
--- a/config-model/src/main/java/com/yahoo/searchdefinition/Search.java
+++ b/config-model/src/main/java/com/yahoo/searchdefinition/Search.java
@@ -51,6 +51,7 @@ public class Search implements Serializable {
private boolean documentsOnly = false;
// The stemming setting of this search definition. Default is SHORTEST.
+ // TODO: Change to Stemming.BEST on Vespa 7
private Stemming stemming = Stemming.SHORTEST;
// Documents contained in this definition.
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexInfo.java b/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexInfo.java
index e98ee662b3a..0d8d21400aa 100644
--- a/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexInfo.java
+++ b/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexInfo.java
@@ -420,7 +420,7 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer {
if (active != null) {
return active;
}
- // assume default
+ // assume default: TODO: Change to Stemming.BEST on Vespa 7
return Stemming.SHORTEST;
}
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/document/Stemming.java b/config-model/src/main/java/com/yahoo/searchdefinition/document/Stemming.java
index f471201f55e..5b145051de5 100644
--- a/config-model/src/main/java/com/yahoo/searchdefinition/document/Stemming.java
+++ b/config-model/src/main/java/com/yahoo/searchdefinition/document/Stemming.java
@@ -17,13 +17,17 @@ public enum Stemming {
/** No stemming */
NONE("none"),
- /** Stem as much as possible */
+ /** @deprecated incorrectly don't stem at all */
+ @Deprecated
ALL("all"),
/** select shortest possible stem */
SHORTEST("shortest"),
- /** index (and query?) multiple stems */
+ /** select the "best" stem alternative */
+ BEST("best"),
+
+ /** index multiple stems */
MULTIPLE("multiple");
private static Logger log=Logger.getLogger(Stemming.class.getName());
@@ -36,6 +40,7 @@ public enum Stemming {
*
* @throws IllegalArgumentException if there is no stemming type with the given name
*/
+ @SuppressWarnings("deprecation")
public static Stemming get(String stemmingName) {
try {
Stemming stemming = Stemming.valueOf(stemmingName.toUpperCase());
@@ -49,7 +54,7 @@ public enum Stemming {
}
}
- private Stemming(String name) {
+ Stemming(String name) {
this.name = name;
}
@@ -59,14 +64,16 @@ public enum Stemming {
return "stemming " + name;
}
+ @SuppressWarnings("deprecation")
public StemMode toStemMode() {
- if (this == Stemming.SHORTEST) {
- return StemMode.SHORTEST;
- }
- if (this == Stemming.MULTIPLE) {
- return StemMode.ALL;
+ switch(this) {
+ case SHORTEST: return StemMode.SHORTEST;
+ case MULTIPLE: return StemMode.ALL;
+ case BEST : return StemMode.BEST;
+ case NONE: return StemMode.NONE;
+ case ALL: return StemMode.SHORTEST; // Intentional; preserve historic behavior
+ default: throw new IllegalStateException("Inconvertible stem mode " + this);
}
- return StemMode.NONE;
}
}
diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java
index c02824420d5..ca8214f35d6 100644
--- a/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java
+++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java
@@ -108,11 +108,8 @@ public class StemmingSearcher extends Searcher {
return reverseConnectivity;
}
- private Item scan(Item item,
- boolean isCJK,
- Language l,
- IndexFacts.Session indexFacts,
- Map<Item, TaggableItem> reverseConnectivity) {
+ private Item scan(Item item, boolean isCJK, Language l, IndexFacts.Session indexFacts,
+ Map<Item, TaggableItem> reverseConnectivity) {
if (item == null) {
return null;
} else if (item instanceof BlockItem) {
@@ -153,9 +150,8 @@ public class StemmingSearcher extends Searcher {
if (i instanceof TermItem) {
return ((TermItem) i).getOrigin(); // this should always be the case
} else {
- getLogger().log(LogLevel.WARNING,
- "Weird, BlockItem '" + b + "' was a composite containing " + i.getClass().getName()
- + ", expected TermItem.");
+ getLogger().log(LogLevel.WARNING, "Weird, BlockItem '" + b + "' was a composite containing " +
+ i.getClass().getName() + ", expected TermItem.");
}
}
return null;
@@ -217,8 +213,8 @@ public class StemmingSearcher extends Searcher {
setConnectivity(current, reverseConnectivity, replacement);
}
- private void andSegmentConnectivity(BlockItem current,
- Map<Item, TaggableItem> reverseConnectivity, CompositeItem composite) {
+ private void andSegmentConnectivity(BlockItem current, Map<Item, TaggableItem> reverseConnectivity,
+ CompositeItem composite) {
// if the original has connectivity to something, add to last word
Connectivity connectivity = getConnectivity(current);
if (connectivity != null) {
@@ -269,8 +265,7 @@ public class StemmingSearcher extends Searcher {
private TaggableItem singleWordSegment(BlockItem current,
StemList segment,
Index index,
- Substring substring)
- {
+ Substring substring) {
String indexName = current.getIndexName();
if (index.getLiteralBoost() || index.getStemMode() == StemMode.ALL) {
// Yes, this will create a new WordAlternativesItem even if stemmed
@@ -301,8 +296,7 @@ public class StemmingSearcher extends Searcher {
}
private WordItem singleStemSegment(Item blockAsItem, String stem, String indexName,
- Substring substring)
- {
+ Substring substring) {
WordItem replacement = new WordItem(stem, indexName, true, substring);
replacement.setStemmed(true);
copyAttributes(blockAsItem, replacement);
@@ -311,8 +305,7 @@ public class StemmingSearcher extends Searcher {
private void setConnectivity(BlockItem current,
Map<Item, TaggableItem> reverseConnectivity,
- Item replacement)
- {
+ Item replacement) {
if (reverseConnectivity != null && !reverseConnectivity.isEmpty()) {
// This Map<Item, TaggableItem>.get(BlockItem) is technically wrong, but the Item API ensures its correctness
TaggableItem connectedTo = reverseConnectivity.get(current);
@@ -425,4 +418,5 @@ public class StemmingSearcher extends Searcher {
}
}
+
}
diff --git a/linguistics/src/main/java/com/yahoo/language/process/StemMode.java b/linguistics/src/main/java/com/yahoo/language/process/StemMode.java
index 269b08dcdf7..ebacb307a85 100644
--- a/linguistics/src/main/java/com/yahoo/language/process/StemMode.java
+++ b/linguistics/src/main/java/com/yahoo/language/process/StemMode.java
@@ -6,7 +6,7 @@ package com.yahoo.language.process;
* Stemming implementation may support a smaller number of modes by mapping a mode to a more
* inclusive alternative.
*
- * @author <a href="mailto:mathiasm@yahoo-inc.com">Mathias Mølster Lidal</a>
+ * @author Mathias Mølster Lidal
*/
public enum StemMode {