diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-10-04 16:25:59 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-10-04 16:25:59 +0200 |
commit | 196ae42ae1d65da115ca9f6f88da934326eb5fbf (patch) | |
tree | 5273eeb4776782ffc946e2ce178498bbf081533e | |
parent | 067f59323ef06f2a8d81033b1a8a264069546cbe (diff) |
Support 'best' stem mode
5 files changed, 29 insertions, 27 deletions
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/Search.java b/config-model/src/main/java/com/yahoo/searchdefinition/Search.java index 2ab634801c2..9032f913d0b 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/Search.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/Search.java @@ -51,6 +51,7 @@ public class Search implements Serializable { private boolean documentsOnly = false; // The stemming setting of this search definition. Default is SHORTEST. + // TODO: Change to Stemming.BEST on Vespa 7 private Stemming stemming = Stemming.SHORTEST; // Documents contained in this definition. diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexInfo.java b/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexInfo.java index e98ee662b3a..0d8d21400aa 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexInfo.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexInfo.java @@ -420,7 +420,7 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { if (active != null) { return active; } - // assume default + // assume default: TODO: Change to Stemming.BEST on Vespa 7 return Stemming.SHORTEST; } diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/document/Stemming.java b/config-model/src/main/java/com/yahoo/searchdefinition/document/Stemming.java index f471201f55e..5b145051de5 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/document/Stemming.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/document/Stemming.java @@ -17,13 +17,17 @@ public enum Stemming { /** No stemming */ NONE("none"), - /** Stem as much as possible */ + /** @deprecated incorrectly don't stem at all */ + @Deprecated ALL("all"), /** select shortest possible stem */ SHORTEST("shortest"), - /** index (and query?) multiple stems */ + /** select the "best" stem alternative */ + BEST("best"), + + /** index multiple stems */ MULTIPLE("multiple"); private static Logger log=Logger.getLogger(Stemming.class.getName()); @@ -36,6 +40,7 @@ public enum Stemming { * * @throws IllegalArgumentException if there is no stemming type with the given name */ + @SuppressWarnings("deprecation") public static Stemming get(String stemmingName) { try { Stemming stemming = Stemming.valueOf(stemmingName.toUpperCase()); @@ -49,7 +54,7 @@ public enum Stemming { } } - private Stemming(String name) { + Stemming(String name) { this.name = name; } @@ -59,14 +64,16 @@ public enum Stemming { return "stemming " + name; } + @SuppressWarnings("deprecation") public StemMode toStemMode() { - if (this == Stemming.SHORTEST) { - return StemMode.SHORTEST; - } - if (this == Stemming.MULTIPLE) { - return StemMode.ALL; + switch(this) { + case SHORTEST: return StemMode.SHORTEST; + case MULTIPLE: return StemMode.ALL; + case BEST : return StemMode.BEST; + case NONE: return StemMode.NONE; + case ALL: return StemMode.SHORTEST; // Intentional; preserve historic behavior + default: throw new IllegalStateException("Inconvertible stem mode " + this); } - return StemMode.NONE; } } diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java index c02824420d5..ca8214f35d6 100644 --- a/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java @@ -108,11 +108,8 @@ public class StemmingSearcher extends Searcher { return reverseConnectivity; } - private Item scan(Item item, - boolean isCJK, - Language l, - IndexFacts.Session indexFacts, - Map<Item, TaggableItem> reverseConnectivity) { + private Item scan(Item item, boolean isCJK, Language l, IndexFacts.Session indexFacts, + Map<Item, TaggableItem> reverseConnectivity) { if (item == null) { return null; } else if (item instanceof BlockItem) { @@ -153,9 +150,8 @@ public class StemmingSearcher extends Searcher { if (i instanceof TermItem) { return ((TermItem) i).getOrigin(); // this should always be the case } else { - getLogger().log(LogLevel.WARNING, - "Weird, BlockItem '" + b + "' was a composite containing " + i.getClass().getName() - + ", expected TermItem."); + getLogger().log(LogLevel.WARNING, "Weird, BlockItem '" + b + "' was a composite containing " + + i.getClass().getName() + ", expected TermItem."); } } return null; @@ -217,8 +213,8 @@ public class StemmingSearcher extends Searcher { setConnectivity(current, reverseConnectivity, replacement); } - private void andSegmentConnectivity(BlockItem current, - Map<Item, TaggableItem> reverseConnectivity, CompositeItem composite) { + private void andSegmentConnectivity(BlockItem current, Map<Item, TaggableItem> reverseConnectivity, + CompositeItem composite) { // if the original has connectivity to something, add to last word Connectivity connectivity = getConnectivity(current); if (connectivity != null) { @@ -269,8 +265,7 @@ public class StemmingSearcher extends Searcher { private TaggableItem singleWordSegment(BlockItem current, StemList segment, Index index, - Substring substring) - { + Substring substring) { String indexName = current.getIndexName(); if (index.getLiteralBoost() || index.getStemMode() == StemMode.ALL) { // Yes, this will create a new WordAlternativesItem even if stemmed @@ -301,8 +296,7 @@ public class StemmingSearcher extends Searcher { } private WordItem singleStemSegment(Item blockAsItem, String stem, String indexName, - Substring substring) - { + Substring substring) { WordItem replacement = new WordItem(stem, indexName, true, substring); replacement.setStemmed(true); copyAttributes(blockAsItem, replacement); @@ -311,8 +305,7 @@ public class StemmingSearcher extends Searcher { private void setConnectivity(BlockItem current, Map<Item, TaggableItem> reverseConnectivity, - Item replacement) - { + Item replacement) { if (reverseConnectivity != null && !reverseConnectivity.isEmpty()) { // This Map<Item, TaggableItem>.get(BlockItem) is technically wrong, but the Item API ensures its correctness TaggableItem connectedTo = reverseConnectivity.get(current); @@ -425,4 +418,5 @@ public class StemmingSearcher extends Searcher { } } + } diff --git a/linguistics/src/main/java/com/yahoo/language/process/StemMode.java b/linguistics/src/main/java/com/yahoo/language/process/StemMode.java index 269b08dcdf7..ebacb307a85 100644 --- a/linguistics/src/main/java/com/yahoo/language/process/StemMode.java +++ b/linguistics/src/main/java/com/yahoo/language/process/StemMode.java @@ -6,7 +6,7 @@ package com.yahoo.language.process; * Stemming implementation may support a smaller number of modes by mapping a mode to a more * inclusive alternative. * - * @author <a href="mailto:mathiasm@yahoo-inc.com">Mathias Mølster Lidal</a> + * @author Mathias Mølster Lidal */ public enum StemMode { |