diff options
author | Jon Bratseth <bratseth@verizonmedia.com> | 2020-01-30 10:17:24 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@verizonmedia.com> | 2020-01-30 10:17:24 +0100 |
commit | 104aee7540e799efc4fc90c1658b0c209bfcf6ce (patch) | |
tree | 2d03f656d8737f6fc0a1a7436e7052367a4f9761 /container-search/src/main/java/com/yahoo/prelude/querytransform | |
parent | 2533470181c45a877fdc884f1c6742e0934aa6bb (diff) |
Respect the chosen composite also if not CJK
Diffstat (limited to 'container-search/src/main/java/com/yahoo/prelude/querytransform')
-rw-r--r-- | container-search/src/main/java/com/yahoo/prelude/querytransform/NormalizingSearcher.java | 20 | ||||
-rw-r--r-- | container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java | 54 |
2 files changed, 29 insertions, 45 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/NormalizingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/NormalizingSearcher.java index fdd6ad47a98..ce13045b518 100644 --- a/container-search/src/main/java/com/yahoo/prelude/querytransform/NormalizingSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/NormalizingSearcher.java @@ -111,25 +111,17 @@ public class NormalizingSearcher extends Searcher { } private void normalizeAlternatives(Language language, Session indexFacts, WordAlternativesItem block) { - if (!block.isNormalizable()) { - return; - } - { - Index index = indexFacts.getIndex(block.getIndexName()); - if (index.isAttribute()) { - return; - } - if (!index.getNormalize()) { - return; - } - } + if ( ! block.isNormalizable()) return; + + Index index = indexFacts.getIndex(block.getIndexName()); + if (index.isAttribute()) return; + if ( ! index.getNormalize()) return; List<Alternative> terms = block.getAlternatives(); for (Alternative term : terms) { String accentDropped = linguistics.getTransformer().accentDrop(term.word, language); - if (!term.word.equals(accentDropped) && accentDropped.length() > 0) { + if ( ! term.word.equals(accentDropped) && accentDropped.length() > 0) block.addTerm(accentDropped, term.exactness * .7d); - } } } diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java index 655fbf6acc3..9a9044def2d 100644 --- a/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java @@ -188,13 +188,10 @@ public class StemmingSearcher extends Searcher { return (Item) w; } - if (context.isCJK) { - composite = chooseCompositeForCJK(current, - ((Item) current).getParent(), - indexName); - } else { - composite = phraseSegment(current, indexName); - } + if (context.isCJK) + composite = chooseCompositeForCJK(current, ((Item) current).getParent(), indexName); + else + composite = chooseComposite(current, ((Item) current).getParent(), indexName); for (StemList segment : segments) { TaggableItem w = singleWordSegment(current, segment, index, substring, context.insidePhrase); @@ -331,39 +328,34 @@ public class StemmingSearcher extends Searcher { } } + private CompositeItem chooseComposite(BlockItem current, CompositeItem parent, String indexName) { + if (parent instanceof PhraseItem || current instanceof PhraseSegmentItem) + return createPhraseSegment(current, indexName); + else + return createAndSegment(current); + + } + private CompositeItem chooseCompositeForCJK(BlockItem current, CompositeItem parent, String indexName) { - CompositeItem composite; - if (current.getSegmentingRule() == SegmentingRule.LANGUAGE_DEFAULT) { - if (parent instanceof PhraseItem || current instanceof PhraseSegmentItem) { - composite = phraseSegment(current, indexName); - } else - composite = createAndSegment(current); - } else { - switch (current.getSegmentingRule()) { - case PHRASE: - composite = phraseSegment(current, indexName); - break; - case BOOLEAN_AND: - composite = createAndSegment(current); - break; + if (current.getSegmentingRule() == SegmentingRule.LANGUAGE_DEFAULT) + return chooseComposite(current, parent, indexName); + + switch (current.getSegmentingRule()) { // TODO: Why for CJK only? The segmentingRule says nothing about being for CJK only + case PHRASE: return createPhraseSegment(current, indexName); + case BOOLEAN_AND: return createAndSegment(current); default: - throw new IllegalArgumentException( - "Unknown segmenting rule: " - + current.getSegmentingRule() - + ". This is a bug in Vespa, as the implementation has gotten out of sync." - + " Please create a ticket as soon as possible."); - } + throw new IllegalArgumentException("Unknown segmenting rule: " + current.getSegmentingRule() + + ". This is a bug in Vespa, as the implementation has gotten out of sync." + + " Please create a ticket as soon as possible."); } - return composite; } private AndSegmentItem createAndSegment(BlockItem current) { return new AndSegmentItem(current.stringValue(), true, true); } - private CompositeItem phraseSegment(BlockItem current, String indexName) { - CompositeItem composite; - composite = new PhraseSegmentItem(current.getRawWord(), current.stringValue(), true, true); + private CompositeItem createPhraseSegment(BlockItem current, String indexName) { + CompositeItem composite = new PhraseSegmentItem(current.getRawWord(), current.stringValue(), true, true); composite.setIndexName(indexName); return composite; } |