diff options
Diffstat (limited to 'container-search')
5 files changed, 36 insertions, 58 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/Index.java b/container-search/src/main/java/com/yahoo/prelude/Index.java index 65d5879b004..365ee299ca4 100644 --- a/container-search/src/main/java/com/yahoo/prelude/Index.java +++ b/container-search/src/main/java/com/yahoo/prelude/Index.java @@ -26,6 +26,7 @@ import java.util.Set; public class Index { public static class Attribute { + private boolean tokenizedContent = false; public final String name; @@ -207,20 +208,12 @@ public class Index { } } - /** - * Whether terms in this field are lower cased when indexing. - * - * @param lowercase true if terms are lowercased - */ + /** Sets whether terms in this field are lowercased when indexing. */ public void setLowercase(boolean lowercase) { this.lowercase = lowercase; } - /** - * Whether terms in this field are lower cased when indexing. - * - * @return true if terms are lowercased - */ + /** Returns whether terms in this field are lowercased when indexing. */ public boolean isLowercase() { return lowercase; } diff --git a/container-search/src/main/java/com/yahoo/prelude/query/BlockItem.java b/container-search/src/main/java/com/yahoo/prelude/query/BlockItem.java index 13673144a0a..d0ffcd2d0e0 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/BlockItem.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/BlockItem.java @@ -3,10 +3,9 @@ package com.yahoo.prelude.query; /** - * An interface used for anything which represents a single block - * of query input. + * An interface used for anything which represents a single block of query input. * - * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author Steinar Knutsen */ public interface BlockItem extends HasIndexItem { @@ -39,4 +38,5 @@ public interface BlockItem extends HasIndexItem { * is necessary to change operator? */ SegmentingRule getSegmentingRule(); + } diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/NormalizingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/NormalizingSearcher.java index fdd6ad47a98..ce13045b518 100644 --- a/container-search/src/main/java/com/yahoo/prelude/querytransform/NormalizingSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/NormalizingSearcher.java @@ -111,25 +111,17 @@ public class NormalizingSearcher extends Searcher { } private void normalizeAlternatives(Language language, Session indexFacts, WordAlternativesItem block) { - if (!block.isNormalizable()) { - return; - } - { - Index index = indexFacts.getIndex(block.getIndexName()); - if (index.isAttribute()) { - return; - } - if (!index.getNormalize()) { - return; - } - } + if ( ! block.isNormalizable()) return; + + Index index = indexFacts.getIndex(block.getIndexName()); + if (index.isAttribute()) return; + if ( ! index.getNormalize()) return; List<Alternative> terms = block.getAlternatives(); for (Alternative term : terms) { String accentDropped = linguistics.getTransformer().accentDrop(term.word, language); - if (!term.word.equals(accentDropped) && accentDropped.length() > 0) { + if ( ! term.word.equals(accentDropped) && accentDropped.length() > 0) block.addTerm(accentDropped, term.exactness * .7d); - } } } diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java index 655fbf6acc3..9a9044def2d 100644 --- a/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java @@ -188,13 +188,10 @@ public class StemmingSearcher extends Searcher { return (Item) w; } - if (context.isCJK) { - composite = chooseCompositeForCJK(current, - ((Item) current).getParent(), - indexName); - } else { - composite = phraseSegment(current, indexName); - } + if (context.isCJK) + composite = chooseCompositeForCJK(current, ((Item) current).getParent(), indexName); + else + composite = chooseComposite(current, ((Item) current).getParent(), indexName); for (StemList segment : segments) { TaggableItem w = singleWordSegment(current, segment, index, substring, context.insidePhrase); @@ -331,39 +328,34 @@ public class StemmingSearcher extends Searcher { } } + private CompositeItem chooseComposite(BlockItem current, CompositeItem parent, String indexName) { + if (parent instanceof PhraseItem || current instanceof PhraseSegmentItem) + return createPhraseSegment(current, indexName); + else + return createAndSegment(current); + + } + private CompositeItem chooseCompositeForCJK(BlockItem current, CompositeItem parent, String indexName) { - CompositeItem composite; - if (current.getSegmentingRule() == SegmentingRule.LANGUAGE_DEFAULT) { - if (parent instanceof PhraseItem || current instanceof PhraseSegmentItem) { - composite = phraseSegment(current, indexName); - } else - composite = createAndSegment(current); - } else { - switch (current.getSegmentingRule()) { - case PHRASE: - composite = phraseSegment(current, indexName); - break; - case BOOLEAN_AND: - composite = createAndSegment(current); - break; + if (current.getSegmentingRule() == SegmentingRule.LANGUAGE_DEFAULT) + return chooseComposite(current, parent, indexName); + + switch (current.getSegmentingRule()) { // TODO: Why for CJK only? The segmentingRule says nothing about being for CJK only + case PHRASE: return createPhraseSegment(current, indexName); + case BOOLEAN_AND: return createAndSegment(current); default: - throw new IllegalArgumentException( - "Unknown segmenting rule: " - + current.getSegmentingRule() - + ". This is a bug in Vespa, as the implementation has gotten out of sync." - + " Please create a ticket as soon as possible."); - } + throw new IllegalArgumentException("Unknown segmenting rule: " + current.getSegmentingRule() + + ". This is a bug in Vespa, as the implementation has gotten out of sync." + + " Please create a ticket as soon as possible."); } - return composite; } private AndSegmentItem createAndSegment(BlockItem current) { return new AndSegmentItem(current.stringValue(), true, true); } - private CompositeItem phraseSegment(BlockItem current, String indexName) { - CompositeItem composite; - composite = new PhraseSegmentItem(current.getRawWord(), current.stringValue(), true, true); + private CompositeItem createPhraseSegment(BlockItem current, String indexName) { + CompositeItem composite = new PhraseSegmentItem(current.getRawWord(), current.stringValue(), true, true); composite.setIndexName(indexName); return composite; } diff --git a/container-search/src/main/java/com/yahoo/search/querytransform/VespaLowercasingSearcher.java b/container-search/src/main/java/com/yahoo/search/querytransform/VespaLowercasingSearcher.java index 1e8f436a05a..25488aa7bbc 100644 --- a/container-search/src/main/java/com/yahoo/search/querytransform/VespaLowercasingSearcher.java +++ b/container-search/src/main/java/com/yahoo/search/querytransform/VespaLowercasingSearcher.java @@ -44,4 +44,5 @@ public class VespaLowercasingSearcher extends LowercasingSearcher { Index index = indexFacts.getIndex(sb.toString()); return index.isLowercase() || index.isAttribute(); } + } |