summaryrefslogtreecommitdiffstats
path: root/container-search/src
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2020-01-31 15:18:55 +0100
committerGitHub <noreply@github.com>2020-01-31 15:18:55 +0100
commit444663f66cd968bb67d25b1533dfa57305563780 (patch)
tree2a71fca81a05dff0df5d5f9ba41dc6a09e88ba95 /container-search/src
parentb3655490b81adede60634fc6a11ab6d980c64968 (diff)
parent104aee7540e799efc4fc90c1658b0c209bfcf6ce (diff)
Merge pull request #12004 from vespa-engine/bratseth/respect-chosen-composite-in-stemming
Respect the chosen composite also if not CJK
Diffstat (limited to 'container-search/src')
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/Index.java13
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/BlockItem.java6
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/querytransform/NormalizingSearcher.java20
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java54
-rw-r--r--container-search/src/main/java/com/yahoo/search/querytransform/VespaLowercasingSearcher.java1
5 files changed, 36 insertions, 58 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/Index.java b/container-search/src/main/java/com/yahoo/prelude/Index.java
index 65d5879b004..365ee299ca4 100644
--- a/container-search/src/main/java/com/yahoo/prelude/Index.java
+++ b/container-search/src/main/java/com/yahoo/prelude/Index.java
@@ -26,6 +26,7 @@ import java.util.Set;
public class Index {
public static class Attribute {
+
private boolean tokenizedContent = false;
public final String name;
@@ -207,20 +208,12 @@ public class Index {
}
}
- /**
- * Whether terms in this field are lower cased when indexing.
- *
- * @param lowercase true if terms are lowercased
- */
+ /** Sets whether terms in this field are lowercased when indexing. */
public void setLowercase(boolean lowercase) {
this.lowercase = lowercase;
}
- /**
- * Whether terms in this field are lower cased when indexing.
- *
- * @return true if terms are lowercased
- */
+ /** Returns whether terms in this field are lowercased when indexing. */
public boolean isLowercase() {
return lowercase;
}
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/BlockItem.java b/container-search/src/main/java/com/yahoo/prelude/query/BlockItem.java
index 13673144a0a..d0ffcd2d0e0 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/BlockItem.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/BlockItem.java
@@ -3,10 +3,9 @@ package com.yahoo.prelude.query;
/**
- * An interface used for anything which represents a single block
- * of query input.
+ * An interface used for anything which represents a single block of query input.
*
- * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a>
+ * @author Steinar Knutsen
*/
public interface BlockItem extends HasIndexItem {
@@ -39,4 +38,5 @@ public interface BlockItem extends HasIndexItem {
* is necessary to change operator?
*/
SegmentingRule getSegmentingRule();
+
}
diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/NormalizingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/NormalizingSearcher.java
index fdd6ad47a98..ce13045b518 100644
--- a/container-search/src/main/java/com/yahoo/prelude/querytransform/NormalizingSearcher.java
+++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/NormalizingSearcher.java
@@ -111,25 +111,17 @@ public class NormalizingSearcher extends Searcher {
}
private void normalizeAlternatives(Language language, Session indexFacts, WordAlternativesItem block) {
- if (!block.isNormalizable()) {
- return;
- }
- {
- Index index = indexFacts.getIndex(block.getIndexName());
- if (index.isAttribute()) {
- return;
- }
- if (!index.getNormalize()) {
- return;
- }
- }
+ if ( ! block.isNormalizable()) return;
+
+ Index index = indexFacts.getIndex(block.getIndexName());
+ if (index.isAttribute()) return;
+ if ( ! index.getNormalize()) return;
List<Alternative> terms = block.getAlternatives();
for (Alternative term : terms) {
String accentDropped = linguistics.getTransformer().accentDrop(term.word, language);
- if (!term.word.equals(accentDropped) && accentDropped.length() > 0) {
+ if ( ! term.word.equals(accentDropped) && accentDropped.length() > 0)
block.addTerm(accentDropped, term.exactness * .7d);
- }
}
}
diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java
index 655fbf6acc3..9a9044def2d 100644
--- a/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java
+++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java
@@ -188,13 +188,10 @@ public class StemmingSearcher extends Searcher {
return (Item) w;
}
- if (context.isCJK) {
- composite = chooseCompositeForCJK(current,
- ((Item) current).getParent(),
- indexName);
- } else {
- composite = phraseSegment(current, indexName);
- }
+ if (context.isCJK)
+ composite = chooseCompositeForCJK(current, ((Item) current).getParent(), indexName);
+ else
+ composite = chooseComposite(current, ((Item) current).getParent(), indexName);
for (StemList segment : segments) {
TaggableItem w = singleWordSegment(current, segment, index, substring, context.insidePhrase);
@@ -331,39 +328,34 @@ public class StemmingSearcher extends Searcher {
}
}
+ private CompositeItem chooseComposite(BlockItem current, CompositeItem parent, String indexName) {
+ if (parent instanceof PhraseItem || current instanceof PhraseSegmentItem)
+ return createPhraseSegment(current, indexName);
+ else
+ return createAndSegment(current);
+
+ }
+
private CompositeItem chooseCompositeForCJK(BlockItem current, CompositeItem parent, String indexName) {
- CompositeItem composite;
- if (current.getSegmentingRule() == SegmentingRule.LANGUAGE_DEFAULT) {
- if (parent instanceof PhraseItem || current instanceof PhraseSegmentItem) {
- composite = phraseSegment(current, indexName);
- } else
- composite = createAndSegment(current);
- } else {
- switch (current.getSegmentingRule()) {
- case PHRASE:
- composite = phraseSegment(current, indexName);
- break;
- case BOOLEAN_AND:
- composite = createAndSegment(current);
- break;
+ if (current.getSegmentingRule() == SegmentingRule.LANGUAGE_DEFAULT)
+ return chooseComposite(current, parent, indexName);
+
+ switch (current.getSegmentingRule()) { // TODO: Why for CJK only? The segmentingRule says nothing about being for CJK only
+ case PHRASE: return createPhraseSegment(current, indexName);
+ case BOOLEAN_AND: return createAndSegment(current);
default:
- throw new IllegalArgumentException(
- "Unknown segmenting rule: "
- + current.getSegmentingRule()
- + ". This is a bug in Vespa, as the implementation has gotten out of sync."
- + " Please create a ticket as soon as possible.");
- }
+ throw new IllegalArgumentException("Unknown segmenting rule: " + current.getSegmentingRule() +
+ ". This is a bug in Vespa, as the implementation has gotten out of sync." +
+ " Please create a ticket as soon as possible.");
}
- return composite;
}
private AndSegmentItem createAndSegment(BlockItem current) {
return new AndSegmentItem(current.stringValue(), true, true);
}
- private CompositeItem phraseSegment(BlockItem current, String indexName) {
- CompositeItem composite;
- composite = new PhraseSegmentItem(current.getRawWord(), current.stringValue(), true, true);
+ private CompositeItem createPhraseSegment(BlockItem current, String indexName) {
+ CompositeItem composite = new PhraseSegmentItem(current.getRawWord(), current.stringValue(), true, true);
composite.setIndexName(indexName);
return composite;
}
diff --git a/container-search/src/main/java/com/yahoo/search/querytransform/VespaLowercasingSearcher.java b/container-search/src/main/java/com/yahoo/search/querytransform/VespaLowercasingSearcher.java
index 1e8f436a05a..25488aa7bbc 100644
--- a/container-search/src/main/java/com/yahoo/search/querytransform/VespaLowercasingSearcher.java
+++ b/container-search/src/main/java/com/yahoo/search/querytransform/VespaLowercasingSearcher.java
@@ -44,4 +44,5 @@ public class VespaLowercasingSearcher extends LowercasingSearcher {
Index index = indexFacts.getIndex(sb.toString());
return index.isLowercase() || index.isAttribute();
}
+
}