aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMariusArhaug <mariusarhaug@hotmail.com>2024-06-26 13:01:43 +0200
committerMariusArhaug <mariusarhaug@hotmail.com>2024-06-26 13:01:43 +0200
commitccd6b8758dee2aeebcaf4eb8b944de5b0a8c32b7 (patch)
treeefc709d5a9231b7464c56823098bd5820b1513ea
parent28928d915eeaddc4cd1de7a94a285e467d473d74 (diff)
Handle implicit/explicit set languages for significance searcher
-rw-r--r--container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java59
-rw-r--r--container-search/src/test/java/com/yahoo/prelude/query/test/QueryLanguageTestCase.java10
-rw-r--r--container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java168
3 files changed, 199 insertions, 38 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java b/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java
index 3f72e98f18a..8ba8f747019 100644
--- a/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java
+++ b/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java
@@ -14,6 +14,7 @@ import com.yahoo.prelude.query.WordItem;
import com.yahoo.search.Query;
import com.yahoo.search.Result;
import com.yahoo.search.Searcher;
+import com.yahoo.search.query.ranking.Significance;
import com.yahoo.search.result.ErrorMessage;
import com.yahoo.search.schema.RankProfile;
import com.yahoo.search.schema.Schema;
@@ -95,16 +96,60 @@ public class SignificanceSearcher extends Searcher {
}
private Result calculateAndSetSignificance(Query query, Execution execution) {
- Language language = query.getModel().getParsingLanguage();
- Optional<SignificanceModel> model = significanceModelRegistry.getModel(language);
- log.log(Level.FINE, () -> "Got model for language %s: %s"
- .formatted(language, model.map(SignificanceModel::getId).orElse("<none>")));
+ try {
+ var significanceModel = getSignificanceModelFromQueryLanguage(query);
+ log.log(Level.FINE, () -> "Got model for language %s: %s"
+ .formatted(query.getModel().getParsingLanguage(), significanceModel.getId()));
- if (model.isEmpty()) return execution.search(query);
+ setIDF(query.getModel().getQueryTree().getRoot(), significanceModel);
- setIDF(query.getModel().getQueryTree().getRoot(), model.get());
+ return execution.search(query);
+ } catch (IllegalArgumentException e) {
+ var result = new Result(query);
+ result.hits().addError(
+ ErrorMessage.createIllegalQuery(e.getMessage()));
+ return result;
+ }
+ }
+
+ private SignificanceModel getSignificanceModelFromQueryLanguage(Query query) throws IllegalArgumentException {
+ Language explicitLanguage = query.getModel().getLanguage();
+ Language implicitLanguage = query.getModel().getParsingLanguage();
+
+ if (explicitLanguage == null && implicitLanguage == null) {
+ throw new IllegalArgumentException("No language found in query");
+ }
+
+ if (explicitLanguage != null) {
+ if (explicitLanguage == Language.UNKNOWN) {
+ return handleFallBackToUnknownLanguage();
+ }
+ var model = significanceModelRegistry.getModel(explicitLanguage);
+ if (model.isEmpty()) {
+ throw new IllegalArgumentException("No significance model available for set language " + explicitLanguage);
+ }
+ return model.get();
+ }
+
+ if (implicitLanguage == Language.UNKNOWN) {
+ return handleFallBackToUnknownLanguage();
+ }
+ var model = significanceModelRegistry.getModel(implicitLanguage);
+ if (model.isEmpty()) {
+ throw new IllegalArgumentException("No significance model available for implicit language " + implicitLanguage);
+ }
+ return model.get();
+ }
+
+ private SignificanceModel handleFallBackToUnknownLanguage() throws IllegalArgumentException {
+ var unknownModel = significanceModelRegistry.getModel(Language.UNKNOWN);
+ var englishModel = significanceModelRegistry.getModel(Language.ENGLISH);
+
+ if (unknownModel.isEmpty() && englishModel.isEmpty()) {
+ throw new IllegalArgumentException("No significance model available for unknown or english language");
+ }
- return execution.search(query);
+ return unknownModel.orElseGet(englishModel::get);
}
private void setIDF(Item root, SignificanceModel significanceModel) {
diff --git a/container-search/src/test/java/com/yahoo/prelude/query/test/QueryLanguageTestCase.java b/container-search/src/test/java/com/yahoo/prelude/query/test/QueryLanguageTestCase.java
index a05124a42b1..45282de817b 100644
--- a/container-search/src/test/java/com/yahoo/prelude/query/test/QueryLanguageTestCase.java
+++ b/container-search/src/test/java/com/yahoo/prelude/query/test/QueryLanguageTestCase.java
@@ -6,6 +6,7 @@ import com.yahoo.prelude.query.NotItem;
import com.yahoo.prelude.query.PhraseItem;
import com.yahoo.prelude.query.WordItem;
import com.yahoo.search.Query;
+import com.yahoo.search.query.Model;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -100,6 +101,15 @@ public class QueryLanguageTestCase {
private void assertLanguage(Language expectedLanguage, String languageParameter) {
Query query = new Query("?query=test&language=" + languageParameter);
+ Query query2 = new Query("?query=test");
+ Model model = query.getModel();
+ Model model2 = query2.getModel();
+
+ Language language1_0 = model.getParsingLanguage();
+ Language language1_1 = model.getLanguage();
+ Language language2_0 = model2.getParsingLanguage();
+ Language language2_1 = model2.getLanguage();
+
assertEquals(expectedLanguage, query.getModel().getParsingLanguage());
/*
diff --git a/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java b/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java
index be68c87efb3..29e3c002c21 100644
--- a/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java
+++ b/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java
@@ -4,6 +4,12 @@ package com.yahoo.search.significance.test;
import com.yahoo.component.chain.Chain;
import com.yahoo.config.subscription.ConfigGetter;
import com.yahoo.language.Language;
+import com.yahoo.language.Linguistics;
+import com.yahoo.language.detect.Detection;
+import com.yahoo.language.detect.Detector;
+import com.yahoo.language.detect.Hint;
+import com.yahoo.language.opennlp.OpenNlpLinguistics;
+import com.yahoo.language.process.*;
import com.yahoo.language.significance.SignificanceModel;
import com.yahoo.language.significance.SignificanceModelRegistry;
import com.yahoo.language.significance.impl.DefaultSignificanceModelRegistry;
@@ -20,6 +26,7 @@ import com.yahoo.search.significance.SignificanceSearcher;
import com.yahoo.vespa.config.search.RankProfilesConfig;
import org.junit.jupiter.api.Test;
+import java.nio.ByteBuffer;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
@@ -27,6 +34,7 @@ import java.util.List;
import static com.yahoo.test.JunitCompat.assertEquals;
+import static java.nio.charset.StandardCharsets.UTF_8;
/**
* Tests significance term in the search chain.
@@ -51,12 +59,90 @@ public class SignificanceSearcherTest {
searcher = new SignificanceSearcher(significanceModelRegistry, new SchemaInfo(List.of(schema.build()), List.of()));
}
+ private static class MockLinguistics implements Linguistics {
+
+ private final MockDetector mockDetector;
+ MockLinguistics(Language language) {
+ this.mockDetector = new MockDetector(language);
+ }
+
+ @Override
+ public Stemmer getStemmer() {
+ throw new UnsupportedOperationException("Not implemented");
+ }
+
+ @Override
+ public Tokenizer getTokenizer() {
+ throw new UnsupportedOperationException("Not implemented");
+ }
+
+ @Override
+ public Normalizer getNormalizer() {
+ throw new UnsupportedOperationException("Not implemented");
+ }
+
+ @Override
+ public Transformer getTransformer() {
+ throw new UnsupportedOperationException("Not implemented");
+ }
+
+ @Override
+ public Segmenter getSegmenter() {
+ throw new UnsupportedOperationException("Not implemented");
+ }
+
+ @Override
+ public Detector getDetector() {
+ return this.mockDetector;
+ }
+
+ @Override
+ public GramSplitter getGramSplitter() {
+ throw new UnsupportedOperationException("Not implemented");
+ }
+
+ @Override
+ public CharacterClasses getCharacterClasses() {
+ throw new UnsupportedOperationException("Not implemented");
+ }
+
+ @Override
+ public boolean equals(Linguistics other) {
+ return false;
+ }
+ }
+
+ private static class MockDetector implements Detector {
+
+ private Language detectionLanguage;
+ MockDetector(Language detectionLanguage) {
+ this.detectionLanguage = detectionLanguage;
+ }
+
+ @Override
+ public Detection detect(byte[] input, int offset, int length, Hint hint) {
+ throw new UnsupportedOperationException("Not implemented");
+ }
+
+ @Override
+ public Detection detect(ByteBuffer input, Hint hint) {
+ throw new UnsupportedOperationException("Not implemented");
+ }
+
+ @Override
+ public Detection detect(String input, Hint hint) {
+ return new Detection(detectionLanguage, UTF_8.name(), false);
+ }
+ }
+
private Execution createExecution(SignificanceSearcher searcher) {
return new Execution(new Chain<>(searcher), Execution.Context.createContextStub());
}
- private Execution createExecution() {
- return new Execution(new Chain<>(), Execution.Context.createContextStub());
+ private Execution createExecution(SignificanceSearcher searcher, Language language) {
+ var context = Execution.Context.createContextStub();
+ context.setLinguistics(new MockLinguistics(language));
+ return new Execution(new Chain<>(searcher), context);
}
@Test
@@ -191,35 +277,6 @@ public class SignificanceSearcherTest {
}
- @Test
- void testSignificanceValueOnEmptyQuery() {
- Query q = new Query();
- q.getModel().setLanguage(Language.NORWEGIAN_BOKMAL);
- AndItem root = new AndItem();
- WordItem tmp;
- tmp = new WordItem("Hei", true);
- root.addItem(tmp);
- tmp = new WordItem("Verden", true);
- root.addItem(tmp);
-
-
- q.getModel().getQueryTree().setRoot(root);
- Result r = createExecution(searcher).search(q);
- root = (AndItem) r.getQuery().getModel().getQueryTree().getRoot();
-
- WordItem w0 = (WordItem) root.getItem(0);
- WordItem w1 = (WordItem) root.getItem(1);
-
- Result r0 = createExecution().search(q);
- root = (AndItem) r0.getQuery().getModel().getQueryTree().getRoot();
-
- WordItem w0_0 = (WordItem) root.getItem(0);
- WordItem w0_1 = (WordItem) root.getItem(1);
-
- assertEquals(w0_0.getSignificance(), w0.getSignificance());
- assertEquals(w0_1.getSignificance(), w1.getSignificance());
-
- }
@Test
public void failsOnConflictingSignificanceConfiguration() {
@@ -252,4 +309,53 @@ public class SignificanceSearcherTest {
"(https://docs.vespa.ai/en/reference/schema-reference.html#significance).",
errorMessage.getDetailedMessage());
}
+
+ @Test
+ public void testSignificanceSearcherWithExplictitAndImplictSetLanguages() {
+ Query q = new Query();
+ q.getModel().setLanguage(Language.UNKNOWN);
+ q.getRanking().setProfile("significance-ranking");
+ AndItem root = new AndItem();
+ WordItem tmp;
+ tmp = new WordItem("hello", true);
+ root.addItem(tmp);
+
+ q.getModel().getQueryTree().setRoot(root);
+
+ SignificanceModel model = significanceModelRegistry.getModel(Language.ENGLISH).get();
+ var helloFrequency = model.documentFrequency("hello");
+ var helloSignificanceValue = SignificanceSearcher.calculateIDF(helloFrequency.corpusSize(), helloFrequency.frequency());
+ Result r = createExecution(searcher).search(q);
+
+ root = (AndItem) r.getQuery().getModel().getQueryTree().getRoot();
+ WordItem w0 = (WordItem) root.getItem(0);
+ assertEquals(helloSignificanceValue, w0.getSignificance());
+
+
+ Query q2 = new Query();
+ q2.getModel().setLanguage(Language.FRENCH);
+ q2.getRanking().setProfile("significance-ranking");
+ AndItem root2 = new AndItem();
+ WordItem tmp2;
+ tmp2 = new WordItem("hello", true);
+ root2.addItem(tmp2);
+
+ q2.getModel().getQueryTree().setRoot(root2);
+ Result r2 = createExecution(searcher).search(q2);
+
+ assertEquals(1, r2.hits().getErrorHit().errors().size());
+
+
+ Query q3 = new Query();
+ q3.getRanking().setProfile("significance-ranking");
+ WordItem root3 = new WordItem("Я с детства хотел завести собаку, но родители мне не разрешали.", true);
+
+ q3.getModel().getQueryTree().setRoot(root3);
+ Execution execution = createExecution(searcher, Language.RUSSIAN);
+ Result r3 = execution.search(q3);
+
+ assertEquals(1, r3.hits().getErrorHit().errors().size());
+
+
+ }
}