diff options
Diffstat (limited to 'container-search/src/test/java/com')
2 files changed, 147 insertions, 31 deletions
diff --git a/container-search/src/test/java/com/yahoo/prelude/query/test/QueryLanguageTestCase.java b/container-search/src/test/java/com/yahoo/prelude/query/test/QueryLanguageTestCase.java index a05124a42b1..45282de817b 100644 --- a/container-search/src/test/java/com/yahoo/prelude/query/test/QueryLanguageTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/query/test/QueryLanguageTestCase.java @@ -6,6 +6,7 @@ import com.yahoo.prelude.query.NotItem; import com.yahoo.prelude.query.PhraseItem; import com.yahoo.prelude.query.WordItem; import com.yahoo.search.Query; +import com.yahoo.search.query.Model; import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -100,6 +101,15 @@ public class QueryLanguageTestCase { private void assertLanguage(Language expectedLanguage, String languageParameter) { Query query = new Query("?query=test&language=" + languageParameter); + Query query2 = new Query("?query=test"); + Model model = query.getModel(); + Model model2 = query2.getModel(); + + Language language1_0 = model.getParsingLanguage(); + Language language1_1 = model.getLanguage(); + Language language2_0 = model2.getParsingLanguage(); + Language language2_1 = model2.getLanguage(); + assertEquals(expectedLanguage, query.getModel().getParsingLanguage()); /* diff --git a/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java b/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java index be68c87efb3..29e3c002c21 100644 --- a/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java +++ b/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java @@ -4,6 +4,12 @@ package com.yahoo.search.significance.test; import com.yahoo.component.chain.Chain; import com.yahoo.config.subscription.ConfigGetter; import com.yahoo.language.Language; +import com.yahoo.language.Linguistics; +import com.yahoo.language.detect.Detection; +import com.yahoo.language.detect.Detector; +import com.yahoo.language.detect.Hint; +import com.yahoo.language.opennlp.OpenNlpLinguistics; +import com.yahoo.language.process.*; import com.yahoo.language.significance.SignificanceModel; import com.yahoo.language.significance.SignificanceModelRegistry; import com.yahoo.language.significance.impl.DefaultSignificanceModelRegistry; @@ -20,6 +26,7 @@ import com.yahoo.search.significance.SignificanceSearcher; import com.yahoo.vespa.config.search.RankProfilesConfig; import org.junit.jupiter.api.Test; +import java.nio.ByteBuffer; import java.nio.file.Path; import java.util.ArrayList; import java.util.HashMap; @@ -27,6 +34,7 @@ import java.util.List; import static com.yahoo.test.JunitCompat.assertEquals; +import static java.nio.charset.StandardCharsets.UTF_8; /** * Tests significance term in the search chain. @@ -51,12 +59,90 @@ public class SignificanceSearcherTest { searcher = new SignificanceSearcher(significanceModelRegistry, new SchemaInfo(List.of(schema.build()), List.of())); } + private static class MockLinguistics implements Linguistics { + + private final MockDetector mockDetector; + MockLinguistics(Language language) { + this.mockDetector = new MockDetector(language); + } + + @Override + public Stemmer getStemmer() { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public Tokenizer getTokenizer() { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public Normalizer getNormalizer() { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public Transformer getTransformer() { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public Segmenter getSegmenter() { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public Detector getDetector() { + return this.mockDetector; + } + + @Override + public GramSplitter getGramSplitter() { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public CharacterClasses getCharacterClasses() { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public boolean equals(Linguistics other) { + return false; + } + } + + private static class MockDetector implements Detector { + + private Language detectionLanguage; + MockDetector(Language detectionLanguage) { + this.detectionLanguage = detectionLanguage; + } + + @Override + public Detection detect(byte[] input, int offset, int length, Hint hint) { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public Detection detect(ByteBuffer input, Hint hint) { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public Detection detect(String input, Hint hint) { + return new Detection(detectionLanguage, UTF_8.name(), false); + } + } + private Execution createExecution(SignificanceSearcher searcher) { return new Execution(new Chain<>(searcher), Execution.Context.createContextStub()); } - private Execution createExecution() { - return new Execution(new Chain<>(), Execution.Context.createContextStub()); + private Execution createExecution(SignificanceSearcher searcher, Language language) { + var context = Execution.Context.createContextStub(); + context.setLinguistics(new MockLinguistics(language)); + return new Execution(new Chain<>(searcher), context); } @Test @@ -191,35 +277,6 @@ public class SignificanceSearcherTest { } - @Test - void testSignificanceValueOnEmptyQuery() { - Query q = new Query(); - q.getModel().setLanguage(Language.NORWEGIAN_BOKMAL); - AndItem root = new AndItem(); - WordItem tmp; - tmp = new WordItem("Hei", true); - root.addItem(tmp); - tmp = new WordItem("Verden", true); - root.addItem(tmp); - - - q.getModel().getQueryTree().setRoot(root); - Result r = createExecution(searcher).search(q); - root = (AndItem) r.getQuery().getModel().getQueryTree().getRoot(); - - WordItem w0 = (WordItem) root.getItem(0); - WordItem w1 = (WordItem) root.getItem(1); - - Result r0 = createExecution().search(q); - root = (AndItem) r0.getQuery().getModel().getQueryTree().getRoot(); - - WordItem w0_0 = (WordItem) root.getItem(0); - WordItem w0_1 = (WordItem) root.getItem(1); - - assertEquals(w0_0.getSignificance(), w0.getSignificance()); - assertEquals(w0_1.getSignificance(), w1.getSignificance()); - - } @Test public void failsOnConflictingSignificanceConfiguration() { @@ -252,4 +309,53 @@ public class SignificanceSearcherTest { "(https://docs.vespa.ai/en/reference/schema-reference.html#significance).", errorMessage.getDetailedMessage()); } + + @Test + public void testSignificanceSearcherWithExplictitAndImplictSetLanguages() { + Query q = new Query(); + q.getModel().setLanguage(Language.UNKNOWN); + q.getRanking().setProfile("significance-ranking"); + AndItem root = new AndItem(); + WordItem tmp; + tmp = new WordItem("hello", true); + root.addItem(tmp); + + q.getModel().getQueryTree().setRoot(root); + + SignificanceModel model = significanceModelRegistry.getModel(Language.ENGLISH).get(); + var helloFrequency = model.documentFrequency("hello"); + var helloSignificanceValue = SignificanceSearcher.calculateIDF(helloFrequency.corpusSize(), helloFrequency.frequency()); + Result r = createExecution(searcher).search(q); + + root = (AndItem) r.getQuery().getModel().getQueryTree().getRoot(); + WordItem w0 = (WordItem) root.getItem(0); + assertEquals(helloSignificanceValue, w0.getSignificance()); + + + Query q2 = new Query(); + q2.getModel().setLanguage(Language.FRENCH); + q2.getRanking().setProfile("significance-ranking"); + AndItem root2 = new AndItem(); + WordItem tmp2; + tmp2 = new WordItem("hello", true); + root2.addItem(tmp2); + + q2.getModel().getQueryTree().setRoot(root2); + Result r2 = createExecution(searcher).search(q2); + + assertEquals(1, r2.hits().getErrorHit().errors().size()); + + + Query q3 = new Query(); + q3.getRanking().setProfile("significance-ranking"); + WordItem root3 = new WordItem("Я с детства хотел завести собаку, но родители мне не разрешали.", true); + + q3.getModel().getQueryTree().setRoot(root3); + Execution execution = createExecution(searcher, Language.RUSSIAN); + Result r3 = execution.search(q3); + + assertEquals(1, r3.hits().getErrorHit().errors().size()); + + + } } |