diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2017-01-22 12:08:57 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2017-01-22 12:08:57 +0100 |
commit | 76c5c6d2ea96842d81cdaa482335e3ddf17a5659 (patch) | |
tree | 080a39a899b8bd2df4ef9c2038c5cdf5a751e251 /container-search/src/test/java/com/yahoo | |
parent | 09caf52b327f6a48af8acf02872a49e08d75c9c9 (diff) |
Use text content only for query lang detection
Diffstat (limited to 'container-search/src/test/java/com/yahoo')
3 files changed, 53 insertions, 11 deletions
diff --git a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java index 1257f2e2746..819684ad9a5 100644 --- a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java @@ -1916,7 +1916,7 @@ public class ParseTestCase { @Test public void testChineseSpecialTokens() { tester.assertParsed("AND \"cat tcp/ip zu\" \"foo dotnet bar dotnet dotnet c# c++ bar dotnet dotnet wiz\"", - "cattcp/ipzu foo.netbar.net.netC#c++bar.net.netwiz","",Query.Type.ALL,Language.CHINESE_SIMPLIFIED); + "cattcp/ipzu foo.netbar.net.netC#c++bar.net.netwiz","", Query.Type.ALL, Language.CHINESE_SIMPLIFIED); } /** diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/RuleBaseAbstractTestCase.java b/container-search/src/test/java/com/yahoo/prelude/semantics/test/RuleBaseAbstractTestCase.java index 87835c08127..7cc440e815e 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/RuleBaseAbstractTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/RuleBaseAbstractTestCase.java @@ -67,6 +67,7 @@ public abstract class RuleBaseAbstractTestCase extends junit.framework.TestCase } protected Query assertSemantics(String result, Query query) { + System.out.println(query.getModel().getQueryTree()); createExecution(searcher).search(query); assertEquals(result, query.getModel().getQueryTree().getRoot().toString()); return query; diff --git a/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java b/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java index ec6d4f11369..eaaf87bc035 100644 --- a/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java @@ -3,7 +3,14 @@ package com.yahoo.search.test; import com.yahoo.component.chain.Chain; import com.yahoo.language.Language; +import com.yahoo.language.Linguistics; +import com.yahoo.language.detect.Detection; +import com.yahoo.language.detect.Detector; +import com.yahoo.language.detect.Hint; +import com.yahoo.language.simple.SimpleDetector; import com.yahoo.language.simple.SimpleLinguistics; +import com.yahoo.prelude.Index; +import com.yahoo.prelude.IndexFacts; import com.yahoo.prelude.query.AndItem; import com.yahoo.prelude.query.Highlight; import com.yahoo.prelude.query.IndexedItem; @@ -637,23 +644,57 @@ public class QueryTestCase { } @Test - public void testMultipleLanguages() { - { - Query q = new Query(httpEncode("/?query=headline:\"彭 博士 觀 風向\" content:\"彭 博士 觀 風向\" description:\"彭 博士 觀 風向\"")); - q.getModel().setExecution(new Execution(Execution.Context.createContextStub(null, null, new SimpleLinguistics()))); - assertEquals(Language.CHINESE_TRADITIONAL, q.getModel().getParsingLanguage()); + public void testHeuristicLanguageDetectionTextExtraction() { + assertDetectionText("b ", "a:b", "text:a", "text:default"); + assertDetectionText("b ", "b", "text:default"); + assertDetectionText("b ", "b","text:b", "text:default"); + assertDetectionText("a b ", "a:b","text:b", "text:default"); + assertDetectionText("foo bar fuz ", "foo a:bar --() fuz","text:a", "text:default"); + assertDetectionText(" 彭 博士 觀 風向 彭 博士 觀 風向 彭 博士 觀 風向 ","headline:\"彭 博士 觀 風向\" content:\"彭 博士 觀 風向\" description:\"彭 博士 觀 風向\" sddocname:contentindexing!0 embargo:<1484665288753!0 expires:>1484665288753!0", + "text:headline", "text:content", "text:description", "text:default", "nontext:tags", "nontext:sddocname", "nontext:embargo", "nontext:expires"); + } + + private void assertDetectionText(String expectedDetectionText, String queryString, String ... indexSpecs) { + Query q = new Query(httpEncode("/?query=" + queryString)); + IndexFacts indexFacts = new IndexFacts(); + for (String indexSpec : indexSpecs) { + String[] specParts = indexSpec.split(":"); + Index tokenIndex = new Index(specParts[1]); + if (specParts[0].equals("text")) + tokenIndex.setPlainTokens(true); + indexFacts.addIndex("testSearchDefinition", tokenIndex); } + MockLinguistics mockLinguistics = new MockLinguistics(); + q.getModel().setExecution(new Execution(Execution.Context.createContextStub(null, indexFacts, mockLinguistics))); + q.getModel().getQueryTree(); // cause parsing + assertEquals(expectedDetectionText, mockLinguistics.detector.lastDetectionText); + } + + /** A linguistics instance which records the last language detection text passed to it */ + private static class MockLinguistics extends SimpleLinguistics { - { - Query q = new Query(httpEncode("/?query=headline:\"彭 博士 觀 風向\" content:\"彭 博士 觀 風向\" description:\"彭 博士 觀 風向\" tags:ymedia:type=story tags:ymedia:type=blogpost tags:ymedia:type=slideshow tags:ymedia:type=cavideo tags:ymedia:type=photo -tags:ymedia:hosted=no sddocname:contentindexing!0 embargo:<1484665288753!0 expires:>1484665288753!0")); - q.getModel().setExecution(new Execution(Execution.Context.createContextStub(null, null, new SimpleLinguistics()))); - assertEquals(Language.CHINESE_TRADITIONAL, q.getModel().getParsingLanguage()); + final MockDetector detector = new MockDetector(); + + @Override + public Detector getDetector() { return detector; } + + } + + private static class MockDetector extends SimpleDetector { + + String lastDetectionText = null; + + @Override + public Detection detect(String input, Hint hint) { + lastDetectionText = input; + return super.detect(input, hint); } + } protected boolean contains(String lineSubstring,String[] lines) { for (String line : lines) - if (line.indexOf(lineSubstring)>=0) return true; + if (line.contains(lineSubstring)) return true; return false; } |