diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2017-01-20 13:37:49 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2017-01-20 13:37:49 +0100 |
commit | 262d072c1ac996b34f6c70efc95853be699ca935 (patch) | |
tree | 925e107e3024bc5ab4e1045a6625c3e4d17bf543 | |
parent | 80b7024763933d95edf39e726d1d78018672d050 (diff) |
Nonfunctional changes only
8 files changed, 53 insertions, 29 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java index fb56e10445a..5051108ea9b 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java @@ -196,14 +196,14 @@ public abstract class AbstractParser implements CustomParser { * @param input The string to normalize. * @return The normalized string. */ - protected String normalize(final String input) { + protected String normalize(String input) { if (input == null || input.length() == 0) { return input; } return environment.getLinguistics().getNormalizer().normalize(input); } - protected void setState(final Language queryLanguage, IndexFacts.Session indexFacts) { + protected void setState(Language queryLanguage, IndexFacts.Session indexFacts) { this.indexFacts = indexFacts; language = queryLanguage; submodes.reset(); @@ -229,18 +229,18 @@ public abstract class AbstractParser implements CustomParser { * @param unwashed The item whose phrases to simplify. * @return The simplified item. */ - public static Item simplifyPhrases(final Item unwashed) { + public static Item simplifyPhrases(Item unwashed) { if (unwashed == null) { return unwashed; } else if (unwashed instanceof PhraseItem) { return collapsePhrase((PhraseItem) unwashed); } else if (unwashed instanceof CompositeItem) { - final CompositeItem composite = (CompositeItem) unwashed; - final ListIterator<Item> i = composite.getItemIterator(); + CompositeItem composite = (CompositeItem) unwashed; + ListIterator<Item> i = composite.getItemIterator(); while (i.hasNext()) { - final Item original = i.next(); - final Item transformed = simplifyPhrases(original); + Item original = i.next(); + Item transformed = simplifyPhrases(original); if (original != transformed) { i.set(transformed); @@ -252,11 +252,10 @@ public abstract class AbstractParser implements CustomParser { } } - private static Item collapsePhrase(final PhraseItem phrase) { + private static Item collapsePhrase(PhraseItem phrase) { if (phrase.getItemCount() == 1 && phrase.getItem(0) instanceof WordItem) { // TODO: Other stuff which needs propagation? - final WordItem word = (WordItem) phrase.getItem(0); - + WordItem word = (WordItem) phrase.getItem(0); word.setWeight(phrase.getWeight()); return word; } else { @@ -270,8 +269,8 @@ public abstract class AbstractParser implements CustomParser { // internally // -JSB // TODO: Use segmenting for forced phrase searches? - protected Item segment(final Token token) { - final String normalizedToken = normalize(token.toString()); + protected Item segment(Token token) { + String normalizedToken = normalize(token.toString()); if (token.isSpecial()) { final WordItem w = new WordItem(token.toString(), true, token.substring); @@ -294,11 +293,10 @@ public abstract class AbstractParser implements CustomParser { return new WordItem(segments.get(0), "", true, token.substring); } - final CompositeItem composite = new PhraseSegmentItem(token.toString(), - normalizedToken, true, false, token.substring); + CompositeItem composite = new PhraseSegmentItem(token.toString(), normalizedToken, true, false, token.substring); int n = 0; - for (final String segment : segments) { - final WordItem w = new WordItem(segment, "", true, token.substring); + for (String segment : segments) { + WordItem w = new WordItem(segment, "", true, token.substring); w.setFromSegmented(true); w.setSegmentIndex(n++); w.setStemmed(false); diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java index 3043cb27247..e0089fb89ea 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java @@ -15,7 +15,7 @@ import static com.yahoo.prelude.query.parser.Token.Kind.*; /** * Parser for queries of type any. * - * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author Steinar Knutsen */ public class AnyParser extends SimpleParser { diff --git a/container-search/src/main/java/com/yahoo/search/query/Model.java b/container-search/src/main/java/com/yahoo/search/query/Model.java index bf0939d17c1..c155ed4fbbd 100644 --- a/container-search/src/main/java/com/yahoo/search/query/Model.java +++ b/container-search/src/main/java/com/yahoo/search/query/Model.java @@ -125,7 +125,7 @@ public class Model implements Cloneable { * @return the language determined, never null */ // TODO: We can support multiple languages per query by changing searchers which call this - // to look up the query to use at each point form item.getLanguage + // to look up the query to use at each point from item.getLanguage // with this as fallback for query branches where no parent item specifies language public Language getParsingLanguage() { Language language = getLanguage(); diff --git a/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java b/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java index 6a3180fc488..11cee99f471 100644 --- a/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java @@ -2,6 +2,8 @@ package com.yahoo.search.test; import com.yahoo.component.chain.Chain; +import com.yahoo.language.Language; +import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.prelude.query.AndItem; import com.yahoo.prelude.query.Highlight; import com.yahoo.prelude.query.IndexedItem; @@ -45,7 +47,7 @@ import static org.junit.Assert.assertNotSame; import static org.junit.Assert.fail; /** - * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + * @author Arne Bergene Fossaa */ public class QueryTestCase { @@ -604,7 +606,7 @@ public class QueryTestCase { @Test public void testModelProperties() { { - Query query=new Query(); + Query query = new Query(); query.properties().set("model.searchPath", "foo"); assertEquals("Set dynamic get dynamic works","foo",query.properties().get("model.searchPath")); assertEquals("Set dynamic get static works","foo",query.getModel().getSearchPath()); @@ -628,11 +630,26 @@ public class QueryTestCase { @Test public void testPositiveTerms() { - Query q = new Query(QueryTestCase.httpEncode("/?query=-a \"b c\" d e")); + Query q = new Query(httpEncode("/?query=-a \"b c\" d e")); Item i = q.getModel().getQueryTree().getRoot(); List<IndexedItem> l = QueryTree.getPositiveTerms(i); assertEquals(3, l.size()); } + + @Test + public void testMultipleLanguages() { + { + Query q = new Query(httpEncode("/?query=headline:\"彭 博士 觀 風向\" content:\"彭 博士 觀 風向\" description:\"彭 博士 觀 風向\"")); + q.getModel().setExecution(new Execution(Execution.Context.createContextStub(null, null, new SimpleLinguistics()))); + assertEquals(Language.CHINESE_TRADITIONAL, q.getModel().getParsingLanguage()); + } + + { + Query q = new Query(httpEncode("/?query=headline:\"彭 博士 觀 風向\" content:\"彭 博士 觀 風向\" description:\"彭 博士 觀 風向\" tags:ymedia:type=story tags:ymedia:type=blogpost tags:ymedia:type=slideshow tags:ymedia:type=cavideo tags:ymedia:type=photo -tags:ymedia:hosted=no sddocname:contentindexing!0 embargo:<1484665288753!0 expires:>1484665288753!0")); + q.getModel().setExecution(new Execution(Execution.Context.createContextStub(null, null, new SimpleLinguistics()))); + assertEquals(Language.ENGLISH, q.getModel().getParsingLanguage()); + } + } protected boolean contains(String lineSubstring,String[] lines) { for (String line : lines) diff --git a/container-search/src/test/java/com/yahoo/vespa/streamingvisitors/MetricsSearcherTestCase.java b/container-search/src/test/java/com/yahoo/vespa/streamingvisitors/MetricsSearcherTestCase.java index 901c9aa79d4..1fe62c2cd35 100644 --- a/container-search/src/test/java/com/yahoo/vespa/streamingvisitors/MetricsSearcherTestCase.java +++ b/container-search/src/test/java/com/yahoo/vespa/streamingvisitors/MetricsSearcherTestCase.java @@ -16,7 +16,7 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; /** - * @author <a href="mailto:ulf@yahoo-inc.com">Ulf Carlin</a> + * @author Ulf Carlin */ public class MetricsSearcherTestCase { private MetricsSearcher metricsSearcher = new MetricsSearcher(); diff --git a/linguistics/src/main/java/com/yahoo/language/detect/AbstractDetector.java b/linguistics/src/main/java/com/yahoo/language/detect/AbstractDetector.java index f80f876d248..d457f83c9fd 100644 --- a/linguistics/src/main/java/com/yahoo/language/detect/AbstractDetector.java +++ b/linguistics/src/main/java/com/yahoo/language/detect/AbstractDetector.java @@ -6,7 +6,7 @@ import com.yahoo.text.Utf8; import java.nio.ByteBuffer; /** - * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + * @author Simon Thoresen */ public abstract class AbstractDetector implements Detector { @@ -22,4 +22,5 @@ public abstract class AbstractDetector implements Detector { input.get(buf, 0, buf.length); return detect(buf, 0, buf.length, hint); } + } diff --git a/linguistics/src/main/java/com/yahoo/language/detect/Detector.java b/linguistics/src/main/java/com/yahoo/language/detect/Detector.java index 4962d761a5a..c18b41bc890 100644 --- a/linguistics/src/main/java/com/yahoo/language/detect/Detector.java +++ b/linguistics/src/main/java/com/yahoo/language/detect/Detector.java @@ -6,7 +6,7 @@ import java.nio.ByteBuffer; /** * Abstract superclass of all Detectors used for language and encoding detection. * - * @author <a href="mailto:einarmr@yahoo-inc.com">Einar M R Rosenvinge</a> + * @author Einar M R Rosenvinge */ public interface Detector { @@ -20,7 +20,7 @@ public interface Detector { * @return an array of possible language/encoding pairs, sorted by decreasing confidence (possibly empty, but never null) * @throws DetectionException if detection fails */ - public abstract Detection detect(byte[] input, int offset, int length, Hint hint); + Detection detect(byte[] input, int offset, int length, Hint hint); /** * Detects language and encoding of the supplied ByteBuffer, possibly using a language/encoding hint. @@ -30,7 +30,7 @@ public interface Detector { * @return an array of possible language/encoding pairs, sorted by decreasing confidence (possibly empty, but never null) * @throws DetectionException if detection fails */ - public abstract Detection detect(ByteBuffer input, Hint hint); + Detection detect(ByteBuffer input, Hint hint); /** * Detects language of the supplied String, possibly using a language hint. @@ -40,5 +40,6 @@ public interface Detector { * @return an array of possible language/encoding pairs, sorted by decreasing confidence (possibly empty, but never null) * @throws DetectionException if detection fails */ - public abstract Detection detect(String input, Hint hint); + Detection detect(String input, Hint hint); + } diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java index eca35772296..24e1684d065 100644 --- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java +++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java @@ -19,8 +19,8 @@ import java.nio.ByteBuffer; * Japanese or Chinese characters, so their presence is a good indication of Korean. If a string contains phonetic * japanese, this is a good indication of Japanese. However, Japanese and Chinese characters occupy many of the same * character blocks, so if there are no definitive signs of Japanese then it is assumed that the String is Chinese.</p> - - * @author <a href="mailto:einarmr@yahoo-inc.com">Einar M R Rosenvinge</a> + * + * @author Rich Pito */ public class SimpleDetector implements Detector { @@ -46,6 +46,12 @@ public class SimpleDetector implements Detector { } public static Language guessLanguage(String input) { + Language language = guessLanguage2(input); + System.out.println("Detecting language of '" + input + "' as " + language); + return language; + } + + public static Language guessLanguage2(String input) { if (input == null || input.length() == 0) { return Language.UNKNOWN; } @@ -176,4 +182,5 @@ public class SimpleDetector implements Detector { return "ISO-8859-1"; } } + } |