summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java30
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java2
-rw-r--r--container-search/src/main/java/com/yahoo/search/query/Model.java2
-rw-r--r--container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java23
-rw-r--r--container-search/src/test/java/com/yahoo/vespa/streamingvisitors/MetricsSearcherTestCase.java2
-rw-r--r--linguistics/src/main/java/com/yahoo/language/detect/AbstractDetector.java3
-rw-r--r--linguistics/src/main/java/com/yahoo/language/detect/Detector.java9
-rw-r--r--linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java11
8 files changed, 53 insertions, 29 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java
index fb56e10445a..5051108ea9b 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java
@@ -196,14 +196,14 @@ public abstract class AbstractParser implements CustomParser {
* @param input The string to normalize.
* @return The normalized string.
*/
- protected String normalize(final String input) {
+ protected String normalize(String input) {
if (input == null || input.length() == 0) {
return input;
}
return environment.getLinguistics().getNormalizer().normalize(input);
}
- protected void setState(final Language queryLanguage, IndexFacts.Session indexFacts) {
+ protected void setState(Language queryLanguage, IndexFacts.Session indexFacts) {
this.indexFacts = indexFacts;
language = queryLanguage;
submodes.reset();
@@ -229,18 +229,18 @@ public abstract class AbstractParser implements CustomParser {
* @param unwashed The item whose phrases to simplify.
* @return The simplified item.
*/
- public static Item simplifyPhrases(final Item unwashed) {
+ public static Item simplifyPhrases(Item unwashed) {
if (unwashed == null) {
return unwashed;
} else if (unwashed instanceof PhraseItem) {
return collapsePhrase((PhraseItem) unwashed);
} else if (unwashed instanceof CompositeItem) {
- final CompositeItem composite = (CompositeItem) unwashed;
- final ListIterator<Item> i = composite.getItemIterator();
+ CompositeItem composite = (CompositeItem) unwashed;
+ ListIterator<Item> i = composite.getItemIterator();
while (i.hasNext()) {
- final Item original = i.next();
- final Item transformed = simplifyPhrases(original);
+ Item original = i.next();
+ Item transformed = simplifyPhrases(original);
if (original != transformed) {
i.set(transformed);
@@ -252,11 +252,10 @@ public abstract class AbstractParser implements CustomParser {
}
}
- private static Item collapsePhrase(final PhraseItem phrase) {
+ private static Item collapsePhrase(PhraseItem phrase) {
if (phrase.getItemCount() == 1 && phrase.getItem(0) instanceof WordItem) {
// TODO: Other stuff which needs propagation?
- final WordItem word = (WordItem) phrase.getItem(0);
-
+ WordItem word = (WordItem) phrase.getItem(0);
word.setWeight(phrase.getWeight());
return word;
} else {
@@ -270,8 +269,8 @@ public abstract class AbstractParser implements CustomParser {
// internally
// -JSB
// TODO: Use segmenting for forced phrase searches?
- protected Item segment(final Token token) {
- final String normalizedToken = normalize(token.toString());
+ protected Item segment(Token token) {
+ String normalizedToken = normalize(token.toString());
if (token.isSpecial()) {
final WordItem w = new WordItem(token.toString(), true, token.substring);
@@ -294,11 +293,10 @@ public abstract class AbstractParser implements CustomParser {
return new WordItem(segments.get(0), "", true, token.substring);
}
- final CompositeItem composite = new PhraseSegmentItem(token.toString(),
- normalizedToken, true, false, token.substring);
+ CompositeItem composite = new PhraseSegmentItem(token.toString(), normalizedToken, true, false, token.substring);
int n = 0;
- for (final String segment : segments) {
- final WordItem w = new WordItem(segment, "", true, token.substring);
+ for (String segment : segments) {
+ WordItem w = new WordItem(segment, "", true, token.substring);
w.setFromSegmented(true);
w.setSegmentIndex(n++);
w.setStemmed(false);
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java
index 3043cb27247..e0089fb89ea 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java
@@ -15,7 +15,7 @@ import static com.yahoo.prelude.query.parser.Token.Kind.*;
/**
* Parser for queries of type any.
*
- * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a>
+ * @author Steinar Knutsen
*/
public class AnyParser extends SimpleParser {
diff --git a/container-search/src/main/java/com/yahoo/search/query/Model.java b/container-search/src/main/java/com/yahoo/search/query/Model.java
index bf0939d17c1..c155ed4fbbd 100644
--- a/container-search/src/main/java/com/yahoo/search/query/Model.java
+++ b/container-search/src/main/java/com/yahoo/search/query/Model.java
@@ -125,7 +125,7 @@ public class Model implements Cloneable {
* @return the language determined, never null
*/
// TODO: We can support multiple languages per query by changing searchers which call this
- // to look up the query to use at each point form item.getLanguage
+ // to look up the query to use at each point from item.getLanguage
// with this as fallback for query branches where no parent item specifies language
public Language getParsingLanguage() {
Language language = getLanguage();
diff --git a/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java b/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java
index 6a3180fc488..11cee99f471 100644
--- a/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java
@@ -2,6 +2,8 @@
package com.yahoo.search.test;
import com.yahoo.component.chain.Chain;
+import com.yahoo.language.Language;
+import com.yahoo.language.simple.SimpleLinguistics;
import com.yahoo.prelude.query.AndItem;
import com.yahoo.prelude.query.Highlight;
import com.yahoo.prelude.query.IndexedItem;
@@ -45,7 +47,7 @@ import static org.junit.Assert.assertNotSame;
import static org.junit.Assert.fail;
/**
- * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a>
+ * @author Arne Bergene Fossaa
*/
public class QueryTestCase {
@@ -604,7 +606,7 @@ public class QueryTestCase {
@Test
public void testModelProperties() {
{
- Query query=new Query();
+ Query query = new Query();
query.properties().set("model.searchPath", "foo");
assertEquals("Set dynamic get dynamic works","foo",query.properties().get("model.searchPath"));
assertEquals("Set dynamic get static works","foo",query.getModel().getSearchPath());
@@ -628,11 +630,26 @@ public class QueryTestCase {
@Test
public void testPositiveTerms() {
- Query q = new Query(QueryTestCase.httpEncode("/?query=-a \"b c\" d e"));
+ Query q = new Query(httpEncode("/?query=-a \"b c\" d e"));
Item i = q.getModel().getQueryTree().getRoot();
List<IndexedItem> l = QueryTree.getPositiveTerms(i);
assertEquals(3, l.size());
}
+
+ @Test
+ public void testMultipleLanguages() {
+ {
+ Query q = new Query(httpEncode("/?query=headline:\"彭 博士 觀 風向\" content:\"彭 博士 觀 風向\" description:\"彭 博士 觀 風向\""));
+ q.getModel().setExecution(new Execution(Execution.Context.createContextStub(null, null, new SimpleLinguistics())));
+ assertEquals(Language.CHINESE_TRADITIONAL, q.getModel().getParsingLanguage());
+ }
+
+ {
+ Query q = new Query(httpEncode("/?query=headline:\"彭 博士 觀 風向\" content:\"彭 博士 觀 風向\" description:\"彭 博士 觀 風向\" tags:ymedia:type=story tags:ymedia:type=blogpost tags:ymedia:type=slideshow tags:ymedia:type=cavideo tags:ymedia:type=photo -tags:ymedia:hosted=no sddocname:contentindexing!0 embargo:<1484665288753!0 expires:>1484665288753!0"));
+ q.getModel().setExecution(new Execution(Execution.Context.createContextStub(null, null, new SimpleLinguistics())));
+ assertEquals(Language.ENGLISH, q.getModel().getParsingLanguage());
+ }
+ }
protected boolean contains(String lineSubstring,String[] lines) {
for (String line : lines)
diff --git a/container-search/src/test/java/com/yahoo/vespa/streamingvisitors/MetricsSearcherTestCase.java b/container-search/src/test/java/com/yahoo/vespa/streamingvisitors/MetricsSearcherTestCase.java
index 901c9aa79d4..1fe62c2cd35 100644
--- a/container-search/src/test/java/com/yahoo/vespa/streamingvisitors/MetricsSearcherTestCase.java
+++ b/container-search/src/test/java/com/yahoo/vespa/streamingvisitors/MetricsSearcherTestCase.java
@@ -16,7 +16,7 @@ import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
/**
- * @author <a href="mailto:ulf@yahoo-inc.com">Ulf Carlin</a>
+ * @author Ulf Carlin
*/
public class MetricsSearcherTestCase {
private MetricsSearcher metricsSearcher = new MetricsSearcher();
diff --git a/linguistics/src/main/java/com/yahoo/language/detect/AbstractDetector.java b/linguistics/src/main/java/com/yahoo/language/detect/AbstractDetector.java
index f80f876d248..d457f83c9fd 100644
--- a/linguistics/src/main/java/com/yahoo/language/detect/AbstractDetector.java
+++ b/linguistics/src/main/java/com/yahoo/language/detect/AbstractDetector.java
@@ -6,7 +6,7 @@ import com.yahoo.text.Utf8;
import java.nio.ByteBuffer;
/**
- * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ * @author Simon Thoresen
*/
public abstract class AbstractDetector implements Detector {
@@ -22,4 +22,5 @@ public abstract class AbstractDetector implements Detector {
input.get(buf, 0, buf.length);
return detect(buf, 0, buf.length, hint);
}
+
}
diff --git a/linguistics/src/main/java/com/yahoo/language/detect/Detector.java b/linguistics/src/main/java/com/yahoo/language/detect/Detector.java
index 4962d761a5a..c18b41bc890 100644
--- a/linguistics/src/main/java/com/yahoo/language/detect/Detector.java
+++ b/linguistics/src/main/java/com/yahoo/language/detect/Detector.java
@@ -6,7 +6,7 @@ import java.nio.ByteBuffer;
/**
* Abstract superclass of all Detectors used for language and encoding detection.
*
- * @author <a href="mailto:einarmr@yahoo-inc.com">Einar M R Rosenvinge</a>
+ * @author Einar M R Rosenvinge
*/
public interface Detector {
@@ -20,7 +20,7 @@ public interface Detector {
* @return an array of possible language/encoding pairs, sorted by decreasing confidence (possibly empty, but never null)
* @throws DetectionException if detection fails
*/
- public abstract Detection detect(byte[] input, int offset, int length, Hint hint);
+ Detection detect(byte[] input, int offset, int length, Hint hint);
/**
* Detects language and encoding of the supplied ByteBuffer, possibly using a language/encoding hint.
@@ -30,7 +30,7 @@ public interface Detector {
* @return an array of possible language/encoding pairs, sorted by decreasing confidence (possibly empty, but never null)
* @throws DetectionException if detection fails
*/
- public abstract Detection detect(ByteBuffer input, Hint hint);
+ Detection detect(ByteBuffer input, Hint hint);
/**
* Detects language of the supplied String, possibly using a language hint.
@@ -40,5 +40,6 @@ public interface Detector {
* @return an array of possible language/encoding pairs, sorted by decreasing confidence (possibly empty, but never null)
* @throws DetectionException if detection fails
*/
- public abstract Detection detect(String input, Hint hint);
+ Detection detect(String input, Hint hint);
+
}
diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java
index eca35772296..24e1684d065 100644
--- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java
+++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java
@@ -19,8 +19,8 @@ import java.nio.ByteBuffer;
* Japanese or Chinese characters, so their presence is a good indication of Korean. If a string contains phonetic
* japanese, this is a good indication of Japanese. However, Japanese and Chinese characters occupy many of the same
* character blocks, so if there are no definitive signs of Japanese then it is assumed that the String is Chinese.</p>
-
- * @author <a href="mailto:einarmr@yahoo-inc.com">Einar M R Rosenvinge</a>
+ *
+ * @author Rich Pito
*/
public class SimpleDetector implements Detector {
@@ -46,6 +46,12 @@ public class SimpleDetector implements Detector {
}
public static Language guessLanguage(String input) {
+ Language language = guessLanguage2(input);
+ System.out.println("Detecting language of '" + input + "' as " + language);
+ return language;
+ }
+
+ public static Language guessLanguage2(String input) {
if (input == null || input.length() == 0) {
return Language.UNKNOWN;
}
@@ -176,4 +182,5 @@ public class SimpleDetector implements Detector {
return "ISO-8859-1";
}
}
+
}