aboutsummaryrefslogtreecommitdiffstats
path: root/container-search/src
diff options
context:
space:
mode:
Diffstat (limited to 'container-search/src')
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/IndexFacts.java4
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/SegmentItem.java2
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java43
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java4
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/parser/PhraseParser.java2
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/parser/SimpleParser.java2
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java8
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java6
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/querytransform/CJKSearcher.java12
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java6
-rw-r--r--container-search/src/main/java/com/yahoo/search/query/Model.java5
-rw-r--r--container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java2
12 files changed, 45 insertions, 51 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java b/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java
index 3631dedeffc..3f931c92489 100644
--- a/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java
+++ b/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java
@@ -18,11 +18,11 @@ import static com.yahoo.text.Lowercase.toLowerCase;
* session.getIndex(indexName).[get index info]
* </code></pre>
*
- * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a>
+ * @author Steinar Knutsen
*/
// TODO: We should replace this with a better representation of search definitions
// which is immutable, models clusters and search definitions inside clusters properly,
-// and uses better names.
+// and uses better names. -bratseth
public class IndexFacts {
private Map<String, List<String>> clusterByDocument;
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/SegmentItem.java b/container-search/src/main/java/com/yahoo/prelude/query/SegmentItem.java
index 0dd8e1c36cc..38e2b82ea35 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/SegmentItem.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/SegmentItem.java
@@ -10,7 +10,7 @@ import com.yahoo.prelude.query.textualrepresentation.Discloser;
* extend AndItem to avoid code using instanceof handling it as an
* AndItem.
*
- * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a>
+ * @author Steinar Knutsen
*/
public abstract class SegmentItem extends CompositeItem implements BlockItem {
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java
index 5051108ea9b..da73aab3396 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java
@@ -134,14 +134,15 @@ public abstract class AbstractParser implements CustomParser {
@Override
public final Item parse(String queryToParse, String filterToParse, Language parsingLanguage,
IndexFacts.Session indexFacts, String defaultIndexName) {
- if (queryToParse == null) {
- return null;
- }
+ if (queryToParse == null) return null;
+
+ tokenize(queryToParse, defaultIndexName, indexFacts, parsingLanguage);
+
if (parsingLanguage == null) {
parsingLanguage = environment.getLinguistics().getDetector().detect(queryToParse, null).getLanguage();
}
setState(parsingLanguage, indexFacts);
- tokenize(queryToParse, defaultIndexName, indexFacts);
+
Item root = parseItems();
if (filterToParse != null) {
AnyParser filterParser = new AnyParser(environment);
@@ -167,25 +168,19 @@ public abstract class AbstractParser implements CustomParser {
* @param defaultIndex The default index to assign.
* @param item The item to check.
*/
- private static void assignDefaultIndex(final String defaultIndex,
- final Item item) {
- if (defaultIndex == null || item == null) {
- return;
- }
+ private static void assignDefaultIndex(final String defaultIndex, Item item) {
+ if (defaultIndex == null || item == null) return;
if (item instanceof IndexedItem) {
- final IndexedItem indexName = (IndexedItem) item;
+ IndexedItem indexName = (IndexedItem) item;
- if ("".equals(indexName.getIndexName())) {
+ if ("".equals(indexName.getIndexName()))
indexName.setIndexName(defaultIndex);
- }
- } else if (item instanceof CompositeItem) {
- final Iterator<Item> items = ((CompositeItem) item)
- .getItemIterator();
- while (items.hasNext()) {
- final Item i = items.next();
- assignDefaultIndex(defaultIndex, i);
- }
+ }
+ else if (item instanceof CompositeItem) {
+ Iterator<Item> items = ((CompositeItem)item).getItemIterator();
+ while (items.hasNext())
+ assignDefaultIndex(defaultIndex, items.next());
}
}
@@ -215,10 +210,11 @@ public abstract class AbstractParser implements CustomParser {
* @param query the string to tokenize.
* @param defaultIndexName the name of the index to use as default.
* @param indexFacts resolved information about the index we are searching
+ * @param language the language set for this query, or null if none
*/
- protected void tokenize(String query, String defaultIndexName, IndexFacts.Session indexFacts) {
+ protected void tokenize(String query, String defaultIndexName, IndexFacts.Session indexFacts, Language language) {
Tokenizer tokenizer = new Tokenizer(environment.getLinguistics());
- tokenizer.setSubstringSpecialTokens(language.isCjk());
+ tokenizer.setSubstringSpecialTokens(language != null && language.isCjk());
tokenizer.setSpecialTokens(environment.getSpecialTokens());
tokens.initialize(tokenizer.tokenize(query, defaultIndexName, indexFacts));
}
@@ -265,9 +261,8 @@ public abstract class AbstractParser implements CustomParser {
// TODO: The segmenting stuff is a mess now, this will fix it:
// - Make Segmenter a class which is instantiated per parsing
- // - Make the instance know the language, etc and do all dispatching
- // internally
- // -JSB
+ // - Make the instance know the language, etc and do all dispatching internally
+ // -bratseth
// TODO: Use segmenting for forced phrase searches?
protected Item segment(Token token) {
String normalizedToken = normalize(token.toString());
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java
index e0089fb89ea..95cce001469 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java
@@ -35,7 +35,7 @@ public class AnyParser extends SimpleParser {
Item filterRoot;
setState(queryLanguage, indexFacts);
- tokenize(filter, null, indexFacts);
+ tokenize(filter, null, indexFacts, queryLanguage);
filterRoot = anyItems(true);
@@ -134,7 +134,7 @@ public class AnyParser extends SimpleParser {
Item applyFilter(Item root, String filter, Language queryLanguage, IndexFacts.Session indexFacts) {
setState(queryLanguage, indexFacts);
- tokenize(filter, null, indexFacts);
+ tokenize(filter, null, indexFacts, queryLanguage);
return filterItems(root);
}
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/PhraseParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/PhraseParser.java
index ba10b7b6ee1..dfd05ca0da5 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/parser/PhraseParser.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/PhraseParser.java
@@ -8,7 +8,7 @@ import com.yahoo.search.query.parser.ParserEnvironment;
/**
* Parser for queries of type phrase.
*
- * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a>
+ * @author Steinar Knutsen
*/
public class PhraseParser extends AbstractParser {
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/SimpleParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/SimpleParser.java
index 6117e8e29ed..d1df74fcfa5 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/parser/SimpleParser.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/SimpleParser.java
@@ -13,7 +13,7 @@ import static com.yahoo.prelude.query.parser.Token.Kind.SPACE;
* Base class for parsers of the "simple" query languages (query types
* ANY and ALL).
*
- * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a>
+ * @author Steinar Knutsen
*/
abstract class SimpleParser extends StructuredParser {
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java
index eb35655e4ca..fd1617b5350 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java
@@ -15,7 +15,7 @@ import static com.yahoo.prelude.query.parser.Token.Kind.*;
* Base class for parsers of the query languages which can be used
* for structured queries (types ANY, ALL and ADVANCED).
*
- * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a>
+ * @author Steinar Knutsen
*/
abstract class StructuredParser extends AbstractParser {
@@ -430,9 +430,7 @@ abstract class StructuredParser extends AbstractParser {
Item item = null;
try {
- if (item == null) {
- item = word();
- }
+ item = word();
if (item == null && tokens.currentIs(NUMBER)) {
Token t = tokens.next();
@@ -542,7 +540,7 @@ abstract class StructuredParser extends AbstractParser {
PhraseItem phrase = null;
Item firstWord = null;
boolean starAfterFirst = false;
- boolean starBeforeFirst = false;
+ boolean starBeforeFirst;
if (tokens.skipMultiple(QUOTE)) {
quoted = !quoted;
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java
index 582395bc738..ed0af8d5060 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java
@@ -16,7 +16,7 @@ import static com.yahoo.prelude.query.parser.Token.Kind.*;
/**
* Query tokenizer. Singlethreaded.
*
- * @author bratseth
+ * @author bratseth
*/
public final class Tokenizer {
@@ -28,7 +28,7 @@ public final class Tokenizer {
private SpecialTokens specialTokens = null;
/** Whether to recognize tokens also as substrings of other tokens, needed for cjk */
- private boolean substringSpecialTokens=false;
+ private boolean substringSpecialTokens = false;
private final CharacterClasses characterClasses;
@@ -53,7 +53,7 @@ public final class Tokenizer {
/** Sets whether to recognize tokens also as substrings of other tokens, needed for cjk. Default false. */
public void setSubstringSpecialTokens(boolean substringSpecialTokens) {
- this.substringSpecialTokens=substringSpecialTokens;
+ this.substringSpecialTokens = substringSpecialTokens;
}
/**
diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/CJKSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/CJKSearcher.java
index 009c11ab1fd..b04ac2fcec5 100644
--- a/container-search/src/main/java/com/yahoo/prelude/querytransform/CJKSearcher.java
+++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/CJKSearcher.java
@@ -26,21 +26,21 @@ import com.yahoo.search.searchchain.Execution;
import com.yahoo.search.searchchain.PhaseNames;
/**
- * Search to do necessary transforms if the query is in segmented in
- * a "CJK language".
+ * Search to do necessary transforms if the query is in segmented in a CJK language.
*
- * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a>
+ * @author Steinar Knutsen
*/
@After(PhaseNames.UNBLENDED_RESULT)
@Before(STEMMING)
@Provides(CJKSearcher.TERM_ORDER_RELAXATION)
public class CJKSearcher extends Searcher {
+
public static final String TERM_ORDER_RELAXATION = "TermOrderRelaxation";
@Override
public Result search(Query query, Execution execution) {
- Language l = query.getModel().getParsingLanguage();
- if (!l.isCjk()) return execution.search(query);
+ Language language = query.getModel().getParsingLanguage();
+ if ( ! language.isCjk()) return execution.search(query);
QueryTree tree = query.getModel().getQueryTree();
tree.setRoot(transform(tree.getRoot()));
@@ -82,7 +82,6 @@ public class CJKSearcher extends Searcher {
return root;
}
-
private boolean hasOverlappingTokens(PhraseItem phrase) {
boolean has = false;
for (Iterator<Item> i = phrase.getItemIterator(); i.hasNext(); ) {
@@ -108,4 +107,5 @@ public class CJKSearcher extends Searcher {
}
return segmentsLength > segments.getRawWord().length();
}
+
}
diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java
index ca8214f35d6..a4562892d0c 100644
--- a/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java
+++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java
@@ -78,11 +78,11 @@ public class StemmingSearcher extends Searcher {
public String getFunctionName() { return "Stemming"; }
private Item replaceTerms(Query q, IndexFacts.Session indexFacts) {
- Language l = q.getModel().getParsingLanguage();
- if (l == Language.UNKNOWN) {
+ Language language = q.getModel().getParsingLanguage();
+ if (language == Language.UNKNOWN) {
return q.getModel().getQueryTree().getRoot();
}
- return scan(q.getModel().getQueryTree().getRoot(), l.isCjk(), l, indexFacts,
+ return scan(q.getModel().getQueryTree().getRoot(), language.isCjk(), language, indexFacts,
createReverseConnectivities(q.getModel().getQueryTree().getRoot()));
}
diff --git a/container-search/src/main/java/com/yahoo/search/query/Model.java b/container-search/src/main/java/com/yahoo/search/query/Model.java
index c155ed4fbbd..ca6f7efaa5e 100644
--- a/container-search/src/main/java/com/yahoo/search/query/Model.java
+++ b/container-search/src/main/java/com/yahoo/search/query/Model.java
@@ -74,7 +74,7 @@ public class Model implements Cloneable {
private String filter = null;
private Language language = null;
private Locale locale = null;
- private QueryTree queryTree = null; // The actual query. This is lazily created from the program
+ private QueryTree queryTree = null; // The query tree to execute. This is lazily created from the program
private String defaultIndex = null;
private Query.Type type = Query.Type.ALL;
private Query parent;
@@ -431,7 +431,8 @@ public class Model implements Cloneable {
return (Model)q.properties().get(argumentTypeName);
}
- public @Override String toString() {
+ @Override
+ public String toString() {
return "query representation [queryTree: " + queryTree + ", filter: " + filter + "]";
}
diff --git a/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java b/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java
index 11cee99f471..ec6d4f11369 100644
--- a/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java
@@ -647,7 +647,7 @@ public class QueryTestCase {
{
Query q = new Query(httpEncode("/?query=headline:\"彭 博士 觀 風向\" content:\"彭 博士 觀 風向\" description:\"彭 博士 觀 風向\" tags:ymedia:type=story tags:ymedia:type=blogpost tags:ymedia:type=slideshow tags:ymedia:type=cavideo tags:ymedia:type=photo -tags:ymedia:hosted=no sddocname:contentindexing!0 embargo:<1484665288753!0 expires:>1484665288753!0"));
q.getModel().setExecution(new Execution(Execution.Context.createContextStub(null, null, new SimpleLinguistics())));
- assertEquals(Language.ENGLISH, q.getModel().getParsingLanguage());
+ assertEquals(Language.CHINESE_TRADITIONAL, q.getModel().getParsingLanguage());
}
}