diff options
Diffstat (limited to 'container-search/src')
12 files changed, 45 insertions, 51 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java b/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java index 3631dedeffc..3f931c92489 100644 --- a/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java +++ b/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java @@ -18,11 +18,11 @@ import static com.yahoo.text.Lowercase.toLowerCase; * session.getIndex(indexName).[get index info] * </code></pre> * - * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author Steinar Knutsen */ // TODO: We should replace this with a better representation of search definitions // which is immutable, models clusters and search definitions inside clusters properly, -// and uses better names. +// and uses better names. -bratseth public class IndexFacts { private Map<String, List<String>> clusterByDocument; diff --git a/container-search/src/main/java/com/yahoo/prelude/query/SegmentItem.java b/container-search/src/main/java/com/yahoo/prelude/query/SegmentItem.java index 0dd8e1c36cc..38e2b82ea35 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/SegmentItem.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/SegmentItem.java @@ -10,7 +10,7 @@ import com.yahoo.prelude.query.textualrepresentation.Discloser; * extend AndItem to avoid code using instanceof handling it as an * AndItem. * - * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author Steinar Knutsen */ public abstract class SegmentItem extends CompositeItem implements BlockItem { diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java index 5051108ea9b..da73aab3396 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java @@ -134,14 +134,15 @@ public abstract class AbstractParser implements CustomParser { @Override public final Item parse(String queryToParse, String filterToParse, Language parsingLanguage, IndexFacts.Session indexFacts, String defaultIndexName) { - if (queryToParse == null) { - return null; - } + if (queryToParse == null) return null; + + tokenize(queryToParse, defaultIndexName, indexFacts, parsingLanguage); + if (parsingLanguage == null) { parsingLanguage = environment.getLinguistics().getDetector().detect(queryToParse, null).getLanguage(); } setState(parsingLanguage, indexFacts); - tokenize(queryToParse, defaultIndexName, indexFacts); + Item root = parseItems(); if (filterToParse != null) { AnyParser filterParser = new AnyParser(environment); @@ -167,25 +168,19 @@ public abstract class AbstractParser implements CustomParser { * @param defaultIndex The default index to assign. * @param item The item to check. */ - private static void assignDefaultIndex(final String defaultIndex, - final Item item) { - if (defaultIndex == null || item == null) { - return; - } + private static void assignDefaultIndex(final String defaultIndex, Item item) { + if (defaultIndex == null || item == null) return; if (item instanceof IndexedItem) { - final IndexedItem indexName = (IndexedItem) item; + IndexedItem indexName = (IndexedItem) item; - if ("".equals(indexName.getIndexName())) { + if ("".equals(indexName.getIndexName())) indexName.setIndexName(defaultIndex); - } - } else if (item instanceof CompositeItem) { - final Iterator<Item> items = ((CompositeItem) item) - .getItemIterator(); - while (items.hasNext()) { - final Item i = items.next(); - assignDefaultIndex(defaultIndex, i); - } + } + else if (item instanceof CompositeItem) { + Iterator<Item> items = ((CompositeItem)item).getItemIterator(); + while (items.hasNext()) + assignDefaultIndex(defaultIndex, items.next()); } } @@ -215,10 +210,11 @@ public abstract class AbstractParser implements CustomParser { * @param query the string to tokenize. * @param defaultIndexName the name of the index to use as default. * @param indexFacts resolved information about the index we are searching + * @param language the language set for this query, or null if none */ - protected void tokenize(String query, String defaultIndexName, IndexFacts.Session indexFacts) { + protected void tokenize(String query, String defaultIndexName, IndexFacts.Session indexFacts, Language language) { Tokenizer tokenizer = new Tokenizer(environment.getLinguistics()); - tokenizer.setSubstringSpecialTokens(language.isCjk()); + tokenizer.setSubstringSpecialTokens(language != null && language.isCjk()); tokenizer.setSpecialTokens(environment.getSpecialTokens()); tokens.initialize(tokenizer.tokenize(query, defaultIndexName, indexFacts)); } @@ -265,9 +261,8 @@ public abstract class AbstractParser implements CustomParser { // TODO: The segmenting stuff is a mess now, this will fix it: // - Make Segmenter a class which is instantiated per parsing - // - Make the instance know the language, etc and do all dispatching - // internally - // -JSB + // - Make the instance know the language, etc and do all dispatching internally + // -bratseth // TODO: Use segmenting for forced phrase searches? protected Item segment(Token token) { String normalizedToken = normalize(token.toString()); diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java index e0089fb89ea..95cce001469 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java @@ -35,7 +35,7 @@ public class AnyParser extends SimpleParser { Item filterRoot; setState(queryLanguage, indexFacts); - tokenize(filter, null, indexFacts); + tokenize(filter, null, indexFacts, queryLanguage); filterRoot = anyItems(true); @@ -134,7 +134,7 @@ public class AnyParser extends SimpleParser { Item applyFilter(Item root, String filter, Language queryLanguage, IndexFacts.Session indexFacts) { setState(queryLanguage, indexFacts); - tokenize(filter, null, indexFacts); + tokenize(filter, null, indexFacts, queryLanguage); return filterItems(root); } diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/PhraseParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/PhraseParser.java index ba10b7b6ee1..dfd05ca0da5 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/PhraseParser.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/PhraseParser.java @@ -8,7 +8,7 @@ import com.yahoo.search.query.parser.ParserEnvironment; /** * Parser for queries of type phrase. * - * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author Steinar Knutsen */ public class PhraseParser extends AbstractParser { diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/SimpleParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/SimpleParser.java index 6117e8e29ed..d1df74fcfa5 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/SimpleParser.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/SimpleParser.java @@ -13,7 +13,7 @@ import static com.yahoo.prelude.query.parser.Token.Kind.SPACE; * Base class for parsers of the "simple" query languages (query types * ANY and ALL). * - * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author Steinar Knutsen */ abstract class SimpleParser extends StructuredParser { diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java index eb35655e4ca..fd1617b5350 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java @@ -15,7 +15,7 @@ import static com.yahoo.prelude.query.parser.Token.Kind.*; * Base class for parsers of the query languages which can be used * for structured queries (types ANY, ALL and ADVANCED). * - * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author Steinar Knutsen */ abstract class StructuredParser extends AbstractParser { @@ -430,9 +430,7 @@ abstract class StructuredParser extends AbstractParser { Item item = null; try { - if (item == null) { - item = word(); - } + item = word(); if (item == null && tokens.currentIs(NUMBER)) { Token t = tokens.next(); @@ -542,7 +540,7 @@ abstract class StructuredParser extends AbstractParser { PhraseItem phrase = null; Item firstWord = null; boolean starAfterFirst = false; - boolean starBeforeFirst = false; + boolean starBeforeFirst; if (tokens.skipMultiple(QUOTE)) { quoted = !quoted; diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java index 582395bc738..ed0af8d5060 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java @@ -16,7 +16,7 @@ import static com.yahoo.prelude.query.parser.Token.Kind.*; /** * Query tokenizer. Singlethreaded. * - * @author bratseth + * @author bratseth */ public final class Tokenizer { @@ -28,7 +28,7 @@ public final class Tokenizer { private SpecialTokens specialTokens = null; /** Whether to recognize tokens also as substrings of other tokens, needed for cjk */ - private boolean substringSpecialTokens=false; + private boolean substringSpecialTokens = false; private final CharacterClasses characterClasses; @@ -53,7 +53,7 @@ public final class Tokenizer { /** Sets whether to recognize tokens also as substrings of other tokens, needed for cjk. Default false. */ public void setSubstringSpecialTokens(boolean substringSpecialTokens) { - this.substringSpecialTokens=substringSpecialTokens; + this.substringSpecialTokens = substringSpecialTokens; } /** diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/CJKSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/CJKSearcher.java index 009c11ab1fd..b04ac2fcec5 100644 --- a/container-search/src/main/java/com/yahoo/prelude/querytransform/CJKSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/CJKSearcher.java @@ -26,21 +26,21 @@ import com.yahoo.search.searchchain.Execution; import com.yahoo.search.searchchain.PhaseNames; /** - * Search to do necessary transforms if the query is in segmented in - * a "CJK language". + * Search to do necessary transforms if the query is in segmented in a CJK language. * - * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author Steinar Knutsen */ @After(PhaseNames.UNBLENDED_RESULT) @Before(STEMMING) @Provides(CJKSearcher.TERM_ORDER_RELAXATION) public class CJKSearcher extends Searcher { + public static final String TERM_ORDER_RELAXATION = "TermOrderRelaxation"; @Override public Result search(Query query, Execution execution) { - Language l = query.getModel().getParsingLanguage(); - if (!l.isCjk()) return execution.search(query); + Language language = query.getModel().getParsingLanguage(); + if ( ! language.isCjk()) return execution.search(query); QueryTree tree = query.getModel().getQueryTree(); tree.setRoot(transform(tree.getRoot())); @@ -82,7 +82,6 @@ public class CJKSearcher extends Searcher { return root; } - private boolean hasOverlappingTokens(PhraseItem phrase) { boolean has = false; for (Iterator<Item> i = phrase.getItemIterator(); i.hasNext(); ) { @@ -108,4 +107,5 @@ public class CJKSearcher extends Searcher { } return segmentsLength > segments.getRawWord().length(); } + } diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java index ca8214f35d6..a4562892d0c 100644 --- a/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java @@ -78,11 +78,11 @@ public class StemmingSearcher extends Searcher { public String getFunctionName() { return "Stemming"; } private Item replaceTerms(Query q, IndexFacts.Session indexFacts) { - Language l = q.getModel().getParsingLanguage(); - if (l == Language.UNKNOWN) { + Language language = q.getModel().getParsingLanguage(); + if (language == Language.UNKNOWN) { return q.getModel().getQueryTree().getRoot(); } - return scan(q.getModel().getQueryTree().getRoot(), l.isCjk(), l, indexFacts, + return scan(q.getModel().getQueryTree().getRoot(), language.isCjk(), language, indexFacts, createReverseConnectivities(q.getModel().getQueryTree().getRoot())); } diff --git a/container-search/src/main/java/com/yahoo/search/query/Model.java b/container-search/src/main/java/com/yahoo/search/query/Model.java index c155ed4fbbd..ca6f7efaa5e 100644 --- a/container-search/src/main/java/com/yahoo/search/query/Model.java +++ b/container-search/src/main/java/com/yahoo/search/query/Model.java @@ -74,7 +74,7 @@ public class Model implements Cloneable { private String filter = null; private Language language = null; private Locale locale = null; - private QueryTree queryTree = null; // The actual query. This is lazily created from the program + private QueryTree queryTree = null; // The query tree to execute. This is lazily created from the program private String defaultIndex = null; private Query.Type type = Query.Type.ALL; private Query parent; @@ -431,7 +431,8 @@ public class Model implements Cloneable { return (Model)q.properties().get(argumentTypeName); } - public @Override String toString() { + @Override + public String toString() { return "query representation [queryTree: " + queryTree + ", filter: " + filter + "]"; } diff --git a/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java b/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java index 11cee99f471..ec6d4f11369 100644 --- a/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java @@ -647,7 +647,7 @@ public class QueryTestCase { { Query q = new Query(httpEncode("/?query=headline:\"彭 博士 觀 風向\" content:\"彭 博士 觀 風向\" description:\"彭 博士 觀 風向\" tags:ymedia:type=story tags:ymedia:type=blogpost tags:ymedia:type=slideshow tags:ymedia:type=cavideo tags:ymedia:type=photo -tags:ymedia:hosted=no sddocname:contentindexing!0 embargo:<1484665288753!0 expires:>1484665288753!0")); q.getModel().setExecution(new Execution(Execution.Context.createContextStub(null, null, new SimpleLinguistics()))); - assertEquals(Language.ENGLISH, q.getModel().getParsingLanguage()); + assertEquals(Language.CHINESE_TRADITIONAL, q.getModel().getParsingLanguage()); } } |