diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
commit | 72231250ed81e10d66bfe70701e64fa5fe50f712 (patch) | |
tree | 2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /container-search/src/main/java/com/yahoo/prelude/querytransform/NonPhrasingSearcher.java |
Publish
Diffstat (limited to 'container-search/src/main/java/com/yahoo/prelude/querytransform/NonPhrasingSearcher.java')
-rw-r--r-- | container-search/src/main/java/com/yahoo/prelude/querytransform/NonPhrasingSearcher.java | 74 |
1 files changed, 74 insertions, 0 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/NonPhrasingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/NonPhrasingSearcher.java new file mode 100644 index 00000000000..c79933dbbd0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/NonPhrasingSearcher.java @@ -0,0 +1,74 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + +import com.yahoo.component.ComponentId; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.searchchain.Execution; + +import java.util.List; + +/** + * <p>Detects and removes certain phrases from the query.</p> + * + * @author bratseth + */ +@After("rawQuery") +@Before("transformedQuery") +public class NonPhrasingSearcher extends Searcher { + + private static final CompoundName suggestonly=new CompoundName("suggestonly"); + + private PhraseMatcher phraseMatcher; + + public NonPhrasingSearcher(ComponentId id, QrSearchersConfig config) { + super(id); + setupAutomatonFile(config.com().yahoo().prelude().querytransform().NonPhrasingSearcher().automatonfile()); + } + + /** + * Creates a nonphrasing searcher + * + * @param phraseAutomatonFile the file containing phrases which should be removed + * @throws IllegalStateException if the automata component is unavailable + * in the current environment + * @throws IllegalArgumentException if the file is not found + */ + public NonPhrasingSearcher(String phraseAutomatonFile) { + setupAutomatonFile(phraseAutomatonFile); + } + + private void setupAutomatonFile(String phraseAutomatonFile) { + if (phraseAutomatonFile == null || phraseAutomatonFile.trim().equals("")) { + //no file, just use dummy matcher + phraseMatcher = PhraseMatcher.getNullMatcher(); + } else { + //use real matcher + phraseMatcher = new PhraseMatcher(phraseAutomatonFile); + } + } + + @Override + public com.yahoo.search.Result search(com.yahoo.search.Query query, Execution execution) { + List<PhraseMatcher.Phrase> phrases=phraseMatcher.matchPhrases(query.getModel().getQueryTree().getRoot()); + if (phrases!=null && !query.properties().getBoolean(suggestonly, false)) { + remove(phrases); + query.trace("Removing stop words",true,2); + } + return execution.search(query); + } + + private void remove(List<PhraseMatcher.Phrase> phrases) { + // Removing the leaf replace phrases first to preserve + // the start index of each replace phrase until removing + for (int i=phrases.size()-1; i>=0; i-- ) { + PhraseMatcher.Phrase phrase= phrases.get(i); + if (phrase.getLength()<phrase.getOwner().getItemCount()) // Don't removeField all + phrase.remove(); + } + } + +} |