aboutsummaryrefslogtreecommitdiffstats
path: root/container-search/src/main/java/com/yahoo/prelude/querytransform/NonPhrasingSearcher.java
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
commit72231250ed81e10d66bfe70701e64fa5fe50f712 (patch)
tree2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /container-search/src/main/java/com/yahoo/prelude/querytransform/NonPhrasingSearcher.java
Publish
Diffstat (limited to 'container-search/src/main/java/com/yahoo/prelude/querytransform/NonPhrasingSearcher.java')
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/querytransform/NonPhrasingSearcher.java74
1 files changed, 74 insertions, 0 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/NonPhrasingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/NonPhrasingSearcher.java
new file mode 100644
index 00000000000..c79933dbbd0
--- /dev/null
+++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/NonPhrasingSearcher.java
@@ -0,0 +1,74 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.prelude.querytransform;
+
+import com.yahoo.component.ComponentId;
+import com.yahoo.component.chain.dependencies.After;
+import com.yahoo.component.chain.dependencies.Before;
+import com.yahoo.container.QrSearchersConfig;
+import com.yahoo.search.Searcher;
+import com.yahoo.processing.request.CompoundName;
+import com.yahoo.search.searchchain.Execution;
+
+import java.util.List;
+
+/**
+ * <p>Detects and removes certain phrases from the query.</p>
+ *
+ * @author bratseth
+ */
+@After("rawQuery")
+@Before("transformedQuery")
+public class NonPhrasingSearcher extends Searcher {
+
+ private static final CompoundName suggestonly=new CompoundName("suggestonly");
+
+ private PhraseMatcher phraseMatcher;
+
+ public NonPhrasingSearcher(ComponentId id, QrSearchersConfig config) {
+ super(id);
+ setupAutomatonFile(config.com().yahoo().prelude().querytransform().NonPhrasingSearcher().automatonfile());
+ }
+
+ /**
+ * Creates a nonphrasing searcher
+ *
+ * @param phraseAutomatonFile the file containing phrases which should be removed
+ * @throws IllegalStateException if the automata component is unavailable
+ * in the current environment
+ * @throws IllegalArgumentException if the file is not found
+ */
+ public NonPhrasingSearcher(String phraseAutomatonFile) {
+ setupAutomatonFile(phraseAutomatonFile);
+ }
+
+ private void setupAutomatonFile(String phraseAutomatonFile) {
+ if (phraseAutomatonFile == null || phraseAutomatonFile.trim().equals("")) {
+ //no file, just use dummy matcher
+ phraseMatcher = PhraseMatcher.getNullMatcher();
+ } else {
+ //use real matcher
+ phraseMatcher = new PhraseMatcher(phraseAutomatonFile);
+ }
+ }
+
+ @Override
+ public com.yahoo.search.Result search(com.yahoo.search.Query query, Execution execution) {
+ List<PhraseMatcher.Phrase> phrases=phraseMatcher.matchPhrases(query.getModel().getQueryTree().getRoot());
+ if (phrases!=null && !query.properties().getBoolean(suggestonly, false)) {
+ remove(phrases);
+ query.trace("Removing stop words",true,2);
+ }
+ return execution.search(query);
+ }
+
+ private void remove(List<PhraseMatcher.Phrase> phrases) {
+ // Removing the leaf replace phrases first to preserve
+ // the start index of each replace phrase until removing
+ for (int i=phrases.size()-1; i>=0; i-- ) {
+ PhraseMatcher.Phrase phrase= phrases.get(i);
+ if (phrase.getLength()<phrase.getOwner().getItemCount()) // Don't removeField all
+ phrase.remove();
+ }
+ }
+
+}