diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-11-29 13:31:57 -0800 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-11-29 13:31:57 -0800 |
commit | e6676955559a9fabb7d9eb75c276692ddcd0cf35 (patch) | |
tree | c381891132aca61ce086adefdf6f305332a868da /container-search | |
parent | 2beb15e133638977ab5aa2b2b905a04959e3cca8 (diff) |
Clean up some Steinarmess
Diffstat (limited to 'container-search')
5 files changed, 131 insertions, 131 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java b/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java index f52d711ff3d..8f61807d2dd 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java @@ -1,22 +1,19 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.prelude.query; - import com.yahoo.search.Query; import com.yahoo.search.query.QueryTree; import java.util.*; - /** - * A class which canonicalizes and validates queries. - * This class is multithread safe. + * Query normalizer and sanity checker. * * @author bratseth */ public class QueryCanonicalizer { - /** The name of the operation performed by this (for use in search chain ordering) */ + /** The name of the operation performed by this, for use in search chain ordering */ public static final String queryCanonicalization = "queryCanonicalization"; /** @@ -26,74 +23,42 @@ public class QueryCanonicalizer { * @return null if the query is valid, an error message if it is invalid */ public static String canonicalize(Query query) { - Item root = query.getModel().getQueryTree().getRoot(); - return canonicalize(query, root); + return canonicalize(query.getModel().getQueryTree()); } /** - * Validates this query and carries out possible operations on this query - * which simplifies it without changing its semantics. - * + * Canonicalize this query + * * @return null if the query is valid, an error message if it is invalid */ public static String canonicalize(QueryTree query) { - QueryWrapper q = new QueryWrapper(); - q.setRoot(query.getRoot()); // Could get rid of the wrapper... - treeCanonicalize(q, query.getRoot(), null); - query.setRoot(q.root); - return q.error; - } - - /** - * Validates this query and - * carries out possible operations on this query which simplifies it - * without changing its semantics. - * - * @param item the item to canonicalize - * @return null if the query is valid, an error message if it is invalid - */ - private static String canonicalize(Query query, Item item) { - QueryWrapper q = new QueryWrapper(); - q.setRoot(item); - treeCanonicalize(q, query.getModel().getQueryTree().getRoot(), null); - if (q.root == null) - q.root = new NullItem(); - query.getModel().getQueryTree().setRoot(q.root); - return q.error; + CanonicalizationResult result = treeCanonicalize(query.getRoot(), null); + if (result.newRoot().isPresent()) + query.setRoot(result.newRoot().get()); + return result.error().orElse(null); } /** - * @param bag wrapper for error message and query root + * Canonicalize this query + * * @param item the item to canonicalize - * @param iterator iterator for the above item if pertinent - * @return whether the query could be canonicalized into something + * @param parentIterator iterator for the parent of this item, or null if there is no parent + * @return true if the given query is valid, false otherwise */ - public static boolean treeCanonicalize(QueryWrapper bag, Item item, ListIterator<Item> iterator) { - if (iterator == null && (item == null || item instanceof NullItem)) { - bag.setError("No query"); - return false; - } + public static CanonicalizationResult treeCanonicalize(Item item, ListIterator<Item> parentIterator) { + if (parentIterator == null && (item == null || item instanceof NullItem)) + return CanonicalizationResult.error("No query"); - if (item instanceof TermItem) { - return true; - } + if (item instanceof TermItem) return CanonicalizationResult.success(); - if (item instanceof NullItem) { - iterator.remove(); - } + if (item instanceof NullItem) parentIterator.remove(); - if ( ! (item instanceof CompositeItem)) { - return true; - } // Impossible yet - CompositeItem composite = (CompositeItem) item; + if ( ! (item instanceof CompositeItem)) return CanonicalizationResult.success(); + CompositeItem composite = (CompositeItem)item; for (ListIterator<Item> i = composite.getItemIterator(); i.hasNext();) { - Item child = i.next(); - boolean subtreeOK = treeCanonicalize(bag, child, i); - - if (!subtreeOK) { - return false; - } + CanonicalizationResult childResult = treeCanonicalize(i.next(), i); + if (childResult.isError()) return childResult; } if (composite instanceof EquivItem) { @@ -103,34 +68,29 @@ public class QueryCanonicalizer { makeDuplicatesCheap((RankItem)composite); } else if (composite instanceof NotItem) { - if (((NotItem) composite).getPositiveItem() == null) { - bag.setError("Can not search for only negative items"); - return false; - } + if (((NotItem) composite).getPositiveItem() == null) + return CanonicalizationResult.error("Can not search for only negative items"); } if (composite.getItemCount() == 0) { - if (iterator == null) { - bag.setRoot(new NullItem()); - bag.setError("No query: Contained an empty " + composite.getName() + " only"); - return false; - } else { - iterator.remove(); - } + if (parentIterator == null) + return CanonicalizationResult.error("No query: Contained an empty " + composite.getName() + " only"); + else + parentIterator.remove(); } if (composite.getItemCount() == 1 && ! (composite instanceof NonReducibleCompositeItem)) { if (composite instanceof PhraseItem || composite instanceof PhraseSegmentItem) { composite.getItem(0).setWeight(composite.getWeight()); } - if (iterator == null) { - bag.setRoot(composite.getItem(0)); + if (parentIterator == null) { + return CanonicalizationResult.successWithRoot(composite.getItem(0)); } else { - iterator.set(composite.getItem(0)); + parentIterator.set(composite.getItem(0)); } } - return true; + return CanonicalizationResult.success(); } private static void removeDuplicates(EquivItem composite) { @@ -173,8 +133,7 @@ public class QueryCanonicalizer { * the term during matching, as it will be made available anyway for matches * by the same term in the rank part. * - * @param rankItem - * an item which will be simplified in place + * @param rankItem an item which will be simplified in place */ private static void makeDuplicatesCheap(RankItem rankItem) { // Collect terms used for ranking @@ -199,19 +158,36 @@ public class QueryCanonicalizer { } } - public static class QueryWrapper { - private Item root = null; - private String error = null; + public static class CanonicalizationResult { + + private final Optional<Item> newRoot; + private final Optional<String> error; - public Item getRoot() { return root; } - public void setRoot(Item root) { - this.root = root; + private CanonicalizationResult(Optional<Item> newRoot, Optional<String> error) { + this.newRoot = newRoot; + this.error = error; } - public String getError() { + + /** Returns the new root after canonicalization, or empty if the root is unchanged */ + public Optional<Item> newRoot() { return newRoot; } + + /** Returns the error of this query, or empty if it is a valid query */ + public Optional<String> error() { return error; } - public void setError(String error) { - this.error = error; + + public static CanonicalizationResult error(String error) { + return new CanonicalizationResult(Optional.of(new NullItem()), Optional.of(error)); + } + + public static CanonicalizationResult success() { + return new CanonicalizationResult(Optional.empty(), Optional.empty()); + } + + public boolean isError() { return error.isPresent(); } + + static CanonicalizationResult successWithRoot(Item newRoot) { + return new CanonicalizationResult(Optional.of(newRoot), Optional.empty()); } } diff --git a/container-search/src/main/java/com/yahoo/search/federation/vespa/VespaSearcher.java b/container-search/src/main/java/com/yahoo/search/federation/vespa/VespaSearcher.java index 26c9b8ad2cd..65864468b85 100644 --- a/container-search/src/main/java/com/yahoo/search/federation/vespa/VespaSearcher.java +++ b/container-search/src/main/java/com/yahoo/search/federation/vespa/VespaSearcher.java @@ -1,7 +1,6 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.search.federation.vespa; -import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URI; @@ -28,11 +27,9 @@ import com.yahoo.search.Query; import com.yahoo.search.Result; import com.yahoo.search.cache.QrBinaryCacheConfig; import com.yahoo.search.cache.QrBinaryCacheRegionConfig; -import com.yahoo.search.federation.FederationSearcher; import com.yahoo.search.federation.ProviderConfig; import com.yahoo.search.federation.http.ConfiguredHTTPProviderSearcher; import com.yahoo.search.federation.http.Connection; -import com.yahoo.search.intent.model.IntentModel; import com.yahoo.search.query.QueryTree; import com.yahoo.search.query.textserialize.TextSerialize; import com.yahoo.search.yql.MinimalQueryInserter; @@ -46,28 +43,25 @@ import edu.umd.cs.findbugs.annotations.Nullable; * <p>If the "sources" argument should be honored on an external cluster * when using YQL+, override {@link #chooseYqlSources(Set)}.</p> * - * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> - * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author Arne Bergene Fossaa + * @author Steinar Knutsen */ @Provides("Vespa") @After("*") public class VespaSearcher extends ConfiguredHTTPProviderSearcher { + private final ThreadLocal<XMLReader> readerHolder = new ThreadLocal<>(); private final Query.Type queryType; private final Tuple2<String, Version> segmenterVersion; private static final CompoundName select = new CompoundName("select"); - private static final CompoundName streamingUserid = new CompoundName( - "streaming.userid"); - private static final CompoundName streamingGroupname = new CompoundName( - "streaming.groupname"); - private static final CompoundName streamingSelection = new CompoundName( - "streaming.selection"); + private static final CompoundName streamingUserid = new CompoundName("streaming.userid"); + private static final CompoundName streamingGroupname = new CompoundName("streaming.groupname"); + private static final CompoundName streamingSelection = new CompoundName("streaming.selection"); /** Create an instance from configuration */ - public VespaSearcher(ComponentId id, ProviderConfig config, - QrBinaryCacheConfig c, QrBinaryCacheRegionConfig r, - Statistics statistics) { + public VespaSearcher(ComponentId id, ProviderConfig config, QrBinaryCacheConfig c, + QrBinaryCacheRegionConfig r, Statistics statistics) { this(id, config, c, r, statistics, null); } @@ -147,8 +141,7 @@ public class VespaSearcher extends ConfiguredHTTPProviderSearcher { } else if (providerQueryType == ProviderConfig.QueryType.YQL) { return Query.Type.YQL; } else { - throw new RuntimeException("Query type " + providerQueryType - + " unsupported."); + throw new RuntimeException("Query type " + providerQueryType + " unsupported."); } } @@ -169,8 +162,7 @@ public class VespaSearcher extends ConfiguredHTTPProviderSearcher { Query workQuery = query.clone(); String error = QueryCanonicalizer.canonicalize(workQuery); if (error != null) { - getLogger().log(LogLevel.WARNING, - "Could not normalize [" + query.toString() + "]: " + error); + getLogger().log(LogLevel.WARNING,"Could not normalize [" + query.toString() + "]: " + error); // Just returning null here is the pattern from existing code... return null; } @@ -180,26 +172,18 @@ public class VespaSearcher extends ConfiguredHTTPProviderSearcher { } public String marshalQuery(QueryTree root) { - QueryCanonicalizer.QueryWrapper qw = new QueryCanonicalizer.QueryWrapper(); - root = root.clone(); - qw.setRoot(root.getRoot()); - boolean could = QueryCanonicalizer.treeCanonicalize(qw, root.getRoot(), - null); - if (!could) { - return null; - } - return marshalRoot(qw.getRoot()); + QueryTree rootClone = root.clone(); // TODO: Why? + QueryCanonicalizer.CanonicalizationResult result = QueryCanonicalizer.treeCanonicalize(rootClone, null); + if (result.isError()) return null; + + return marshalRoot(result.newRoot().orElse(rootClone.getRoot())); } private String marshalRoot(Item root) { switch (queryType) { - case ADVANCED: - QueryMarshaller marshaller = new QueryMarshaller(); - return marshaller.marshal(root); - case PROGRAMMATIC: - return TextSerialize.serialize(root); - default: - throw new RuntimeException("Unsupported query type."); + case ADVANCED: return new QueryMarshaller().marshal(root); + case PROGRAMMATIC: return TextSerialize.serialize(root); + default: throw new RuntimeException("Unsupported query type."); } } @@ -223,8 +207,7 @@ public class VespaSearcher extends ConfiguredHTTPProviderSearcher { /** Returns the canonical Vespa ping URI, http://host:port/status.html */ @Override - public URI getPingURI(Connection connection) throws MalformedURLException, - URISyntaxException { + public URI getPingURI(Connection connection) throws MalformedURLException, URISyntaxException { return new URL(getParameters().getSchema(), connection.getHost(), connection.getPort(), "/status.html").toURI(); } @@ -267,4 +250,5 @@ public class VespaSearcher extends ConfiguredHTTPProviderSearcher { protected void chooseYqlSummaryFields(Set<String> summaryFields) { summaryFields.clear(); } + } diff --git a/container-search/src/main/java/com/yahoo/search/query/QueryTree.java b/container-search/src/main/java/com/yahoo/search/query/QueryTree.java index 3a501853388..621b972dd2b 100644 --- a/container-search/src/main/java/com/yahoo/search/query/QueryTree.java +++ b/container-search/src/main/java/com/yahoo/search/query/QueryTree.java @@ -22,7 +22,7 @@ import java.util.List; * * <p>This is also the home of accessor methods which eases querying into and manipulation of the query tree.</p> * - * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + * @author Arne Bergene Fossaa */ public class QueryTree extends CompositeItem { diff --git a/container-search/src/test/java/com/yahoo/prelude/query/test/QueryCanonicalizerTestCase.java b/container-search/src/test/java/com/yahoo/prelude/query/test/QueryCanonicalizerTestCase.java index 56a59e44fba..9b2f05d8804 100644 --- a/container-search/src/test/java/com/yahoo/prelude/query/test/QueryCanonicalizerTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/query/test/QueryCanonicalizerTestCase.java @@ -4,16 +4,19 @@ package com.yahoo.prelude.query.test; import com.yahoo.prelude.query.*; import com.yahoo.search.Query; import com.yahoo.search.query.QueryTree; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; /** * @author bratseth */ -public class QueryCanonicalizerTestCase extends junit.framework.TestCase { - - public QueryCanonicalizerTestCase(String name) { - super(name); - } +public class QueryCanonicalizerTestCase { + @Test public void testSingleLevelSingleItemComposite() { CompositeItem root = new AndItem(); @@ -21,6 +24,7 @@ public class QueryCanonicalizerTestCase extends junit.framework.TestCase { assertCanonicalized("word", null, root); } + @Test public void testSingleLevelSingleItemNonReducibleComposite() { CompositeItem root = new WeakAndItem(); @@ -28,6 +32,7 @@ public class QueryCanonicalizerTestCase extends junit.framework.TestCase { assertCanonicalized("WAND(100) word", null, root); } + @Test public void testMultilevelSingleItemComposite() { CompositeItem root = new AndItem(); CompositeItem and1 = new AndItem(); @@ -39,6 +44,7 @@ public class QueryCanonicalizerTestCase extends junit.framework.TestCase { assertCanonicalized("word", null, root); } + @Test public void testMultilevelComposite() { // AND (RANK (AND a b c)) WAND(25,0.0,1.0) AndItem and = new AndItem(); @@ -55,6 +61,7 @@ public class QueryCanonicalizerTestCase extends junit.framework.TestCase { assertCanonicalized("AND (AND a b c) WAND(100,0.0,1.0) default}", null, and); } + @Test public void testMultilevelEmptyComposite() { CompositeItem root = new AndItem(); CompositeItem and1 = new AndItem(); @@ -65,6 +72,7 @@ public class QueryCanonicalizerTestCase extends junit.framework.TestCase { assertCanonicalized("NULL", "No query", new Query()); } + @Test public void testMultilevelMultiBranchEmptyComposite() { CompositeItem root = new AndItem(); CompositeItem and1 = new AndItem(); @@ -81,6 +89,7 @@ public class QueryCanonicalizerTestCase extends junit.framework.TestCase { assertCanonicalized("NULL", "No query", new Query()); } + @Test public void testMultilevelMultiBranchSingleItemComposite() { CompositeItem root = new AndItem(); CompositeItem and1 = new AndItem(); @@ -98,10 +107,12 @@ public class QueryCanonicalizerTestCase extends junit.framework.TestCase { assertCanonicalized("word", null, new Query("?query=word")); } + @Test public void testNullRoot() { assertCanonicalized("NULL", "No query", new Query()); } + @Test public void testNestedNull() { CompositeItem root = new AndItem(); CompositeItem or = new AndItem(); @@ -116,6 +127,7 @@ public class QueryCanonicalizerTestCase extends junit.framework.TestCase { assertCanonicalized("NULL", "No query: Contained an empty AND only", root); } + @Test public void testNestedNullItem() { CompositeItem root = new AndItem(); CompositeItem or = new AndItem(); @@ -132,6 +144,7 @@ public class QueryCanonicalizerTestCase extends junit.framework.TestCase { assertCanonicalized("NULL", "No query: Contained an empty AND only", root); } + @Test public void testNestedNullAndSingle() { CompositeItem root = new AndItem(); CompositeItem or = new OrItem(); @@ -144,6 +157,7 @@ public class QueryCanonicalizerTestCase extends junit.framework.TestCase { assertCanonicalized("word", null, root); } + @Test public void testRemovalOfUnnecessaryComposites() { CompositeItem root = new AndItem(); CompositeItem or = new OrItem(); @@ -158,6 +172,27 @@ public class QueryCanonicalizerTestCase extends junit.framework.TestCase { assertCanonicalized("OR word1 word2 word3", null, root); } + /** Multiple levels of the same composite should collapse */ + @Test + public void testMultilevelCollapsing() { + CompositeItem root = new AndItem(); + CompositeItem child = new AndItem(); + CompositeItem grandchild = new AndItem(); + CompositeItem grandgrandchild = new AndItem(); + + root.addItem(child); + child.addItem(new WordItem("childItem")); + + child.addItem(grandchild); + grandchild.addItem(new WordItem("grandchildItem")); + + grandchild.addItem(grandgrandchild); + grandgrandchild.addItem(new WordItem("grandgrandchildItem")); + + assertCanonicalized("AND childItem grandchildItem grandgrandchildItem", null, root); + } + + @Test public void testNegativeMustHaveNegatives() { CompositeItem root = new NotItem(); @@ -165,14 +200,15 @@ public class QueryCanonicalizerTestCase extends junit.framework.TestCase { assertCanonicalized("positive", null, root); } + @Test public void testNegativeMustHavePositive() { NotItem root = new NotItem(); root.addNegativeItem(new WordItem("negative")); - assertCanonicalized("+(null) -negative", - "Can not search for only negative items", root); + assertCanonicalized("NULL","Can not search for only negative items", root); } + @Test public void testNegativeMustHavePositiveNested() { CompositeItem root = new AndItem(); NotItem not = new NotItem(); @@ -180,14 +216,14 @@ public class QueryCanonicalizerTestCase extends junit.framework.TestCase { root.addItem(not); root.addItem(new WordItem("word")); not.addNegativeItem(new WordItem("negative")); - assertCanonicalized("AND (+(null) -negative) word", - "Can not search for only negative items", root); + assertCanonicalized("NULL","Can not search for only negative items", root); } /** * Tests that connexity is preserved by cloning and transferred to rank properties by preparing the query * (which strictly is an implementation detail which we should rather hide). */ + @Test public void testConnexityAndCloning() { Query q = new Query("?query=a%20b"); CompositeItem root = (CompositeItem) q.getModel().getQueryTree().getRoot(); @@ -207,6 +243,7 @@ public class QueryCanonicalizerTestCase extends junit.framework.TestCase { * Tests that significance is transferred to rank properties by preparing the query * (which strictly is an implementation detail which we should rather hide). */ + @Test public void testSignificance() { Query q = new Query("?query=a%20b"); CompositeItem root = (CompositeItem) q.getModel().getQueryTree().getRoot(); @@ -217,6 +254,7 @@ public class QueryCanonicalizerTestCase extends junit.framework.TestCase { assertEquals("0.95", q.getRanking().getProperties().get("vespa.term.2.significance").get(0)); } + @Test public void testPhraseWeight() { PhraseItem root = new PhraseItem(); root.setWeight(200); @@ -224,6 +262,7 @@ public class QueryCanonicalizerTestCase extends junit.framework.TestCase { assertCanonicalized("a!200", null, root); } + @Test public void testEquivDuplicateRemoval() { { EquivItem root = new EquivItem(); @@ -289,6 +328,7 @@ public class QueryCanonicalizerTestCase extends junit.framework.TestCase { } } + @Test public void testRankDuplicateCheapification() { AndItem and = new AndItem(); WordItem shoe = new WordItem("shoe", "prod"); diff --git a/container-search/src/test/java/com/yahoo/search/federation/vespa/test/VespaSearcherTestCase.java b/container-search/src/test/java/com/yahoo/search/federation/vespa/test/VespaSearcherTestCase.java index 63da6adca77..b6aec9262ca 100644 --- a/container-search/src/test/java/com/yahoo/search/federation/vespa/test/VespaSearcherTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/federation/vespa/test/VespaSearcherTestCase.java @@ -17,9 +17,9 @@ import java.io.IOException; import java.net.URI; /** - * Check query marshaling in VespaSearcher works... and stuff... + * Check query marshaling in VespaSearcher works. * - * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author Steinar Knutsen */ public class VespaSearcherTestCase extends TestCase { |