aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArne H Juul <arnej27959@users.noreply.github.com>2020-05-28 15:45:04 +0200
committerGitHub <noreply@github.com>2020-05-28 15:45:04 +0200
commit74fb334833045254e32721a20ed96d69f9c8cfed (patch)
treef279d04929aff8074c6419b2774f2a28c90b4d8c
parentd4b9ae47ae9233ce9d852dba1ac20c00f4c66879 (diff)
parentd2cec706ee0f3511ed2b6d40ca392563b59fa291 (diff)
Merge pull request #13412 from vespa-engine/arnej/create-filter-in-intermediates
Arnej/create filter in intermediates
-rw-r--r--searchlib/src/tests/queryeval/queryeval.cpp46
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp141
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h6
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h2
-rw-r--r--vespalib/src/vespa/vespalib/regex/regex.cpp3
-rw-r--r--vespalib/src/vespa/vespalib/regex/regex.h5
7 files changed, 170 insertions, 44 deletions
diff --git a/searchlib/src/tests/queryeval/queryeval.cpp b/searchlib/src/tests/queryeval/queryeval.cpp
index 5601baa9113..29cdd6a4b84 100644
--- a/searchlib/src/tests/queryeval/queryeval.cpp
+++ b/searchlib/src/tests/queryeval/queryeval.cpp
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/vespalib/regex/regex.h>
#include <vespa/searchlib/test/initrange.h>
#include <vespa/searchlib/queryeval/andnotsearch.h>
#include <vespa/searchlib/queryeval/andsearch.h>
@@ -34,6 +35,9 @@ using search::test::InitRangeVerifier;
//-----------------------------------------------------------------------------
+constexpr auto lower_bound = Blueprint::FilterConstraint::LOWER_BOUND;
+constexpr auto upper_bound = Blueprint::FilterConstraint::UPPER_BOUND;
+
template <typename T, typename V=std::vector<T> >
class Collect
{
@@ -213,6 +217,15 @@ TEST("test that non-strict andnot search does NOT forward to its greedy first ch
EXPECT_TRUE(nullptr != search->andWith(std::move(filter), 8).get());
}
+void expect_match(std::string input, std::string regexp) {
+ using vespalib::Regex;
+ Regex pattern = Regex::from_pattern(regexp, Regex::Options::DotMatchesNewline);
+ if (! EXPECT_TRUE(pattern.partial_match(input))) {
+ fprintf(stderr, "no match for pattern: >>>%s<<< in input:\n>>>\n%s\n<<<\n",
+ regexp.c_str(), input.c_str());
+ }
+}
+
TEST("testAnd") {
SimpleResult a;
SimpleResult b;
@@ -232,8 +245,19 @@ TEST("testAnd") {
res.search(*and_ab);
SimpleResult expect;
expect.addHit(5).addHit(30);
+ EXPECT_EQUAL(res, expect);
+ SearchIterator::UP filter_ab = and_b->createFilterSearch(true, upper_bound);
+ SimpleResult filter_res;
+ filter_res.search(*filter_ab);
EXPECT_EQUAL(res, expect);
+ std::string dump = filter_ab->asString();
+ expect_match(dump, "upper");
+ expect_match(dump, "AndSearchStrict.*NoUnpack.*SimpleSearch.*upper.*SimpleSearch.*upper");
+ filter_ab = and_b->createFilterSearch(false, lower_bound);
+ dump = filter_ab->asString();
+ expect_match(dump, "lower");
+ expect_match(dump, "AndSearchNoStrict.*NoUnpack.*SimpleSearch.*lower.*SimpleSearch.*lower");
}
TEST("mutisearch and initRange") {
@@ -257,8 +281,19 @@ TEST("testOr") {
res.search(*or_ab);
SimpleResult expect;
expect.addHit(5).addHit(10).addHit(17).addHit(30);
+ EXPECT_EQUAL(res, expect);
+ SearchIterator::UP filter_ab = or_b->createFilterSearch(true, upper_bound);
+ SimpleResult filter_res;
+ filter_res.search(*filter_ab);
EXPECT_EQUAL(res, expect);
+ std::string dump = filter_ab->asString();
+ expect_match(dump, "upper");
+ expect_match(dump, "OrLikeSearch.true.*NoUnpack.*SimpleSearch.*upper.*SimpleSearch.*upper");
+ filter_ab = or_b->createFilterSearch(false, lower_bound);
+ dump = filter_ab->asString();
+ expect_match(dump, "lower");
+ expect_match(dump, "OrLikeSearch.false.*NoUnpack.*SimpleSearch.*lower.*SimpleSearch.*lower");
}
{
TermFieldMatchData tfmd;
@@ -371,8 +406,19 @@ TEST("testAndNot") {
res.search(*andnot_ab);
SimpleResult expect;
expect.addHit(10);
+ EXPECT_EQUAL(res, expect);
+ SearchIterator::UP filter_ab = andnot_b->createFilterSearch(true, upper_bound);
+ SimpleResult filter_res;
+ filter_res.search(*filter_ab);
EXPECT_EQUAL(res, expect);
+ std::string dump = filter_ab->asString();
+ expect_match(dump, "upper");
+ expect_match(dump, "AndNotSearch.*SimpleSearch.*<strict,upper>.*SimpleSearch.*<nostrict,lower>");
+ filter_ab = andnot_b->createFilterSearch(false, lower_bound);
+ dump = filter_ab->asString();
+ expect_match(dump, "lower");
+ expect_match(dump, "AndNotSearch.*SimpleSearch.*<nostrict,lower>.*SimpleSearch.*<nostrict,upper>");
}
{
SimpleResult a;
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
index 430bc3956e7..624b9a1a368 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
@@ -149,12 +149,12 @@ AndNotBlueprint::inheritStrict(size_t i) const
}
SearchIterator::UP
-AndNotBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches,
+AndNotBlueprint::createIntermediateSearch(const MultiSearch::Children &sub_searches,
bool strict, search::fef::MatchData &md) const
{
- UnpackInfo unpackInfo(calculateUnpackInfo(md));
- if (should_do_termwise_eval(unpackInfo, md.get_termwise_limit())) {
- TermwiseBlueprintHelper helper(*this, subSearches, unpackInfo);
+ UnpackInfo unpack_info(calculateUnpackInfo(md));
+ if (should_do_termwise_eval(unpack_info, md.get_termwise_limit())) {
+ TermwiseBlueprintHelper helper(*this, sub_searches, unpack_info);
bool termwise_strict = (strict && inheritStrict(helper.first_termwise));
auto termwise_search = (helper.first_termwise == 0)
? SearchIterator::UP(AndNotSearch::create(helper.termwise, termwise_strict))
@@ -165,7 +165,34 @@ AndNotBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearch
}
return SearchIterator::UP(AndNotSearch::create(helper.children, strict));
}
- return SearchIterator::UP(AndNotSearch::create(subSearches, strict));
+ return SearchIterator::UP(AndNotSearch::create(sub_searches, strict));
+}
+
+namespace {
+Blueprint::FilterConstraint invert(Blueprint::FilterConstraint constraint) {
+ if (constraint == Blueprint::FilterConstraint::UPPER_BOUND) {
+ return Blueprint::FilterConstraint::LOWER_BOUND;
+ }
+ if (constraint == Blueprint::FilterConstraint::LOWER_BOUND) {
+ return Blueprint::FilterConstraint::UPPER_BOUND;
+ }
+ abort();
+}
+} // namespace <unnamed>
+
+SearchIterator::UP
+AndNotBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const
+{
+ MultiSearch::Children sub_searches;
+ sub_searches.reserve(childCnt());
+ for (size_t i = 0; i < childCnt(); ++i) {
+ bool child_strict = strict && inheritStrict(i);
+ auto search = (i == 0)
+ ? getChild(i).createFilterSearch(child_strict, constraint)
+ : getChild(i).createFilterSearch(child_strict, invert(constraint));
+ sub_searches.push_back(search.release());
+ }
+ return SearchIterator::UP(AndNotSearch::create(sub_searches, strict));
}
//-----------------------------------------------------------------------------
@@ -221,13 +248,13 @@ AndBlueprint::inheritStrict(size_t i) const
}
SearchIterator::UP
-AndBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches,
+AndBlueprint::createIntermediateSearch(const MultiSearch::Children &sub_searches,
bool strict, search::fef::MatchData & md) const
{
- UnpackInfo unpackInfo(calculateUnpackInfo(md));
+ UnpackInfo unpack_info(calculateUnpackInfo(md));
AndSearch * search = 0;
- if (should_do_termwise_eval(unpackInfo, md.get_termwise_limit())) {
- TermwiseBlueprintHelper helper(*this, subSearches, unpackInfo);
+ if (should_do_termwise_eval(unpack_info, md.get_termwise_limit())) {
+ TermwiseBlueprintHelper helper(*this, sub_searches, unpack_info);
bool termwise_strict = (strict && inheritStrict(helper.first_termwise));
auto termwise_search = SearchIterator::UP(AndSearch::create(helper.termwise, termwise_strict));
helper.insert_termwise(std::move(termwise_search), termwise_strict);
@@ -237,12 +264,28 @@ AndBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches,
search = AndSearch::create(helper.children, strict, helper.termwise_unpack);
}
} else {
- search = AndSearch::create(subSearches, strict, unpackInfo);
+ search = AndSearch::create(sub_searches, strict, unpack_info);
}
search->estimate(getState().estimate().estHits);
return SearchIterator::UP(search);
}
+SearchIterator::UP
+AndBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const
+{
+ MultiSearch::Children sub_searches;
+ sub_searches.reserve(childCnt());
+ for (size_t i = 0; i < childCnt(); ++i) {
+ bool child_strict = strict && inheritStrict(i);
+ auto search = getChild(i).createFilterSearch(child_strict, constraint);
+ sub_searches.push_back(search.release());
+ }
+ UnpackInfo unpack_info;
+ AndSearch * search = AndSearch::create(sub_searches, strict, unpack_info);
+ search->estimate(getState().estimate().estHits);
+ return SearchIterator::UP(search);
+}
+
double
AndBlueprint::computeNextHitRate(const Blueprint & child, double hitRate) const {
return hitRate * child.hit_ratio();
@@ -303,12 +346,12 @@ OrBlueprint::inheritStrict(size_t) const
}
SearchIterator::UP
-OrBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches,
+OrBlueprint::createIntermediateSearch(const MultiSearch::Children &sub_searches,
bool strict, search::fef::MatchData & md) const
{
- UnpackInfo unpackInfo(calculateUnpackInfo(md));
- if (should_do_termwise_eval(unpackInfo, md.get_termwise_limit())) {
- TermwiseBlueprintHelper helper(*this, subSearches, unpackInfo);
+ UnpackInfo unpack_info(calculateUnpackInfo(md));
+ if (should_do_termwise_eval(unpack_info, md.get_termwise_limit())) {
+ TermwiseBlueprintHelper helper(*this, sub_searches, unpack_info);
bool termwise_strict = (strict && inheritStrict(helper.first_termwise));
auto termwise_search = SearchIterator::UP(OrSearch::create(helper.termwise, termwise_strict));
helper.insert_termwise(std::move(termwise_search), termwise_strict);
@@ -317,7 +360,21 @@ OrBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches,
}
return SearchIterator::UP(OrSearch::create(helper.children, strict, helper.termwise_unpack));
}
- return SearchIterator::UP(OrSearch::create(subSearches, strict, unpackInfo));
+ return SearchIterator::UP(OrSearch::create(sub_searches, strict, unpack_info));
+}
+
+SearchIterator::UP
+OrBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const
+{
+ MultiSearch::Children sub_searches;
+ sub_searches.reserve(childCnt());
+ for (size_t i = 0; i < childCnt(); ++i) {
+ bool child_strict = strict && inheritStrict(i);
+ auto search = getChild(i).createFilterSearch(child_strict, constraint);
+ sub_searches.push_back(search.release());
+ }
+ UnpackInfo unpack_info;
+ return SearchIterator::UP(OrSearch::create(sub_searches, strict, unpack_info));
}
//-----------------------------------------------------------------------------
@@ -359,14 +416,14 @@ WeakAndBlueprint::always_needs_unpack() const
}
SearchIterator::UP
-WeakAndBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches,
+WeakAndBlueprint::createIntermediateSearch(const MultiSearch::Children &sub_searches,
bool strict, search::fef::MatchData &) const
{
WeakAndSearch::Terms terms;
- assert(subSearches.size() == childCnt());
+ assert(sub_searches.size() == childCnt());
assert(_weights.size() == childCnt());
- for (size_t i = 0; i < subSearches.size(); ++i) {
- terms.push_back(wand::Term(subSearches[i],
+ for (size_t i = 0; i < sub_searches.size(); ++i) {
+ terms.push_back(wand::Term(sub_searches[i],
_weights[i],
getChild(i).getState().estimate().estHits));
}
@@ -407,7 +464,7 @@ NearBlueprint::createSearch(fef::MatchData &md, bool strict) const
}
SearchIterator::UP
-NearBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches,
+NearBlueprint::createIntermediateSearch(const MultiSearch::Children &sub_searches,
bool strict, search::fef::MatchData &md) const
{
search::fef::TermFieldMatchDataArray tfmda;
@@ -417,7 +474,7 @@ NearBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches
tfmda.add(cs.field(j).resolve(md));
}
}
- return SearchIterator::UP(new NearSearch(subSearches, tfmda, _window, strict));
+ return SearchIterator::UP(new NearSearch(sub_searches, tfmda, _window, strict));
}
//-----------------------------------------------------------------------------
@@ -455,7 +512,7 @@ ONearBlueprint::createSearch(fef::MatchData &md, bool strict) const
}
SearchIterator::UP
-ONearBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches,
+ONearBlueprint::createIntermediateSearch(const MultiSearch::Children &sub_searches,
bool strict, search::fef::MatchData &md) const
{
search::fef::TermFieldMatchDataArray tfmda;
@@ -465,9 +522,9 @@ ONearBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearche
tfmda.add(cs.field(j).resolve(md));
}
}
- // could sort subSearches here
+ // could sort sub_searches here
// but then strictness inheritance would also need to be fixed
- return SearchIterator::UP(new ONearSearch(subSearches, tfmda, _window, strict));
+ return SearchIterator::UP(new ONearSearch(sub_searches, tfmda, _window, strict));
}
//-----------------------------------------------------------------------------
@@ -520,27 +577,27 @@ RankBlueprint::inheritStrict(size_t i) const
}
SearchIterator::UP
-RankBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches,
+RankBlueprint::createIntermediateSearch(const MultiSearch::Children &sub_searches,
bool strict, search::fef::MatchData & md) const
{
- UnpackInfo unpackInfo(calculateUnpackInfo(md));
- if (unpackInfo.unpackAll()) {
- return SearchIterator::UP(RankSearch::create(subSearches, strict));
+ UnpackInfo unpack_info(calculateUnpackInfo(md));
+ if (unpack_info.unpackAll()) {
+ return SearchIterator::UP(RankSearch::create(sub_searches, strict));
} else {
- MultiSearch::Children requireUnpack;
- requireUnpack.reserve(subSearches.size());
- requireUnpack.push_back(subSearches[0]);
- for (size_t i(1); i < subSearches.size(); i++) {
- if (unpackInfo.needUnpack(i)) {
- requireUnpack.push_back(subSearches[i]);
+ MultiSearch::Children require_unpack;
+ require_unpack.reserve(sub_searches.size());
+ require_unpack.push_back(sub_searches[0]);
+ for (size_t i(1); i < sub_searches.size(); i++) {
+ if (unpack_info.needUnpack(i)) {
+ require_unpack.push_back(sub_searches[i]);
} else {
- delete subSearches[i];
+ delete sub_searches[i];
}
}
- if (requireUnpack.size() == 1) {
- return SearchIterator::UP(requireUnpack[0]);
+ if (require_unpack.size() == 1) {
+ return SearchIterator::UP(require_unpack[0]);
} else {
- return SearchIterator::UP(RankSearch::create(requireUnpack, strict));
+ return SearchIterator::UP(RankSearch::create(require_unpack, strict));
}
}
}
@@ -597,13 +654,13 @@ SourceBlenderBlueprint::findSource(uint32_t sourceId) const
}
SearchIterator::UP
-SourceBlenderBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches,
+SourceBlenderBlueprint::createIntermediateSearch(const MultiSearch::Children &sub_searches,
bool strict, search::fef::MatchData &) const
{
SourceBlenderSearch::Children children;
- assert(subSearches.size() == childCnt());
- for (size_t i = 0; i < subSearches.size(); ++i) {
- children.push_back(SourceBlenderSearch::Child(subSearches[i],
+ assert(sub_searches.size() == childCnt());
+ for (size_t i = 0; i < sub_searches.size(); ++i) {
+ children.push_back(SourceBlenderSearch::Child(sub_searches[i],
getChild(i).getSourceId()));
assert(children.back().sourceId != 0xffffffff);
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
index 3a6b5e1a31a..ecad9431a81 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
@@ -24,6 +24,8 @@ public:
SearchIterator::UP
createIntermediateSearch(const MultiSearch::Children &subSearches,
bool strict, fef::MatchData &md) const override;
+ SearchIterator::UP
+ createFilterSearch(bool strict, FilterConstraint constraint) const override;
private:
bool isPositive(size_t index) const override { return index == 0; }
};
@@ -48,6 +50,8 @@ public:
SearchIterator::UP
createIntermediateSearch(const MultiSearch::Children &subSearches,
bool strict, fef::MatchData &md) const override;
+ SearchIterator::UP
+ createFilterSearch(bool strict, FilterConstraint constraint) const override;
};
//-----------------------------------------------------------------------------
@@ -65,6 +69,8 @@ public:
SearchIterator::UP
createIntermediateSearch(const MultiSearch::Children &subSearches,
bool strict, fef::MatchData &md) const override;
+ SearchIterator::UP
+ createFilterSearch(bool strict, FilterConstraint constraint) const override;
};
//-----------------------------------------------------------------------------
diff --git a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp
index 94cec9a63c5..e605f36c87f 100644
--- a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp
@@ -42,6 +42,17 @@ SimpleBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &,
return search;
}
+SearchIterator::UP
+SimpleBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const
+{
+ SimpleSearch *ss = new SimpleSearch(_result);
+ SearchIterator::UP search(ss);
+ ss->tag(_tag +
+ (strict ? "<strict," : "<nostrict,") +
+ (constraint == FilterConstraint::UPPER_BOUND ? "upper>" : "lower>"));
+ return search;
+}
+
SimpleBlueprint::SimpleBlueprint(const SimpleResult &result)
: SimpleLeafBlueprint(FieldSpecBaseList()),
_tag(),
diff --git a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h
index 2dc2d938bb6..b99be4a9353 100644
--- a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h
+++ b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h
@@ -34,6 +34,8 @@ private:
protected:
SearchIterator::UP
createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda, bool strict) const override;
+ SearchIterator::UP
+ createFilterSearch(bool strict, FilterConstraint constraint) const override;
public:
SimpleBlueprint(const SimpleResult &result);
diff --git a/vespalib/src/vespa/vespalib/regex/regex.cpp b/vespalib/src/vespa/vespalib/regex/regex.cpp
index 3229365b753..ebdbb256d19 100644
--- a/vespalib/src/vespa/vespalib/regex/regex.cpp
+++ b/vespalib/src/vespa/vespalib/regex/regex.cpp
@@ -58,6 +58,9 @@ Regex Regex::from_pattern(std::string_view pattern, uint32_t opt_mask) {
if ((opt_mask & Options::IgnoreCase) != 0) {
opts.set_case_sensitive(false);
}
+ if ((opt_mask & Options::DotMatchesNewline) != 0) {
+ opts.set_dot_nl(true);
+ }
return Regex(std::make_shared<const Impl>(pattern, opts));
}
diff --git a/vespalib/src/vespa/vespalib/regex/regex.h b/vespalib/src/vespa/vespalib/regex/regex.h
index 4382d057252..0c80f4e5d3a 100644
--- a/vespalib/src/vespa/vespalib/regex/regex.h
+++ b/vespalib/src/vespa/vespalib/regex/regex.h
@@ -43,8 +43,9 @@ class Regex {
public:
// TODO consider using type-safe parameter instead.
enum Options {
- None = 0,
- IgnoreCase = 1
+ None = 0,
+ IgnoreCase = 1,
+ DotMatchesNewline = 2
};
~Regex();