diff options
author | Arne H Juul <arnej27959@users.noreply.github.com> | 2020-05-28 15:45:04 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-05-28 15:45:04 +0200 |
commit | 74fb334833045254e32721a20ed96d69f9c8cfed (patch) | |
tree | f279d04929aff8074c6419b2774f2a28c90b4d8c | |
parent | d4b9ae47ae9233ce9d852dba1ac20c00f4c66879 (diff) | |
parent | d2cec706ee0f3511ed2b6d40ca392563b59fa291 (diff) |
Merge pull request #13412 from vespa-engine/arnej/create-filter-in-intermediates
Arnej/create filter in intermediates
7 files changed, 170 insertions, 44 deletions
diff --git a/searchlib/src/tests/queryeval/queryeval.cpp b/searchlib/src/tests/queryeval/queryeval.cpp index 5601baa9113..29cdd6a4b84 100644 --- a/searchlib/src/tests/queryeval/queryeval.cpp +++ b/searchlib/src/tests/queryeval/queryeval.cpp @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/regex/regex.h> #include <vespa/searchlib/test/initrange.h> #include <vespa/searchlib/queryeval/andnotsearch.h> #include <vespa/searchlib/queryeval/andsearch.h> @@ -34,6 +35,9 @@ using search::test::InitRangeVerifier; //----------------------------------------------------------------------------- +constexpr auto lower_bound = Blueprint::FilterConstraint::LOWER_BOUND; +constexpr auto upper_bound = Blueprint::FilterConstraint::UPPER_BOUND; + template <typename T, typename V=std::vector<T> > class Collect { @@ -213,6 +217,15 @@ TEST("test that non-strict andnot search does NOT forward to its greedy first ch EXPECT_TRUE(nullptr != search->andWith(std::move(filter), 8).get()); } +void expect_match(std::string input, std::string regexp) { + using vespalib::Regex; + Regex pattern = Regex::from_pattern(regexp, Regex::Options::DotMatchesNewline); + if (! EXPECT_TRUE(pattern.partial_match(input))) { + fprintf(stderr, "no match for pattern: >>>%s<<< in input:\n>>>\n%s\n<<<\n", + regexp.c_str(), input.c_str()); + } +} + TEST("testAnd") { SimpleResult a; SimpleResult b; @@ -232,8 +245,19 @@ TEST("testAnd") { res.search(*and_ab); SimpleResult expect; expect.addHit(5).addHit(30); + EXPECT_EQUAL(res, expect); + SearchIterator::UP filter_ab = and_b->createFilterSearch(true, upper_bound); + SimpleResult filter_res; + filter_res.search(*filter_ab); EXPECT_EQUAL(res, expect); + std::string dump = filter_ab->asString(); + expect_match(dump, "upper"); + expect_match(dump, "AndSearchStrict.*NoUnpack.*SimpleSearch.*upper.*SimpleSearch.*upper"); + filter_ab = and_b->createFilterSearch(false, lower_bound); + dump = filter_ab->asString(); + expect_match(dump, "lower"); + expect_match(dump, "AndSearchNoStrict.*NoUnpack.*SimpleSearch.*lower.*SimpleSearch.*lower"); } TEST("mutisearch and initRange") { @@ -257,8 +281,19 @@ TEST("testOr") { res.search(*or_ab); SimpleResult expect; expect.addHit(5).addHit(10).addHit(17).addHit(30); + EXPECT_EQUAL(res, expect); + SearchIterator::UP filter_ab = or_b->createFilterSearch(true, upper_bound); + SimpleResult filter_res; + filter_res.search(*filter_ab); EXPECT_EQUAL(res, expect); + std::string dump = filter_ab->asString(); + expect_match(dump, "upper"); + expect_match(dump, "OrLikeSearch.true.*NoUnpack.*SimpleSearch.*upper.*SimpleSearch.*upper"); + filter_ab = or_b->createFilterSearch(false, lower_bound); + dump = filter_ab->asString(); + expect_match(dump, "lower"); + expect_match(dump, "OrLikeSearch.false.*NoUnpack.*SimpleSearch.*lower.*SimpleSearch.*lower"); } { TermFieldMatchData tfmd; @@ -371,8 +406,19 @@ TEST("testAndNot") { res.search(*andnot_ab); SimpleResult expect; expect.addHit(10); + EXPECT_EQUAL(res, expect); + SearchIterator::UP filter_ab = andnot_b->createFilterSearch(true, upper_bound); + SimpleResult filter_res; + filter_res.search(*filter_ab); EXPECT_EQUAL(res, expect); + std::string dump = filter_ab->asString(); + expect_match(dump, "upper"); + expect_match(dump, "AndNotSearch.*SimpleSearch.*<strict,upper>.*SimpleSearch.*<nostrict,lower>"); + filter_ab = andnot_b->createFilterSearch(false, lower_bound); + dump = filter_ab->asString(); + expect_match(dump, "lower"); + expect_match(dump, "AndNotSearch.*SimpleSearch.*<nostrict,lower>.*SimpleSearch.*<nostrict,upper>"); } { SimpleResult a; diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp index 430bc3956e7..624b9a1a368 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp @@ -149,12 +149,12 @@ AndNotBlueprint::inheritStrict(size_t i) const } SearchIterator::UP -AndNotBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches, +AndNotBlueprint::createIntermediateSearch(const MultiSearch::Children &sub_searches, bool strict, search::fef::MatchData &md) const { - UnpackInfo unpackInfo(calculateUnpackInfo(md)); - if (should_do_termwise_eval(unpackInfo, md.get_termwise_limit())) { - TermwiseBlueprintHelper helper(*this, subSearches, unpackInfo); + UnpackInfo unpack_info(calculateUnpackInfo(md)); + if (should_do_termwise_eval(unpack_info, md.get_termwise_limit())) { + TermwiseBlueprintHelper helper(*this, sub_searches, unpack_info); bool termwise_strict = (strict && inheritStrict(helper.first_termwise)); auto termwise_search = (helper.first_termwise == 0) ? SearchIterator::UP(AndNotSearch::create(helper.termwise, termwise_strict)) @@ -165,7 +165,34 @@ AndNotBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearch } return SearchIterator::UP(AndNotSearch::create(helper.children, strict)); } - return SearchIterator::UP(AndNotSearch::create(subSearches, strict)); + return SearchIterator::UP(AndNotSearch::create(sub_searches, strict)); +} + +namespace { +Blueprint::FilterConstraint invert(Blueprint::FilterConstraint constraint) { + if (constraint == Blueprint::FilterConstraint::UPPER_BOUND) { + return Blueprint::FilterConstraint::LOWER_BOUND; + } + if (constraint == Blueprint::FilterConstraint::LOWER_BOUND) { + return Blueprint::FilterConstraint::UPPER_BOUND; + } + abort(); +} +} // namespace <unnamed> + +SearchIterator::UP +AndNotBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const +{ + MultiSearch::Children sub_searches; + sub_searches.reserve(childCnt()); + for (size_t i = 0; i < childCnt(); ++i) { + bool child_strict = strict && inheritStrict(i); + auto search = (i == 0) + ? getChild(i).createFilterSearch(child_strict, constraint) + : getChild(i).createFilterSearch(child_strict, invert(constraint)); + sub_searches.push_back(search.release()); + } + return SearchIterator::UP(AndNotSearch::create(sub_searches, strict)); } //----------------------------------------------------------------------------- @@ -221,13 +248,13 @@ AndBlueprint::inheritStrict(size_t i) const } SearchIterator::UP -AndBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches, +AndBlueprint::createIntermediateSearch(const MultiSearch::Children &sub_searches, bool strict, search::fef::MatchData & md) const { - UnpackInfo unpackInfo(calculateUnpackInfo(md)); + UnpackInfo unpack_info(calculateUnpackInfo(md)); AndSearch * search = 0; - if (should_do_termwise_eval(unpackInfo, md.get_termwise_limit())) { - TermwiseBlueprintHelper helper(*this, subSearches, unpackInfo); + if (should_do_termwise_eval(unpack_info, md.get_termwise_limit())) { + TermwiseBlueprintHelper helper(*this, sub_searches, unpack_info); bool termwise_strict = (strict && inheritStrict(helper.first_termwise)); auto termwise_search = SearchIterator::UP(AndSearch::create(helper.termwise, termwise_strict)); helper.insert_termwise(std::move(termwise_search), termwise_strict); @@ -237,12 +264,28 @@ AndBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches, search = AndSearch::create(helper.children, strict, helper.termwise_unpack); } } else { - search = AndSearch::create(subSearches, strict, unpackInfo); + search = AndSearch::create(sub_searches, strict, unpack_info); } search->estimate(getState().estimate().estHits); return SearchIterator::UP(search); } +SearchIterator::UP +AndBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const +{ + MultiSearch::Children sub_searches; + sub_searches.reserve(childCnt()); + for (size_t i = 0; i < childCnt(); ++i) { + bool child_strict = strict && inheritStrict(i); + auto search = getChild(i).createFilterSearch(child_strict, constraint); + sub_searches.push_back(search.release()); + } + UnpackInfo unpack_info; + AndSearch * search = AndSearch::create(sub_searches, strict, unpack_info); + search->estimate(getState().estimate().estHits); + return SearchIterator::UP(search); +} + double AndBlueprint::computeNextHitRate(const Blueprint & child, double hitRate) const { return hitRate * child.hit_ratio(); @@ -303,12 +346,12 @@ OrBlueprint::inheritStrict(size_t) const } SearchIterator::UP -OrBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches, +OrBlueprint::createIntermediateSearch(const MultiSearch::Children &sub_searches, bool strict, search::fef::MatchData & md) const { - UnpackInfo unpackInfo(calculateUnpackInfo(md)); - if (should_do_termwise_eval(unpackInfo, md.get_termwise_limit())) { - TermwiseBlueprintHelper helper(*this, subSearches, unpackInfo); + UnpackInfo unpack_info(calculateUnpackInfo(md)); + if (should_do_termwise_eval(unpack_info, md.get_termwise_limit())) { + TermwiseBlueprintHelper helper(*this, sub_searches, unpack_info); bool termwise_strict = (strict && inheritStrict(helper.first_termwise)); auto termwise_search = SearchIterator::UP(OrSearch::create(helper.termwise, termwise_strict)); helper.insert_termwise(std::move(termwise_search), termwise_strict); @@ -317,7 +360,21 @@ OrBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches, } return SearchIterator::UP(OrSearch::create(helper.children, strict, helper.termwise_unpack)); } - return SearchIterator::UP(OrSearch::create(subSearches, strict, unpackInfo)); + return SearchIterator::UP(OrSearch::create(sub_searches, strict, unpack_info)); +} + +SearchIterator::UP +OrBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const +{ + MultiSearch::Children sub_searches; + sub_searches.reserve(childCnt()); + for (size_t i = 0; i < childCnt(); ++i) { + bool child_strict = strict && inheritStrict(i); + auto search = getChild(i).createFilterSearch(child_strict, constraint); + sub_searches.push_back(search.release()); + } + UnpackInfo unpack_info; + return SearchIterator::UP(OrSearch::create(sub_searches, strict, unpack_info)); } //----------------------------------------------------------------------------- @@ -359,14 +416,14 @@ WeakAndBlueprint::always_needs_unpack() const } SearchIterator::UP -WeakAndBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches, +WeakAndBlueprint::createIntermediateSearch(const MultiSearch::Children &sub_searches, bool strict, search::fef::MatchData &) const { WeakAndSearch::Terms terms; - assert(subSearches.size() == childCnt()); + assert(sub_searches.size() == childCnt()); assert(_weights.size() == childCnt()); - for (size_t i = 0; i < subSearches.size(); ++i) { - terms.push_back(wand::Term(subSearches[i], + for (size_t i = 0; i < sub_searches.size(); ++i) { + terms.push_back(wand::Term(sub_searches[i], _weights[i], getChild(i).getState().estimate().estHits)); } @@ -407,7 +464,7 @@ NearBlueprint::createSearch(fef::MatchData &md, bool strict) const } SearchIterator::UP -NearBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches, +NearBlueprint::createIntermediateSearch(const MultiSearch::Children &sub_searches, bool strict, search::fef::MatchData &md) const { search::fef::TermFieldMatchDataArray tfmda; @@ -417,7 +474,7 @@ NearBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches tfmda.add(cs.field(j).resolve(md)); } } - return SearchIterator::UP(new NearSearch(subSearches, tfmda, _window, strict)); + return SearchIterator::UP(new NearSearch(sub_searches, tfmda, _window, strict)); } //----------------------------------------------------------------------------- @@ -455,7 +512,7 @@ ONearBlueprint::createSearch(fef::MatchData &md, bool strict) const } SearchIterator::UP -ONearBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches, +ONearBlueprint::createIntermediateSearch(const MultiSearch::Children &sub_searches, bool strict, search::fef::MatchData &md) const { search::fef::TermFieldMatchDataArray tfmda; @@ -465,9 +522,9 @@ ONearBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearche tfmda.add(cs.field(j).resolve(md)); } } - // could sort subSearches here + // could sort sub_searches here // but then strictness inheritance would also need to be fixed - return SearchIterator::UP(new ONearSearch(subSearches, tfmda, _window, strict)); + return SearchIterator::UP(new ONearSearch(sub_searches, tfmda, _window, strict)); } //----------------------------------------------------------------------------- @@ -520,27 +577,27 @@ RankBlueprint::inheritStrict(size_t i) const } SearchIterator::UP -RankBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches, +RankBlueprint::createIntermediateSearch(const MultiSearch::Children &sub_searches, bool strict, search::fef::MatchData & md) const { - UnpackInfo unpackInfo(calculateUnpackInfo(md)); - if (unpackInfo.unpackAll()) { - return SearchIterator::UP(RankSearch::create(subSearches, strict)); + UnpackInfo unpack_info(calculateUnpackInfo(md)); + if (unpack_info.unpackAll()) { + return SearchIterator::UP(RankSearch::create(sub_searches, strict)); } else { - MultiSearch::Children requireUnpack; - requireUnpack.reserve(subSearches.size()); - requireUnpack.push_back(subSearches[0]); - for (size_t i(1); i < subSearches.size(); i++) { - if (unpackInfo.needUnpack(i)) { - requireUnpack.push_back(subSearches[i]); + MultiSearch::Children require_unpack; + require_unpack.reserve(sub_searches.size()); + require_unpack.push_back(sub_searches[0]); + for (size_t i(1); i < sub_searches.size(); i++) { + if (unpack_info.needUnpack(i)) { + require_unpack.push_back(sub_searches[i]); } else { - delete subSearches[i]; + delete sub_searches[i]; } } - if (requireUnpack.size() == 1) { - return SearchIterator::UP(requireUnpack[0]); + if (require_unpack.size() == 1) { + return SearchIterator::UP(require_unpack[0]); } else { - return SearchIterator::UP(RankSearch::create(requireUnpack, strict)); + return SearchIterator::UP(RankSearch::create(require_unpack, strict)); } } } @@ -597,13 +654,13 @@ SourceBlenderBlueprint::findSource(uint32_t sourceId) const } SearchIterator::UP -SourceBlenderBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches, +SourceBlenderBlueprint::createIntermediateSearch(const MultiSearch::Children &sub_searches, bool strict, search::fef::MatchData &) const { SourceBlenderSearch::Children children; - assert(subSearches.size() == childCnt()); - for (size_t i = 0; i < subSearches.size(); ++i) { - children.push_back(SourceBlenderSearch::Child(subSearches[i], + assert(sub_searches.size() == childCnt()); + for (size_t i = 0; i < sub_searches.size(); ++i) { + children.push_back(SourceBlenderSearch::Child(sub_searches[i], getChild(i).getSourceId())); assert(children.back().sourceId != 0xffffffff); } diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h index 3a6b5e1a31a..ecad9431a81 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h @@ -24,6 +24,8 @@ public: SearchIterator::UP createIntermediateSearch(const MultiSearch::Children &subSearches, bool strict, fef::MatchData &md) const override; + SearchIterator::UP + createFilterSearch(bool strict, FilterConstraint constraint) const override; private: bool isPositive(size_t index) const override { return index == 0; } }; @@ -48,6 +50,8 @@ public: SearchIterator::UP createIntermediateSearch(const MultiSearch::Children &subSearches, bool strict, fef::MatchData &md) const override; + SearchIterator::UP + createFilterSearch(bool strict, FilterConstraint constraint) const override; }; //----------------------------------------------------------------------------- @@ -65,6 +69,8 @@ public: SearchIterator::UP createIntermediateSearch(const MultiSearch::Children &subSearches, bool strict, fef::MatchData &md) const override; + SearchIterator::UP + createFilterSearch(bool strict, FilterConstraint constraint) const override; }; //----------------------------------------------------------------------------- diff --git a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp index 94cec9a63c5..e605f36c87f 100644 --- a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp @@ -42,6 +42,17 @@ SimpleBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &, return search; } +SearchIterator::UP +SimpleBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const +{ + SimpleSearch *ss = new SimpleSearch(_result); + SearchIterator::UP search(ss); + ss->tag(_tag + + (strict ? "<strict," : "<nostrict,") + + (constraint == FilterConstraint::UPPER_BOUND ? "upper>" : "lower>")); + return search; +} + SimpleBlueprint::SimpleBlueprint(const SimpleResult &result) : SimpleLeafBlueprint(FieldSpecBaseList()), _tag(), diff --git a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h index 2dc2d938bb6..b99be4a9353 100644 --- a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h +++ b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h @@ -34,6 +34,8 @@ private: protected: SearchIterator::UP createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda, bool strict) const override; + SearchIterator::UP + createFilterSearch(bool strict, FilterConstraint constraint) const override; public: SimpleBlueprint(const SimpleResult &result); diff --git a/vespalib/src/vespa/vespalib/regex/regex.cpp b/vespalib/src/vespa/vespalib/regex/regex.cpp index 3229365b753..ebdbb256d19 100644 --- a/vespalib/src/vespa/vespalib/regex/regex.cpp +++ b/vespalib/src/vespa/vespalib/regex/regex.cpp @@ -58,6 +58,9 @@ Regex Regex::from_pattern(std::string_view pattern, uint32_t opt_mask) { if ((opt_mask & Options::IgnoreCase) != 0) { opts.set_case_sensitive(false); } + if ((opt_mask & Options::DotMatchesNewline) != 0) { + opts.set_dot_nl(true); + } return Regex(std::make_shared<const Impl>(pattern, opts)); } diff --git a/vespalib/src/vespa/vespalib/regex/regex.h b/vespalib/src/vespa/vespalib/regex/regex.h index 4382d057252..0c80f4e5d3a 100644 --- a/vespalib/src/vespa/vespalib/regex/regex.h +++ b/vespalib/src/vespa/vespalib/regex/regex.h @@ -43,8 +43,9 @@ class Regex { public: // TODO consider using type-safe parameter instead. enum Options { - None = 0, - IgnoreCase = 1 + None = 0, + IgnoreCase = 1, + DotMatchesNewline = 2 }; ~Regex(); |