diff options
Diffstat (limited to 'searchlib/src/tests/queryeval/predicate/predicate_search_test.cpp')
-rw-r--r-- | searchlib/src/tests/queryeval/predicate/predicate_search_test.cpp | 370 |
1 files changed, 370 insertions, 0 deletions
diff --git a/searchlib/src/tests/queryeval/predicate/predicate_search_test.cpp b/searchlib/src/tests/queryeval/predicate/predicate_search_test.cpp new file mode 100644 index 00000000000..5954d51ec9b --- /dev/null +++ b/searchlib/src/tests/queryeval/predicate/predicate_search_test.cpp @@ -0,0 +1,370 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for predicate_search. + +#include <vespa/log/log.h> +LOG_SETUP("predicate_search_test"); +#include <vespa/fastos/fastos.h> + +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/fef/termfieldmatchdataarray.h> +#include <vespa/searchlib/queryeval/predicate_search.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/util/arraysize.h> + +using search::fef::TermFieldMatchData; +using search::fef::TermFieldMatchDataArray; +using namespace search::queryeval; +using namespace search::predicate; +using std::pair; +using std::vector; +using vespalib::arraysize; + +namespace { + +class MyPostingList : public PredicatePostingList { + vector<pair<uint32_t, uint32_t>> _entries; + size_t _index; + uint32_t _interval; + + void setInterval(uint32_t interval) { _interval = interval; } + +public: + MyPostingList(const vector<pair<uint32_t, uint32_t>> &entries) + : _entries(entries), + _index(0) { + } + MyPostingList(std::initializer_list<pair<uint32_t, uint32_t>> ilist) + : _entries(ilist.begin(), ilist.end()), + _index(0) { + } + + bool next(uint32_t doc_id) override { + if (_index < _entries.size()) { + while (_entries[_index].first <= doc_id) { + ++_index; + if (_index == _entries.size()) { + setDocId(search::endDocId); + return false; + } + } + setDocId(_entries[_index].first); + setInterval(_entries[_index].second); + return true; + } + setDocId(search::endDocId); + return false; + } + + bool nextInterval() override { + if (_index + 1 < _entries.size() && + _entries[_index].first == _entries[_index + 1].first) { + ++_index; + setInterval(_entries[_index].second); + return true; + } + return false; + } + uint32_t getInterval() const override { return _interval; } +}; + +template <int N> +vector<PredicatePostingList::UP> +make_posting_lists_vector(MyPostingList (&plists)[N]) { + vector<PredicatePostingList::UP> posting_lists; + for (int i = 0; i < N; ++i) { + posting_lists.emplace_back(std::make_unique<MyPostingList>(plists[i])); + } + return posting_lists; +} + +TermFieldMatchDataArray tfmda; +typedef std::vector<uint8_t> CV; +typedef std::vector<uint8_t> MF; +typedef std::vector<uint16_t> IR; + +TEST("Require that the skipping is efficient") { + const uint8_t min_feature[] = { 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7}; + const uint8_t kv[] = { 6,7,6,7,6,7,6,8,6,5,6,7,6,0,6,7, + 7,6,7,6,6,6,6,7,7,7,8,7,8,7,7,7,6,7}; + SkipMinFeature::UP skip = SkipMinFeature::create(min_feature, kv, 34); + EXPECT_EQUAL(1u, skip->next()); + EXPECT_EQUAL(3u, skip->next()); + EXPECT_EQUAL(5u, skip->next()); + EXPECT_EQUAL(7u, skip->next()); + EXPECT_EQUAL(11u, skip->next()); + EXPECT_EQUAL(15u, skip->next()); + EXPECT_EQUAL(16u, skip->next()); + EXPECT_EQUAL(18u, skip->next()); + EXPECT_EQUAL(23u, skip->next()); + EXPECT_EQUAL(24u, skip->next()); + EXPECT_EQUAL(25u, skip->next()); + EXPECT_EQUAL(26u, skip->next()); + EXPECT_EQUAL(27u, skip->next()); + EXPECT_EQUAL(28u, skip->next()); + EXPECT_EQUAL(29u, skip->next()); + EXPECT_EQUAL(30u, skip->next()); + EXPECT_EQUAL(31u, skip->next()); + EXPECT_EQUAL(33u, skip->next()); +} + +TEST("require that empty search yields no results") { + vector<PredicatePostingList::UP> posting_lists; + MF mf(3); CV cv(3); IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, std::move(posting_lists), tfmda); + search.initFullRange(); + EXPECT_EQUAL(SearchIterator::beginId(), search.getDocId()); + EXPECT_FALSE(search.seek(2)); + EXPECT_TRUE(search.isAtEnd()); +} + +TEST("require that simple search yields result") { + MyPostingList plists[] = {{{2, 0x0001ffff}}}; + MF mf{0, 0, 0}; + CV cv{0, 0, 1}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_EQUAL(SearchIterator::beginId(), search.getDocId()); + EXPECT_FALSE(search.seek(1)); + EXPECT_EQUAL(2u, search.getDocId()); + EXPECT_TRUE(search.seek(2)); + EXPECT_EQUAL(2u, search.getDocId()); + EXPECT_FALSE(search.seek(3)); + EXPECT_TRUE(search.isAtEnd()); +} + +TEST("require that minFeature (K) is used to prune results") { + MyPostingList plists[] = {{{2, 0x0001ffff}}, + {{5, 0x0001ffff}}}; + MF mf{0, 0, 3, 0, 0, 0}; + CV cv{1, 0, 0, 0, 0, 1}; + IR ir(6, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_FALSE(search.seek(2)); + EXPECT_EQUAL(5u, search.getDocId()); +} + +TEST("require that a high K (min_feature - 1) can yield results") { + MyPostingList plists[] = {{{2, 0x00010001}}, + {{2, 0x0002ffff}}}; + MF mf{0, 0, 2}; + CV cv{0, 0, 2}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(2)); +} + +TEST("require that we can skip past entries") { + MyPostingList plists[] = {{{2, 0x0001ffff}, + {5, 0x0001ffff}}}; + MF mf{0, 0, 0, 0, 0, 0}; + CV cv{0, 0, 1, 0, 0, 1}; + IR ir(6, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(5)); +} + +TEST("require that posting lists are sorted after advancing") { + MyPostingList plists[] = {{{1, 0x0001ffff}, + {5, 0x0001ffff}}, + {{2, 0x0001ffff}, + {4, 0x0001ffff}}}; + MF mf{0, 2, 0, 0, 0, 0}; + CV cv{0, 1, 1, 0, 1, 1}; + IR ir(6, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_FALSE(search.seek(1)); + EXPECT_FALSE(search.seek(3)); + EXPECT_TRUE(search.seek(4)); +} + +TEST("require that short interval ranges works") { + MyPostingList plists[] = {{{1, 0x00010001}, + {5, 0x00010001}}, + {{2, 0x00010001}, + {4, 0x00010001}}}; + MF mf{0, 2, 0, 0, 0, 0}; + CV cv{0, 1, 1, 0, 1, 1}; + IR ir(6, 0x0001); + PredicateSearch search(&mf[0], &ir[0], 0x1, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_FALSE(search.seek(1)); + EXPECT_FALSE(search.seek(3)); + EXPECT_TRUE(search.seek(4)); +} + +TEST("require that empty posting lists work") { + MyPostingList plists[] = {{}}; + MF mf(3); CV cv(3); IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_EQUAL(SearchIterator::beginId(), search.getDocId()); + EXPECT_FALSE(search.seek(2)); + EXPECT_TRUE(search.isAtEnd()); +} + +TEST("require that shorter posting list ending is ok") { + MyPostingList plists[] = {{{1, 0x0001ffff}, + {2, 0x0001ffff}}, + {{4, 0x0001ffff}}}; + MF mf{0, 0, 0, 0, 0}; + CV cv{0, 1, 1, 0, 1}; + IR ir(5, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(1)); + EXPECT_TRUE(search.seek(4)); +} + +TEST("require that sorting works for many posting lists") { + MyPostingList plists[] = {{{1, 0x0001ffff}, + {2, 0x0001ffff}}, + {{2, 0x0001ffff}, + {4, 0x0001ffff}}, + {{2, 0x0001ffff}, + {5, 0x0001ffff}}, + {{2, 0x0001ffff}, + {4, 0x0001ffff}}, + {{2, 0x0001ffff}, + {5, 0x0001ffff}}}; + MF mf{0, 1, 5, 0, 2, 2}; + CV cv{0, 1, 5, 0, 2, 2}; + IR ir(6, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(1)); + EXPECT_TRUE(search.seek(2)); + EXPECT_TRUE(search.seek(4)); + EXPECT_TRUE(search.seek(5)); +} + +TEST("require that insufficient interval coverage prevents match") { + MyPostingList plists[] = {{{2, 0x00010001}, + {3, 0x0002ffff}}}; + MF mf{0, 0, 0, 0}; + CV cv{0, 0, 1, 1}; + IR ir(4, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_FALSE(search.seek(2)); + EXPECT_FALSE(search.seek(3)); +} + +TEST("require that intervals are sorted") { + MyPostingList plists[] = {{{2, 0x00010001}}, + {{2, 0x0003ffff}}, + {{2, 0x00020002}}}; + MF mf{0, 0, 0}; + CV cv{0, 0, 3}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(2)); +} + +TEST("require that NOT is supported - no match") { + MyPostingList plists[] = {{{2, 0x00010001}}, // [l, r] + {{2, 0x00010000}, // [l, r]* + {2, 0xffff0001}}}; // [r+1, r+1]* + MF mf{0, 0, 0}; + CV cv{0, 0, 3}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_FALSE(search.seek(2)); +} + +TEST("require that NOT is supported - match") { + MyPostingList plists[] = {{{2, 0x00010000}, // [l, r]* + {2, 0xffff0001}}}; // [r+1, r+1]* + MF mf{0, 0, 0}; + CV cv{0, 0, 2}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(2)); +} + +TEST("require that NOT is supported - no match because of previous term") { + MyPostingList plists[] = {{{2, 0x00020001}, // [l, r]* + {2, 0xffff0002}}}; // [r+1, r+1]* + MF mf{0, 0, 0}; + CV cv{0, 0, 2}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_FALSE(search.seek(2)); +} + +TEST("require that NOT is supported - subqueries") { + MyPostingList plists[] = {{{2, 0x00010001}}, // [l, r] + {{2, 0x00010000}, // [l, r]* + {2, 0xffff0001}}}; // [r+1, r+1]* + plists[0].setSubquery(0xffff); + MF mf{0, 0, 0}; + CV cv{0, 0, 3}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(2)); +} + +TEST("require that there can be many intervals") { + MyPostingList plists[] = {{{2, 0x00010001}, + {2, 0x00020002}, + {2, 0x00030003}, + {2, 0x0001ffff}, + {2, 0x00040004}, + {2, 0x00050005}, + {2, 0x00060006}}}; + MF mf{0, 0, 0}; + CV cv{0, 0, 7}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(2)); +} + +TEST("require that match can require multiple postinglists.") { + MyPostingList plists[] = {{{2, 0x00010001}}, + {{2, 0x0002000b}, + {2, 0x00030003}}, + {{2, 0x00040003}}, + {{2, 0x00050004}}, + {{2, 0x00010008}, + {2, 0x00060006}}, + {{2, 0x00020002}, + {2, 0x0007ffff}}}; + MF mf{0, 0, 0}; + CV cv{0, 0, 9}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(2)); +} + +TEST("require that subquery bitmap is unpacked to subqueries.") { + MyPostingList plists[] = {{{2, 0x0001ffff}}}; + TermFieldMatchDataArray array; + TermFieldMatchData data; + array.add(&data); + MF mf{0, 0, 0}; + CV cv{0, 0, 1}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), array); + search.initFullRange(); + EXPECT_TRUE(search.seek(2)); + search.unpack(2); + EXPECT_EQUAL(0xffffffffffffffffULL, + static_cast<unsigned long long>(data.getSubqueries())); +} + + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } |