diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-11-20 21:31:40 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2023-11-21 17:02:10 +0000 |
commit | c0f6dc28c836f554870a47ce385ca202ee81579a (patch) | |
tree | 5db57f3275e8bd9dff603c39fe7d5cb82a5ca527 /searchlib/src | |
parent | ab75d86b43fb1027c0f8684e4dbeacb28632f997 (diff) |
If limit not reached after a certain amount iterators, estimate how many iterators are needed and
step iterator forward.
Diffstat (limited to 'searchlib/src')
5 files changed, 98 insertions, 16 deletions
diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp index 343a5c8e38b..c701d5ac19f 100644 --- a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp +++ b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp @@ -233,6 +233,7 @@ private: void testRangeSearch(const AttributePtr & ptr, uint32_t numDocs, std::vector<ValueType> values); void testRangeSearch(); void testRangeSearchLimited(); + void testRangeSearchLimitedHugeDictionary(); // test case insensitive search @@ -504,7 +505,7 @@ SearchContextTest::checkResultSet(const ResultSet & rs, const DocSet & expected, ASSERT_TRUE(array != nullptr); uint32_t i = 0; for (auto iter = expected.begin(); iter != expected.end(); ++iter, ++i) { - EXPECT_TRUE(array[i].getDocId() == *iter); + EXPECT_EQUAL(array[i].getDocId(), *iter); } } } @@ -1176,6 +1177,42 @@ SearchContextTest::testRangeSearch(const AttributePtr & ptr, uint32_t numDocs, s } } +DocSet +createDocs(uint32_t from, int32_t count) { + DocSet docs; + if (count >= 0) { + for (int32_t i(0); i < count; i++) { + docs.put(from + i); + } + } else { + for (int32_t i(0); i > count; i--) { + docs.put(from + i); + } + } + return docs; +} + +void +SearchContextTest::testRangeSearchLimitedHugeDictionary() { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + std::vector<int32_t> v; + v.reserve(2000); + for (size_t i(0); i < v.capacity(); i++) { + v.push_back(i); + } + auto ptr = AttributeBuilder("limited-int32", cfg).fill(v).get(); + auto& vec = dynamic_cast<IntegerAttribute &>(*ptr); + + performRangeSearch(vec, "[1;9;1200]", createDocs(2, 9)); + performRangeSearch(vec, "[1;1109;1200]", createDocs(2, 1109)); + performRangeSearch(vec, "[1;3009;1200]", createDocs(2, 1200)); + + performRangeSearch(vec, "[1;9;-1200]", createDocs(2, 9)); + performRangeSearch(vec, "[1;1109;-1200]", createDocs(2, 1109)); + performRangeSearch(vec, "[1;3009;-1200]", createDocs(2000, -1200)); +} + void SearchContextTest::testRangeSearchLimited() { @@ -1980,6 +2017,7 @@ SearchContextTest::Main() testSearchIterator(); testRangeSearch(); testRangeSearchLimited(); + testRangeSearchLimitedHugeDictionary(); testCaseInsensitiveSearch(); testRegexSearch(); testPrefixSearch(); diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h index e4d0aeeccb0..f5683546eea 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h @@ -34,6 +34,7 @@ protected: using FrozenDictionary = Dictionary::FrozenView; using EntryRef = vespalib::datastore::EntryRef; using EnumIndex = IEnumStore::Index; + static constexpr uint32_t max_posting_lists_to_count = 1000; const IEnumStoreDictionary& _dictionary; const ISearchContext& _baseSearchCtx; @@ -113,7 +114,7 @@ protected: unsigned int singleHits() const; unsigned int approximateHits() const override; - void applyRangeLimit(int rangeLimit); + void applyRangeLimit(long rangeLimit); }; @@ -451,14 +452,14 @@ NumericPostingSearchContext<BaseSC, AttrT, DataT>::calc_estimated_hits_in_range( { size_t exact_sum = 0; size_t estimated_sum = 0; - constexpr uint32_t max_posting_lists_to_count = 1000; + auto it = this->_lowerDictItr; - for (uint32_t count = 0; (it != this->_upperDictItr) && (count < max_posting_lists_to_count); ++it, ++count) { + for (uint32_t count = 0; (it != this->_upperDictItr) && (count < this->max_posting_lists_to_count); ++it, ++count) { exact_sum += this->_posting_store.frozenSize(it.getData().load_acquire()); } if (it != this->_upperDictItr) { uint32_t remaining_posting_lists = this->_upperDictItr - it; - float hits_per_posting_list = static_cast<float>(exact_sum) / static_cast<float>(max_posting_lists_to_count); + float hits_per_posting_list = static_cast<float>(exact_sum) / static_cast<float>(this->max_posting_lists_to_count); estimated_sum = remaining_posting_lists * hits_per_posting_list; } return exact_sum + estimated_sum; diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp index 5476ddd1793..27ef06565a6 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp @@ -243,25 +243,46 @@ PostingListSearchContextT<DataT>::approximateHits() const template <typename DataT> void -PostingListSearchContextT<DataT>::applyRangeLimit(int rangeLimit) +PostingListSearchContextT<DataT>::applyRangeLimit(long rangeLimit) { + long n = 0; + size_t count = 0; if (rangeLimit > 0) { DictionaryConstIterator middle = _lowerDictItr; - for (int n(0); (n < rangeLimit) && (middle != _upperDictItr); ++middle) { + for (; (n < rangeLimit) && (count < max_posting_lists_to_count) && (middle != _upperDictItr); ++middle, count++) { n += _posting_store.frozenSize(middle.getData().load_acquire()); } - _upperDictItr = middle; - _uniqueValues = _upperDictItr - _lowerDictItr; + if (middle == _upperDictItr) { + // All there is + } else if (n >= rangeLimit) { + _upperDictItr = middle; + } else { + size_t offset = ((rangeLimit - n) * count)/n; + middle += offset; + if (middle.valid() && ((_upperDictItr - middle) > 0)) { + _upperDictItr = middle; + } + } } else if ((rangeLimit < 0) && (_lowerDictItr != _upperDictItr)) { rangeLimit = -rangeLimit; DictionaryConstIterator middle = _upperDictItr; - for (int n(0); (n < rangeLimit) && (middle != _lowerDictItr); ) { + for (; (n < rangeLimit) && (count < max_posting_lists_to_count) && (middle != _lowerDictItr); count++) { --middle; n += _posting_store.frozenSize(middle.getData().load_acquire()); } - _lowerDictItr = middle; - _uniqueValues = _upperDictItr - _lowerDictItr; + if (middle == _lowerDictItr) { + // All there is + } else if (n >= rangeLimit) { + _lowerDictItr = middle; + } else { + size_t offset = ((rangeLimit - n) * count)/n; + middle -= offset; + if (middle.valid() && ((middle - _lowerDictItr) > 0)) { + _lowerDictItr = middle; + } + } } + _uniqueValues = std::abs(_upperDictItr - _lowerDictItr); } diff --git a/searchlib/src/vespa/searchlib/test/attribute_builder.cpp b/searchlib/src/vespa/searchlib/test/attribute_builder.cpp index 9a88d44c72e..d6a8b0f4fdf 100644 --- a/searchlib/src/vespa/searchlib/test/attribute_builder.cpp +++ b/searchlib/src/vespa/searchlib/test/attribute_builder.cpp @@ -32,14 +32,27 @@ add_docs(AttributeVector& attr, size_t num_docs) attr.addDocs(num_docs); } + template <typename AttrType, typename ValueType> void -fill_helper(AttributeVector& attr, std::initializer_list<ValueType> values) +fill_helper(AttributeVector& attr, std::span<ValueType> values) { add_docs(attr, values.size()); auto& real = dynamic_cast<AttrType&>(attr); uint32_t docid = 1; - for (auto value : values) { + for (const auto& value : values) { + real.update(docid++, value); + } + attr.commit(true); +} + +template <typename AttrType, typename ValueType> +void +fill_helper(AttributeVector& attr, std::initializer_list<ValueType> values) { + add_docs(attr, values.size()); + auto& real = dynamic_cast<AttrType&>(attr); + uint32_t docid = 1; + for (const auto& value : values) { real.update(docid++, value); } attr.commit(true); @@ -54,7 +67,7 @@ fill_array_helper(AttributeVector& attr, std::initializer_list<std::initializer_ auto& real = dynamic_cast<AttrType&>(attr); uint32_t docid = 1; for (auto value : values) { - for (auto elem : value) { + for (const auto& elem : value) { real.append(docid, elem, 1); } ++docid; @@ -71,7 +84,7 @@ fill_wset_helper(AttributeVector& attr, std::initializer_list<std::initializer_l auto& real = dynamic_cast<AttrType&>(attr); uint32_t docid = 1; for (auto value : values) { - for (auto elem : value) { + for (const auto& elem : value) { real.append(docid, elem.first, elem.second); } ++docid; @@ -89,6 +102,13 @@ AttributeBuilder::docs(size_t num_docs) } AttributeBuilder& +AttributeBuilder::fill(std::span<int32_t> values) +{ + fill_helper<IntegerAttribute, int32_t>(_attr, values); + return *this; +} + +AttributeBuilder& AttributeBuilder::fill(std::initializer_list<int32_t> values) { fill_helper<IntegerAttribute, int32_t>(_attr, values); diff --git a/searchlib/src/vespa/searchlib/test/attribute_builder.h b/searchlib/src/vespa/searchlib/test/attribute_builder.h index 473003bafc9..cdbed838327 100644 --- a/searchlib/src/vespa/searchlib/test/attribute_builder.h +++ b/searchlib/src/vespa/searchlib/test/attribute_builder.h @@ -8,6 +8,7 @@ #include <memory> #include <utility> #include <vector> +#include <span> namespace search { class AttributeVector; } namespace search::attribute { class Config; } @@ -38,6 +39,7 @@ public: AttributeBuilder& docs(size_t num_docs); // Fill functions for integer attributes + AttributeBuilder& fill(std::span<int32_t> values); AttributeBuilder& fill(std::initializer_list<int32_t> values); AttributeBuilder& fill(std::initializer_list<int64_t> values); AttributeBuilder& fill_array(std::initializer_list<IntList> values); |