summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-11-20 21:31:40 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2023-11-21 17:02:10 +0000
commitc0f6dc28c836f554870a47ce385ca202ee81579a (patch)
tree5db57f3275e8bd9dff603c39fe7d5cb82a5ca527
parentab75d86b43fb1027c0f8684e4dbeacb28632f997 (diff)
If limit not reached after a certain amount iterators, estimate how many iterators are needed and
step iterator forward.
-rw-r--r--searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp40
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h9
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp35
-rw-r--r--searchlib/src/vespa/searchlib/test/attribute_builder.cpp28
-rw-r--r--searchlib/src/vespa/searchlib/test/attribute_builder.h2
5 files changed, 98 insertions, 16 deletions
diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
index 343a5c8e38b..c701d5ac19f 100644
--- a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
+++ b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
@@ -233,6 +233,7 @@ private:
void testRangeSearch(const AttributePtr & ptr, uint32_t numDocs, std::vector<ValueType> values);
void testRangeSearch();
void testRangeSearchLimited();
+ void testRangeSearchLimitedHugeDictionary();
// test case insensitive search
@@ -504,7 +505,7 @@ SearchContextTest::checkResultSet(const ResultSet & rs, const DocSet & expected,
ASSERT_TRUE(array != nullptr);
uint32_t i = 0;
for (auto iter = expected.begin(); iter != expected.end(); ++iter, ++i) {
- EXPECT_TRUE(array[i].getDocId() == *iter);
+ EXPECT_EQUAL(array[i].getDocId(), *iter);
}
}
}
@@ -1176,6 +1177,42 @@ SearchContextTest::testRangeSearch(const AttributePtr & ptr, uint32_t numDocs, s
}
}
+DocSet
+createDocs(uint32_t from, int32_t count) {
+ DocSet docs;
+ if (count >= 0) {
+ for (int32_t i(0); i < count; i++) {
+ docs.put(from + i);
+ }
+ } else {
+ for (int32_t i(0); i > count; i--) {
+ docs.put(from + i);
+ }
+ }
+ return docs;
+}
+
+void
+SearchContextTest::testRangeSearchLimitedHugeDictionary() {
+ Config cfg(BasicType::INT32, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ std::vector<int32_t> v;
+ v.reserve(2000);
+ for (size_t i(0); i < v.capacity(); i++) {
+ v.push_back(i);
+ }
+ auto ptr = AttributeBuilder("limited-int32", cfg).fill(v).get();
+ auto& vec = dynamic_cast<IntegerAttribute &>(*ptr);
+
+ performRangeSearch(vec, "[1;9;1200]", createDocs(2, 9));
+ performRangeSearch(vec, "[1;1109;1200]", createDocs(2, 1109));
+ performRangeSearch(vec, "[1;3009;1200]", createDocs(2, 1200));
+
+ performRangeSearch(vec, "[1;9;-1200]", createDocs(2, 9));
+ performRangeSearch(vec, "[1;1109;-1200]", createDocs(2, 1109));
+ performRangeSearch(vec, "[1;3009;-1200]", createDocs(2000, -1200));
+}
+
void
SearchContextTest::testRangeSearchLimited()
{
@@ -1980,6 +2017,7 @@ SearchContextTest::Main()
testSearchIterator();
testRangeSearch();
testRangeSearchLimited();
+ testRangeSearchLimitedHugeDictionary();
testCaseInsensitiveSearch();
testRegexSearch();
testPrefixSearch();
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
index e4d0aeeccb0..f5683546eea 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
@@ -34,6 +34,7 @@ protected:
using FrozenDictionary = Dictionary::FrozenView;
using EntryRef = vespalib::datastore::EntryRef;
using EnumIndex = IEnumStore::Index;
+ static constexpr uint32_t max_posting_lists_to_count = 1000;
const IEnumStoreDictionary& _dictionary;
const ISearchContext& _baseSearchCtx;
@@ -113,7 +114,7 @@ protected:
unsigned int singleHits() const;
unsigned int approximateHits() const override;
- void applyRangeLimit(int rangeLimit);
+ void applyRangeLimit(long rangeLimit);
};
@@ -451,14 +452,14 @@ NumericPostingSearchContext<BaseSC, AttrT, DataT>::calc_estimated_hits_in_range(
{
size_t exact_sum = 0;
size_t estimated_sum = 0;
- constexpr uint32_t max_posting_lists_to_count = 1000;
+
auto it = this->_lowerDictItr;
- for (uint32_t count = 0; (it != this->_upperDictItr) && (count < max_posting_lists_to_count); ++it, ++count) {
+ for (uint32_t count = 0; (it != this->_upperDictItr) && (count < this->max_posting_lists_to_count); ++it, ++count) {
exact_sum += this->_posting_store.frozenSize(it.getData().load_acquire());
}
if (it != this->_upperDictItr) {
uint32_t remaining_posting_lists = this->_upperDictItr - it;
- float hits_per_posting_list = static_cast<float>(exact_sum) / static_cast<float>(max_posting_lists_to_count);
+ float hits_per_posting_list = static_cast<float>(exact_sum) / static_cast<float>(this->max_posting_lists_to_count);
estimated_sum = remaining_posting_lists * hits_per_posting_list;
}
return exact_sum + estimated_sum;
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
index 5476ddd1793..27ef06565a6 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
@@ -243,25 +243,46 @@ PostingListSearchContextT<DataT>::approximateHits() const
template <typename DataT>
void
-PostingListSearchContextT<DataT>::applyRangeLimit(int rangeLimit)
+PostingListSearchContextT<DataT>::applyRangeLimit(long rangeLimit)
{
+ long n = 0;
+ size_t count = 0;
if (rangeLimit > 0) {
DictionaryConstIterator middle = _lowerDictItr;
- for (int n(0); (n < rangeLimit) && (middle != _upperDictItr); ++middle) {
+ for (; (n < rangeLimit) && (count < max_posting_lists_to_count) && (middle != _upperDictItr); ++middle, count++) {
n += _posting_store.frozenSize(middle.getData().load_acquire());
}
- _upperDictItr = middle;
- _uniqueValues = _upperDictItr - _lowerDictItr;
+ if (middle == _upperDictItr) {
+ // All there is
+ } else if (n >= rangeLimit) {
+ _upperDictItr = middle;
+ } else {
+ size_t offset = ((rangeLimit - n) * count)/n;
+ middle += offset;
+ if (middle.valid() && ((_upperDictItr - middle) > 0)) {
+ _upperDictItr = middle;
+ }
+ }
} else if ((rangeLimit < 0) && (_lowerDictItr != _upperDictItr)) {
rangeLimit = -rangeLimit;
DictionaryConstIterator middle = _upperDictItr;
- for (int n(0); (n < rangeLimit) && (middle != _lowerDictItr); ) {
+ for (; (n < rangeLimit) && (count < max_posting_lists_to_count) && (middle != _lowerDictItr); count++) {
--middle;
n += _posting_store.frozenSize(middle.getData().load_acquire());
}
- _lowerDictItr = middle;
- _uniqueValues = _upperDictItr - _lowerDictItr;
+ if (middle == _lowerDictItr) {
+ // All there is
+ } else if (n >= rangeLimit) {
+ _lowerDictItr = middle;
+ } else {
+ size_t offset = ((rangeLimit - n) * count)/n;
+ middle -= offset;
+ if (middle.valid() && ((middle - _lowerDictItr) > 0)) {
+ _lowerDictItr = middle;
+ }
+ }
}
+ _uniqueValues = std::abs(_upperDictItr - _lowerDictItr);
}
diff --git a/searchlib/src/vespa/searchlib/test/attribute_builder.cpp b/searchlib/src/vespa/searchlib/test/attribute_builder.cpp
index 9a88d44c72e..d6a8b0f4fdf 100644
--- a/searchlib/src/vespa/searchlib/test/attribute_builder.cpp
+++ b/searchlib/src/vespa/searchlib/test/attribute_builder.cpp
@@ -32,14 +32,27 @@ add_docs(AttributeVector& attr, size_t num_docs)
attr.addDocs(num_docs);
}
+
template <typename AttrType, typename ValueType>
void
-fill_helper(AttributeVector& attr, std::initializer_list<ValueType> values)
+fill_helper(AttributeVector& attr, std::span<ValueType> values)
{
add_docs(attr, values.size());
auto& real = dynamic_cast<AttrType&>(attr);
uint32_t docid = 1;
- for (auto value : values) {
+ for (const auto& value : values) {
+ real.update(docid++, value);
+ }
+ attr.commit(true);
+}
+
+template <typename AttrType, typename ValueType>
+void
+fill_helper(AttributeVector& attr, std::initializer_list<ValueType> values) {
+ add_docs(attr, values.size());
+ auto& real = dynamic_cast<AttrType&>(attr);
+ uint32_t docid = 1;
+ for (const auto& value : values) {
real.update(docid++, value);
}
attr.commit(true);
@@ -54,7 +67,7 @@ fill_array_helper(AttributeVector& attr, std::initializer_list<std::initializer_
auto& real = dynamic_cast<AttrType&>(attr);
uint32_t docid = 1;
for (auto value : values) {
- for (auto elem : value) {
+ for (const auto& elem : value) {
real.append(docid, elem, 1);
}
++docid;
@@ -71,7 +84,7 @@ fill_wset_helper(AttributeVector& attr, std::initializer_list<std::initializer_l
auto& real = dynamic_cast<AttrType&>(attr);
uint32_t docid = 1;
for (auto value : values) {
- for (auto elem : value) {
+ for (const auto& elem : value) {
real.append(docid, elem.first, elem.second);
}
++docid;
@@ -89,6 +102,13 @@ AttributeBuilder::docs(size_t num_docs)
}
AttributeBuilder&
+AttributeBuilder::fill(std::span<int32_t> values)
+{
+ fill_helper<IntegerAttribute, int32_t>(_attr, values);
+ return *this;
+}
+
+AttributeBuilder&
AttributeBuilder::fill(std::initializer_list<int32_t> values)
{
fill_helper<IntegerAttribute, int32_t>(_attr, values);
diff --git a/searchlib/src/vespa/searchlib/test/attribute_builder.h b/searchlib/src/vespa/searchlib/test/attribute_builder.h
index 473003bafc9..cdbed838327 100644
--- a/searchlib/src/vespa/searchlib/test/attribute_builder.h
+++ b/searchlib/src/vespa/searchlib/test/attribute_builder.h
@@ -8,6 +8,7 @@
#include <memory>
#include <utility>
#include <vector>
+#include <span>
namespace search { class AttributeVector; }
namespace search::attribute { class Config; }
@@ -38,6 +39,7 @@ public:
AttributeBuilder& docs(size_t num_docs);
// Fill functions for integer attributes
+ AttributeBuilder& fill(std::span<int32_t> values);
AttributeBuilder& fill(std::initializer_list<int32_t> values);
AttributeBuilder& fill(std::initializer_list<int64_t> values);
AttributeBuilder& fill_array(std::initializer_list<IntList> values);