diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-07-27 17:36:08 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-27 17:36:08 +0800 |
commit | 2b43a46817cc779dccedd82ea8460802367a448a (patch) | |
tree | c921accbcf27cf83c7ff77f4e77e91ca22c84c80 | |
parent | c9b14b6c8d8f2ab0d704ed2c11a578ed1c077e62 (diff) | |
parent | 5bff22e3bb7ffa9cb4b77d57bc71d77975f332de (diff) |
Merge pull request #27817 from vespa-engine/revert-27773-revert-27643-balder/use-direct-weighted-set-also-for-filter-fields
Revert "Revert "- Consolidate on isFilter.""
20 files changed, 122 insertions, 205 deletions
diff --git a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp index 6e622c840b6..5fa8889a01d 100644 --- a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp +++ b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp @@ -53,15 +53,9 @@ struct BitVectorTest StringAttribute & asString(AttributePtr &v); FloatingPointAttribute & asFloat(AttributePtr &v); - AttributePtr - make(Config cfg, - const vespalib::string &pref, - bool fastSearch, - bool enableOnlyBitVector, - bool filter); + AttributePtr make(Config cfg, const vespalib::string &pref, bool fastSearch, bool filter); - void - addDocs(const AttributePtr &v, size_t sz); + void addDocs(const AttributePtr &v, size_t sz); template <typename VectorType> void populate(VectorType &v, uint32_t low, uint32_t high, bool set); @@ -69,22 +63,16 @@ struct BitVectorTest template <typename VectorType> void populateAll(VectorType &v, uint32_t low, uint32_t high, bool set); - void - buildTermQuery(std::vector<char> & buffer, - const vespalib::string & index, - const vespalib::string & term, bool prefix); + void buildTermQuery(std::vector<char> & buffer, const vespalib::string & index, const vespalib::string & term, bool prefix); template <typename V> - vespalib::string - getSearchStr(); + vespalib::string getSearchStr(); template <typename V, typename T> - SearchContextPtr - getSearch(const V & vec, const T & term, bool prefix, bool useBitVector); + SearchContextPtr getSearch(const V & vec, const T & term, bool prefix, bool useBitVector); template <typename V> - SearchContextPtr - getSearch(const V & vec, bool useBitVector); + SearchContextPtr getSearch(const V & vec, bool useBitVector); void checkSearch(AttributePtr v, @@ -107,10 +95,7 @@ struct BitVectorTest template <typename VectorType, typename BufferType> void - test(BasicType bt, CollectionType ct, const vespalib::string &pref, - bool fastSearch, - bool enableOnlyBitVector, - bool filter); + test(BasicType bt, CollectionType ct, const vespalib::string &pref, bool fastSearch, bool filter); template <typename VectorType, typename BufferType> void @@ -195,8 +180,7 @@ BitVectorTest::getSearchStr<StringAttribute>() template <typename V, typename T> SearchContextPtr -BitVectorTest::getSearch(const V &vec, const T &term, bool prefix, - bool useBitVector) +BitVectorTest::getSearch(const V &vec, const T &term, bool prefix, bool useBitVector) { std::vector<char> query; vespalib::asciistream ss; @@ -211,8 +195,7 @@ BitVectorTest::getSearch(const V &vec, const T &term, bool prefix, template <> SearchContextPtr -BitVectorTest::getSearch<IntegerAttribute>(const IntegerAttribute &v, - bool useBitVector) +BitVectorTest::getSearch<IntegerAttribute>(const IntegerAttribute &v, bool useBitVector) { return getSearch<IntegerAttribute>(v, "[-42;-42]", false, useBitVector); } @@ -220,32 +203,23 @@ BitVectorTest::getSearch<IntegerAttribute>(const IntegerAttribute &v, template <> SearchContextPtr BitVectorTest:: -getSearch<FloatingPointAttribute>(const FloatingPointAttribute &v, - bool useBitVector) +getSearch<FloatingPointAttribute>(const FloatingPointAttribute &v, bool useBitVector) { - return getSearch<FloatingPointAttribute>(v, "[-42.0;-42.0]", false, - useBitVector); + return getSearch<FloatingPointAttribute>(v, "[-42.0;-42.0]", false, useBitVector); } template <> SearchContextPtr -BitVectorTest::getSearch<StringAttribute>(const StringAttribute &v, - bool useBitVector) +BitVectorTest::getSearch<StringAttribute>(const StringAttribute &v, bool useBitVector) { - return getSearch<StringAttribute, const vespalib::string &> - (v, "foo", false, useBitVector); + return getSearch<StringAttribute, const vespalib::string &>(v, "foo", false, useBitVector); } BitVectorTest::AttributePtr -BitVectorTest::make(Config cfg, - const vespalib::string &pref, - bool fastSearch, - bool enableOnlyBitVector, - bool filter) +BitVectorTest::make(Config cfg, const vespalib::string &pref, bool fastSearch, bool filter) { cfg.setFastSearch(fastSearch); - cfg.setEnableOnlyBitVector(enableOnlyBitVector); cfg.setIsFilter(filter); AttributePtr v = AttributeFactory::createAttribute(pref, cfg); return v; @@ -267,11 +241,9 @@ BitVectorTest::addDocs(const AttributePtr &v, size_t sz) template <> void -BitVectorTest::populate(IntegerAttribute &v, - uint32_t low, uint32_t high, - bool set) +BitVectorTest::populate(IntegerAttribute &v, uint32_t low, uint32_t high, bool set) { - for(size_t i(low), m(high); i < m; i+= 5) { + for (size_t i(low), m(high); i < m; i+= 5) { if (!set) { v.clearDoc(i); } else if (v.hasMultiValue()) { @@ -288,11 +260,9 @@ BitVectorTest::populate(IntegerAttribute &v, template <> void -BitVectorTest::populate(FloatingPointAttribute &v, - uint32_t low, uint32_t high, - bool set) +BitVectorTest::populate(FloatingPointAttribute &v, uint32_t low, uint32_t high, bool set) { - for(size_t i(low), m(high); i < m; i+= 5) { + for (size_t i(low), m(high); i < m; i+= 5) { if (!set) { v.clearDoc(i); } else if (v.hasMultiValue()) { @@ -309,11 +279,9 @@ BitVectorTest::populate(FloatingPointAttribute &v, template <> void -BitVectorTest::populate(StringAttribute &v, - uint32_t low, uint32_t high, - bool set) +BitVectorTest::populate(StringAttribute &v, uint32_t low, uint32_t high, bool set) { - for(size_t i(low), m(high); i < m; i+= 5) { + for (size_t i(low), m(high); i < m; i+= 5) { if (!set) { v.clearDoc(i); } else if (v.hasMultiValue()) { @@ -329,11 +297,9 @@ BitVectorTest::populate(StringAttribute &v, template <> void -BitVectorTest::populateAll(IntegerAttribute &v, - uint32_t low, uint32_t high, - bool set) +BitVectorTest::populateAll(IntegerAttribute &v, uint32_t low, uint32_t high, bool set) { - for(size_t i(low), m(high); i < m; ++i) { + for (size_t i(low), m(high); i < m; ++i) { if (!set) { v.clearDoc(i); } else if (v.hasMultiValue()) { @@ -351,11 +317,9 @@ BitVectorTest::populateAll(IntegerAttribute &v, template <> void -BitVectorTest::populateAll(FloatingPointAttribute &v, - uint32_t low, uint32_t high, - bool set) +BitVectorTest::populateAll(FloatingPointAttribute &v, uint32_t low, uint32_t high, bool set) { - for(size_t i(low), m(high); i < m; ++i) { + for (size_t i(low), m(high); i < m; ++i) { if (!set) { v.clearDoc(i); } else if (v.hasMultiValue()) { @@ -373,11 +337,9 @@ BitVectorTest::populateAll(FloatingPointAttribute &v, template <> void -BitVectorTest::populateAll(StringAttribute &v, - uint32_t low, uint32_t high, - bool set) +BitVectorTest::populateAll(StringAttribute &v, uint32_t low, uint32_t high, bool set) { - for(size_t i(low), m(high); i < m; ++i) { + for (size_t i(low), m(high); i < m; ++i) { if (!set) { v.clearDoc(i); } else if (v.hasMultiValue()) { @@ -416,8 +378,7 @@ BitVectorTest::checkSearch(AttributePtr v, assert(!checkStride || (docId % 5) == 2u); sb->unpack(docId); EXPECT_EQUAL(md.getDocId(), docId); - if (v->getCollectionType() == CollectionType::SINGLE || - !weights) { + if (v->getCollectionType() == CollectionType::SINGLE || !weights) { EXPECT_EQUAL(1, md.getWeight()); } else if (v->getCollectionType() == CollectionType::ARRAY) { EXPECT_EQUAL(2, md.getWeight()); @@ -456,15 +417,10 @@ BitVectorTest::checkSearch(AttributePtr v, template <typename VectorType, typename BufferType> void -BitVectorTest::test(BasicType bt, - CollectionType ct, - const vespalib::string &pref, - bool fastSearch, - bool enableOnlyBitVector, - bool filter) +BitVectorTest::test(BasicType bt, CollectionType ct, const vespalib::string &pref, bool fastSearch, bool filter) { Config cfg(bt, ct); - AttributePtr v = make(cfg, pref, fastSearch, enableOnlyBitVector, filter); + AttributePtr v = make(cfg, pref, fastSearch, filter); addDocs(v, 1024); auto &tv = as<VectorType>(v); populate(tv, 2, 1023, true); @@ -472,7 +428,7 @@ BitVectorTest::test(BasicType bt, SearchContextPtr sc = getSearch<VectorType>(tv, true); checkSearch(v, std::move(sc), 2, 1022, 205, !fastSearch && !filter, true); sc = getSearch<VectorType>(tv, false); - checkSearch(v, std::move(sc), 2, 1022, 205, !enableOnlyBitVector && !filter, true); + checkSearch(v, std::move(sc), 2, 1022, 205, !filter, true); const search::IDocumentWeightAttribute *dwa = v->asDocumentWeightAttribute(); if (dwa != nullptr) { search::IDocumentWeightAttribute::LookupResult lres = @@ -481,8 +437,8 @@ BitVectorTest::test(BasicType bt, using SI = search::queryeval::SearchIterator; TermFieldMatchData md; SI::UP dwsi(new DWSI(md, *dwa, lres)); - if (!enableOnlyBitVector) { - checkSearch(v, std::move(dwsi), md, 2, 1022, 205, !filter, true); + if (!filter) { + TEST_DO(checkSearch(v, std::move(dwsi), md, 2, 1022, 205, !filter, true)); } else { dwsi->initRange(1, v->getCommittedDocIdLimit()); EXPECT_TRUE(dwsi->isAtEnd()); @@ -490,13 +446,13 @@ BitVectorTest::test(BasicType bt, } populate(tv, 2, 973, false); sc = getSearch<VectorType>(tv, true); - checkSearch(v, std::move(sc), 977, 1022, 10, !enableOnlyBitVector &&!filter, true); + checkSearch(v, std::move(sc), 977, 1022, 10, !filter, true); populate(tv, 2, 973, true); sc = getSearch<VectorType>(tv, true); checkSearch(v, std::move(sc), 2, 1022, 205, !fastSearch && !filter, true); addDocs(v, 15000); sc = getSearch<VectorType>(tv, true); - checkSearch(v, std::move(sc), 2, 1022, 205, !enableOnlyBitVector && !filter, true); + checkSearch(v, std::move(sc), 2, 1022, 205, !filter, true); populateAll(tv, 10, 15000, true); sc = getSearch<VectorType>(tv, true); checkSearch(v, std::move(sc), 2, 14999, 14992, !fastSearch && !filter, false); @@ -508,85 +464,65 @@ void BitVectorTest::test(BasicType bt, CollectionType ct, const vespalib::string &pref) { LOG(info, "test run, pref is %s", pref.c_str()); - test<VectorType, BufferType>(bt, ct, pref, false, false, false); - test<VectorType, BufferType>(bt, ct, pref, false, false, true); - test<VectorType, BufferType>(bt, ct, pref, true, false, false); - test<VectorType, BufferType>(bt, ct, pref, true, false, true); - test<VectorType, BufferType>(bt, ct, pref, true, true, false); - test<VectorType, BufferType>(bt, ct, pref, true, true, true); + test<VectorType, BufferType>(bt, ct, pref, false, false); + test<VectorType, BufferType>(bt, ct, pref, false, true); + test<VectorType, BufferType>(bt, ct, pref, true, false); + test<VectorType, BufferType>(bt, ct, pref, true, true); } TEST_F("Test bitvectors with single value int32", BitVectorTest) { f.template test<IntegerAttribute, - IntegerAttribute::largeint_t>(BasicType::INT32, - CollectionType::SINGLE, - "int32_sv"); + IntegerAttribute::largeint_t>(BasicType::INT32, CollectionType::SINGLE, "int32_sv"); } TEST_F("Test bitvectors with array value int32", BitVectorTest) { f.template test<IntegerAttribute, - IntegerAttribute::largeint_t>(BasicType::INT32, - CollectionType::ARRAY, - "int32_a"); + IntegerAttribute::largeint_t>(BasicType::INT32, CollectionType::ARRAY, "int32_a"); } TEST_F("Test bitvectors with weighted set value int32", BitVectorTest) { f.template test<IntegerAttribute, - IntegerAttribute::WeightedInt>(BasicType::INT32, - CollectionType::WSET, - "int32_sv"); + IntegerAttribute::WeightedInt>(BasicType::INT32, CollectionType::WSET, "int32_sv"); } TEST_F("Test bitvectors with single value double", BitVectorTest) { f.template test<FloatingPointAttribute, - double>(BasicType::DOUBLE, - CollectionType::SINGLE, - "double_sv"); + double>(BasicType::DOUBLE, CollectionType::SINGLE, "double_sv"); } TEST_F("Test bitvectors with array value double", BitVectorTest) { f.template test<FloatingPointAttribute, - double>(BasicType::DOUBLE, - CollectionType::ARRAY, - "double_a"); + double>(BasicType::DOUBLE, CollectionType::ARRAY, "double_a"); } TEST_F("Test bitvectors with weighted set value double", BitVectorTest) { f.template test<FloatingPointAttribute, - FloatingPointAttribute::WeightedFloat>(BasicType::DOUBLE, - CollectionType::WSET, - "double_ws"); + FloatingPointAttribute::WeightedFloat>(BasicType::DOUBLE, CollectionType::WSET, "double_ws"); } TEST_F("Test bitvectors with single value string", BitVectorTest) { f.template test<StringAttribute, - vespalib::string>(BasicType::STRING, - CollectionType::SINGLE, - "string_sv"); + vespalib::string>(BasicType::STRING, CollectionType::SINGLE, "string_sv"); } TEST_F("Test bitvectors with array value string", BitVectorTest) { f.template test<StringAttribute, - vespalib::string>(BasicType::STRING, - CollectionType::ARRAY, - "string_a"); + vespalib::string>(BasicType::STRING, CollectionType::ARRAY, "string_a"); } TEST_F("Test bitvectors with weighted set value string", BitVectorTest) { f.template test<StringAttribute, - StringAttribute::WeightedString>(BasicType::STRING, - CollectionType::WSET, - "string_ws"); + StringAttribute::WeightedString>(BasicType::STRING, CollectionType::WSET, "string_ws"); } @@ -633,5 +569,4 @@ TEST("Test that bitvector iterators adheres to SearchIterator requirements") { } } - TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp index 57029f92111..227dbfadbc0 100644 --- a/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp +++ b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp @@ -4,9 +4,7 @@ #include <vespa/searchcommon/attribute/status.h> #include <vespa/searchlib/attribute/postingstore.h> #include <vespa/searchlib/attribute/enumstore.hpp> -#include <vespa/vespalib/btree/btreenodeallocator.hpp> #include <vespa/vespalib/btree/btreerootbase.hpp> -#include <vespa/vespalib/btree/btreeroot.hpp> #include <vespa/searchlib/attribute/postingstore.hpp> #include <vespa/vespalib/datastore/buffer_type.hpp> #include <vespa/vespalib/gtest/gtest.h> @@ -42,7 +40,7 @@ std::ostream& operator<<(std::ostream& os, const PostingStoreSetup setup) Config make_config(PostingStoreSetup param) { Config cfg; - cfg.setEnableOnlyBitVector(param.enable_only_bitvector); + cfg.setIsFilter(param.enable_only_bitvector); return cfg; } @@ -212,8 +210,7 @@ PostingStoreTest::test_compact_btree_nodes(uint32_t sequence_length) EXPECT_EQ(make_exp_sequence(4, 4 + sequence_length), get_sequence(ref1)); EXPECT_EQ(make_exp_sequence(5, 5 + sequence_length), get_sequence(ref2)); auto usage_after = store.getMemoryUsage(); - if (sequence_length < huge_sequence_length || - !_config.getEnableOnlyBitVector()) { + if ((sequence_length < huge_sequence_length) || !_config.getIsFilter()) { EXPECT_GT(usage_before.deadBytes(), usage_after.deadBytes()); } else { EXPECT_EQ(usage_before.deadBytes(), usage_after.deadBytes()); diff --git a/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp index 00e2a82d24e..25de1105973 100644 --- a/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp +++ b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp @@ -471,7 +471,7 @@ PostingListAttributeTest::checkPostingList(const VectorType & vec, const std::ve } EXPECT_EQ(doc, docEnd); } else { - EXPECT_TRUE(has_bitvector && vec.getEnableOnlyBitVector()); + EXPECT_TRUE(has_bitvector && vec.getIsFilter()); numHits = postingList.getBitVectorEntry(find_result.second)->_bv->reader().countTrueBits(); } if (has_bitvector) { @@ -612,21 +612,21 @@ PostingListAttributeTest::testPostingList(bool enable_only_bitvector, uint32_t n { Config cfg(Config(BasicType::INT32, CollectionType::SINGLE)); cfg.setFastSearch(true); - cfg.setEnableOnlyBitVector(enable_only_bitvector); + cfg.setIsFilter(enable_only_bitvector); AttributePtr ptr1 = create_attribute("sint32", cfg); testPostingList<Int32PostingListAttribute>(ptr1, numDocs, values); } { Config cfg(Config(BasicType::INT32, CollectionType::ARRAY)); cfg.setFastSearch(true); - cfg.setEnableOnlyBitVector(enable_only_bitvector); + cfg.setIsFilter(enable_only_bitvector); AttributePtr ptr1 = create_attribute("aint32", cfg); testPostingList<Int32ArrayPostingListAttribute>(ptr1, numDocs, values); } { Config cfg(Config(BasicType::INT32, CollectionType::WSET)); cfg.setFastSearch(true); - cfg.setEnableOnlyBitVector(enable_only_bitvector); + cfg.setIsFilter(enable_only_bitvector); AttributePtr ptr1 = create_attribute("wsint32", cfg); testPostingList<Int32WsetPostingListAttribute>(ptr1, numDocs, values); } @@ -640,21 +640,21 @@ PostingListAttributeTest::testPostingList(bool enable_only_bitvector, uint32_t n { Config cfg(Config(BasicType::FLOAT, CollectionType::SINGLE)); cfg.setFastSearch(true); - cfg.setEnableOnlyBitVector(enable_only_bitvector); + cfg.setIsFilter(enable_only_bitvector); AttributePtr ptr1 = create_attribute("sfloat", cfg); testPostingList<FloatPostingListAttribute>(ptr1, numDocs, values); } { Config cfg(Config(BasicType::FLOAT, CollectionType::ARRAY)); cfg.setFastSearch(true); - cfg.setEnableOnlyBitVector(enable_only_bitvector); + cfg.setIsFilter(enable_only_bitvector); AttributePtr ptr1 = create_attribute("afloat", cfg); testPostingList<FloatArrayPostingListAttribute>(ptr1, numDocs, values); } { Config cfg(Config(BasicType::FLOAT, CollectionType::WSET)); cfg.setFastSearch(true); - cfg.setEnableOnlyBitVector(enable_only_bitvector); + cfg.setIsFilter(enable_only_bitvector); AttributePtr ptr1 = create_attribute("wsfloat", cfg); testPostingList<FloatWsetPostingListAttribute>(ptr1, numDocs, values); } @@ -674,21 +674,21 @@ PostingListAttributeTest::testPostingList(bool enable_only_bitvector, uint32_t n { Config cfg(Config(BasicType::STRING, CollectionType::SINGLE)); cfg.setFastSearch(true); - cfg.setEnableOnlyBitVector(enable_only_bitvector); + cfg.setIsFilter(enable_only_bitvector); AttributePtr ptr1 = create_attribute("sstr", cfg); testPostingList<StringPostingListAttribute>(ptr1, numDocs, charValues); } { Config cfg(Config(BasicType::STRING, CollectionType::ARRAY)); cfg.setFastSearch(true); - cfg.setEnableOnlyBitVector(enable_only_bitvector); + cfg.setIsFilter(enable_only_bitvector); AttributePtr ptr1 = create_attribute("astr", cfg); testPostingList<StringArrayPostingListAttribute>(ptr1, numDocs, charValues); } { Config cfg(Config(BasicType::STRING, CollectionType::WSET)); cfg.setFastSearch(true); - cfg.setEnableOnlyBitVector(enable_only_bitvector); + cfg.setIsFilter(enable_only_bitvector); AttributePtr ptr1 = create_attribute("wsstr", cfg); testPostingList<StringWsetPostingListAttribute>(ptr1, numDocs, charValues); } diff --git a/searchlib/src/tests/searchcommon/attribute/config/attribute_config_test.cpp b/searchlib/src/tests/searchcommon/attribute/config/attribute_config_test.cpp index 986848c39b7..dc9c68c4539 100644 --- a/searchlib/src/tests/searchcommon/attribute/config/attribute_config_test.cpp +++ b/searchlib/src/tests/searchcommon/attribute/config/attribute_config_test.cpp @@ -31,7 +31,6 @@ TEST_F("test default attribute config", Fixture) EXPECT_EQUAL(CollectionType::Type::SINGLE, f._config.collectionType().type()); EXPECT_TRUE(!f._config.fastSearch()); - EXPECT_TRUE(!f._config.getEnableOnlyBitVector()); EXPECT_TRUE(!f._config.getIsFilter()); EXPECT_TRUE(!f._config.fastAccess()); EXPECT_TRUE(f._config.tensorType().is_error()); @@ -43,7 +42,6 @@ TEST_F("test integer weightedset attribute config", EXPECT_EQUAL(BasicType::Type::INT32, f._config.basicType().type()); EXPECT_EQUAL(CollectionType::Type::WSET, f._config.collectionType().type()); EXPECT_TRUE(!f._config.fastSearch()); - EXPECT_TRUE(!f._config.getEnableOnlyBitVector()); EXPECT_TRUE(!f._config.getIsFilter()); EXPECT_TRUE(!f._config.fastAccess()); EXPECT_TRUE(f._config.tensorType().is_error()); diff --git a/searchlib/src/vespa/searchcommon/attribute/config.cpp b/searchlib/src/vespa/searchcommon/attribute/config.cpp index 91495025dee..af29bd64ad6 100644 --- a/searchlib/src/vespa/searchcommon/attribute/config.cpp +++ b/searchlib/src/vespa/searchcommon/attribute/config.cpp @@ -19,7 +19,6 @@ Config::Config(BasicType bt, CollectionType ct, bool fastSearch_) noexcept : _basicType(bt), _type(ct), _fastSearch(fastSearch_), - _enableOnlyBitVector(false), _isFilter(false), _fastAccess(false), _mutable(false), @@ -48,7 +47,6 @@ Config::operator==(const Config &b) const return _basicType == b._basicType && _type == b._type && _fastSearch == b._fastSearch && - _enableOnlyBitVector == b._enableOnlyBitVector && _isFilter == b._isFilter && _fastAccess == b._fastAccess && _mutable == b._mutable && diff --git a/searchlib/src/vespa/searchcommon/attribute/config.h b/searchlib/src/vespa/searchcommon/attribute/config.h index 32cac7ec9d6..b368885240c 100644 --- a/searchlib/src/vespa/searchcommon/attribute/config.h +++ b/searchlib/src/vespa/searchcommon/attribute/config.h @@ -46,8 +46,6 @@ public: * Check if attribute posting list can consist of only a bitvector with * no corresponding btree. */ - bool getEnableOnlyBitVector() const { return _enableOnlyBitVector; } - bool getIsFilter() const { return _isFilter; } bool isMutable() const { return _mutable; } @@ -83,14 +81,6 @@ public: * document frequency goes down, since recreated btree representation * will then have lost weight information. */ - Config & setEnableOnlyBitVector(bool enableOnlyBitVector) { - _enableOnlyBitVector = enableOnlyBitVector; - return *this; - } - - /** - * Hide weight information when searching in attributes. - */ Config & setIsFilter(bool isFilter) { _isFilter = isFilter; return *this; } Config & setMutable(bool isMutable) { _mutable = isMutable; return *this; } Config & setPaged(bool paged_in) { _paged = paged_in; return *this; } @@ -112,7 +102,6 @@ private: BasicType _basicType; CollectionType _type; bool _fastSearch; - bool _enableOnlyBitVector; bool _isFilter; bool _fastAccess; bool _mutable; diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 6cb5dbf7889..be631be6dca 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -639,7 +639,11 @@ public: return bitvector_iterator; } } - return std::make_unique<queryeval::DocumentWeightSearchIterator>(*tfmda[0], _attr, _dict_entry); + if (_attr.has_weight_iterator(_dict_entry.posting_idx)) { + return std::make_unique<queryeval::DocumentWeightSearchIterator>(*tfmda[0], _attr, _dict_entry); + } else { + return _attr.make_bitvector_iterator(_dict_entry.posting_idx, get_docid_limit(), *tfmda[0], strict); + } } SearchIteratorUP createFilterSearch(bool strict, FilterConstraint constraint) const override { diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp index 56f877a7546..859c607e9cb 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp @@ -128,7 +128,6 @@ bool AttributeVector::hasArrayType() const { return _config->collectionType().is bool AttributeVector::getIsFilter() const { return _config->getIsFilter(); } bool AttributeVector::getIsFastSearch() const { return _config->fastSearch(); } bool AttributeVector::isMutable() const { return _config->isMutable(); } -bool AttributeVector::getEnableOnlyBitVector() const { return _config->getEnableOnlyBitVector(); } bool AttributeVector::isEnumerated(const vespalib::GenericHeader &header) diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.h b/searchlib/src/vespa/searchlib/attribute/attributevector.h index e3a7fdeb2c3..5fd6cb915fa 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributevector.h +++ b/searchlib/src/vespa/searchlib/attribute/attributevector.h @@ -288,7 +288,6 @@ public: bool getIsFilter() const override final; bool getIsFastSearch() const override final; bool isMutable() const; - bool getEnableOnlyBitVector() const; const Config &getConfig() const noexcept { return *_config; } void update_config(const Config& cfg); diff --git a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp index 7f04efd940b..4e300fe3800 100644 --- a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp +++ b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp @@ -104,7 +104,6 @@ ConfigConverter::convert(const AttributesConfig::Attribute & cfg) Config retval(bType, cType); PredicateParams predicateParams; retval.setFastSearch(cfg.fastsearch); - retval.setEnableOnlyBitVector(cfg.enableonlybitvector); retval.setIsFilter(cfg.enableonlybitvector); retval.setFastAccess(cfg.fastaccess); retval.setMutable(cfg.ismutable); diff --git a/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h b/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h index be36bcd185a..d6499708b76 100644 --- a/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h +++ b/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h @@ -43,6 +43,7 @@ struct IDocumentWeightAttribute virtual void collect_folded(vespalib::datastore::EntryRef enum_idx, vespalib::datastore::EntryRef dictionary_snapshot, const std::function<void(vespalib::datastore::EntryRef)>& callback) const = 0; virtual void create(vespalib::datastore::EntryRef idx, std::vector<DocumentWeightIterator> &dst) const = 0; virtual DocumentWeightIterator create(vespalib::datastore::EntryRef idx) const = 0; + virtual bool has_weight_iterator(vespalib::datastore::EntryRef idx) const noexcept = 0; virtual std::unique_ptr<queryeval::SearchIterator> make_bitvector_iterator(vespalib::datastore::EntryRef idx, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const = 0; virtual ~IDocumentWeightAttribute() = default; }; diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h index 71c50ccb270..f45ba3c8773 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h @@ -41,6 +41,7 @@ private: void create(vespalib::datastore::EntryRef idx, std::vector<DocumentWeightIterator> &dst) const override; DocumentWeightIterator create(vespalib::datastore::EntryRef idx) const override; std::unique_ptr<queryeval::SearchIterator> make_bitvector_iterator(vespalib::datastore::EntryRef idx, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const override; + bool has_weight_iterator(vespalib::datastore::EntryRef idx) const noexcept override; }; DocumentWeightAttributeAdapter _document_weight_attribute_adapter; diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp index 1009fa2fb5f..5f3219e4d88 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp @@ -146,12 +146,17 @@ MultiValueNumericPostingAttribute<B, M>::DocumentWeightAttributeAdapter::make_bi } template <typename B, typename M> +bool +MultiValueNumericPostingAttribute<B, M>::DocumentWeightAttributeAdapter::has_weight_iterator(vespalib::datastore::EntryRef idx) const noexcept +{ + return self.getPostingList().has_btree(idx); +} + +template <typename B, typename M> const IDocumentWeightAttribute * MultiValueNumericPostingAttribute<B, M>::asDocumentWeightAttribute() const { - if (this->hasWeightedSetType() && - this->getBasicType() == AttributeVector::BasicType::INT64 && - !this->getConfig().getIsFilter()) { + if (this->hasWeightedSetType() && (this->getBasicType() == AttributeVector::BasicType::INT64) && !this->getIsFilter()) { return &_document_weight_attribute_adapter; } return nullptr; diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h index b25c31a7dea..5c4d97660f6 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h @@ -39,6 +39,7 @@ private: void create(vespalib::datastore::EntryRef idx, std::vector<DocumentWeightIterator> &dst) const override; DocumentWeightIterator create(vespalib::datastore::EntryRef idx) const override; std::unique_ptr<queryeval::SearchIterator> make_bitvector_iterator(vespalib::datastore::EntryRef idx, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const override; + bool has_weight_iterator(vespalib::datastore::EntryRef idx) const noexcept override; }; DocumentWeightAttributeAdapter _document_weight_attribute_adapter; diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp index 19840b5a474..fe52b785fa7 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp @@ -159,6 +159,13 @@ MultiValueStringPostingAttributeT<B, M>::DocumentWeightAttributeAdapter::create( } template <typename B, typename M> +bool +MultiValueStringPostingAttributeT<B, M>::DocumentWeightAttributeAdapter::has_weight_iterator(vespalib::datastore::EntryRef idx) const noexcept +{ + return self.getPostingList().has_btree(idx); +} + +template <typename B, typename M> std::unique_ptr<queryeval::SearchIterator> MultiValueStringPostingAttributeT<B, M>::DocumentWeightAttributeAdapter::make_bitvector_iterator(vespalib::datastore::EntryRef idx, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const { @@ -169,9 +176,8 @@ template <typename B, typename T> const IDocumentWeightAttribute * MultiValueStringPostingAttributeT<B, T>::asDocumentWeightAttribute() const { - if (this->hasWeightedSetType() && - this->getBasicType() == AttributeVector::BasicType::STRING && - !this->getConfig().getIsFilter()) { + // TODO: Add support for handling bit vectors too, and lift restriction on isFilter. + if (this->hasWeightedSetType() && this->isStringType() && ! this->getIsFilter()) { return &_document_weight_attribute_adapter; } return nullptr; diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp index d32d8cde7ea..725491c4702 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp @@ -154,7 +154,7 @@ createPostingIterator(fef::TermFieldMatchData *matchData, bool strict) DocIt postings; vespalib::ConstArrayRef<Posting> array = _merger.getArray(); postings.set(&array[0], &array[array.size()]); - if (_postingList._isFilter) { + if (_postingList.isFilter()) { return std::make_unique<FilterAttributePostingListIteratorT<DocIt>>(_baseSearchCtx, matchData, postings); } else { return std::make_unique<AttributePostingListIteratorT<DocIt>>(_baseSearchCtx, _hasWeight, matchData, postings); @@ -182,7 +182,7 @@ createPostingIterator(fef::TermFieldMatchData *matchData, bool strict) DocIt postings; const Posting *array = postingList.getKeyDataEntry(_pidx, clusterSize); postings.set(array, array + clusterSize); - if (postingList._isFilter) { + if (postingList.isFilter()) { return std::make_unique<FilterAttributePostingListIteratorT<DocIt>>(_baseSearchCtx, matchData, postings); } else { return std::make_unique<AttributePostingListIteratorT<DocIt>>(_baseSearchCtx, _hasWeight, matchData, postings); @@ -191,7 +191,7 @@ createPostingIterator(fef::TermFieldMatchData *matchData, bool strict) typename PostingList::BTreeType::FrozenView frozen(_frozenRoot, postingList.getAllocator()); using DocIt = typename PostingList::ConstIterator; - if (_postingList._isFilter) { + if (_postingList.isFilter()) { return std::make_unique<FilterAttributePostingListIteratorT<DocIt>>(_baseSearchCtx, matchData, frozen.getRoot(), frozen.getAllocator()); } else { return std::make_unique<AttributePostingListIteratorT<DocIt>> (_baseSearchCtx, _hasWeight, matchData, frozen.getRoot(), frozen.getAllocator()); diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp index 2703201b292..09af15e35d5 100644 --- a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp +++ b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp @@ -19,9 +19,7 @@ using vespalib::btree::BTreeNoLeafData; using vespalib::datastore::EntryRefFilter; PostingStoreBase2::PostingStoreBase2(IEnumStoreDictionary& dictionary, Status &status, const Config &config) - : _enableOnlyBitVector(config.getEnableOnlyBitVector()), - _isFilter(config.getIsFilter()), - _bvSize(64u), + : _bvSize(64u), _bvCapacity(128u), _minBvDocFreq(64), _maxBvDocFreq(std::numeric_limits<uint32_t>::max()), @@ -29,9 +27,9 @@ PostingStoreBase2::PostingStoreBase2(IEnumStoreDictionary& dictionary, Status &s _dictionary(dictionary), _status(status), _bvExtraBytes(0), - _compaction_spec() -{ -} + _compaction_spec(), + _isFilter(config.getIsFilter()) +{ } PostingStoreBase2::~PostingStoreBase2() = default; @@ -60,8 +58,7 @@ PostingStoreBase2::resizeBitVectors(uint32_t newSize, uint32_t newCapacity) template <typename DataT> -PostingStore<DataT>::PostingStore(IEnumStoreDictionary& dictionary, Status &status, - const Config &config) +PostingStore<DataT>::PostingStore(IEnumStoreDictionary& dictionary, Status &status, const Config &config) : Parent(false), PostingStoreBase2(dictionary, status, config), _bvType(1, 1024u, RefType::offsetSize()) @@ -185,8 +182,7 @@ PostingStore<DataT>::applyNew(EntryRef &ref, AddIter a, AddIter ae) template <typename DataT> void -PostingStore<DataT>::makeDegradedTree(EntryRef &ref, - const BitVector &bv) +PostingStore<DataT>::makeDegradedTree(EntryRef &ref, const BitVector &bv) { assert(!ref.valid()); BTreeTypeRefPair tPair(allocBTree()); @@ -264,7 +260,7 @@ PostingStore<DataT>::makeBitVector(EntryRef &ref) assert(bv.countTrueBits() == expDocFreq); BitVectorRefPair bPair(allocBitVector()); BitVectorEntry *bve = bPair.data; - if (_enableOnlyBitVector) { + if (isFilter()) { BTreeType *tree = getWTreeEntry(iRef); tree->clear(_allocator); _store.hold_entry(ref); @@ -301,7 +297,7 @@ PostingStore<DataT>::applyNewBitVector(EntryRef &ref, AddIter aOrg, AddIter ae) assert(bv.countTrueBits() == expDocFreq); BitVectorRefPair bPair(allocBitVector()); BitVectorEntry *bve = bPair.data; - if (!_enableOnlyBitVector) { + if (!isFilter()) { applyNewTree(bve->_tree, aOrg, ae, CompareT()); } bve->_bv = bvsp; @@ -315,11 +311,7 @@ PostingStore<DataT>::applyNewBitVector(EntryRef &ref, AddIter aOrg, AddIter ae) template <typename DataT> void -PostingStore<DataT>::apply(BitVector &bv, - AddIter a, - AddIter ae, - RemoveIter r, - RemoveIter re) +PostingStore<DataT>::apply(BitVector &bv, AddIter a, AddIter ae, RemoveIter r, RemoveIter re) { while (a != ae || r != re) { if (r != re && (a == ae || *r < a->_key)) { @@ -345,11 +337,7 @@ PostingStore<DataT>::apply(BitVector &bv, template <typename DataT> void -PostingStore<DataT>::apply(EntryRef &ref, - AddIter a, - AddIter ae, - RemoveIter r, - RemoveIter re) +PostingStore<DataT>::apply(EntryRef &ref, AddIter a, AddIter ae, RemoveIter r, RemoveIter re) { if (!ref.valid()) { // No old data @@ -506,11 +494,9 @@ PostingStore<DataT>::beginFrozen(const EntryRef ref) const return ConstIterator(shortArray, clusterSize, _allocator, _aggrCalc); } - template <typename DataT> void -PostingStore<DataT>::beginFrozen(const EntryRef ref, - std::vector<ConstIterator> &where) const +PostingStore<DataT>::beginFrozen(const EntryRef ref, std::vector<ConstIterator> &where) const { if (!ref.valid()) { where.emplace_back(); @@ -742,8 +728,7 @@ PostingStore<DataT>::compact_worst_buffers(CompactionSpec compaction_spec, const filter.add_buffers(_bvType.get_active_buffers()); } _dictionary.normalize_posting_lists([this](std::vector<EntryRef>& refs) - { return move(refs); }, - filter); + { return move(refs); }, filter); compacting_buffers->finish(); } diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.h b/searchlib/src/vespa/searchlib/attribute/postingstore.h index 57f523acefe..8c6ed3d9497 100644 --- a/searchlib/src/vespa/searchlib/attribute/postingstore.h +++ b/searchlib/src/vespa/searchlib/attribute/postingstore.h @@ -27,7 +27,7 @@ public: std::shared_ptr<GrowableBitVector> _bv; // bitvector public: - BitVectorEntry() + BitVectorEntry() noexcept : _tree(), _bv() { } @@ -36,25 +36,22 @@ public: class PostingStoreBase2 { -public: - bool _enableOnlyBitVector; - bool _isFilter; protected: + static constexpr uint32_t BUFFERTYPE_BITVECTOR = 9u; uint32_t _bvSize; uint32_t _bvCapacity; -public: uint32_t _minBvDocFreq; // Less than this ==> destroy bv uint32_t _maxBvDocFreq; // Greater than or equal to this ==> create bv -protected: - std::set<uint32_t> _bvs; // Current bitvectors - IEnumStoreDictionary& _dictionary; - Status &_status; - uint64_t _bvExtraBytes; + std::set<uint32_t> _bvs; // Current bitvectors + IEnumStoreDictionary& _dictionary; + Status &_status; + uint64_t _bvExtraBytes; PostingStoreCompactionSpec _compaction_spec; - - static constexpr uint32_t BUFFERTYPE_BITVECTOR = 9u; +private: + bool _isFilter; public: + bool isFilter() const noexcept { return _isFilter; } PostingStoreBase2(IEnumStoreDictionary& dictionary, Status &status, const Config &config); virtual ~PostingStoreBase2(); bool resizeBitVectors(uint32_t newSize, uint32_t newCapacity); @@ -111,7 +108,7 @@ public: bool removeSparseBitVectors() override; void consider_remove_sparse_bitvector(std::vector<EntryRef> &refs); - static bool isBitVector(uint32_t typeId) { return typeId == BUFFERTYPE_BITVECTOR; } + static bool isBitVector(uint32_t typeId) noexcept { return typeId == BUFFERTYPE_BITVECTOR; } void applyNew(EntryRef &ref, AddIter a, AddIter ae); @@ -186,6 +183,9 @@ public: BitVectorEntry *getWBitVectorEntry(RefType ref) { return _store.template getEntry<BitVectorEntry>(ref); } + bool has_btree(const EntryRef ref) const noexcept { + return !ref.valid() || !isBitVector(getTypeId(RefType(ref))) || !isFilter(); + } std::unique_ptr<queryeval::SearchIterator> make_bitvector_iterator(RefType ref, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const; diff --git a/vespalib/src/vespa/vespalib/datastore/bufferstate.h b/vespalib/src/vespa/vespalib/datastore/bufferstate.h index 01439586f5b..3de821928b8 100644 --- a/vespalib/src/vespa/vespalib/datastore/bufferstate.h +++ b/vespalib/src/vespa/vespalib/datastore/bufferstate.h @@ -134,21 +134,21 @@ public: class BufferAndMeta { public: - BufferAndMeta() : BufferAndMeta(nullptr, nullptr, 0, 0) { } + BufferAndMeta() noexcept : BufferAndMeta(nullptr, nullptr, 0, 0) { } std::atomic<void*>& get_atomic_buffer() noexcept { return _buffer; } void* get_buffer_relaxed() noexcept { return _buffer.load(std::memory_order_relaxed); } const void* get_buffer_acquire() const noexcept { return _buffer.load(std::memory_order_acquire); } - uint32_t getTypeId() const { return _typeId; } - uint32_t get_array_size() const { return _array_size; } - BufferState * get_state_relaxed() { return _state.load(std::memory_order_relaxed); } + uint32_t getTypeId() const noexcept { return _typeId; } + uint32_t get_array_size() const noexcept { return _array_size; } + BufferState * get_state_relaxed() noexcept { return _state.load(std::memory_order_relaxed); } const BufferState * get_state_acquire() const { return _state.load(std::memory_order_acquire); } uint32_t get_entry_size() const noexcept { return _entry_size; } - void setTypeId(uint32_t typeId) { _typeId = typeId; } - void set_array_size(uint32_t arraySize) { _array_size = arraySize; } + void setTypeId(uint32_t typeId) noexcept { _typeId = typeId; } + void set_array_size(uint32_t arraySize) noexcept { _array_size = arraySize; } void set_entry_size(uint32_t entry_size) noexcept { _entry_size = entry_size; } - void set_state(BufferState * state) { _state.store(state, std::memory_order_release); } + void set_state(BufferState * state) noexcept { _state.store(state, std::memory_order_release); } private: - BufferAndMeta(void* buffer, BufferState * state, uint32_t typeId, uint32_t arraySize) + BufferAndMeta(void* buffer, BufferState * state, uint32_t typeId, uint32_t arraySize) noexcept : _buffer(buffer), _state(state), _typeId(typeId), diff --git a/vespalib/src/vespa/vespalib/datastore/datastorebase.h b/vespalib/src/vespa/vespalib/datastore/datastorebase.h index dbcdbeb12b9..b91d6c7cfa6 100644 --- a/vespalib/src/vespa/vespalib/datastore/datastorebase.h +++ b/vespalib/src/vespa/vespalib/datastore/datastorebase.h @@ -72,9 +72,9 @@ public: /** * Get the primary buffer id for the given type id. */ - uint32_t primary_buffer_id(uint32_t typeId) const { return _primary_buffer_ids[typeId]; } + uint32_t primary_buffer_id(uint32_t typeId) const noexcept { return _primary_buffer_ids[typeId]; } BufferState &getBufferState(uint32_t buffer_id) noexcept; - const BufferAndMeta & getBufferMeta(uint32_t buffer_id) const { return _buffers[buffer_id]; } + const BufferAndMeta & getBufferMeta(uint32_t buffer_id) const noexcept { return _buffers[buffer_id]; } uint32_t getMaxNumBuffers() const noexcept { return _buffers.size(); } uint32_t get_bufferid_limit_acquire() const noexcept { return _bufferIdLimit.load(std::memory_order_acquire); } uint32_t get_bufferid_limit_relaxed() noexcept { return _bufferIdLimit.load(std::memory_order_relaxed); } |