diff options
Diffstat (limited to 'searchlib')
26 files changed, 680 insertions, 267 deletions
diff --git a/searchlib/src/tests/attribute/attribute_header/attribute_header_test.cpp b/searchlib/src/tests/attribute/attribute_header/attribute_header_test.cpp index 3c8c9ff17e0..16a04a746f3 100644 --- a/searchlib/src/tests/attribute/attribute_header/attribute_header_test.cpp +++ b/searchlib/src/tests/attribute/attribute_header/attribute_header_test.cpp @@ -49,7 +49,7 @@ void verify_roundtrip_serialization(const HnswIPO& hnsw_params_in) { auto gen_header = populate_header(hnsw_params_in); - auto attr_header = AttributeHeader::extractTags(gen_header); + auto attr_header = AttributeHeader::extractTags(gen_header, file_name); EXPECT_EQ(tensor_cfg.basicType(), attr_header.getBasicType()); EXPECT_EQ(tensor_cfg.collectionType(), attr_header.getCollectionType()); diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp index 9c25429932b..40ff25ff976 100644 --- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp +++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp @@ -8,6 +8,21 @@ LOG_SETUP("enumstore_test"); using Type = search::DictionaryConfig::Type; using vespalib::datastore::EntryRef; +using vespalib::datastore::EntryRefFilter; +using RefT = vespalib::datastore::EntryRefT<22>; + +namespace vespalib::datastore { + +/* + * Print EntryRef as RefT which is used by test_normalize_posting_lists and + * test_foreach_posting_list to differentiate between buffers + */ +void PrintTo(const EntryRef &ref, std::ostream* os) { + RefT iref(ref); + *os << "RefT(" << iref.offset() << "," << iref.bufferId() << ")"; +} + +} namespace search { @@ -597,6 +612,11 @@ public: void update_posting_idx(EnumIndex enum_idx, EntryRef old_posting_idx, EntryRef new_posting_idx); EnumIndex insert_value(size_t value_idx); + void populate_sample_data(uint32_t cnt); + std::vector<EntryRef> get_sample_values(uint32_t cnt); + void clear_sample_values(uint32_t cnt); + void test_normalize_posting_lists(bool use_filter, bool one_filter); + void test_foreach_posting_list(bool one_filter); static EntryRef fake_pidx() { return EntryRef(42); } }; @@ -620,6 +640,149 @@ EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::insert_value(size_t val return enum_idx; } +namespace { +/* + * large_population should trigger multiple callbacks from normalize_values + * and foreach_value + */ +constexpr uint32_t large_population = 1200; + +uint32_t select_buffer(uint32_t i) { + if ((i % 2) == 0) { + return 0; + } + if ((i % 3) == 0) { + return 1; + } + if ((i % 5) == 0) { + return 2; + } + return 3; +} + +EntryRef make_fake_pidx(uint32_t i) { return RefT(i + 200, select_buffer(i)); } +EntryRef make_fake_adjusted_pidx(uint32_t i) { return RefT(i + 500, select_buffer(i)); } +EntryRef adjust_fake_pidx(EntryRef ref) { RefT iref(ref); return RefT(iref.offset() + 300, iref.bufferId()); } + +} + + +template <typename EnumStoreTypeAndDictionaryType> +void +EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::populate_sample_data(uint32_t cnt) +{ + auto& dict = store.get_dictionary(); + for (uint32_t i = 0; i < cnt; ++i) { + auto enum_idx = store.insert(i); + EXPECT_TRUE(enum_idx.valid()); + EntryRef posting_idx(make_fake_pidx(i)); + dict.update_posting_list(enum_idx, store.get_comparator(), [posting_idx](EntryRef) noexcept -> EntryRef { return posting_idx; }); + } +} + +template <typename EnumStoreTypeAndDictionaryType> +std::vector<EntryRef> +EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::get_sample_values(uint32_t cnt) +{ + std::vector<EntryRef> result; + result.reserve(cnt); + store.freeze_dictionary(); + auto& dict = store.get_dictionary(); + for (uint32_t i = 0; i < cnt; ++i) { + auto compare = store.make_comparator(i); + auto enum_idx = dict.find(compare); + EXPECT_TRUE(enum_idx.valid()); + EntryRef posting_idx; + dict.update_posting_list(enum_idx, compare, [&posting_idx](EntryRef ref) noexcept { posting_idx = ref; return ref; });; + auto find_result = dict.find_posting_list(compare, dict.get_frozen_root()); + EXPECT_EQ(enum_idx, find_result.first); + EXPECT_EQ(posting_idx, find_result.second); + result.emplace_back(find_result.second); + } + return result; +} + +template <typename EnumStoreTypeAndDictionaryType> +void +EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::clear_sample_values(uint32_t cnt) +{ + auto& dict = store.get_dictionary(); + for (uint32_t i = 0; i < cnt; ++i) { + auto comparator = store.make_comparator(i); + auto enum_idx = dict.find(comparator); + EXPECT_TRUE(enum_idx.valid()); + dict.update_posting_list(enum_idx, comparator, [](EntryRef) noexcept -> EntryRef { return EntryRef(); }); + } +} + +namespace { + +EntryRefFilter make_entry_ref_filter(bool one_filter) +{ + if (one_filter) { + EntryRefFilter filter(RefT::numBuffers(), RefT::offset_bits); + filter.add_buffer(3); + return filter; + } + return EntryRefFilter::create_all_filter(RefT::numBuffers(), RefT::offset_bits); +} + +} + +template <typename EnumStoreTypeAndDictionaryType> +void +EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::test_normalize_posting_lists(bool use_filter, bool one_filter) +{ + populate_sample_data(large_population); + auto& dict = store.get_dictionary(); + std::vector<EntryRef> exp_refs; + std::vector<EntryRef> exp_adjusted_refs; + exp_refs.reserve(large_population); + exp_adjusted_refs.reserve(large_population); + for (uint32_t i = 0; i < large_population; ++i) { + exp_refs.emplace_back(make_fake_pidx(i)); + if (!use_filter || !one_filter || select_buffer(i) == 3) { + exp_adjusted_refs.emplace_back(make_fake_adjusted_pidx(i)); + } else { + exp_adjusted_refs.emplace_back(make_fake_pidx(i)); + } + } + EXPECT_EQ(exp_refs, get_sample_values(large_population)); + if (use_filter) { + auto filter = make_entry_ref_filter(one_filter); + auto dummy = [](std::vector<EntryRef>&) noexcept { }; + auto adjust_refs = [](std::vector<EntryRef> &refs) noexcept { for (auto &ref : refs) { ref = adjust_fake_pidx(ref); } }; + EXPECT_FALSE(dict.normalize_posting_lists(dummy, filter)); + EXPECT_EQ(exp_refs, get_sample_values(large_population)); + EXPECT_TRUE(dict.normalize_posting_lists(adjust_refs, filter)); + } else { + auto dummy = [](EntryRef posting_idx) noexcept { return posting_idx; }; + auto adjust_refs = [](EntryRef ref) noexcept { return adjust_fake_pidx(ref); }; + EXPECT_FALSE(dict.normalize_posting_lists(dummy)); + EXPECT_EQ(exp_refs, get_sample_values(large_population)); + EXPECT_TRUE(dict.normalize_posting_lists(adjust_refs)); + } + EXPECT_EQ(exp_adjusted_refs, get_sample_values(large_population)); + clear_sample_values(large_population); +} + +template <typename EnumStoreTypeAndDictionaryType> +void +EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::test_foreach_posting_list(bool one_filter) +{ + auto filter = make_entry_ref_filter(one_filter); + populate_sample_data(large_population); + auto& dict = store.get_dictionary(); + std::vector<EntryRef> exp_refs; + auto save_exp_refs = [&exp_refs](std::vector<EntryRef>& refs) { exp_refs.insert(exp_refs.end(), refs.begin(), refs.end()); }; + EXPECT_FALSE(dict.normalize_posting_lists(save_exp_refs, filter)); + std::vector<EntryRef> act_refs; + auto save_act_refs = [&act_refs](const std::vector<EntryRef>& refs) { act_refs.insert(act_refs.end(), refs.begin(), refs.end()); }; + dict.foreach_posting_list(save_act_refs, filter); + EXPECT_EQ(exp_refs, act_refs); + clear_sample_values(large_population); +} + // Disable warnings emitted by gtest generated files when using typed tests #pragma GCC diagnostic push #ifndef __clang__ @@ -678,26 +841,27 @@ TYPED_TEST(EnumStoreDictionaryTest, find_posting_list_works) TYPED_TEST(EnumStoreDictionaryTest, normalize_posting_lists_works) { - auto value_0_idx = this->insert_value(0); - this->update_posting_idx(value_0_idx, EntryRef(), this->fake_pidx()); - this->store.freeze_dictionary(); - auto& dict = this->store.get_dictionary(); - auto root = dict.get_frozen_root(); - auto find_result = dict.find_posting_list(this->make_bound_comparator(0), root); - EXPECT_EQ(value_0_idx, find_result.first); - EXPECT_EQ(this->fake_pidx(), find_result.second); - auto dummy = [](EntryRef posting_idx) noexcept { return posting_idx; }; - std::vector<EntryRef> saved_refs; - auto save_refs_and_clear = [&saved_refs](EntryRef posting_idx) { saved_refs.push_back(posting_idx); return EntryRef(); }; - EXPECT_FALSE(dict.normalize_posting_lists(dummy)); - EXPECT_TRUE(dict.normalize_posting_lists(save_refs_and_clear)); - EXPECT_FALSE(dict.normalize_posting_lists(save_refs_and_clear)); - EXPECT_EQ((std::vector<EntryRef>{ this->fake_pidx(), EntryRef() }), saved_refs); - this->store.freeze_dictionary(); - root = dict.get_frozen_root(); - find_result = dict.find_posting_list(this->make_bound_comparator(0), root); - EXPECT_EQ(value_0_idx, find_result.first); - EXPECT_EQ(EntryRef(), find_result.second); + this->test_normalize_posting_lists(false, false); +} + +TYPED_TEST(EnumStoreDictionaryTest, normalize_posting_lists_with_all_filter_works) +{ + this->test_normalize_posting_lists(true, false); +} + +TYPED_TEST(EnumStoreDictionaryTest, normalize_posting_lists_with_one_filter_works) +{ + this->test_normalize_posting_lists(true, true); +} + +TYPED_TEST(EnumStoreDictionaryTest, foreach_posting_list_with_all_filter_works) +{ + this->test_foreach_posting_list(false); +} + +TYPED_TEST(EnumStoreDictionaryTest, foreach_posting_list_with_one_filter_works) +{ + this->test_foreach_posting_list(true); } namespace { diff --git a/searchlib/src/tests/transactionlog/CMakeLists.txt b/searchlib/src/tests/transactionlog/CMakeLists.txt index b09271eefe2..0904dc3ee36 100644 --- a/searchlib/src/tests/transactionlog/CMakeLists.txt +++ b/searchlib/src/tests/transactionlog/CMakeLists.txt @@ -5,8 +5,7 @@ vespa_add_executable(searchlib_translogclient_test_app TEST DEPENDS searchlib ) -vespa_add_test(NAME searchlib_translogclient_test_app COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/translogclient_test.sh - DEPENDS searchlib_translogclient_test_app COST 100) +vespa_add_test(NAME searchlib_translogclient_test_app COMMAND searchlib_translogclient_test_app) vespa_add_executable(searchlib_translog_chunks_test_app TEST SOURCES diff --git a/searchlib/src/tests/transactionlog/translogclient_test.cpp b/searchlib/src/tests/transactionlog/translogclient_test.cpp index de8f9e5c462..5740eeb610d 100644 --- a/searchlib/src/tests/transactionlog/translogclient_test.cpp +++ b/searchlib/src/tests/transactionlog/translogclient_test.cpp @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/searchlib/transactionlog/translogclient.h> #include <vespa/searchlib/transactionlog/translogserver.h> +#include <vespa/searchlib/test/directory_handler.h> #include <vespa/vespalib/testkit/testapp.h> #include <vespa/vespalib/objects/identifiable.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> @@ -34,8 +35,8 @@ void fillDomainTest(Session * s1, size_t numPackets, size_t numEntries, size_t e uint32_t countFiles(const vespalib::string &dir); void checkFilledDomainTest(Session &s1, size_t numEntries); bool visitDomainTest(TransLogClient & tls, Session * s1, const vespalib::string & name); -void createAndFillDomain(const vespalib::string & name, Encoding encoding, size_t preExistingDomains); -void verifyDomain(const vespalib::string & name); +void createAndFillDomain(const vespalib::string & dir, const vespalib::string & name, Encoding encoding, size_t preExistingDomains); +void verifyDomain(const vespalib::string & dir, const vespalib::string & name); vespalib::string myhex(const void * b, size_t sz) @@ -357,7 +358,7 @@ void fillDomainTest(Session * s1, size_t numPackets, size_t numEntries, size_t entrySize) { size_t value(0); - std::vector<char> entryBuffer(entrySize); + std::vector<char> entryBuffer(entrySize); for(size_t i=0; i < numPackets; i++) { std::unique_ptr<Packet> p(new Packet(DEFAULT_PACKET_SIZE)); for(size_t j=0; j < numEntries; j++, value++) { @@ -464,10 +465,11 @@ getMaxSessionRunTime(TransLogServer &tls, const vespalib::string &domain) return tls.getDomainStats()[domain].maxSessionRunTime.count(); } -void createAndFillDomain(const vespalib::string & name, Encoding encoding, size_t preExistingDomains) +void +createAndFillDomain(const vespalib::string & dir, const vespalib::string & name, Encoding encoding, size_t preExistingDomains) { DummyFileHeaderContext fileHeaderContext; - TransLogServer tlss("test13", 18377, ".", fileHeaderContext, + TransLogServer tlss(dir, 18377, ".", fileHeaderContext, createDomainConfig(0x1000000).setEncoding(encoding), 4); TransLogClient tls("tcp/localhost:18377"); @@ -476,19 +478,21 @@ void createAndFillDomain(const vespalib::string & name, Encoding encoding, size_ fillDomainTest(s1.get(), name); } -void verifyDomain(const vespalib::string & name) { +void +verifyDomain(const vespalib::string & dir, const vespalib::string & name) { DummyFileHeaderContext fileHeaderContext; - TransLogServer tlss("test13", 18377, ".", fileHeaderContext, createDomainConfig(0x1000000)); + TransLogServer tlss(dir, 18377, ".", fileHeaderContext, createDomainConfig(0x1000000)); TransLogClient tls("tcp/localhost:18377"); auto s1 = openDomainTest(tls, name); visitDomainTest(tls, s1.get(), name); } -} -TEST("testVisitOverGeneratedDomain") { + +void +testVisitOverGeneratedDomain(const vespalib::string & testDir) { DummyFileHeaderContext fileHeaderContext; - TransLogServer tlss("test7", 18377, ".", fileHeaderContext, createDomainConfig(0x10000)); + TransLogServer tlss(testDir, 18377, ".", fileHeaderContext, createDomainConfig(0x10000)); TransLogClient tls("tcp/localhost:18377"); vespalib::string name("test1"); @@ -502,10 +506,11 @@ TEST("testVisitOverGeneratedDomain") { EXPECT_GREATER(maxSessionRunTime, 0); } -TEST("testVisitOverPreExistingDomain") { +void +testVisitOverPreExistingDomain(const vespalib::string & testDir) { // Depends on Test::testVisitOverGeneratedDomain() DummyFileHeaderContext fileHeaderContext; - TransLogServer tlss("test7", 18377, ".", fileHeaderContext, createDomainConfig(0x10000)); + TransLogServer tlss(testDir, 18377, ".", fileHeaderContext, createDomainConfig(0x10000)); TransLogClient tls("tcp/localhost:18377"); vespalib::string name("test1"); @@ -513,9 +518,10 @@ TEST("testVisitOverPreExistingDomain") { visitDomainTest(tls, s1.get(), name); } -TEST("partialUpdateTest") { +void +partialUpdateTest(const vespalib::string & testDir) { DummyFileHeaderContext fileHeaderContext; - TransLogServer tlss("test7", 18377, ".", fileHeaderContext, createDomainConfig(0x10000)); + TransLogServer tlss(testDir, 18377, ".", fileHeaderContext, createDomainConfig(0x10000)); TransLogClient tls("tcp/localhost:18377"); auto s1 = openDomainTest(tls, "test1"); @@ -568,21 +574,33 @@ TEST("partialUpdateTest") { ASSERT_TRUE( ca3.hasSerial(7) ); } +} + +TEST("testVisitAndUpdates") { + test::DirectoryHandler testDir("test7"); + testVisitOverGeneratedDomain(testDir.getDir()); + testVisitOverPreExistingDomain(testDir.getDir()); + partialUpdateTest(testDir.getDir()); +} + + TEST("testCrcVersions") { + test::DirectoryHandler testDir("test13"); try { - createAndFillDomain("ccitt_crc32", Encoding(Encoding::Crc::ccitt_crc32, Encoding::Compression::none), 0); + createAndFillDomain(testDir.getDir(),"ccitt_crc32", Encoding(Encoding::Crc::ccitt_crc32, Encoding::Compression::none), 0); ASSERT_TRUE(false); } catch (vespalib::IllegalArgumentException & e) { EXPECT_TRUE(e.getMessage().find("Compression:none is not allowed for the tls") != vespalib::string::npos); } - createAndFillDomain("xxh64", Encoding(Encoding::Crc::xxh64, Encoding::Compression::zstd), 0); + createAndFillDomain(testDir.getDir(), "xxh64", Encoding(Encoding::Crc::xxh64, Encoding::Compression::zstd), 0); - verifyDomain("xxh64"); + verifyDomain(testDir.getDir(), "xxh64"); } TEST("testRemove") { + test::DirectoryHandler testDir("testremove"); DummyFileHeaderContext fileHeaderContext; - TransLogServer tlss("testremove", 18377, ".", fileHeaderContext, createDomainConfig(0x10000)); + TransLogServer tlss(testDir.getDir(), 18377, ".", fileHeaderContext, createDomainConfig(0x10000)); TransLogClient tls("tcp/localhost:18377"); vespalib::string name("test-delete"); @@ -629,14 +647,15 @@ assertStatus(Session &s, SerialNum expFirstSerial, SerialNum expLastSerial, uint } -TEST("test sending a lot of data") { +void + testSendingAlotOfDataSync(const vespalib::string & testDir) { const unsigned int NUM_PACKETS = 1000; const unsigned int NUM_ENTRIES = 100; const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES; const vespalib::string MANY("many"); { DummyFileHeaderContext fileHeaderContext; - TransLogServer tlss("test8", 18377, ".", fileHeaderContext, createDomainConfig(0x80000)); + TransLogServer tlss(testDir, 18377, ".", fileHeaderContext, createDomainConfig(0x80000)); TransLogClient tls("tcp/localhost:18377"); createDomainTest(tls, MANY, 0); @@ -659,7 +678,7 @@ TEST("test sending a lot of data") { } { DummyFileHeaderContext fileHeaderContext; - TransLogServer tlss("test8", 18377, ".", fileHeaderContext, createDomainConfig(0x1000000)); + TransLogServer tlss(testDir, 18377, ".", fileHeaderContext, createDomainConfig(0x1000000)); TransLogClient tls("tcp/localhost:18377"); auto s1 = openDomainTest(tls, "many"); @@ -680,7 +699,7 @@ TEST("test sending a lot of data") { } { DummyFileHeaderContext fileHeaderContext; - TransLogServer tlss("test8", 18377, ".", fileHeaderContext, createDomainConfig(0x1000000)); + TransLogServer tlss(testDir, 18377, ".", fileHeaderContext, createDomainConfig(0x1000000)); TransLogClient tls("tcp/localhost:18377"); auto s1 = openDomainTest(tls, MANY); @@ -701,14 +720,14 @@ TEST("test sending a lot of data") { } } -TEST("test sending a lot of data async") { +void testSendingAlotOfDataAsync(const vespalib::string & testDir) { const unsigned int NUM_PACKETS = 1000; const unsigned int NUM_ENTRIES = 100; const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES; const vespalib::string MANY("many-async"); { DummyFileHeaderContext fileHeaderContext; - TransLogServer tlss("test8", 18377, ".", fileHeaderContext, createDomainConfig(0x80000)); + TransLogServer tlss(testDir, 18377, ".", fileHeaderContext, createDomainConfig(0x80000)); TransLogClient tls("tcp/localhost:18377"); createDomainTest(tls, MANY, 1); auto s1 = openDomainTest(tls, MANY); @@ -730,7 +749,7 @@ TEST("test sending a lot of data async") { } { DummyFileHeaderContext fileHeaderContext; - TransLogServer tlss("test8", 18377, ".", fileHeaderContext, createDomainConfig(0x1000000)); + TransLogServer tlss(testDir, 18377, ".", fileHeaderContext, createDomainConfig(0x1000000)); TransLogClient tls("tcp/localhost:18377"); auto s1 = openDomainTest(tls, MANY); @@ -751,16 +770,21 @@ TEST("test sending a lot of data async") { } } - +TEST("test sending a lot of data both sync and async") { + test::DirectoryHandler testDir("test8"); + testSendingAlotOfDataSync(testDir.getDir()); + testSendingAlotOfDataAsync(testDir.getDir()); +} TEST("testErase") { const unsigned int NUM_PACKETS = 1000; const unsigned int NUM_ENTRIES = 100; const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES; + test::DirectoryHandler testDir("test12"); { DummyFileHeaderContext fileHeaderContext; - TransLogServer tlss("test12", 18377, ".", fileHeaderContext, createDomainConfig(0x80000)); + TransLogServer tlss(testDir.getDir(), 18377, ".", fileHeaderContext, createDomainConfig(0x80000)); TransLogClient tls("tcp/localhost:18377"); createDomainTest(tls, "erase", 0); @@ -769,7 +793,7 @@ TEST("testErase") { } { DummyFileHeaderContext fileHeaderContext; - TransLogServer tlss("test12", 18377, ".", fileHeaderContext, createDomainConfig(0x1000000)); + TransLogServer tlss(testDir.getDir(), 18377, ".", fileHeaderContext, createDomainConfig(0x1000000)); TransLogClient tls("tcp/localhost:18377"); auto s1 = openDomainTest(tls, "erase"); @@ -856,7 +880,8 @@ TEST("testSync") { const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES; DummyFileHeaderContext fileHeaderContext; - TransLogServer tlss("test9", 18377, ".", fileHeaderContext, createDomainConfig(0x1000000)); + test::DirectoryHandler testDir("test9"); + TransLogServer tlss(testDir.getDir(), 18377, ".", fileHeaderContext, createDomainConfig(0x1000000)); TransLogClient tls("tcp/localhost:18377"); createDomainTest(tls, "sync", 0); @@ -877,8 +902,9 @@ TEST("test truncate on version mismatch") { uint64_t fromOld(0), toOld(0); size_t countOld(0); DummyFileHeaderContext fileHeaderContext; + test::DirectoryHandler testDir("test11"); { - TransLogServer tlss("test11", 18377, ".", fileHeaderContext, createDomainConfig(0x1000000)); + TransLogServer tlss(testDir.getDir(), 18377, ".", fileHeaderContext, createDomainConfig(0x1000000)); TransLogClient tls("tcp/localhost:18377"); createDomainTest(tls, "sync", 0); @@ -890,7 +916,7 @@ TEST("test truncate on version mismatch") { EXPECT_TRUE(s1->sync(2, syncedTo)); EXPECT_EQUAL(syncedTo, TOTAL_NUM_ENTRIES); } - FastOS_File f("test11/sync/sync-0000000000000000"); + FastOS_File f((testDir.getDir() + "/sync/sync-0000000000000000").c_str()); EXPECT_TRUE(f.OpenWriteOnlyExisting()); EXPECT_TRUE(f.SetPosition(f.GetSize())); @@ -899,7 +925,7 @@ TEST("test truncate on version mismatch") { EXPECT_EQUAL(static_cast<ssize_t>(sizeof(tmp)), f.Write2(tmp, sizeof(tmp))); EXPECT_TRUE(f.Close()); { - TransLogServer tlss("test11", 18377, ".", fileHeaderContext, createDomainConfig(0x10000)); + TransLogServer tlss(testDir.getDir(), 18377, ".", fileHeaderContext, createDomainConfig(0x10000)); TransLogClient tls("tcp/localhost:18377"); auto s1 = openDomainTest(tls, "sync"); uint64_t from(0), to(0); @@ -916,15 +942,16 @@ TEST("test truncation after short read") { const unsigned int NUM_ENTRIES = 1; const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES; const unsigned int ENTRYSIZE = 4080; - vespalib::string topdir("test10"); + test::DirectoryHandler topdir("test10"); vespalib::string domain("truncate"); - vespalib::string dir(topdir + "/" + domain); + vespalib::string dir(topdir.getDir() + "/" + domain); vespalib::string tlsspec("tcp/localhost:18377"); + DomainConfig domainConfig = createDomainConfig(0x10000); DummyFileHeaderContext fileHeaderContext; { - TransLogServer tlss(topdir, 18377, ".", fileHeaderContext, domainConfig); + TransLogServer tlss(topdir.getDir(), 18377, ".", fileHeaderContext, domainConfig); TransLogClient tls(tlsspec); createDomainTest(tls, domain, 0); @@ -938,7 +965,7 @@ TEST("test truncation after short read") { } EXPECT_EQUAL(2u, countFiles(dir)); { - TransLogServer tlss(topdir, 18377, ".", fileHeaderContext, domainConfig); + TransLogServer tlss(topdir.getDir(), 18377, ".", fileHeaderContext, domainConfig); TransLogClient tls(tlsspec); auto s1 = openDomainTest(tls, domain); checkFilledDomainTest(*s1, TOTAL_NUM_ENTRIES); @@ -952,7 +979,7 @@ TEST("test truncation after short read") { trfile.Close(); } { - TransLogServer tlss(topdir, 18377, ".", fileHeaderContext, domainConfig); + TransLogServer tlss(topdir.getDir(), 18377, ".", fileHeaderContext, domainConfig); TransLogClient tls(tlsspec); auto s1 = openDomainTest(tls, domain); checkFilledDomainTest(*s1, TOTAL_NUM_ENTRIES - 1); diff --git a/searchlib/src/tests/transactionlog/translogclient_test.sh b/searchlib/src/tests/transactionlog/translogclient_test.sh deleted file mode 100755 index 50d7c73fd6a..00000000000 --- a/searchlib/src/tests/transactionlog/translogclient_test.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -set -e -rm -rf test7 test8 test9 test10 test11 test12 test13 testremove -$VALGRIND ./searchlib_translogclient_test_app -rm -rf test7 test8 test9 test10 test11 test12 test13 testremove diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp index b68923b90bf..e40717e6375 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp @@ -191,9 +191,9 @@ AttributeHeader::internalExtractTags(const vespalib::GenericHeader &header) } AttributeHeader -AttributeHeader::extractTags(const vespalib::GenericHeader &header) +AttributeHeader::extractTags(const vespalib::GenericHeader &header, const vespalib::string &file_name) { - AttributeHeader result; + AttributeHeader result(file_name); result.internalExtractTags(header); return result; } diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_header.h b/searchlib/src/vespa/searchlib/attribute/attribute_header.h index 00da28baf80..7c0b8f3084b 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_header.h +++ b/searchlib/src/vespa/searchlib/attribute/attribute_header.h @@ -69,7 +69,7 @@ public: bool getPredicateParamsSet() const { return _predicateParamsSet; } bool getCollectionTypeParamsSet() const { return _collectionTypeParamsSet; } const std::optional<HnswIndexParams>& get_hnsw_index_params() const { return _hnsw_index_params; } - static AttributeHeader extractTags(const vespalib::GenericHeader &header); + static AttributeHeader extractTags(const vespalib::GenericHeader &header, const vespalib::string &file_name); void addTags(vespalib::GenericHeader &header) const; }; diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp index 6c929ad5981..8bc28abc238 100644 --- a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp @@ -311,6 +311,165 @@ EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::normalize_posting_lists( } template <> +bool +EnumStoreDictionary<EnumTree>::normalize_posting_lists(std::function<void(std::vector<EntryRef>&)>, const EntryRefFilter&) +{ + LOG_ABORT("should not be reached"); +} + +namespace { + +template <typename HashDictionaryT> +class ChangeWriterBase +{ +protected: + HashDictionaryT* _hash_dict; + static constexpr bool has_hash_dictionary = true; + ChangeWriterBase() + : _hash_dict(nullptr) + { + } +public: + void set_hash_dict(HashDictionaryT &hash_dict) { _hash_dict = &hash_dict; } +}; + +template <> +class ChangeWriterBase<vespalib::datastore::NoHashDictionary> +{ +protected: + static constexpr bool has_hash_dictionary = false; + ChangeWriterBase() = default; +}; + +template <typename HashDictionaryT> +class ChangeWriter : public ChangeWriterBase<HashDictionaryT> { + using Parent = ChangeWriterBase<HashDictionaryT>; + using Parent::has_hash_dictionary; + std::vector<std::pair<EntryRef,uint32_t*>> _tree_refs; +public: + ChangeWriter(uint32_t capacity); + ~ChangeWriter(); + bool write(const std::vector<EntryRef>& refs); + void emplace_back(EntryRef key, uint32_t& tree_ref) { _tree_refs.emplace_back(std::make_pair(key, &tree_ref)); } +}; + +template <typename HashDictionaryT> +ChangeWriter<HashDictionaryT>::ChangeWriter(uint32_t capacity) + : ChangeWriterBase<HashDictionaryT>(), + _tree_refs() +{ + _tree_refs.reserve(capacity); +} + +template <typename HashDictionaryT> +ChangeWriter<HashDictionaryT>::~ChangeWriter() = default; + +template <typename HashDictionaryT> +bool +ChangeWriter<HashDictionaryT>::write(const std::vector<EntryRef> &refs) +{ + bool changed = false; + assert(refs.size() == _tree_refs.size()); + auto tree_ref = _tree_refs.begin(); + for (auto ref : refs) { + EntryRef old_ref(*tree_ref->second); + if (ref != old_ref) { + if (!changed) { + // Note: Needs review when porting to other platforms + // Assumes that other CPUs observes stores from this CPU in order + std::atomic_thread_fence(std::memory_order_release); + changed = true; + } + *tree_ref->second = ref.ref(); + if constexpr (has_hash_dictionary) { + auto find_result = this->_hash_dict->find(this->_hash_dict->get_default_comparator(), tree_ref->first); + assert(find_result != nullptr && find_result->first.load_relaxed() == tree_ref->first); + assert(find_result->second.load_relaxed() == old_ref); + find_result->second.store_release(ref); + } + } + ++tree_ref; + } + assert(tree_ref == _tree_refs.end()); + _tree_refs.clear(); + return changed; +} + +} + +template <typename BTreeDictionaryT, typename HashDictionaryT> +bool +EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::normalize_posting_lists(std::function<void(std::vector<EntryRef>&)> normalize, const EntryRefFilter& filter) +{ + if constexpr (has_btree_dictionary) { + std::vector<EntryRef> refs; + refs.reserve(1024); + bool changed = false; + ChangeWriter<HashDictionaryT> change_writer(refs.capacity()); + if constexpr (has_hash_dictionary) { + change_writer.set_hash_dict(this->_hash_dict); + } + auto& dict = this->_btree_dict; + for (auto itr = dict.begin(); itr.valid(); ++itr) { + EntryRef ref(itr.getData()); + if (ref.valid()) { + if (filter.has(ref)) { + refs.emplace_back(ref); + change_writer.emplace_back(itr.getKey(), itr.getWData()); + if (refs.size() >= refs.capacity()) { + normalize(refs); + changed |= change_writer.write(refs); + refs.clear(); + } + } + } + } + if (!refs.empty()) { + normalize(refs); + changed |= change_writer.write(refs); + } + return changed; + } else { + return this->_hash_dict.normalize_values(normalize, filter); + } +} + +template <> +void +EnumStoreDictionary<EnumTree>::foreach_posting_list(std::function<void(const std::vector<EntryRef>&)>, const EntryRefFilter&) +{ + LOG_ABORT("should not be reached"); +} + +template <typename BTreeDictionaryT, typename HashDictionaryT> +void +EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::foreach_posting_list(std::function<void(const std::vector<EntryRef>&)> callback, const EntryRefFilter& filter) +{ + if constexpr (has_btree_dictionary) { + std::vector<EntryRef> refs; + refs.reserve(1024); + auto& dict = this->_btree_dict; + for (auto itr = dict.begin(); itr.valid(); ++itr) { + EntryRef ref(itr.getData()); + if (ref.valid()) { + if (filter.has(ref)) { + refs.emplace_back(ref); + if (refs.size() >= refs.capacity()) { + callback(refs); + refs.clear(); + } + } + } + } + if (!refs.empty()) { + callback(refs); + } + } else { + this->_hash_dict.foreach_value(callback, filter); + } +} + +template <> const EnumPostingTree & EnumStoreDictionary<EnumTree>::get_posting_dictionary() const { diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h index 4d0509c0eb1..db1176c5484 100644 --- a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h +++ b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h @@ -16,6 +16,7 @@ template <typename BTreeDictionaryT, typename HashDictionaryT = vespalib::datast class EnumStoreDictionary : public vespalib::datastore::UniqueStoreDictionary<BTreeDictionaryT, IEnumStoreDictionary, HashDictionaryT> { protected: using EntryRef = IEnumStoreDictionary::EntryRef; + using EntryRefFilter = IEnumStoreDictionary::EntryRefFilter; using Index = IEnumStoreDictionary::Index; using BTreeDictionaryType = BTreeDictionaryT; using EntryComparator = IEnumStoreDictionary::EntryComparator; @@ -54,6 +55,8 @@ public: void clear_all_posting_lists(std::function<void(EntryRef)> clearer) override; void update_posting_list(Index idx, const EntryComparator& cmp, std::function<EntryRef(EntryRef)> updater) override; bool normalize_posting_lists(std::function<EntryRef(EntryRef)> normalize) override; + bool normalize_posting_lists(std::function<void(std::vector<EntryRef>&)> normalize, const EntryRefFilter& filter) override; + void foreach_posting_list(std::function<void(const std::vector<EntryRef>&)> callback, const EntryRefFilter& filter) override; const EnumPostingTree& get_posting_dictionary() const override; }; diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h index a8cf6881b86..a9716ec5d05 100644 --- a/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h +++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h @@ -30,6 +30,7 @@ class IEnumStoreDictionary : public vespalib::datastore::IUniqueStoreDictionary public: using EntryRef = vespalib::datastore::EntryRef; using EntryComparator = vespalib::datastore::EntryComparator; + using EntryRefFilter = vespalib::datastore::EntryRefFilter; using EnumVector = IEnumStore::EnumVector; using Index = IEnumStore::Index; using IndexList = IEnumStore::IndexList; @@ -52,7 +53,25 @@ public: virtual Index remap_index(Index idx) = 0; virtual void clear_all_posting_lists(std::function<void(EntryRef)> clearer) = 0; virtual void update_posting_list(Index idx, const EntryComparator& cmp, std::function<EntryRef(EntryRef)> updater) = 0; + /* + * Scan dictionary and call normalize function for each value. If + * returned value is different then write back the modified value to + * the dictionary. Only used by unit tests. + */ virtual bool normalize_posting_lists(std::function<EntryRef(EntryRef)> normalize) = 0; + /* + * Scan dictionary and call normalize function for batches of values + * that pass the filter. Write back modified values to the dictionary. + * Used by compaction of posting lists when moving short arrays, + * bitvectors or btree roots. + */ + virtual bool normalize_posting_lists(std::function<void(std::vector<EntryRef>&)> normalize, const EntryRefFilter& filter) = 0; + /* + * Scan dictionary and call callback function for batches of values + * that pass the filter. Used by compaction of posting lists when + * moving btree nodes. + */ + virtual void foreach_posting_list(std::function<void(const std::vector<EntryRef>&)> callback, const EntryRefFilter& filter) = 0; virtual const EnumPostingTree& get_posting_dictionary() const = 0; }; diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp index b114a355bb4..8790bdd9885 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp @@ -30,13 +30,17 @@ remap_enum_store_refs(const EnumIndexRemapper& remapper, AttributeVector& v, att v.logEnumStoreEvent("compactfixup", "drain"); { AttributeVector::EnumModifier enum_guard(v.getEnumModifier()); + auto& filter = remapper.get_entry_ref_filter(); v.logEnumStoreEvent("compactfixup", "start"); for (uint32_t doc = 0; doc < v.getNumDocs(); ++doc) { vespalib::ConstArrayRef<WeightedIndex> indicesRef(multi_value_mapping.get(doc)); WeightedIndexVector indices(indicesRef.cbegin(), indicesRef.cend()); for (uint32_t i = 0; i < indices.size(); ++i) { - EnumIndex oldIndex = indices[i].value(); - indices[i] = WeightedIndex(remapper.remap(oldIndex), indices[i].weight()); + EnumIndex ref = indices[i].value(); + if (ref.valid() && filter.has(ref)) { + ref = remapper.remap(ref); + } + indices[i] = WeightedIndex(ref, indices[i].weight()); } std::atomic_thread_fence(std::memory_order_release); multi_value_mapping.replace(doc, indices); diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp index 3451c2b0456..8ed8a0cfbee 100644 --- a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp +++ b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp @@ -7,11 +7,13 @@ #include <vespa/vespalib/btree/btreeiterator.hpp> #include <vespa/vespalib/btree/btreerootbase.cpp> #include <vespa/vespalib/datastore/datastore.hpp> +#include <vespa/vespalib/datastore/entry_ref_filter.h> #include <vespa/vespalib/datastore/buffer_type.hpp> namespace search::attribute { using vespalib::btree::BTreeNoLeafData; +using vespalib::datastore::EntryRefFilter; // #define FORCE_BITVECTORS @@ -127,45 +129,47 @@ PostingStore<DataT>::removeSparseBitVectors() } } if (needscan) { - res = _dictionary.normalize_posting_lists([this](EntryRef posting_idx) -> EntryRef - { return consider_remove_sparse_bitvector(posting_idx); }); + EntryRefFilter filter(RefType::numBuffers(), RefType::offset_bits); + filter.add_buffers(_bvType.get_active_buffers()); + res = _dictionary.normalize_posting_lists([this](std::vector<EntryRef>& refs) + { consider_remove_sparse_bitvector(refs); }, + filter); } return res; } template <typename DataT> -typename PostingStore<DataT>::EntryRef -PostingStore<DataT>::consider_remove_sparse_bitvector(EntryRef ref) +void +PostingStore<DataT>::consider_remove_sparse_bitvector(std::vector<EntryRef>& refs) { - if (!ref.valid() || !isBitVector(getTypeId(EntryRef(ref)))) { - return ref; - } - RefType iRef(ref); - uint32_t typeId = getTypeId(iRef); - assert(isBitVector(typeId)); - assert(_bvs.find(ref.ref() )!= _bvs.end()); - BitVectorEntry *bve = getWBitVectorEntry(iRef); - BitVector &bv = *bve->_bv.get(); - uint32_t docFreq = bv.countTrueBits(); - if (bve->_tree.valid()) { - RefType iRef2(bve->_tree); - assert(isBTree(iRef2)); - const BTreeType *tree = getTreeEntry(iRef2); - assert(tree->size(_allocator) == docFreq); - (void) tree; - } - if (docFreq < _minBvDocFreq) { - dropBitVector(ref); - if (ref.valid()) { + for (auto& ref : refs) { + RefType iRef(ref); + assert(iRef.valid()); + uint32_t typeId = getTypeId(iRef); + assert(isBitVector(typeId)); + assert(_bvs.find(iRef.ref()) != _bvs.end()); + BitVectorEntry *bve = getWBitVectorEntry(iRef); + BitVector &bv = *bve->_bv.get(); + uint32_t docFreq = bv.countTrueBits(); + if (bve->_tree.valid()) { + RefType iRef2(bve->_tree); + assert(isBTree(iRef2)); + const BTreeType *tree = getTreeEntry(iRef2); + assert(tree->size(_allocator) == docFreq); + (void) tree; + } + if (docFreq < _minBvDocFreq) { + dropBitVector(ref); iRef = ref; - typeId = getTypeId(iRef); - if (isBTree(typeId)) { - BTreeType *tree = getWTreeEntry(iRef); - normalizeTree(ref, tree, false); + if (iRef.valid()) { + typeId = getTypeId(iRef); + if (isBTree(typeId)) { + BTreeType *tree = getWTreeEntry(iRef); + normalizeTree(ref, tree, false); + } } } } - return ref; } template <typename DataT> @@ -647,74 +651,75 @@ PostingStore<DataT>::update_stat() template <typename DataT> void -PostingStore<DataT>::move_btree_nodes(EntryRef ref) +PostingStore<DataT>::move_btree_nodes(const std::vector<EntryRef>& refs) { - if (ref.valid()) { + for (auto ref : refs) { RefType iRef(ref); + assert(iRef.valid()); uint32_t typeId = getTypeId(iRef); uint32_t clusterSize = getClusterSize(typeId); - if (clusterSize == 0) { - if (isBitVector(typeId)) { - BitVectorEntry *bve = getWBitVectorEntry(iRef); - RefType iRef2(bve->_tree); - if (iRef2.valid()) { - assert(isBTree(iRef2)); - BTreeType *tree = getWTreeEntry(iRef2); - tree->move_nodes(_allocator); - } - } else { - BTreeType *tree = getWTreeEntry(iRef); + assert(clusterSize == 0); + if (isBitVector(typeId)) { + BitVectorEntry *bve = getWBitVectorEntry(iRef); + RefType iRef2(bve->_tree); + if (iRef2.valid()) { + assert(isBTree(iRef2)); + BTreeType *tree = getWTreeEntry(iRef2); tree->move_nodes(_allocator); } + } else { + assert(isBTree(typeId)); + BTreeType *tree = getWTreeEntry(iRef); + tree->move_nodes(_allocator); } } } template <typename DataT> -typename PostingStore<DataT>::EntryRef -PostingStore<DataT>::move(EntryRef ref) +void +PostingStore<DataT>::move(std::vector<EntryRef>& refs) { - if (!ref.valid()) { - return EntryRef(); - } - RefType iRef(ref); - uint32_t typeId = getTypeId(iRef); - uint32_t clusterSize = getClusterSize(typeId); - if (clusterSize == 0) { - if (isBitVector(typeId)) { - BitVectorEntry *bve = getWBitVectorEntry(iRef); - RefType iRef2(bve->_tree); - if (iRef2.valid()) { - assert(isBTree(iRef2)); - if (_store.getCompacting(iRef2)) { - BTreeType *tree = getWTreeEntry(iRef2); - auto ref_and_ptr = allocBTreeCopy(*tree); - tree->prepare_hold(); - bve->_tree = ref_and_ptr.ref; + for (auto& ref : refs) { + RefType iRef(ref); + assert(iRef.valid()); + uint32_t typeId = getTypeId(iRef); + uint32_t clusterSize = getClusterSize(typeId); + if (clusterSize == 0) { + if (isBitVector(typeId)) { + BitVectorEntry *bve = getWBitVectorEntry(iRef); + RefType iRef2(bve->_tree); + if (iRef2.valid()) { + assert(isBTree(iRef2)); + if (_store.getCompacting(iRef2)) { + BTreeType *tree = getWTreeEntry(iRef2); + auto ref_and_ptr = allocBTreeCopy(*tree); + tree->prepare_hold(); + // Note: Needs review when porting to other platforms + // Assumes that other CPUs observes stores from this CPU in order + std::atomic_thread_fence(std::memory_order_release); + bve->_tree = ref_and_ptr.ref; + } } + if (_store.getCompacting(iRef)) { + auto new_ref = allocBitVectorCopy(*bve).ref; + _bvs.erase(iRef.ref()); + _bvs.insert(new_ref.ref()); + ref = new_ref; + } + } else { + assert(isBTree(typeId)); + assert(_store.getCompacting(iRef)); + BTreeType *tree = getWTreeEntry(iRef); + auto ref_and_ptr = allocBTreeCopy(*tree); + tree->prepare_hold(); + ref = ref_and_ptr.ref; } - if (!_store.getCompacting(ref)) { - return ref; - } - auto new_ref = allocBitVectorCopy(*bve).ref; - _bvs.erase(ref.ref()); - _bvs.insert(new_ref.ref()); - return new_ref; } else { - if (!_store.getCompacting(ref)) { - return ref; - } - BTreeType *tree = getWTreeEntry(iRef); - auto ref_and_ptr = allocBTreeCopy(*tree); - tree->prepare_hold(); - return ref_and_ptr.ref; + assert(_store.getCompacting(iRef)); + const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize); + ref = allocKeyDataCopy(shortArray, clusterSize).ref; } } - if (!_store.getCompacting(ref)) { - return ref; - } - const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize); - return allocKeyDataCopy(shortArray, clusterSize).ref; } template <typename DataT> @@ -722,11 +727,12 @@ void PostingStore<DataT>::compact_worst_btree_nodes() { auto to_hold = this->start_compact_worst_btree_nodes(); - _dictionary.normalize_posting_lists([this](EntryRef posting_idx) -> EntryRef - { - move_btree_nodes(posting_idx); - return posting_idx; - }); + EntryRefFilter filter(RefType::numBuffers(), RefType::offset_bits); + // Only look at buffers containing bitvectors and btree roots + filter.add_buffers(this->_treeType.get_active_buffers()); + filter.add_buffers(_bvType.get_active_buffers()); + _dictionary.foreach_posting_list([this](const std::vector<EntryRef>& refs) + { move_btree_nodes(refs); }, filter); this->finish_compact_worst_btree_nodes(to_hold); } @@ -735,8 +741,23 @@ void PostingStore<DataT>::compact_worst_buffers() { auto to_hold = this->start_compact_worst_buffers(); - _dictionary.normalize_posting_lists([this](EntryRef posting_idx) -> EntryRef - { return move(posting_idx); }); + bool compact_btree_roots = false; + EntryRefFilter filter(RefType::numBuffers(), RefType::offset_bits); + filter.add_buffers(to_hold); + // Start with looking at buffers being compacted + for (uint32_t buffer_id : to_hold) { + if (isBTree(_store.getBufferState(buffer_id).getTypeId())) { + compact_btree_roots = true; + } + } + if (compact_btree_roots) { + // If we are compacting btree roots then we also have to look at bitvector + // buffers + filter.add_buffers(_bvType.get_active_buffers()); + } + _dictionary.normalize_posting_lists([this](std::vector<EntryRef>& refs) + { return move(refs); }, + filter); this->finishCompact(to_hold); } diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.h b/searchlib/src/vespa/searchlib/attribute/postingstore.h index a0f0be1c430..2b119a55158 100644 --- a/searchlib/src/vespa/searchlib/attribute/postingstore.h +++ b/searchlib/src/vespa/searchlib/attribute/postingstore.h @@ -89,6 +89,7 @@ public: using Parent::getWTreeEntry; using Parent::getTreeEntry; using Parent::getKeyDataEntry; + using Parent::isBTree; using Parent::clusterLimit; using Parent::allocBTree; using Parent::allocBTreeCopy; @@ -105,10 +106,8 @@ public: ~PostingStore(); bool removeSparseBitVectors() override; - EntryRef consider_remove_sparse_bitvector(EntryRef ref); + void consider_remove_sparse_bitvector(std::vector<EntryRef> &refs); static bool isBitVector(uint32_t typeId) { return typeId == BUFFERTYPE_BITVECTOR; } - static bool isBTree(uint32_t typeId) { return typeId == BUFFERTYPE_BTREE; } - bool isBTree(RefType ref) const { return isBTree(getTypeId(ref)); } void applyNew(EntryRef &ref, AddIter a, AddIter ae); @@ -188,8 +187,8 @@ public: vespalib::MemoryUsage getMemoryUsage() const; vespalib::MemoryUsage update_stat(); - void move_btree_nodes(EntryRef ref); - EntryRef move(EntryRef ref); + void move_btree_nodes(const std::vector<EntryRef> &refs); + void move(std::vector<EntryRef>& refs); void compact_worst_btree_nodes(); void compact_worst_buffers(); diff --git a/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp index d9024af724b..6268a6da701 100644 --- a/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp @@ -194,7 +194,7 @@ PredicateAttribute::onLoad(vespalib::Executor *) buffer.moveFreeToData(size); const GenericHeader &header = loaded_buffer->getHeader(); - auto attributeHeader = attribute::AttributeHeader::extractTags(header); + auto attributeHeader = attribute::AttributeHeader::extractTags(header, getBaseFileName()); uint32_t version = attributeHeader.getVersion(); setCreateSerialNum(attributeHeader.getCreateSerialNum()); diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp index 4323e57f6b1..18805a7b20f 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp @@ -49,13 +49,16 @@ SingleValueEnumAttributeBase::remap_enum_store_refs(const EnumIndexRemapper& rem { // update _enumIndices with new EnumIndex values after enum store has been compacted. v.logEnumStoreEvent("reenumerate", "reserved"); - auto new_indexes = std::make_unique<vespalib::Array<EnumIndex>>(); - new_indexes->reserve(_enumIndices.capacity()); + vespalib::Array<EnumIndex> new_indexes; + new_indexes.reserve(_enumIndices.capacity()); v.logEnumStoreEvent("reenumerate", "start"); + auto& filter = remapper.get_entry_ref_filter(); for (uint32_t i = 0; i < _enumIndices.size(); ++i) { - EnumIndex old_index = _enumIndices[i]; - EnumIndex new_index = remapper.remap(old_index); - new_indexes->push_back_fast(new_index); + EnumIndex ref = _enumIndices[i]; + if (ref.valid() && filter.has(ref)) { + ref = remapper.remap(ref); + } + new_indexes.push_back_fast(ref); } v.logEnumStoreEvent("compactfixup", "drain"); { diff --git a/searchlib/src/vespa/searchlib/docstore/compacter.cpp b/searchlib/src/vespa/searchlib/docstore/compacter.cpp index 38f3fbef0b0..26fb79f8a4e 100644 --- a/searchlib/src/vespa/searchlib/docstore/compacter.cpp +++ b/searchlib/src/vespa/searchlib/docstore/compacter.cpp @@ -26,7 +26,7 @@ BucketCompacter::BucketCompacter(size_t maxSignificantBucketBits, const Compress _bucketizer(bucketizer), _writeCount(0), _maxBucketGuardDuration(vespalib::duration::zero()), - _lastSample(), + _lastSample(vespalib::steady_clock::now()), _lock(), _backingMemory(Alloc::alloc(0x40000000), &_lock), _tmpStore(), diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp index 0f4326aac40..5217c44df97 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp @@ -75,7 +75,7 @@ BlobSequenceReader::BlobSequenceReader(AttributeVector& attr, bool has_index) : ReaderBase(attr), _use_index_file(has_index && has_index_file(attr) && can_use_index_save_file(attr.getConfig(), - search::attribute::AttributeHeader::extractTags(getDatHeader()))), + search::attribute::AttributeHeader::extractTags(getDatHeader(), attr.getBaseFileName()))), _index_file(_use_index_file ? attribute::LoadUtils::openFile(attr, DenseTensorAttributeSaver::index_file_suffix()) : std::unique_ptr<Fast_BufferedFile>()) diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp index d3c2998333a..86090f2ac92 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp @@ -79,12 +79,6 @@ DenseTensorStore::~DenseTensorStore() _store.dropBuffers(); } -const void * -DenseTensorStore::getRawBuffer(RefType ref) const -{ - return _store.getEntryArray<char>(ref, _bufferType.getArraySize()); -} - namespace { void clearPadAreaAfterBuffer(char *buffer, size_t bufSize, size_t alignedBufSize) { @@ -136,15 +130,6 @@ DenseTensorStore::getTensor(EntryRef ref) const return std::make_unique<vespalib::eval::DenseValueView>(_type, cells_ref); } -vespalib::eval::TypedCells -DenseTensorStore::get_typed_cells(EntryRef ref) const -{ - if (!ref.valid()) { - return vespalib::eval::TypedCells(&_emptySpace[0], _type.cell_type(), getNumCells()); - } - return vespalib::eval::TypedCells(getRawBuffer(ref), _type.cell_type(), getNumCells()); -} - template <class TensorType> TensorStore::EntryRef DenseTensorStore::setDenseTensor(const TensorType &tensor) diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h index 3b7cb71863e..06492596f70 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h @@ -50,12 +50,9 @@ private: ValueType _type; // type of dense tensor std::vector<char> _emptySpace; - size_t unboundCells(const void *buffer) const; - template <class TensorType> TensorStore::EntryRef setDenseTensor(const TensorType &tensor); - public: DenseTensorStore(const ValueType &type, std::unique_ptr<vespalib::alloc::MemoryAllocator> allocator); ~DenseTensorStore() override; @@ -63,12 +60,17 @@ public: const ValueType &type() const { return _type; } size_t getNumCells() const { return _tensorSizeCalc._numCells; } size_t getBufSize() const { return _tensorSizeCalc.bufSize(); } - const void *getRawBuffer(RefType ref) const; + const void *getRawBuffer(RefType ref) const { + return _store.getEntryArray<char>(ref, _bufferType.getArraySize()); + } vespalib::datastore::Handle<char> allocRawBuffer(); void holdTensor(EntryRef ref) override; EntryRef move(EntryRef ref) override; std::unique_ptr<vespalib::eval::Value> getTensor(EntryRef ref) const; - vespalib::eval::TypedCells get_typed_cells(EntryRef ref) const; + vespalib::eval::TypedCells get_typed_cells(EntryRef ref) const { + return vespalib::eval::TypedCells(ref.valid() ? getRawBuffer(ref) : &_emptySpace[0], + _type.cell_type(), getNumCells()); + } EntryRef setTensor(const vespalib::eval::Value &tensor); // The following method is meant to be used only for unit tests. uint32_t getArraySize() const { return _bufferType.getArraySize(); } diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp index 7f9f20e07c4..43596478a6f 100644 --- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp @@ -43,4 +43,13 @@ HammingDistance::calc(const vespalib::eval::TypedCells& lhs, } } +double +HammingDistance::calc_with_limit(const vespalib::eval::TypedCells& lhs, + const vespalib::eval::TypedCells& rhs, + double) const +{ + // consider optimizing: + return calc(lhs, rhs); +} + } diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h index f0b7b159b90..c64fc5b532d 100644 --- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h @@ -15,7 +15,7 @@ namespace search::tensor { * or (for int8 cells, aka binary data only) * "number of bits that are different" */ -class HammingDistance : public DistanceFunction { +class HammingDistance final : public DistanceFunction { public: HammingDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {} double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override; @@ -26,13 +26,7 @@ public: double score = 1.0 / (1.0 + distance); return score; } - double calc_with_limit(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs, - double) const override - { - // consider optimizing: - return calc(lhs, rhs); - } + double calc_with_limit(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs, double) const override; }; } diff --git a/searchlib/src/vespa/searchlib/transactionlog/domain.cpp b/searchlib/src/vespa/searchlib/transactionlog/domain.cpp index aa49a2d2954..7fa87f3ec09 100644 --- a/searchlib/src/vespa/searchlib/transactionlog/domain.cpp +++ b/searchlib/src/vespa/searchlib/transactionlog/domain.cpp @@ -56,11 +56,13 @@ Domain::Domain(const string &domainName, const string & baseDir, Executor & exec _fileHeaderContext(fileHeaderContext), _markedDeleted(false) { - int retval(0); - if ((retval = makeDirectory(_baseDir.c_str())) != 0) { + assert(_config.getEncoding().getCompression() != Encoding::Compression::none); + int retval = makeDirectory(_baseDir.c_str()); + if (retval != 0) { throw runtime_error(fmt("Failed creating basedirectory %s r(%d), e(%d)", _baseDir.c_str(), retval, errno)); } - if ((retval = makeDirectory(dir().c_str())) != 0) { + retval = makeDirectory(dir().c_str()); + if (retval != 0) { throw runtime_error(fmt("Failed creating domaindir %s r(%d), e(%d)", dir().c_str(), retval, errno)); } SerialNumList partIdVector = scanDir(); @@ -76,8 +78,7 @@ Domain::Domain(const string &domainName, const string & baseDir, Executor & exec } pending.waitForZeroRefCount(); if (_parts.empty() || _parts.crbegin()->second->isClosed()) { - _parts[lastPart] = std::make_shared<DomainPart>(_name, dir(), lastPart, _config.getEncoding(), - _config.getCompressionlevel(), _fileHeaderContext, false); + _parts[lastPart] = std::make_shared<DomainPart>(_name, dir(), lastPart, _fileHeaderContext, false); vespalib::File::sync(dir()); } _lastSerial = end(); @@ -86,13 +87,13 @@ Domain::Domain(const string &domainName, const string & baseDir, Executor & exec Domain & Domain::setConfig(const DomainConfig & cfg) { _config = cfg; + assert(_config.getEncoding().getCompression() != Encoding::Compression::none); return *this; } void Domain::addPart(SerialNum partId, bool isLastPart) { - auto dp = std::make_shared<DomainPart>(_name, dir(), partId, _config.getEncoding(), - _config.getCompressionlevel(), _fileHeaderContext, isLastPart); + auto dp = std::make_shared<DomainPart>(_name, dir(), partId, _fileHeaderContext, isLastPart); if (dp->size() == 0) { // Only last domain part is allowed to be truncated down to // empty size. @@ -331,8 +332,7 @@ Domain::optionallyRotateFile(SerialNum serialNum) { triggerSyncNow({}); waitPendingSync(_syncMonitor, _syncCond, _pendingSync); dp->close(); - dp = std::make_shared<DomainPart>(_name, dir(), serialNum, _config.getEncoding(), - _config.getCompressionlevel(), _fileHeaderContext, false); + dp = std::make_shared<DomainPart>(_name, dir(), serialNum, _fileHeaderContext, false); { std::lock_guard guard(_lock); _parts[serialNum] = dp; @@ -399,17 +399,16 @@ Domain::commitChunk(std::unique_ptr<CommitChunk> chunk, const UniqueLock & chunk })); } + + void Domain::doCommit(std::unique_ptr<CommitChunk> chunk) { const Packet & packet = chunk->getPacket(); if (packet.empty()) return; - - vespalib::nbostream_longlivedbuf is(packet.getHandle().data(), packet.getHandle().size()); - Packet::Entry entry; - entry.deserialize(is); - assert(entry.serial() == packet.range().from()); - DomainPart::SP dp = optionallyRotateFile(entry.serial()); - dp->commit(entry.serial(), packet); + + SerializedChunk serialized(packet, _config.getEncoding(), _config.getCompressionlevel()); + DomainPart::SP dp = optionallyRotateFile(packet.range().from()); + dp->commit(serialized); if (_config.getFSyncOnCommit()) { dp->sync(); } diff --git a/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp b/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp index 3dad67df177..2ca2f15545d 100644 --- a/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp +++ b/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp @@ -247,11 +247,9 @@ DomainPart::buildPacketMapping(bool allowTruncate) return currPos; } -DomainPart::DomainPart(const string & name, const string & baseDir, SerialNum s, Encoding encoding, - uint8_t compressionLevel, const FileHeaderContext &fileHeaderContext, bool allowTruncate) - : _encoding(encoding), - _compressionLevel(compressionLevel), - _lock(), +DomainPart::DomainPart(const string & name, const string & baseDir, SerialNum s, + const FileHeaderContext &fileHeaderContext, bool allowTruncate) + : _lock(), _fileLock(), _range(s), _sz(0), @@ -379,35 +377,21 @@ DomainPart::erase(SerialNum to) } void -DomainPart::commit(SerialNum firstSerial, const Packet &packet) +DomainPart::commit(const SerializedChunk & serialized) { + SerialNumRange range = serialized.range(); + int64_t firstPos(byteSize()); - nbostream_longlivedbuf h(packet.getHandle().data(), packet.getHandle().size()); + assert(_range.to() < range.to()); + _sz += serialized.getNumEntries(); + _range.to(range.to()); if (_range.from() == 0) { - _range.from(firstSerial); - } - IChunk::UP chunk = IChunk::create(_encoding, _compressionLevel); - for (size_t i(0); h.size() > 0; i++) { - //LOG(spam, - //"Pos(%d) Len(%d), Lim(%d), Remaining(%d)", - //h.getPos(), h.getLength(), h.getLimit(), h.getRemaining()); - Packet::Entry entry; - entry.deserialize(h); - if (_range.to() < entry.serial()) { - chunk->add(entry); - assert(_encoding.getCompression() != Encoding::Compression::none); - _sz++; - _range.to(entry.serial()); - } else { - throw runtime_error(fmt("Incoming serial number(%" PRIu64 ") must be bigger than the last one (%" PRIu64 ").", - entry.serial(), _range.to())); - } - } - if ( ! chunk->getEntries().empty()) { - write(*_transLog, *chunk); + _range.from(range.from()); } + + write(*_transLog, range, serialized.getData()); std::lock_guard guard(_lock); - _skipList.emplace_back(firstSerial, firstPos); + _skipList.emplace_back(range.from(), firstPos); } void @@ -442,26 +426,15 @@ DomainPart::visit(FastOS_FileInterface &file, SerialNumRange &r, Packet &packet) } void -DomainPart::write(FastOS_FileInterface &file, const IChunk & chunk) +DomainPart::write(FastOS_FileInterface &file, SerialNumRange range, vespalib::ConstBufferRef buf) { - nbostream os; - size_t begin = os.wp(); - os << _encoding.getRaw(); // Placeholder for encoding - os << uint32_t(0); // Placeholder for size - Encoding realEncoding = chunk.encode(os); - size_t end = os.wp(); - os.wp(0); - os << realEncoding.getRaw(); //Patching real encoding - os << uint32_t(end - (begin + sizeof(uint32_t) + sizeof(uint8_t))); // Patching actual size. - os.wp(end); std::lock_guard guard(_writeLock); - if ( ! file.CheckedWrite(os.data(), os.size()) ) { - throw runtime_error(handleWriteError("Failed writing the entry.", file, byteSize(), chunk.range(), os.size())); + if ( ! file.CheckedWrite(buf.data(), buf.size()) ) { + throw runtime_error(handleWriteError("Failed writing the entry.", file, byteSize(), range, buf.size())); } - LOG(debug, "Wrote chunk with %zu entries and %zu bytes, range[%" PRIu64 ", %" PRIu64 "] encoding(wanted=%x, real=%x)", - chunk.getEntries().size(), os.size(), chunk.range().from(), chunk.range().to(), _encoding.getRaw(), realEncoding.getRaw()); - _writtenSerial = chunk.range().to(); - _byteSize.fetch_add(os.size(), std::memory_order_release); + LOG(debug, "Wrote chunk with and %zu bytes, range[%" PRIu64 ", %" PRIu64 "]", buf.size(), range.from(), range.to()); + _writtenSerial = range.to(); + _byteSize.fetch_add(buf.size(), std::memory_order_release); } bool diff --git a/searchlib/src/vespa/searchlib/transactionlog/domainpart.h b/searchlib/src/vespa/searchlib/transactionlog/domainpart.h index 9ab0db54391..ea5290c433b 100644 --- a/searchlib/src/vespa/searchlib/transactionlog/domainpart.h +++ b/searchlib/src/vespa/searchlib/transactionlog/domainpart.h @@ -19,13 +19,13 @@ public: using SP = std::shared_ptr<DomainPart>; DomainPart(const DomainPart &) = delete; DomainPart& operator=(const DomainPart &) = delete; - DomainPart(const vespalib::string &name, const vespalib::string &baseDir, SerialNum s, Encoding defaultEncoding, - uint8_t compressionLevel, const common::FileHeaderContext &FileHeaderContext, bool allowTruncate); + DomainPart(const vespalib::string &name, const vespalib::string &baseDir, SerialNum s, + const common::FileHeaderContext &FileHeaderContext, bool allowTruncate); ~DomainPart(); const vespalib::string &fileName() const { return _fileName; } - void commit(SerialNum firstSerial, const Packet &packet); + void commit(const SerializedChunk & serialized); bool erase(SerialNum to); bool visit(FastOS_FileInterface &file, SerialNumRange &r, Packet &packet); bool close(); @@ -49,7 +49,7 @@ private: static Packet readPacket(FastOS_FileInterface & file, SerialNumRange wanted, size_t targetSize, bool allowTruncate); static bool read(FastOS_FileInterface &file, IChunk::UP & chunk, Alloc &buf, bool allowTruncate); - void write(FastOS_FileInterface &file, const IChunk & entry); + void write(FastOS_FileInterface &file, SerialNumRange range, vespalib::ConstBufferRef buf); void writeHeader(const common::FileHeaderContext &fileHeaderContext); class SkipInfo @@ -69,8 +69,6 @@ private: SerialNum _id; uint64_t _pos; }; - const Encoding _encoding; - const uint8_t _compressionLevel; std::mutex _lock; std::mutex _fileLock; SerialNumRange _range; diff --git a/searchlib/src/vespa/searchlib/transactionlog/ichunk.cpp b/searchlib/src/vespa/searchlib/transactionlog/ichunk.cpp index ee1631ea8c2..e3d98cd576d 100644 --- a/searchlib/src/vespa/searchlib/transactionlog/ichunk.cpp +++ b/searchlib/src/vespa/searchlib/transactionlog/ichunk.cpp @@ -8,6 +8,9 @@ #include <cassert> #include <ostream> +#include <vespa/log/log.h> +LOG_SETUP(".searchlib.transactionlog.ichunk"); + using std::make_unique; using vespalib::make_string_short::fmt; using vespalib::nbostream_longlivedbuf; @@ -115,4 +118,46 @@ std::ostream & operator << (std::ostream & os, Encoding e) { return os << "crc=" << e.getCrc() << " compression=" << e.getCompression(); } + +void +encode(vespalib::nbostream & os, const IChunk & chunk, Encoding encoding) { + size_t begin = os.wp(); + os << encoding.getRaw(); // Placeholder for encoding + os << uint32_t(0); // Placeholder for size + Encoding realEncoding = chunk.encode(os); + size_t end = os.wp(); + os.wp(0); + os << realEncoding.getRaw(); //Patching real encoding + os << uint32_t(end - (begin + sizeof(uint32_t) + sizeof(uint8_t))); // Patching actual size. + os.wp(end); + SerialNumRange range = chunk.range(); + LOG(spam, "Encoded chunk with %zu entries and %zu bytes, range[%" PRIu64 ", %" PRIu64 "] encoding(wanted=%x, real=%x)", + chunk.getEntries().size(), os.size(), range.from(), range.to(), encoding.getRaw(), realEncoding.getRaw()); +} + +SerializedChunk::SerializedChunk(const Packet & packet, Encoding encoding, uint8_t compressionLevel) + : _os(), + _range(packet.range()), + _numEntries(packet.size()) +{ + nbostream_longlivedbuf h(packet.getHandle().data(), packet.getHandle().size()); + + IChunk::UP chunk = IChunk::create(encoding, compressionLevel); + SerialNum prev = 0; + for (size_t i(0); h.size() > 0; i++) { + //LOG(spam, + //"Pos(%d) Len(%d), Lim(%d), Remaining(%d)", + //h.getPos(), h.getLength(), h.getLimit(), h.getRemaining()); + Packet::Entry entry; + entry.deserialize(h); + assert (prev < entry.serial()); + chunk->add(entry); + prev = entry.serial(); + } + assert(! chunk->getEntries().empty()); + encode(_os, *chunk, encoding); +} +vespalib::ConstBufferRef SerializedChunk::getData() const { + return vespalib::ConstBufferRef(_os.data(), _os.size()); +} } diff --git a/searchlib/src/vespa/searchlib/transactionlog/ichunk.h b/searchlib/src/vespa/searchlib/transactionlog/ichunk.h index 02bd0ce9426..dccfd6617f5 100644 --- a/searchlib/src/vespa/searchlib/transactionlog/ichunk.h +++ b/searchlib/src/vespa/searchlib/transactionlog/ichunk.h @@ -33,6 +33,22 @@ private: std::ostream & operator << (std::ostream & os, Encoding e); /** + * Represents a completely encoded chunk with a buffer ready to be persisted, + * and the range and number of entries it covers. + */ +class SerializedChunk { +public: + SerializedChunk(const Packet & packet, Encoding encoding, uint8_t compressionLevel); + vespalib::ConstBufferRef getData() const; + SerialNumRange range() const { return _range; } + size_t getNumEntries() const { return _numEntries; } +private: + vespalib::nbostream _os; + SerialNumRange _range; + size_t _numEntries; +}; + +/** * Interface for different chunk formats. * Format specifies both crc type, and compression type. */ |