summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/attribute/attribute_header/attribute_header_test.cpp2
-rw-r--r--searchlib/src/tests/attribute/enumstore/enumstore_test.cpp204
-rw-r--r--searchlib/src/tests/transactionlog/CMakeLists.txt3
-rw-r--r--searchlib/src/tests/transactionlog/translogclient_test.cpp101
-rwxr-xr-xsearchlib/src/tests/transactionlog/translogclient_test.sh6
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_header.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_header.h2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp159
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h19
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postingstore.cpp195
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postingstore.h9
-rw-r--r--searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp13
-rw-r--r--searchlib/src/vespa/searchlib/docstore/compacter.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp15
-rw-r--r--searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h12
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp9
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hamming_distance.h10
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/domain.cpp31
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp65
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/domainpart.h10
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/ichunk.cpp45
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/ichunk.h16
26 files changed, 680 insertions, 267 deletions
diff --git a/searchlib/src/tests/attribute/attribute_header/attribute_header_test.cpp b/searchlib/src/tests/attribute/attribute_header/attribute_header_test.cpp
index 3c8c9ff17e0..16a04a746f3 100644
--- a/searchlib/src/tests/attribute/attribute_header/attribute_header_test.cpp
+++ b/searchlib/src/tests/attribute/attribute_header/attribute_header_test.cpp
@@ -49,7 +49,7 @@ void
verify_roundtrip_serialization(const HnswIPO& hnsw_params_in)
{
auto gen_header = populate_header(hnsw_params_in);
- auto attr_header = AttributeHeader::extractTags(gen_header);
+ auto attr_header = AttributeHeader::extractTags(gen_header, file_name);
EXPECT_EQ(tensor_cfg.basicType(), attr_header.getBasicType());
EXPECT_EQ(tensor_cfg.collectionType(), attr_header.getCollectionType());
diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
index 9c25429932b..40ff25ff976 100644
--- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
+++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
@@ -8,6 +8,21 @@ LOG_SETUP("enumstore_test");
using Type = search::DictionaryConfig::Type;
using vespalib::datastore::EntryRef;
+using vespalib::datastore::EntryRefFilter;
+using RefT = vespalib::datastore::EntryRefT<22>;
+
+namespace vespalib::datastore {
+
+/*
+ * Print EntryRef as RefT which is used by test_normalize_posting_lists and
+ * test_foreach_posting_list to differentiate between buffers
+ */
+void PrintTo(const EntryRef &ref, std::ostream* os) {
+ RefT iref(ref);
+ *os << "RefT(" << iref.offset() << "," << iref.bufferId() << ")";
+}
+
+}
namespace search {
@@ -597,6 +612,11 @@ public:
void update_posting_idx(EnumIndex enum_idx, EntryRef old_posting_idx, EntryRef new_posting_idx);
EnumIndex insert_value(size_t value_idx);
+ void populate_sample_data(uint32_t cnt);
+ std::vector<EntryRef> get_sample_values(uint32_t cnt);
+ void clear_sample_values(uint32_t cnt);
+ void test_normalize_posting_lists(bool use_filter, bool one_filter);
+ void test_foreach_posting_list(bool one_filter);
static EntryRef fake_pidx() { return EntryRef(42); }
};
@@ -620,6 +640,149 @@ EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::insert_value(size_t val
return enum_idx;
}
+namespace {
+/*
+ * large_population should trigger multiple callbacks from normalize_values
+ * and foreach_value
+ */
+constexpr uint32_t large_population = 1200;
+
+uint32_t select_buffer(uint32_t i) {
+ if ((i % 2) == 0) {
+ return 0;
+ }
+ if ((i % 3) == 0) {
+ return 1;
+ }
+ if ((i % 5) == 0) {
+ return 2;
+ }
+ return 3;
+}
+
+EntryRef make_fake_pidx(uint32_t i) { return RefT(i + 200, select_buffer(i)); }
+EntryRef make_fake_adjusted_pidx(uint32_t i) { return RefT(i + 500, select_buffer(i)); }
+EntryRef adjust_fake_pidx(EntryRef ref) { RefT iref(ref); return RefT(iref.offset() + 300, iref.bufferId()); }
+
+}
+
+
+template <typename EnumStoreTypeAndDictionaryType>
+void
+EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::populate_sample_data(uint32_t cnt)
+{
+ auto& dict = store.get_dictionary();
+ for (uint32_t i = 0; i < cnt; ++i) {
+ auto enum_idx = store.insert(i);
+ EXPECT_TRUE(enum_idx.valid());
+ EntryRef posting_idx(make_fake_pidx(i));
+ dict.update_posting_list(enum_idx, store.get_comparator(), [posting_idx](EntryRef) noexcept -> EntryRef { return posting_idx; });
+ }
+}
+
+template <typename EnumStoreTypeAndDictionaryType>
+std::vector<EntryRef>
+EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::get_sample_values(uint32_t cnt)
+{
+ std::vector<EntryRef> result;
+ result.reserve(cnt);
+ store.freeze_dictionary();
+ auto& dict = store.get_dictionary();
+ for (uint32_t i = 0; i < cnt; ++i) {
+ auto compare = store.make_comparator(i);
+ auto enum_idx = dict.find(compare);
+ EXPECT_TRUE(enum_idx.valid());
+ EntryRef posting_idx;
+ dict.update_posting_list(enum_idx, compare, [&posting_idx](EntryRef ref) noexcept { posting_idx = ref; return ref; });;
+ auto find_result = dict.find_posting_list(compare, dict.get_frozen_root());
+ EXPECT_EQ(enum_idx, find_result.first);
+ EXPECT_EQ(posting_idx, find_result.second);
+ result.emplace_back(find_result.second);
+ }
+ return result;
+}
+
+template <typename EnumStoreTypeAndDictionaryType>
+void
+EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::clear_sample_values(uint32_t cnt)
+{
+ auto& dict = store.get_dictionary();
+ for (uint32_t i = 0; i < cnt; ++i) {
+ auto comparator = store.make_comparator(i);
+ auto enum_idx = dict.find(comparator);
+ EXPECT_TRUE(enum_idx.valid());
+ dict.update_posting_list(enum_idx, comparator, [](EntryRef) noexcept -> EntryRef { return EntryRef(); });
+ }
+}
+
+namespace {
+
+EntryRefFilter make_entry_ref_filter(bool one_filter)
+{
+ if (one_filter) {
+ EntryRefFilter filter(RefT::numBuffers(), RefT::offset_bits);
+ filter.add_buffer(3);
+ return filter;
+ }
+ return EntryRefFilter::create_all_filter(RefT::numBuffers(), RefT::offset_bits);
+}
+
+}
+
+template <typename EnumStoreTypeAndDictionaryType>
+void
+EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::test_normalize_posting_lists(bool use_filter, bool one_filter)
+{
+ populate_sample_data(large_population);
+ auto& dict = store.get_dictionary();
+ std::vector<EntryRef> exp_refs;
+ std::vector<EntryRef> exp_adjusted_refs;
+ exp_refs.reserve(large_population);
+ exp_adjusted_refs.reserve(large_population);
+ for (uint32_t i = 0; i < large_population; ++i) {
+ exp_refs.emplace_back(make_fake_pidx(i));
+ if (!use_filter || !one_filter || select_buffer(i) == 3) {
+ exp_adjusted_refs.emplace_back(make_fake_adjusted_pidx(i));
+ } else {
+ exp_adjusted_refs.emplace_back(make_fake_pidx(i));
+ }
+ }
+ EXPECT_EQ(exp_refs, get_sample_values(large_population));
+ if (use_filter) {
+ auto filter = make_entry_ref_filter(one_filter);
+ auto dummy = [](std::vector<EntryRef>&) noexcept { };
+ auto adjust_refs = [](std::vector<EntryRef> &refs) noexcept { for (auto &ref : refs) { ref = adjust_fake_pidx(ref); } };
+ EXPECT_FALSE(dict.normalize_posting_lists(dummy, filter));
+ EXPECT_EQ(exp_refs, get_sample_values(large_population));
+ EXPECT_TRUE(dict.normalize_posting_lists(adjust_refs, filter));
+ } else {
+ auto dummy = [](EntryRef posting_idx) noexcept { return posting_idx; };
+ auto adjust_refs = [](EntryRef ref) noexcept { return adjust_fake_pidx(ref); };
+ EXPECT_FALSE(dict.normalize_posting_lists(dummy));
+ EXPECT_EQ(exp_refs, get_sample_values(large_population));
+ EXPECT_TRUE(dict.normalize_posting_lists(adjust_refs));
+ }
+ EXPECT_EQ(exp_adjusted_refs, get_sample_values(large_population));
+ clear_sample_values(large_population);
+}
+
+template <typename EnumStoreTypeAndDictionaryType>
+void
+EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::test_foreach_posting_list(bool one_filter)
+{
+ auto filter = make_entry_ref_filter(one_filter);
+ populate_sample_data(large_population);
+ auto& dict = store.get_dictionary();
+ std::vector<EntryRef> exp_refs;
+ auto save_exp_refs = [&exp_refs](std::vector<EntryRef>& refs) { exp_refs.insert(exp_refs.end(), refs.begin(), refs.end()); };
+ EXPECT_FALSE(dict.normalize_posting_lists(save_exp_refs, filter));
+ std::vector<EntryRef> act_refs;
+ auto save_act_refs = [&act_refs](const std::vector<EntryRef>& refs) { act_refs.insert(act_refs.end(), refs.begin(), refs.end()); };
+ dict.foreach_posting_list(save_act_refs, filter);
+ EXPECT_EQ(exp_refs, act_refs);
+ clear_sample_values(large_population);
+}
+
// Disable warnings emitted by gtest generated files when using typed tests
#pragma GCC diagnostic push
#ifndef __clang__
@@ -678,26 +841,27 @@ TYPED_TEST(EnumStoreDictionaryTest, find_posting_list_works)
TYPED_TEST(EnumStoreDictionaryTest, normalize_posting_lists_works)
{
- auto value_0_idx = this->insert_value(0);
- this->update_posting_idx(value_0_idx, EntryRef(), this->fake_pidx());
- this->store.freeze_dictionary();
- auto& dict = this->store.get_dictionary();
- auto root = dict.get_frozen_root();
- auto find_result = dict.find_posting_list(this->make_bound_comparator(0), root);
- EXPECT_EQ(value_0_idx, find_result.first);
- EXPECT_EQ(this->fake_pidx(), find_result.second);
- auto dummy = [](EntryRef posting_idx) noexcept { return posting_idx; };
- std::vector<EntryRef> saved_refs;
- auto save_refs_and_clear = [&saved_refs](EntryRef posting_idx) { saved_refs.push_back(posting_idx); return EntryRef(); };
- EXPECT_FALSE(dict.normalize_posting_lists(dummy));
- EXPECT_TRUE(dict.normalize_posting_lists(save_refs_and_clear));
- EXPECT_FALSE(dict.normalize_posting_lists(save_refs_and_clear));
- EXPECT_EQ((std::vector<EntryRef>{ this->fake_pidx(), EntryRef() }), saved_refs);
- this->store.freeze_dictionary();
- root = dict.get_frozen_root();
- find_result = dict.find_posting_list(this->make_bound_comparator(0), root);
- EXPECT_EQ(value_0_idx, find_result.first);
- EXPECT_EQ(EntryRef(), find_result.second);
+ this->test_normalize_posting_lists(false, false);
+}
+
+TYPED_TEST(EnumStoreDictionaryTest, normalize_posting_lists_with_all_filter_works)
+{
+ this->test_normalize_posting_lists(true, false);
+}
+
+TYPED_TEST(EnumStoreDictionaryTest, normalize_posting_lists_with_one_filter_works)
+{
+ this->test_normalize_posting_lists(true, true);
+}
+
+TYPED_TEST(EnumStoreDictionaryTest, foreach_posting_list_with_all_filter_works)
+{
+ this->test_foreach_posting_list(false);
+}
+
+TYPED_TEST(EnumStoreDictionaryTest, foreach_posting_list_with_one_filter_works)
+{
+ this->test_foreach_posting_list(true);
}
namespace {
diff --git a/searchlib/src/tests/transactionlog/CMakeLists.txt b/searchlib/src/tests/transactionlog/CMakeLists.txt
index b09271eefe2..0904dc3ee36 100644
--- a/searchlib/src/tests/transactionlog/CMakeLists.txt
+++ b/searchlib/src/tests/transactionlog/CMakeLists.txt
@@ -5,8 +5,7 @@ vespa_add_executable(searchlib_translogclient_test_app TEST
DEPENDS
searchlib
)
-vespa_add_test(NAME searchlib_translogclient_test_app COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/translogclient_test.sh
- DEPENDS searchlib_translogclient_test_app COST 100)
+vespa_add_test(NAME searchlib_translogclient_test_app COMMAND searchlib_translogclient_test_app)
vespa_add_executable(searchlib_translog_chunks_test_app TEST
SOURCES
diff --git a/searchlib/src/tests/transactionlog/translogclient_test.cpp b/searchlib/src/tests/transactionlog/translogclient_test.cpp
index de8f9e5c462..5740eeb610d 100644
--- a/searchlib/src/tests/transactionlog/translogclient_test.cpp
+++ b/searchlib/src/tests/transactionlog/translogclient_test.cpp
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/searchlib/transactionlog/translogclient.h>
#include <vespa/searchlib/transactionlog/translogserver.h>
+#include <vespa/searchlib/test/directory_handler.h>
#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/vespalib/objects/identifiable.h>
#include <vespa/searchlib/index/dummyfileheadercontext.h>
@@ -34,8 +35,8 @@ void fillDomainTest(Session * s1, size_t numPackets, size_t numEntries, size_t e
uint32_t countFiles(const vespalib::string &dir);
void checkFilledDomainTest(Session &s1, size_t numEntries);
bool visitDomainTest(TransLogClient & tls, Session * s1, const vespalib::string & name);
-void createAndFillDomain(const vespalib::string & name, Encoding encoding, size_t preExistingDomains);
-void verifyDomain(const vespalib::string & name);
+void createAndFillDomain(const vespalib::string & dir, const vespalib::string & name, Encoding encoding, size_t preExistingDomains);
+void verifyDomain(const vespalib::string & dir, const vespalib::string & name);
vespalib::string
myhex(const void * b, size_t sz)
@@ -357,7 +358,7 @@ void
fillDomainTest(Session * s1, size_t numPackets, size_t numEntries, size_t entrySize)
{
size_t value(0);
- std::vector<char> entryBuffer(entrySize);
+ std::vector<char> entryBuffer(entrySize);
for(size_t i=0; i < numPackets; i++) {
std::unique_ptr<Packet> p(new Packet(DEFAULT_PACKET_SIZE));
for(size_t j=0; j < numEntries; j++, value++) {
@@ -464,10 +465,11 @@ getMaxSessionRunTime(TransLogServer &tls, const vespalib::string &domain)
return tls.getDomainStats()[domain].maxSessionRunTime.count();
}
-void createAndFillDomain(const vespalib::string & name, Encoding encoding, size_t preExistingDomains)
+void
+createAndFillDomain(const vespalib::string & dir, const vespalib::string & name, Encoding encoding, size_t preExistingDomains)
{
DummyFileHeaderContext fileHeaderContext;
- TransLogServer tlss("test13", 18377, ".", fileHeaderContext,
+ TransLogServer tlss(dir, 18377, ".", fileHeaderContext,
createDomainConfig(0x1000000).setEncoding(encoding), 4);
TransLogClient tls("tcp/localhost:18377");
@@ -476,19 +478,21 @@ void createAndFillDomain(const vespalib::string & name, Encoding encoding, size_
fillDomainTest(s1.get(), name);
}
-void verifyDomain(const vespalib::string & name) {
+void
+verifyDomain(const vespalib::string & dir, const vespalib::string & name) {
DummyFileHeaderContext fileHeaderContext;
- TransLogServer tlss("test13", 18377, ".", fileHeaderContext, createDomainConfig(0x1000000));
+ TransLogServer tlss(dir, 18377, ".", fileHeaderContext, createDomainConfig(0x1000000));
TransLogClient tls("tcp/localhost:18377");
auto s1 = openDomainTest(tls, name);
visitDomainTest(tls, s1.get(), name);
}
-}
-TEST("testVisitOverGeneratedDomain") {
+
+void
+testVisitOverGeneratedDomain(const vespalib::string & testDir) {
DummyFileHeaderContext fileHeaderContext;
- TransLogServer tlss("test7", 18377, ".", fileHeaderContext, createDomainConfig(0x10000));
+ TransLogServer tlss(testDir, 18377, ".", fileHeaderContext, createDomainConfig(0x10000));
TransLogClient tls("tcp/localhost:18377");
vespalib::string name("test1");
@@ -502,10 +506,11 @@ TEST("testVisitOverGeneratedDomain") {
EXPECT_GREATER(maxSessionRunTime, 0);
}
-TEST("testVisitOverPreExistingDomain") {
+void
+testVisitOverPreExistingDomain(const vespalib::string & testDir) {
// Depends on Test::testVisitOverGeneratedDomain()
DummyFileHeaderContext fileHeaderContext;
- TransLogServer tlss("test7", 18377, ".", fileHeaderContext, createDomainConfig(0x10000));
+ TransLogServer tlss(testDir, 18377, ".", fileHeaderContext, createDomainConfig(0x10000));
TransLogClient tls("tcp/localhost:18377");
vespalib::string name("test1");
@@ -513,9 +518,10 @@ TEST("testVisitOverPreExistingDomain") {
visitDomainTest(tls, s1.get(), name);
}
-TEST("partialUpdateTest") {
+void
+partialUpdateTest(const vespalib::string & testDir) {
DummyFileHeaderContext fileHeaderContext;
- TransLogServer tlss("test7", 18377, ".", fileHeaderContext, createDomainConfig(0x10000));
+ TransLogServer tlss(testDir, 18377, ".", fileHeaderContext, createDomainConfig(0x10000));
TransLogClient tls("tcp/localhost:18377");
auto s1 = openDomainTest(tls, "test1");
@@ -568,21 +574,33 @@ TEST("partialUpdateTest") {
ASSERT_TRUE( ca3.hasSerial(7) );
}
+}
+
+TEST("testVisitAndUpdates") {
+ test::DirectoryHandler testDir("test7");
+ testVisitOverGeneratedDomain(testDir.getDir());
+ testVisitOverPreExistingDomain(testDir.getDir());
+ partialUpdateTest(testDir.getDir());
+}
+
+
TEST("testCrcVersions") {
+ test::DirectoryHandler testDir("test13");
try {
- createAndFillDomain("ccitt_crc32", Encoding(Encoding::Crc::ccitt_crc32, Encoding::Compression::none), 0);
+ createAndFillDomain(testDir.getDir(),"ccitt_crc32", Encoding(Encoding::Crc::ccitt_crc32, Encoding::Compression::none), 0);
ASSERT_TRUE(false);
} catch (vespalib::IllegalArgumentException & e) {
EXPECT_TRUE(e.getMessage().find("Compression:none is not allowed for the tls") != vespalib::string::npos);
}
- createAndFillDomain("xxh64", Encoding(Encoding::Crc::xxh64, Encoding::Compression::zstd), 0);
+ createAndFillDomain(testDir.getDir(), "xxh64", Encoding(Encoding::Crc::xxh64, Encoding::Compression::zstd), 0);
- verifyDomain("xxh64");
+ verifyDomain(testDir.getDir(), "xxh64");
}
TEST("testRemove") {
+ test::DirectoryHandler testDir("testremove");
DummyFileHeaderContext fileHeaderContext;
- TransLogServer tlss("testremove", 18377, ".", fileHeaderContext, createDomainConfig(0x10000));
+ TransLogServer tlss(testDir.getDir(), 18377, ".", fileHeaderContext, createDomainConfig(0x10000));
TransLogClient tls("tcp/localhost:18377");
vespalib::string name("test-delete");
@@ -629,14 +647,15 @@ assertStatus(Session &s, SerialNum expFirstSerial, SerialNum expLastSerial, uint
}
-TEST("test sending a lot of data") {
+void
+ testSendingAlotOfDataSync(const vespalib::string & testDir) {
const unsigned int NUM_PACKETS = 1000;
const unsigned int NUM_ENTRIES = 100;
const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES;
const vespalib::string MANY("many");
{
DummyFileHeaderContext fileHeaderContext;
- TransLogServer tlss("test8", 18377, ".", fileHeaderContext, createDomainConfig(0x80000));
+ TransLogServer tlss(testDir, 18377, ".", fileHeaderContext, createDomainConfig(0x80000));
TransLogClient tls("tcp/localhost:18377");
createDomainTest(tls, MANY, 0);
@@ -659,7 +678,7 @@ TEST("test sending a lot of data") {
}
{
DummyFileHeaderContext fileHeaderContext;
- TransLogServer tlss("test8", 18377, ".", fileHeaderContext, createDomainConfig(0x1000000));
+ TransLogServer tlss(testDir, 18377, ".", fileHeaderContext, createDomainConfig(0x1000000));
TransLogClient tls("tcp/localhost:18377");
auto s1 = openDomainTest(tls, "many");
@@ -680,7 +699,7 @@ TEST("test sending a lot of data") {
}
{
DummyFileHeaderContext fileHeaderContext;
- TransLogServer tlss("test8", 18377, ".", fileHeaderContext, createDomainConfig(0x1000000));
+ TransLogServer tlss(testDir, 18377, ".", fileHeaderContext, createDomainConfig(0x1000000));
TransLogClient tls("tcp/localhost:18377");
auto s1 = openDomainTest(tls, MANY);
@@ -701,14 +720,14 @@ TEST("test sending a lot of data") {
}
}
-TEST("test sending a lot of data async") {
+void testSendingAlotOfDataAsync(const vespalib::string & testDir) {
const unsigned int NUM_PACKETS = 1000;
const unsigned int NUM_ENTRIES = 100;
const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES;
const vespalib::string MANY("many-async");
{
DummyFileHeaderContext fileHeaderContext;
- TransLogServer tlss("test8", 18377, ".", fileHeaderContext, createDomainConfig(0x80000));
+ TransLogServer tlss(testDir, 18377, ".", fileHeaderContext, createDomainConfig(0x80000));
TransLogClient tls("tcp/localhost:18377");
createDomainTest(tls, MANY, 1);
auto s1 = openDomainTest(tls, MANY);
@@ -730,7 +749,7 @@ TEST("test sending a lot of data async") {
}
{
DummyFileHeaderContext fileHeaderContext;
- TransLogServer tlss("test8", 18377, ".", fileHeaderContext, createDomainConfig(0x1000000));
+ TransLogServer tlss(testDir, 18377, ".", fileHeaderContext, createDomainConfig(0x1000000));
TransLogClient tls("tcp/localhost:18377");
auto s1 = openDomainTest(tls, MANY);
@@ -751,16 +770,21 @@ TEST("test sending a lot of data async") {
}
}
-
+TEST("test sending a lot of data both sync and async") {
+ test::DirectoryHandler testDir("test8");
+ testSendingAlotOfDataSync(testDir.getDir());
+ testSendingAlotOfDataAsync(testDir.getDir());
+}
TEST("testErase") {
const unsigned int NUM_PACKETS = 1000;
const unsigned int NUM_ENTRIES = 100;
const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES;
+ test::DirectoryHandler testDir("test12");
{
DummyFileHeaderContext fileHeaderContext;
- TransLogServer tlss("test12", 18377, ".", fileHeaderContext, createDomainConfig(0x80000));
+ TransLogServer tlss(testDir.getDir(), 18377, ".", fileHeaderContext, createDomainConfig(0x80000));
TransLogClient tls("tcp/localhost:18377");
createDomainTest(tls, "erase", 0);
@@ -769,7 +793,7 @@ TEST("testErase") {
}
{
DummyFileHeaderContext fileHeaderContext;
- TransLogServer tlss("test12", 18377, ".", fileHeaderContext, createDomainConfig(0x1000000));
+ TransLogServer tlss(testDir.getDir(), 18377, ".", fileHeaderContext, createDomainConfig(0x1000000));
TransLogClient tls("tcp/localhost:18377");
auto s1 = openDomainTest(tls, "erase");
@@ -856,7 +880,8 @@ TEST("testSync") {
const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES;
DummyFileHeaderContext fileHeaderContext;
- TransLogServer tlss("test9", 18377, ".", fileHeaderContext, createDomainConfig(0x1000000));
+ test::DirectoryHandler testDir("test9");
+ TransLogServer tlss(testDir.getDir(), 18377, ".", fileHeaderContext, createDomainConfig(0x1000000));
TransLogClient tls("tcp/localhost:18377");
createDomainTest(tls, "sync", 0);
@@ -877,8 +902,9 @@ TEST("test truncate on version mismatch") {
uint64_t fromOld(0), toOld(0);
size_t countOld(0);
DummyFileHeaderContext fileHeaderContext;
+ test::DirectoryHandler testDir("test11");
{
- TransLogServer tlss("test11", 18377, ".", fileHeaderContext, createDomainConfig(0x1000000));
+ TransLogServer tlss(testDir.getDir(), 18377, ".", fileHeaderContext, createDomainConfig(0x1000000));
TransLogClient tls("tcp/localhost:18377");
createDomainTest(tls, "sync", 0);
@@ -890,7 +916,7 @@ TEST("test truncate on version mismatch") {
EXPECT_TRUE(s1->sync(2, syncedTo));
EXPECT_EQUAL(syncedTo, TOTAL_NUM_ENTRIES);
}
- FastOS_File f("test11/sync/sync-0000000000000000");
+ FastOS_File f((testDir.getDir() + "/sync/sync-0000000000000000").c_str());
EXPECT_TRUE(f.OpenWriteOnlyExisting());
EXPECT_TRUE(f.SetPosition(f.GetSize()));
@@ -899,7 +925,7 @@ TEST("test truncate on version mismatch") {
EXPECT_EQUAL(static_cast<ssize_t>(sizeof(tmp)), f.Write2(tmp, sizeof(tmp)));
EXPECT_TRUE(f.Close());
{
- TransLogServer tlss("test11", 18377, ".", fileHeaderContext, createDomainConfig(0x10000));
+ TransLogServer tlss(testDir.getDir(), 18377, ".", fileHeaderContext, createDomainConfig(0x10000));
TransLogClient tls("tcp/localhost:18377");
auto s1 = openDomainTest(tls, "sync");
uint64_t from(0), to(0);
@@ -916,15 +942,16 @@ TEST("test truncation after short read") {
const unsigned int NUM_ENTRIES = 1;
const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES;
const unsigned int ENTRYSIZE = 4080;
- vespalib::string topdir("test10");
+ test::DirectoryHandler topdir("test10");
vespalib::string domain("truncate");
- vespalib::string dir(topdir + "/" + domain);
+ vespalib::string dir(topdir.getDir() + "/" + domain);
vespalib::string tlsspec("tcp/localhost:18377");
+
DomainConfig domainConfig = createDomainConfig(0x10000);
DummyFileHeaderContext fileHeaderContext;
{
- TransLogServer tlss(topdir, 18377, ".", fileHeaderContext, domainConfig);
+ TransLogServer tlss(topdir.getDir(), 18377, ".", fileHeaderContext, domainConfig);
TransLogClient tls(tlsspec);
createDomainTest(tls, domain, 0);
@@ -938,7 +965,7 @@ TEST("test truncation after short read") {
}
EXPECT_EQUAL(2u, countFiles(dir));
{
- TransLogServer tlss(topdir, 18377, ".", fileHeaderContext, domainConfig);
+ TransLogServer tlss(topdir.getDir(), 18377, ".", fileHeaderContext, domainConfig);
TransLogClient tls(tlsspec);
auto s1 = openDomainTest(tls, domain);
checkFilledDomainTest(*s1, TOTAL_NUM_ENTRIES);
@@ -952,7 +979,7 @@ TEST("test truncation after short read") {
trfile.Close();
}
{
- TransLogServer tlss(topdir, 18377, ".", fileHeaderContext, domainConfig);
+ TransLogServer tlss(topdir.getDir(), 18377, ".", fileHeaderContext, domainConfig);
TransLogClient tls(tlsspec);
auto s1 = openDomainTest(tls, domain);
checkFilledDomainTest(*s1, TOTAL_NUM_ENTRIES - 1);
diff --git a/searchlib/src/tests/transactionlog/translogclient_test.sh b/searchlib/src/tests/transactionlog/translogclient_test.sh
deleted file mode 100755
index 50d7c73fd6a..00000000000
--- a/searchlib/src/tests/transactionlog/translogclient_test.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-set -e
-rm -rf test7 test8 test9 test10 test11 test12 test13 testremove
-$VALGRIND ./searchlib_translogclient_test_app
-rm -rf test7 test8 test9 test10 test11 test12 test13 testremove
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp
index b68923b90bf..e40717e6375 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp
@@ -191,9 +191,9 @@ AttributeHeader::internalExtractTags(const vespalib::GenericHeader &header)
}
AttributeHeader
-AttributeHeader::extractTags(const vespalib::GenericHeader &header)
+AttributeHeader::extractTags(const vespalib::GenericHeader &header, const vespalib::string &file_name)
{
- AttributeHeader result;
+ AttributeHeader result(file_name);
result.internalExtractTags(header);
return result;
}
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_header.h b/searchlib/src/vespa/searchlib/attribute/attribute_header.h
index 00da28baf80..7c0b8f3084b 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_header.h
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_header.h
@@ -69,7 +69,7 @@ public:
bool getPredicateParamsSet() const { return _predicateParamsSet; }
bool getCollectionTypeParamsSet() const { return _collectionTypeParamsSet; }
const std::optional<HnswIndexParams>& get_hnsw_index_params() const { return _hnsw_index_params; }
- static AttributeHeader extractTags(const vespalib::GenericHeader &header);
+ static AttributeHeader extractTags(const vespalib::GenericHeader &header, const vespalib::string &file_name);
void addTags(vespalib::GenericHeader &header) const;
};
diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp
index 6c929ad5981..8bc28abc238 100644
--- a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp
@@ -311,6 +311,165 @@ EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::normalize_posting_lists(
}
template <>
+bool
+EnumStoreDictionary<EnumTree>::normalize_posting_lists(std::function<void(std::vector<EntryRef>&)>, const EntryRefFilter&)
+{
+ LOG_ABORT("should not be reached");
+}
+
+namespace {
+
+template <typename HashDictionaryT>
+class ChangeWriterBase
+{
+protected:
+ HashDictionaryT* _hash_dict;
+ static constexpr bool has_hash_dictionary = true;
+ ChangeWriterBase()
+ : _hash_dict(nullptr)
+ {
+ }
+public:
+ void set_hash_dict(HashDictionaryT &hash_dict) { _hash_dict = &hash_dict; }
+};
+
+template <>
+class ChangeWriterBase<vespalib::datastore::NoHashDictionary>
+{
+protected:
+ static constexpr bool has_hash_dictionary = false;
+ ChangeWriterBase() = default;
+};
+
+template <typename HashDictionaryT>
+class ChangeWriter : public ChangeWriterBase<HashDictionaryT> {
+ using Parent = ChangeWriterBase<HashDictionaryT>;
+ using Parent::has_hash_dictionary;
+ std::vector<std::pair<EntryRef,uint32_t*>> _tree_refs;
+public:
+ ChangeWriter(uint32_t capacity);
+ ~ChangeWriter();
+ bool write(const std::vector<EntryRef>& refs);
+ void emplace_back(EntryRef key, uint32_t& tree_ref) { _tree_refs.emplace_back(std::make_pair(key, &tree_ref)); }
+};
+
+template <typename HashDictionaryT>
+ChangeWriter<HashDictionaryT>::ChangeWriter(uint32_t capacity)
+ : ChangeWriterBase<HashDictionaryT>(),
+ _tree_refs()
+{
+ _tree_refs.reserve(capacity);
+}
+
+template <typename HashDictionaryT>
+ChangeWriter<HashDictionaryT>::~ChangeWriter() = default;
+
+template <typename HashDictionaryT>
+bool
+ChangeWriter<HashDictionaryT>::write(const std::vector<EntryRef> &refs)
+{
+ bool changed = false;
+ assert(refs.size() == _tree_refs.size());
+ auto tree_ref = _tree_refs.begin();
+ for (auto ref : refs) {
+ EntryRef old_ref(*tree_ref->second);
+ if (ref != old_ref) {
+ if (!changed) {
+ // Note: Needs review when porting to other platforms
+ // Assumes that other CPUs observes stores from this CPU in order
+ std::atomic_thread_fence(std::memory_order_release);
+ changed = true;
+ }
+ *tree_ref->second = ref.ref();
+ if constexpr (has_hash_dictionary) {
+ auto find_result = this->_hash_dict->find(this->_hash_dict->get_default_comparator(), tree_ref->first);
+ assert(find_result != nullptr && find_result->first.load_relaxed() == tree_ref->first);
+ assert(find_result->second.load_relaxed() == old_ref);
+ find_result->second.store_release(ref);
+ }
+ }
+ ++tree_ref;
+ }
+ assert(tree_ref == _tree_refs.end());
+ _tree_refs.clear();
+ return changed;
+}
+
+}
+
+template <typename BTreeDictionaryT, typename HashDictionaryT>
+bool
+EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::normalize_posting_lists(std::function<void(std::vector<EntryRef>&)> normalize, const EntryRefFilter& filter)
+{
+ if constexpr (has_btree_dictionary) {
+ std::vector<EntryRef> refs;
+ refs.reserve(1024);
+ bool changed = false;
+ ChangeWriter<HashDictionaryT> change_writer(refs.capacity());
+ if constexpr (has_hash_dictionary) {
+ change_writer.set_hash_dict(this->_hash_dict);
+ }
+ auto& dict = this->_btree_dict;
+ for (auto itr = dict.begin(); itr.valid(); ++itr) {
+ EntryRef ref(itr.getData());
+ if (ref.valid()) {
+ if (filter.has(ref)) {
+ refs.emplace_back(ref);
+ change_writer.emplace_back(itr.getKey(), itr.getWData());
+ if (refs.size() >= refs.capacity()) {
+ normalize(refs);
+ changed |= change_writer.write(refs);
+ refs.clear();
+ }
+ }
+ }
+ }
+ if (!refs.empty()) {
+ normalize(refs);
+ changed |= change_writer.write(refs);
+ }
+ return changed;
+ } else {
+ return this->_hash_dict.normalize_values(normalize, filter);
+ }
+}
+
+template <>
+void
+EnumStoreDictionary<EnumTree>::foreach_posting_list(std::function<void(const std::vector<EntryRef>&)>, const EntryRefFilter&)
+{
+ LOG_ABORT("should not be reached");
+}
+
+template <typename BTreeDictionaryT, typename HashDictionaryT>
+void
+EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::foreach_posting_list(std::function<void(const std::vector<EntryRef>&)> callback, const EntryRefFilter& filter)
+{
+ if constexpr (has_btree_dictionary) {
+ std::vector<EntryRef> refs;
+ refs.reserve(1024);
+ auto& dict = this->_btree_dict;
+ for (auto itr = dict.begin(); itr.valid(); ++itr) {
+ EntryRef ref(itr.getData());
+ if (ref.valid()) {
+ if (filter.has(ref)) {
+ refs.emplace_back(ref);
+ if (refs.size() >= refs.capacity()) {
+ callback(refs);
+ refs.clear();
+ }
+ }
+ }
+ }
+ if (!refs.empty()) {
+ callback(refs);
+ }
+ } else {
+ this->_hash_dict.foreach_value(callback, filter);
+ }
+}
+
+template <>
const EnumPostingTree &
EnumStoreDictionary<EnumTree>::get_posting_dictionary() const
{
diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h
index 4d0509c0eb1..db1176c5484 100644
--- a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h
+++ b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h
@@ -16,6 +16,7 @@ template <typename BTreeDictionaryT, typename HashDictionaryT = vespalib::datast
class EnumStoreDictionary : public vespalib::datastore::UniqueStoreDictionary<BTreeDictionaryT, IEnumStoreDictionary, HashDictionaryT> {
protected:
using EntryRef = IEnumStoreDictionary::EntryRef;
+ using EntryRefFilter = IEnumStoreDictionary::EntryRefFilter;
using Index = IEnumStoreDictionary::Index;
using BTreeDictionaryType = BTreeDictionaryT;
using EntryComparator = IEnumStoreDictionary::EntryComparator;
@@ -54,6 +55,8 @@ public:
void clear_all_posting_lists(std::function<void(EntryRef)> clearer) override;
void update_posting_list(Index idx, const EntryComparator& cmp, std::function<EntryRef(EntryRef)> updater) override;
bool normalize_posting_lists(std::function<EntryRef(EntryRef)> normalize) override;
+ bool normalize_posting_lists(std::function<void(std::vector<EntryRef>&)> normalize, const EntryRefFilter& filter) override;
+ void foreach_posting_list(std::function<void(const std::vector<EntryRef>&)> callback, const EntryRefFilter& filter) override;
const EnumPostingTree& get_posting_dictionary() const override;
};
diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h
index a8cf6881b86..a9716ec5d05 100644
--- a/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h
+++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h
@@ -30,6 +30,7 @@ class IEnumStoreDictionary : public vespalib::datastore::IUniqueStoreDictionary
public:
using EntryRef = vespalib::datastore::EntryRef;
using EntryComparator = vespalib::datastore::EntryComparator;
+ using EntryRefFilter = vespalib::datastore::EntryRefFilter;
using EnumVector = IEnumStore::EnumVector;
using Index = IEnumStore::Index;
using IndexList = IEnumStore::IndexList;
@@ -52,7 +53,25 @@ public:
virtual Index remap_index(Index idx) = 0;
virtual void clear_all_posting_lists(std::function<void(EntryRef)> clearer) = 0;
virtual void update_posting_list(Index idx, const EntryComparator& cmp, std::function<EntryRef(EntryRef)> updater) = 0;
+ /*
+ * Scan dictionary and call normalize function for each value. If
+ * returned value is different then write back the modified value to
+ * the dictionary. Only used by unit tests.
+ */
virtual bool normalize_posting_lists(std::function<EntryRef(EntryRef)> normalize) = 0;
+ /*
+ * Scan dictionary and call normalize function for batches of values
+ * that pass the filter. Write back modified values to the dictionary.
+ * Used by compaction of posting lists when moving short arrays,
+ * bitvectors or btree roots.
+ */
+ virtual bool normalize_posting_lists(std::function<void(std::vector<EntryRef>&)> normalize, const EntryRefFilter& filter) = 0;
+ /*
+ * Scan dictionary and call callback function for batches of values
+ * that pass the filter. Used by compaction of posting lists when
+ * moving btree nodes.
+ */
+ virtual void foreach_posting_list(std::function<void(const std::vector<EntryRef>&)> callback, const EntryRefFilter& filter) = 0;
virtual const EnumPostingTree& get_posting_dictionary() const = 0;
};
diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp
index b114a355bb4..8790bdd9885 100644
--- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp
@@ -30,13 +30,17 @@ remap_enum_store_refs(const EnumIndexRemapper& remapper, AttributeVector& v, att
v.logEnumStoreEvent("compactfixup", "drain");
{
AttributeVector::EnumModifier enum_guard(v.getEnumModifier());
+ auto& filter = remapper.get_entry_ref_filter();
v.logEnumStoreEvent("compactfixup", "start");
for (uint32_t doc = 0; doc < v.getNumDocs(); ++doc) {
vespalib::ConstArrayRef<WeightedIndex> indicesRef(multi_value_mapping.get(doc));
WeightedIndexVector indices(indicesRef.cbegin(), indicesRef.cend());
for (uint32_t i = 0; i < indices.size(); ++i) {
- EnumIndex oldIndex = indices[i].value();
- indices[i] = WeightedIndex(remapper.remap(oldIndex), indices[i].weight());
+ EnumIndex ref = indices[i].value();
+ if (ref.valid() && filter.has(ref)) {
+ ref = remapper.remap(ref);
+ }
+ indices[i] = WeightedIndex(ref, indices[i].weight());
}
std::atomic_thread_fence(std::memory_order_release);
multi_value_mapping.replace(doc, indices);
diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
index 3451c2b0456..8ed8a0cfbee 100644
--- a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
@@ -7,11 +7,13 @@
#include <vespa/vespalib/btree/btreeiterator.hpp>
#include <vespa/vespalib/btree/btreerootbase.cpp>
#include <vespa/vespalib/datastore/datastore.hpp>
+#include <vespa/vespalib/datastore/entry_ref_filter.h>
#include <vespa/vespalib/datastore/buffer_type.hpp>
namespace search::attribute {
using vespalib::btree::BTreeNoLeafData;
+using vespalib::datastore::EntryRefFilter;
// #define FORCE_BITVECTORS
@@ -127,45 +129,47 @@ PostingStore<DataT>::removeSparseBitVectors()
}
}
if (needscan) {
- res = _dictionary.normalize_posting_lists([this](EntryRef posting_idx) -> EntryRef
- { return consider_remove_sparse_bitvector(posting_idx); });
+ EntryRefFilter filter(RefType::numBuffers(), RefType::offset_bits);
+ filter.add_buffers(_bvType.get_active_buffers());
+ res = _dictionary.normalize_posting_lists([this](std::vector<EntryRef>& refs)
+ { consider_remove_sparse_bitvector(refs); },
+ filter);
}
return res;
}
template <typename DataT>
-typename PostingStore<DataT>::EntryRef
-PostingStore<DataT>::consider_remove_sparse_bitvector(EntryRef ref)
+void
+PostingStore<DataT>::consider_remove_sparse_bitvector(std::vector<EntryRef>& refs)
{
- if (!ref.valid() || !isBitVector(getTypeId(EntryRef(ref)))) {
- return ref;
- }
- RefType iRef(ref);
- uint32_t typeId = getTypeId(iRef);
- assert(isBitVector(typeId));
- assert(_bvs.find(ref.ref() )!= _bvs.end());
- BitVectorEntry *bve = getWBitVectorEntry(iRef);
- BitVector &bv = *bve->_bv.get();
- uint32_t docFreq = bv.countTrueBits();
- if (bve->_tree.valid()) {
- RefType iRef2(bve->_tree);
- assert(isBTree(iRef2));
- const BTreeType *tree = getTreeEntry(iRef2);
- assert(tree->size(_allocator) == docFreq);
- (void) tree;
- }
- if (docFreq < _minBvDocFreq) {
- dropBitVector(ref);
- if (ref.valid()) {
+ for (auto& ref : refs) {
+ RefType iRef(ref);
+ assert(iRef.valid());
+ uint32_t typeId = getTypeId(iRef);
+ assert(isBitVector(typeId));
+ assert(_bvs.find(iRef.ref()) != _bvs.end());
+ BitVectorEntry *bve = getWBitVectorEntry(iRef);
+ BitVector &bv = *bve->_bv.get();
+ uint32_t docFreq = bv.countTrueBits();
+ if (bve->_tree.valid()) {
+ RefType iRef2(bve->_tree);
+ assert(isBTree(iRef2));
+ const BTreeType *tree = getTreeEntry(iRef2);
+ assert(tree->size(_allocator) == docFreq);
+ (void) tree;
+ }
+ if (docFreq < _minBvDocFreq) {
+ dropBitVector(ref);
iRef = ref;
- typeId = getTypeId(iRef);
- if (isBTree(typeId)) {
- BTreeType *tree = getWTreeEntry(iRef);
- normalizeTree(ref, tree, false);
+ if (iRef.valid()) {
+ typeId = getTypeId(iRef);
+ if (isBTree(typeId)) {
+ BTreeType *tree = getWTreeEntry(iRef);
+ normalizeTree(ref, tree, false);
+ }
}
}
}
- return ref;
}
template <typename DataT>
@@ -647,74 +651,75 @@ PostingStore<DataT>::update_stat()
template <typename DataT>
void
-PostingStore<DataT>::move_btree_nodes(EntryRef ref)
+PostingStore<DataT>::move_btree_nodes(const std::vector<EntryRef>& refs)
{
- if (ref.valid()) {
+ for (auto ref : refs) {
RefType iRef(ref);
+ assert(iRef.valid());
uint32_t typeId = getTypeId(iRef);
uint32_t clusterSize = getClusterSize(typeId);
- if (clusterSize == 0) {
- if (isBitVector(typeId)) {
- BitVectorEntry *bve = getWBitVectorEntry(iRef);
- RefType iRef2(bve->_tree);
- if (iRef2.valid()) {
- assert(isBTree(iRef2));
- BTreeType *tree = getWTreeEntry(iRef2);
- tree->move_nodes(_allocator);
- }
- } else {
- BTreeType *tree = getWTreeEntry(iRef);
+ assert(clusterSize == 0);
+ if (isBitVector(typeId)) {
+ BitVectorEntry *bve = getWBitVectorEntry(iRef);
+ RefType iRef2(bve->_tree);
+ if (iRef2.valid()) {
+ assert(isBTree(iRef2));
+ BTreeType *tree = getWTreeEntry(iRef2);
tree->move_nodes(_allocator);
}
+ } else {
+ assert(isBTree(typeId));
+ BTreeType *tree = getWTreeEntry(iRef);
+ tree->move_nodes(_allocator);
}
}
}
template <typename DataT>
-typename PostingStore<DataT>::EntryRef
-PostingStore<DataT>::move(EntryRef ref)
+void
+PostingStore<DataT>::move(std::vector<EntryRef>& refs)
{
- if (!ref.valid()) {
- return EntryRef();
- }
- RefType iRef(ref);
- uint32_t typeId = getTypeId(iRef);
- uint32_t clusterSize = getClusterSize(typeId);
- if (clusterSize == 0) {
- if (isBitVector(typeId)) {
- BitVectorEntry *bve = getWBitVectorEntry(iRef);
- RefType iRef2(bve->_tree);
- if (iRef2.valid()) {
- assert(isBTree(iRef2));
- if (_store.getCompacting(iRef2)) {
- BTreeType *tree = getWTreeEntry(iRef2);
- auto ref_and_ptr = allocBTreeCopy(*tree);
- tree->prepare_hold();
- bve->_tree = ref_and_ptr.ref;
+ for (auto& ref : refs) {
+ RefType iRef(ref);
+ assert(iRef.valid());
+ uint32_t typeId = getTypeId(iRef);
+ uint32_t clusterSize = getClusterSize(typeId);
+ if (clusterSize == 0) {
+ if (isBitVector(typeId)) {
+ BitVectorEntry *bve = getWBitVectorEntry(iRef);
+ RefType iRef2(bve->_tree);
+ if (iRef2.valid()) {
+ assert(isBTree(iRef2));
+ if (_store.getCompacting(iRef2)) {
+ BTreeType *tree = getWTreeEntry(iRef2);
+ auto ref_and_ptr = allocBTreeCopy(*tree);
+ tree->prepare_hold();
+ // Note: Needs review when porting to other platforms
+ // Assumes that other CPUs observes stores from this CPU in order
+ std::atomic_thread_fence(std::memory_order_release);
+ bve->_tree = ref_and_ptr.ref;
+ }
}
+ if (_store.getCompacting(iRef)) {
+ auto new_ref = allocBitVectorCopy(*bve).ref;
+ _bvs.erase(iRef.ref());
+ _bvs.insert(new_ref.ref());
+ ref = new_ref;
+ }
+ } else {
+ assert(isBTree(typeId));
+ assert(_store.getCompacting(iRef));
+ BTreeType *tree = getWTreeEntry(iRef);
+ auto ref_and_ptr = allocBTreeCopy(*tree);
+ tree->prepare_hold();
+ ref = ref_and_ptr.ref;
}
- if (!_store.getCompacting(ref)) {
- return ref;
- }
- auto new_ref = allocBitVectorCopy(*bve).ref;
- _bvs.erase(ref.ref());
- _bvs.insert(new_ref.ref());
- return new_ref;
} else {
- if (!_store.getCompacting(ref)) {
- return ref;
- }
- BTreeType *tree = getWTreeEntry(iRef);
- auto ref_and_ptr = allocBTreeCopy(*tree);
- tree->prepare_hold();
- return ref_and_ptr.ref;
+ assert(_store.getCompacting(iRef));
+ const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize);
+ ref = allocKeyDataCopy(shortArray, clusterSize).ref;
}
}
- if (!_store.getCompacting(ref)) {
- return ref;
- }
- const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize);
- return allocKeyDataCopy(shortArray, clusterSize).ref;
}
template <typename DataT>
@@ -722,11 +727,12 @@ void
PostingStore<DataT>::compact_worst_btree_nodes()
{
auto to_hold = this->start_compact_worst_btree_nodes();
- _dictionary.normalize_posting_lists([this](EntryRef posting_idx) -> EntryRef
- {
- move_btree_nodes(posting_idx);
- return posting_idx;
- });
+ EntryRefFilter filter(RefType::numBuffers(), RefType::offset_bits);
+ // Only look at buffers containing bitvectors and btree roots
+ filter.add_buffers(this->_treeType.get_active_buffers());
+ filter.add_buffers(_bvType.get_active_buffers());
+ _dictionary.foreach_posting_list([this](const std::vector<EntryRef>& refs)
+ { move_btree_nodes(refs); }, filter);
this->finish_compact_worst_btree_nodes(to_hold);
}
@@ -735,8 +741,23 @@ void
PostingStore<DataT>::compact_worst_buffers()
{
auto to_hold = this->start_compact_worst_buffers();
- _dictionary.normalize_posting_lists([this](EntryRef posting_idx) -> EntryRef
- { return move(posting_idx); });
+ bool compact_btree_roots = false;
+ EntryRefFilter filter(RefType::numBuffers(), RefType::offset_bits);
+ filter.add_buffers(to_hold);
+ // Start with looking at buffers being compacted
+ for (uint32_t buffer_id : to_hold) {
+ if (isBTree(_store.getBufferState(buffer_id).getTypeId())) {
+ compact_btree_roots = true;
+ }
+ }
+ if (compact_btree_roots) {
+ // If we are compacting btree roots then we also have to look at bitvector
+ // buffers
+ filter.add_buffers(_bvType.get_active_buffers());
+ }
+ _dictionary.normalize_posting_lists([this](std::vector<EntryRef>& refs)
+ { return move(refs); },
+ filter);
this->finishCompact(to_hold);
}
diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.h b/searchlib/src/vespa/searchlib/attribute/postingstore.h
index a0f0be1c430..2b119a55158 100644
--- a/searchlib/src/vespa/searchlib/attribute/postingstore.h
+++ b/searchlib/src/vespa/searchlib/attribute/postingstore.h
@@ -89,6 +89,7 @@ public:
using Parent::getWTreeEntry;
using Parent::getTreeEntry;
using Parent::getKeyDataEntry;
+ using Parent::isBTree;
using Parent::clusterLimit;
using Parent::allocBTree;
using Parent::allocBTreeCopy;
@@ -105,10 +106,8 @@ public:
~PostingStore();
bool removeSparseBitVectors() override;
- EntryRef consider_remove_sparse_bitvector(EntryRef ref);
+ void consider_remove_sparse_bitvector(std::vector<EntryRef> &refs);
static bool isBitVector(uint32_t typeId) { return typeId == BUFFERTYPE_BITVECTOR; }
- static bool isBTree(uint32_t typeId) { return typeId == BUFFERTYPE_BTREE; }
- bool isBTree(RefType ref) const { return isBTree(getTypeId(ref)); }
void applyNew(EntryRef &ref, AddIter a, AddIter ae);
@@ -188,8 +187,8 @@ public:
vespalib::MemoryUsage getMemoryUsage() const;
vespalib::MemoryUsage update_stat();
- void move_btree_nodes(EntryRef ref);
- EntryRef move(EntryRef ref);
+ void move_btree_nodes(const std::vector<EntryRef> &refs);
+ void move(std::vector<EntryRef>& refs);
void compact_worst_btree_nodes();
void compact_worst_buffers();
diff --git a/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp
index d9024af724b..6268a6da701 100644
--- a/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp
@@ -194,7 +194,7 @@ PredicateAttribute::onLoad(vespalib::Executor *)
buffer.moveFreeToData(size);
const GenericHeader &header = loaded_buffer->getHeader();
- auto attributeHeader = attribute::AttributeHeader::extractTags(header);
+ auto attributeHeader = attribute::AttributeHeader::extractTags(header, getBaseFileName());
uint32_t version = attributeHeader.getVersion();
setCreateSerialNum(attributeHeader.getCreateSerialNum());
diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp
index 4323e57f6b1..18805a7b20f 100644
--- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp
@@ -49,13 +49,16 @@ SingleValueEnumAttributeBase::remap_enum_store_refs(const EnumIndexRemapper& rem
{
// update _enumIndices with new EnumIndex values after enum store has been compacted.
v.logEnumStoreEvent("reenumerate", "reserved");
- auto new_indexes = std::make_unique<vespalib::Array<EnumIndex>>();
- new_indexes->reserve(_enumIndices.capacity());
+ vespalib::Array<EnumIndex> new_indexes;
+ new_indexes.reserve(_enumIndices.capacity());
v.logEnumStoreEvent("reenumerate", "start");
+ auto& filter = remapper.get_entry_ref_filter();
for (uint32_t i = 0; i < _enumIndices.size(); ++i) {
- EnumIndex old_index = _enumIndices[i];
- EnumIndex new_index = remapper.remap(old_index);
- new_indexes->push_back_fast(new_index);
+ EnumIndex ref = _enumIndices[i];
+ if (ref.valid() && filter.has(ref)) {
+ ref = remapper.remap(ref);
+ }
+ new_indexes.push_back_fast(ref);
}
v.logEnumStoreEvent("compactfixup", "drain");
{
diff --git a/searchlib/src/vespa/searchlib/docstore/compacter.cpp b/searchlib/src/vespa/searchlib/docstore/compacter.cpp
index 38f3fbef0b0..26fb79f8a4e 100644
--- a/searchlib/src/vespa/searchlib/docstore/compacter.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/compacter.cpp
@@ -26,7 +26,7 @@ BucketCompacter::BucketCompacter(size_t maxSignificantBucketBits, const Compress
_bucketizer(bucketizer),
_writeCount(0),
_maxBucketGuardDuration(vespalib::duration::zero()),
- _lastSample(),
+ _lastSample(vespalib::steady_clock::now()),
_lock(),
_backingMemory(Alloc::alloc(0x40000000), &_lock),
_tmpStore(),
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
index 0f4326aac40..5217c44df97 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
@@ -75,7 +75,7 @@ BlobSequenceReader::BlobSequenceReader(AttributeVector& attr, bool has_index)
: ReaderBase(attr),
_use_index_file(has_index && has_index_file(attr) &&
can_use_index_save_file(attr.getConfig(),
- search::attribute::AttributeHeader::extractTags(getDatHeader()))),
+ search::attribute::AttributeHeader::extractTags(getDatHeader(), attr.getBaseFileName()))),
_index_file(_use_index_file ?
attribute::LoadUtils::openFile(attr, DenseTensorAttributeSaver::index_file_suffix()) :
std::unique_ptr<Fast_BufferedFile>())
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp
index d3c2998333a..86090f2ac92 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp
@@ -79,12 +79,6 @@ DenseTensorStore::~DenseTensorStore()
_store.dropBuffers();
}
-const void *
-DenseTensorStore::getRawBuffer(RefType ref) const
-{
- return _store.getEntryArray<char>(ref, _bufferType.getArraySize());
-}
-
namespace {
void clearPadAreaAfterBuffer(char *buffer, size_t bufSize, size_t alignedBufSize) {
@@ -136,15 +130,6 @@ DenseTensorStore::getTensor(EntryRef ref) const
return std::make_unique<vespalib::eval::DenseValueView>(_type, cells_ref);
}
-vespalib::eval::TypedCells
-DenseTensorStore::get_typed_cells(EntryRef ref) const
-{
- if (!ref.valid()) {
- return vespalib::eval::TypedCells(&_emptySpace[0], _type.cell_type(), getNumCells());
- }
- return vespalib::eval::TypedCells(getRawBuffer(ref), _type.cell_type(), getNumCells());
-}
-
template <class TensorType>
TensorStore::EntryRef
DenseTensorStore::setDenseTensor(const TensorType &tensor)
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h
index 3b7cb71863e..06492596f70 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h
@@ -50,12 +50,9 @@ private:
ValueType _type; // type of dense tensor
std::vector<char> _emptySpace;
- size_t unboundCells(const void *buffer) const;
-
template <class TensorType>
TensorStore::EntryRef
setDenseTensor(const TensorType &tensor);
-
public:
DenseTensorStore(const ValueType &type, std::unique_ptr<vespalib::alloc::MemoryAllocator> allocator);
~DenseTensorStore() override;
@@ -63,12 +60,17 @@ public:
const ValueType &type() const { return _type; }
size_t getNumCells() const { return _tensorSizeCalc._numCells; }
size_t getBufSize() const { return _tensorSizeCalc.bufSize(); }
- const void *getRawBuffer(RefType ref) const;
+ const void *getRawBuffer(RefType ref) const {
+ return _store.getEntryArray<char>(ref, _bufferType.getArraySize());
+ }
vespalib::datastore::Handle<char> allocRawBuffer();
void holdTensor(EntryRef ref) override;
EntryRef move(EntryRef ref) override;
std::unique_ptr<vespalib::eval::Value> getTensor(EntryRef ref) const;
- vespalib::eval::TypedCells get_typed_cells(EntryRef ref) const;
+ vespalib::eval::TypedCells get_typed_cells(EntryRef ref) const {
+ return vespalib::eval::TypedCells(ref.valid() ? getRawBuffer(ref) : &_emptySpace[0],
+ _type.cell_type(), getNumCells());
+ }
EntryRef setTensor(const vespalib::eval::Value &tensor);
// The following method is meant to be used only for unit tests.
uint32_t getArraySize() const { return _bufferType.getArraySize(); }
diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
index 7f9f20e07c4..43596478a6f 100644
--- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
@@ -43,4 +43,13 @@ HammingDistance::calc(const vespalib::eval::TypedCells& lhs,
}
}
+double
+HammingDistance::calc_with_limit(const vespalib::eval::TypedCells& lhs,
+ const vespalib::eval::TypedCells& rhs,
+ double) const
+{
+ // consider optimizing:
+ return calc(lhs, rhs);
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
index f0b7b159b90..c64fc5b532d 100644
--- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
@@ -15,7 +15,7 @@ namespace search::tensor {
* or (for int8 cells, aka binary data only)
* "number of bits that are different"
*/
-class HammingDistance : public DistanceFunction {
+class HammingDistance final : public DistanceFunction {
public:
HammingDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {}
double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override;
@@ -26,13 +26,7 @@ public:
double score = 1.0 / (1.0 + distance);
return score;
}
- double calc_with_limit(const vespalib::eval::TypedCells& lhs,
- const vespalib::eval::TypedCells& rhs,
- double) const override
- {
- // consider optimizing:
- return calc(lhs, rhs);
- }
+ double calc_with_limit(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs, double) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/transactionlog/domain.cpp b/searchlib/src/vespa/searchlib/transactionlog/domain.cpp
index aa49a2d2954..7fa87f3ec09 100644
--- a/searchlib/src/vespa/searchlib/transactionlog/domain.cpp
+++ b/searchlib/src/vespa/searchlib/transactionlog/domain.cpp
@@ -56,11 +56,13 @@ Domain::Domain(const string &domainName, const string & baseDir, Executor & exec
_fileHeaderContext(fileHeaderContext),
_markedDeleted(false)
{
- int retval(0);
- if ((retval = makeDirectory(_baseDir.c_str())) != 0) {
+ assert(_config.getEncoding().getCompression() != Encoding::Compression::none);
+ int retval = makeDirectory(_baseDir.c_str());
+ if (retval != 0) {
throw runtime_error(fmt("Failed creating basedirectory %s r(%d), e(%d)", _baseDir.c_str(), retval, errno));
}
- if ((retval = makeDirectory(dir().c_str())) != 0) {
+ retval = makeDirectory(dir().c_str());
+ if (retval != 0) {
throw runtime_error(fmt("Failed creating domaindir %s r(%d), e(%d)", dir().c_str(), retval, errno));
}
SerialNumList partIdVector = scanDir();
@@ -76,8 +78,7 @@ Domain::Domain(const string &domainName, const string & baseDir, Executor & exec
}
pending.waitForZeroRefCount();
if (_parts.empty() || _parts.crbegin()->second->isClosed()) {
- _parts[lastPart] = std::make_shared<DomainPart>(_name, dir(), lastPart, _config.getEncoding(),
- _config.getCompressionlevel(), _fileHeaderContext, false);
+ _parts[lastPart] = std::make_shared<DomainPart>(_name, dir(), lastPart, _fileHeaderContext, false);
vespalib::File::sync(dir());
}
_lastSerial = end();
@@ -86,13 +87,13 @@ Domain::Domain(const string &domainName, const string & baseDir, Executor & exec
Domain &
Domain::setConfig(const DomainConfig & cfg) {
_config = cfg;
+ assert(_config.getEncoding().getCompression() != Encoding::Compression::none);
return *this;
}
void
Domain::addPart(SerialNum partId, bool isLastPart) {
- auto dp = std::make_shared<DomainPart>(_name, dir(), partId, _config.getEncoding(),
- _config.getCompressionlevel(), _fileHeaderContext, isLastPart);
+ auto dp = std::make_shared<DomainPart>(_name, dir(), partId, _fileHeaderContext, isLastPart);
if (dp->size() == 0) {
// Only last domain part is allowed to be truncated down to
// empty size.
@@ -331,8 +332,7 @@ Domain::optionallyRotateFile(SerialNum serialNum) {
triggerSyncNow({});
waitPendingSync(_syncMonitor, _syncCond, _pendingSync);
dp->close();
- dp = std::make_shared<DomainPart>(_name, dir(), serialNum, _config.getEncoding(),
- _config.getCompressionlevel(), _fileHeaderContext, false);
+ dp = std::make_shared<DomainPart>(_name, dir(), serialNum, _fileHeaderContext, false);
{
std::lock_guard guard(_lock);
_parts[serialNum] = dp;
@@ -399,17 +399,16 @@ Domain::commitChunk(std::unique_ptr<CommitChunk> chunk, const UniqueLock & chunk
}));
}
+
+
void
Domain::doCommit(std::unique_ptr<CommitChunk> chunk) {
const Packet & packet = chunk->getPacket();
if (packet.empty()) return;
-
- vespalib::nbostream_longlivedbuf is(packet.getHandle().data(), packet.getHandle().size());
- Packet::Entry entry;
- entry.deserialize(is);
- assert(entry.serial() == packet.range().from());
- DomainPart::SP dp = optionallyRotateFile(entry.serial());
- dp->commit(entry.serial(), packet);
+
+ SerializedChunk serialized(packet, _config.getEncoding(), _config.getCompressionlevel());
+ DomainPart::SP dp = optionallyRotateFile(packet.range().from());
+ dp->commit(serialized);
if (_config.getFSyncOnCommit()) {
dp->sync();
}
diff --git a/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp b/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp
index 3dad67df177..2ca2f15545d 100644
--- a/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp
+++ b/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp
@@ -247,11 +247,9 @@ DomainPart::buildPacketMapping(bool allowTruncate)
return currPos;
}
-DomainPart::DomainPart(const string & name, const string & baseDir, SerialNum s, Encoding encoding,
- uint8_t compressionLevel, const FileHeaderContext &fileHeaderContext, bool allowTruncate)
- : _encoding(encoding),
- _compressionLevel(compressionLevel),
- _lock(),
+DomainPart::DomainPart(const string & name, const string & baseDir, SerialNum s,
+ const FileHeaderContext &fileHeaderContext, bool allowTruncate)
+ : _lock(),
_fileLock(),
_range(s),
_sz(0),
@@ -379,35 +377,21 @@ DomainPart::erase(SerialNum to)
}
void
-DomainPart::commit(SerialNum firstSerial, const Packet &packet)
+DomainPart::commit(const SerializedChunk & serialized)
{
+ SerialNumRange range = serialized.range();
+
int64_t firstPos(byteSize());
- nbostream_longlivedbuf h(packet.getHandle().data(), packet.getHandle().size());
+ assert(_range.to() < range.to());
+ _sz += serialized.getNumEntries();
+ _range.to(range.to());
if (_range.from() == 0) {
- _range.from(firstSerial);
- }
- IChunk::UP chunk = IChunk::create(_encoding, _compressionLevel);
- for (size_t i(0); h.size() > 0; i++) {
- //LOG(spam,
- //"Pos(%d) Len(%d), Lim(%d), Remaining(%d)",
- //h.getPos(), h.getLength(), h.getLimit(), h.getRemaining());
- Packet::Entry entry;
- entry.deserialize(h);
- if (_range.to() < entry.serial()) {
- chunk->add(entry);
- assert(_encoding.getCompression() != Encoding::Compression::none);
- _sz++;
- _range.to(entry.serial());
- } else {
- throw runtime_error(fmt("Incoming serial number(%" PRIu64 ") must be bigger than the last one (%" PRIu64 ").",
- entry.serial(), _range.to()));
- }
- }
- if ( ! chunk->getEntries().empty()) {
- write(*_transLog, *chunk);
+ _range.from(range.from());
}
+
+ write(*_transLog, range, serialized.getData());
std::lock_guard guard(_lock);
- _skipList.emplace_back(firstSerial, firstPos);
+ _skipList.emplace_back(range.from(), firstPos);
}
void
@@ -442,26 +426,15 @@ DomainPart::visit(FastOS_FileInterface &file, SerialNumRange &r, Packet &packet)
}
void
-DomainPart::write(FastOS_FileInterface &file, const IChunk & chunk)
+DomainPart::write(FastOS_FileInterface &file, SerialNumRange range, vespalib::ConstBufferRef buf)
{
- nbostream os;
- size_t begin = os.wp();
- os << _encoding.getRaw(); // Placeholder for encoding
- os << uint32_t(0); // Placeholder for size
- Encoding realEncoding = chunk.encode(os);
- size_t end = os.wp();
- os.wp(0);
- os << realEncoding.getRaw(); //Patching real encoding
- os << uint32_t(end - (begin + sizeof(uint32_t) + sizeof(uint8_t))); // Patching actual size.
- os.wp(end);
std::lock_guard guard(_writeLock);
- if ( ! file.CheckedWrite(os.data(), os.size()) ) {
- throw runtime_error(handleWriteError("Failed writing the entry.", file, byteSize(), chunk.range(), os.size()));
+ if ( ! file.CheckedWrite(buf.data(), buf.size()) ) {
+ throw runtime_error(handleWriteError("Failed writing the entry.", file, byteSize(), range, buf.size()));
}
- LOG(debug, "Wrote chunk with %zu entries and %zu bytes, range[%" PRIu64 ", %" PRIu64 "] encoding(wanted=%x, real=%x)",
- chunk.getEntries().size(), os.size(), chunk.range().from(), chunk.range().to(), _encoding.getRaw(), realEncoding.getRaw());
- _writtenSerial = chunk.range().to();
- _byteSize.fetch_add(os.size(), std::memory_order_release);
+ LOG(debug, "Wrote chunk with and %zu bytes, range[%" PRIu64 ", %" PRIu64 "]", buf.size(), range.from(), range.to());
+ _writtenSerial = range.to();
+ _byteSize.fetch_add(buf.size(), std::memory_order_release);
}
bool
diff --git a/searchlib/src/vespa/searchlib/transactionlog/domainpart.h b/searchlib/src/vespa/searchlib/transactionlog/domainpart.h
index 9ab0db54391..ea5290c433b 100644
--- a/searchlib/src/vespa/searchlib/transactionlog/domainpart.h
+++ b/searchlib/src/vespa/searchlib/transactionlog/domainpart.h
@@ -19,13 +19,13 @@ public:
using SP = std::shared_ptr<DomainPart>;
DomainPart(const DomainPart &) = delete;
DomainPart& operator=(const DomainPart &) = delete;
- DomainPart(const vespalib::string &name, const vespalib::string &baseDir, SerialNum s, Encoding defaultEncoding,
- uint8_t compressionLevel, const common::FileHeaderContext &FileHeaderContext, bool allowTruncate);
+ DomainPart(const vespalib::string &name, const vespalib::string &baseDir, SerialNum s,
+ const common::FileHeaderContext &FileHeaderContext, bool allowTruncate);
~DomainPart();
const vespalib::string &fileName() const { return _fileName; }
- void commit(SerialNum firstSerial, const Packet &packet);
+ void commit(const SerializedChunk & serialized);
bool erase(SerialNum to);
bool visit(FastOS_FileInterface &file, SerialNumRange &r, Packet &packet);
bool close();
@@ -49,7 +49,7 @@ private:
static Packet readPacket(FastOS_FileInterface & file, SerialNumRange wanted, size_t targetSize, bool allowTruncate);
static bool read(FastOS_FileInterface &file, IChunk::UP & chunk, Alloc &buf, bool allowTruncate);
- void write(FastOS_FileInterface &file, const IChunk & entry);
+ void write(FastOS_FileInterface &file, SerialNumRange range, vespalib::ConstBufferRef buf);
void writeHeader(const common::FileHeaderContext &fileHeaderContext);
class SkipInfo
@@ -69,8 +69,6 @@ private:
SerialNum _id;
uint64_t _pos;
};
- const Encoding _encoding;
- const uint8_t _compressionLevel;
std::mutex _lock;
std::mutex _fileLock;
SerialNumRange _range;
diff --git a/searchlib/src/vespa/searchlib/transactionlog/ichunk.cpp b/searchlib/src/vespa/searchlib/transactionlog/ichunk.cpp
index ee1631ea8c2..e3d98cd576d 100644
--- a/searchlib/src/vespa/searchlib/transactionlog/ichunk.cpp
+++ b/searchlib/src/vespa/searchlib/transactionlog/ichunk.cpp
@@ -8,6 +8,9 @@
#include <cassert>
#include <ostream>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.transactionlog.ichunk");
+
using std::make_unique;
using vespalib::make_string_short::fmt;
using vespalib::nbostream_longlivedbuf;
@@ -115,4 +118,46 @@ std::ostream &
operator << (std::ostream & os, Encoding e) {
return os << "crc=" << e.getCrc() << " compression=" << e.getCompression();
}
+
+void
+encode(vespalib::nbostream & os, const IChunk & chunk, Encoding encoding) {
+ size_t begin = os.wp();
+ os << encoding.getRaw(); // Placeholder for encoding
+ os << uint32_t(0); // Placeholder for size
+ Encoding realEncoding = chunk.encode(os);
+ size_t end = os.wp();
+ os.wp(0);
+ os << realEncoding.getRaw(); //Patching real encoding
+ os << uint32_t(end - (begin + sizeof(uint32_t) + sizeof(uint8_t))); // Patching actual size.
+ os.wp(end);
+ SerialNumRange range = chunk.range();
+ LOG(spam, "Encoded chunk with %zu entries and %zu bytes, range[%" PRIu64 ", %" PRIu64 "] encoding(wanted=%x, real=%x)",
+ chunk.getEntries().size(), os.size(), range.from(), range.to(), encoding.getRaw(), realEncoding.getRaw());
+}
+
+SerializedChunk::SerializedChunk(const Packet & packet, Encoding encoding, uint8_t compressionLevel)
+ : _os(),
+ _range(packet.range()),
+ _numEntries(packet.size())
+{
+ nbostream_longlivedbuf h(packet.getHandle().data(), packet.getHandle().size());
+
+ IChunk::UP chunk = IChunk::create(encoding, compressionLevel);
+ SerialNum prev = 0;
+ for (size_t i(0); h.size() > 0; i++) {
+ //LOG(spam,
+ //"Pos(%d) Len(%d), Lim(%d), Remaining(%d)",
+ //h.getPos(), h.getLength(), h.getLimit(), h.getRemaining());
+ Packet::Entry entry;
+ entry.deserialize(h);
+ assert (prev < entry.serial());
+ chunk->add(entry);
+ prev = entry.serial();
+ }
+ assert(! chunk->getEntries().empty());
+ encode(_os, *chunk, encoding);
+}
+vespalib::ConstBufferRef SerializedChunk::getData() const {
+ return vespalib::ConstBufferRef(_os.data(), _os.size());
+}
}
diff --git a/searchlib/src/vespa/searchlib/transactionlog/ichunk.h b/searchlib/src/vespa/searchlib/transactionlog/ichunk.h
index 02bd0ce9426..dccfd6617f5 100644
--- a/searchlib/src/vespa/searchlib/transactionlog/ichunk.h
+++ b/searchlib/src/vespa/searchlib/transactionlog/ichunk.h
@@ -33,6 +33,22 @@ private:
std::ostream & operator << (std::ostream & os, Encoding e);
/**
+ * Represents a completely encoded chunk with a buffer ready to be persisted,
+ * and the range and number of entries it covers.
+ */
+class SerializedChunk {
+public:
+ SerializedChunk(const Packet & packet, Encoding encoding, uint8_t compressionLevel);
+ vespalib::ConstBufferRef getData() const;
+ SerialNumRange range() const { return _range; }
+ size_t getNumEntries() const { return _numEntries; }
+private:
+ vespalib::nbostream _os;
+ SerialNumRange _range;
+ size_t _numEntries;
+};
+
+/**
* Interface for different chunk formats.
* Format specifies both crc type, and compression type.
*/