summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--searchcore/src/vespa/searchcore/proton/attribute/attribute_vector_explorer.cpp4
-rw-r--r--searchlib/src/tests/attribute/attribute_test.cpp8
-rw-r--r--searchlib/src/tests/attribute/comparator/comparator_test.cpp10
-rw-r--r--searchlib/src/tests/attribute/enumstore/enumstore_test.cpp591
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributevector.cpp25
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributevector.h1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumattribute.h1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumattribute.hpp52
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstore.cpp76
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstore.h298
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstore.hpp463
-rw-r--r--searchlib/src/vespa/searchlib/attribute/i_enum_store.h6
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp4
-rw-r--r--vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary.h1
-rw-r--r--vespalib/src/vespa/vespalib/datastore/unique_store.h7
-rw-r--r--vespalib/src/vespa/vespalib/datastore/unique_store.hpp15
-rw-r--r--vespalib/src/vespa/vespalib/datastore/unique_store_allocator.h1
-rw-r--r--vespalib/src/vespa/vespalib/datastore/unique_store_builder.h1
-rw-r--r--vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h1
-rw-r--r--vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.hpp17
21 files changed, 289 insertions, 1297 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/attribute/attribute_vector_explorer.cpp b/searchcore/src/vespa/searchcore/proton/attribute/attribute_vector_explorer.cpp
index a1d5f72bc9d..5a199c529b6 100644
--- a/searchcore/src/vespa/searchcore/proton/attribute/attribute_vector_explorer.cpp
+++ b/searchcore/src/vespa/searchcore/proton/attribute/attribute_vector_explorer.cpp
@@ -77,8 +77,8 @@ void
convertEnumStoreToSlime(const IEnumStore &enumStore, Cursor &object)
{
object.setLong("numUniques", enumStore.getNumUniques());
- convertMemoryUsageToSlime(enumStore.getMemoryUsage(), object.setObject("memoryUsage"));
- convertMemoryUsageToSlime(enumStore.getTreeMemoryUsage(), object.setObject("treeMemoryUsage"));
+ convertMemoryUsageToSlime(enumStore.getValuesMemoryUsage(), object.setObject("valuesMemoryUsage"));
+ convertMemoryUsageToSlime(enumStore.getDictionaryMemoryUsage(), object.setObject("dictionaryMemoryUsage"));
}
void
diff --git a/searchlib/src/tests/attribute/attribute_test.cpp b/searchlib/src/tests/attribute/attribute_test.cpp
index 4e520e86707..98caf39dace 100644
--- a/searchlib/src/tests/attribute/attribute_test.cpp
+++ b/searchlib/src/tests/attribute/attribute_test.cpp
@@ -2036,11 +2036,11 @@ AttributeTest::requireThatAddressSpaceUsageIsReported(const Config &config, bool
AddressSpaceUsage after = attrPtr->getAddressSpaceUsage();
if (attrPtr->hasEnum()) {
LOG(info, "requireThatAddressSpaceUsageIsReported(%s): Has enum", attrName.c_str());
- EXPECT_EQUAL(before.enumStoreUsage().used(), 16u);
- EXPECT_EQUAL(before.enumStoreUsage().dead(), 16u);
+ EXPECT_EQUAL(before.enumStoreUsage().used(), 1u);
+ EXPECT_EQUAL(before.enumStoreUsage().dead(), 1u);
EXPECT_GREATER(after.enumStoreUsage().used(), before.enumStoreUsage().used());
- EXPECT_EQUAL(after.enumStoreUsage().limit(), before.enumStoreUsage().limit());
- EXPECT_EQUAL(34359738368u, after.enumStoreUsage().limit()); // EnumStoreBase::DataStoreType::RefType::offsetSize()
+ EXPECT_GREATER_EQUAL(after.enumStoreUsage().limit(), before.enumStoreUsage().limit());
+ EXPECT_GREATER(after.enumStoreUsage().limit(), 4200000000u);
} else {
LOG(info, "requireThatAddressSpaceUsageIsReported(%s): NOT enum", attrName.c_str());
EXPECT_EQUAL(before.enumStoreUsage().used(), 0u);
diff --git a/searchlib/src/tests/attribute/comparator/comparator_test.cpp b/searchlib/src/tests/attribute/comparator/comparator_test.cpp
index a2000c48423..7bd6f3ca013 100644
--- a/searchlib/src/tests/attribute/comparator/comparator_test.cpp
+++ b/searchlib/src/tests/attribute/comparator/comparator_test.cpp
@@ -47,7 +47,7 @@ public:
void
Test::requireThatNumericComparatorIsWorking()
{
- NumericEnumStore es(1024, false);
+ NumericEnumStore es(false);
EnumIndex e1, e2;
es.addEnum(10, e1);
es.addEnum(30, e2);
@@ -63,7 +63,7 @@ Test::requireThatNumericComparatorIsWorking()
void
Test::requireThatFloatComparatorIsWorking()
{
- FloatEnumStore es(1024, false);
+ FloatEnumStore es(false);
EnumIndex e1, e2, e3;
es.addEnum(10.5, e1);
es.addEnum(30.5, e2);
@@ -83,7 +83,7 @@ Test::requireThatFloatComparatorIsWorking()
void
Test::requireThatStringComparatorIsWorking()
{
- StringEnumStore es(1024, false);
+ StringEnumStore es(false);
EnumIndex e1, e2, e3;
es.addEnum("Aa", e1);
es.addEnum("aa", e2);
@@ -102,7 +102,7 @@ Test::requireThatStringComparatorIsWorking()
void
Test::requireThatComparatorWithTreeIsWorking()
{
- NumericEnumStore es(2048, false);
+ NumericEnumStore es(false);
vespalib::GenerationHandler g;
TreeType t;
NodeAllocator m;
@@ -129,7 +129,7 @@ Test::requireThatComparatorWithTreeIsWorking()
void
Test::requireThatFoldedComparatorIsWorking()
{
- StringEnumStore es(1024, false);
+ StringEnumStore es(false);
EnumIndex e1, e2, e3, e4;
es.addEnum("Aa", e1);
es.addEnum("aa", e2);
diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
index c4ba8eecf43..f61211283a4 100644
--- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
+++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
@@ -17,7 +17,8 @@ size_t enumStoreAlign(size_t size)
// IEnumStore::Index(0,0) is reserved thus 16 bytes are reserved in buffer 0
const uint32_t RESERVED_BYTES = 16u;
-typedef EnumStoreT<NumericEntryType<uint32_t> > NumericEnumStore;
+using NumericEnumStore = EnumStoreT<NumericEntryType<uint32_t> >;
+using generation_t = vespalib::GenerationHandler::generation_t;
class EnumStoreTest : public vespalib::TestApp
{
@@ -27,15 +28,6 @@ private:
typedef EnumStoreT<NumericEntryType<double> > DoubleEnumStore;
typedef IEnumStore::Index EnumIndex;
- typedef vespalib::GenerationHandler::generation_t generation_t;
-
- void testIndex();
- void fillDataBuffer(char * data, uint32_t refCount,
- const std::string & string);
- void fillDataBuffer(char * data, uint32_t refCount,
- uint32_t value);
- void testStringEntry();
- void testNumericEntry();
template <typename EnumStoreType, typename T>
void testFloatEnumStore(EnumStoreType & es);
@@ -51,27 +43,11 @@ private:
testUniques(const EnumStoreType &ses,
const std::vector<std::string> &unique);
-
- void testCompaction();
- template <typename EnumStoreType>
- void testCompaction(bool hasPostings);
-
- void testReset();
- template <typename EnumStoreType>
- void testReset(bool hasPostings);
-
void testHoldListAndGeneration();
- void testMemoryUsage();
void requireThatAddressSpaceUsageIsReported();
- void testBufferLimit();
// helper methods
typedef std::vector<std::string> StringVector;
- template <typename T>
- T random(T low, T high);
- std::string getRandomString(uint32_t minLen, uint32_t maxLen);
- StringVector fillRandomStrings(uint32_t numStrings, uint32_t minLen, uint32_t maxLen);
- StringVector sortRandomStrings(StringVector & strings);
struct StringEntry {
StringEntry(uint32_t r, const std::string & s) :
@@ -107,123 +83,6 @@ EnumStoreTest::Reader::Reader(uint32_t generation, const IndexVector & indices,
{}
EnumStoreTest::Reader::~Reader() { }
-void
-EnumStoreTest::testIndex()
-{
- {
- StringEnumStore::Index idx;
- EXPECT_TRUE( ! idx.valid());
- EXPECT_EQUAL(idx.offset(), 0u);
- EXPECT_TRUE(idx.bufferId() == 0);
- }
- {
- StringEnumStore::Index idx(enumStoreAlign(1000), 0);
- EXPECT_TRUE(idx.offset() == enumStoreAlign(1000));
- EXPECT_TRUE(idx.bufferId() == 0);
- }
- {
- StringEnumStore::Index idx((UINT64_C(1) << 31)- RESERVED_BYTES, 1);
- EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 31) - RESERVED_BYTES);
- EXPECT_TRUE(idx.bufferId() == 1);
- }
- {
- StringEnumStore::Index idx((UINT64_C(1) << 33) - RESERVED_BYTES, 1);
- EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 33) - RESERVED_BYTES);
- EXPECT_TRUE(idx.bufferId() == 1);
- }
- {
- StringEnumStore::Index idx((UINT64_C(1) << 35) - RESERVED_BYTES, 1);
- EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 35) - RESERVED_BYTES);
- EXPECT_TRUE(idx.bufferId() == 1);
- }
- {
- // Change offsets when alignment changes.
- StringEnumStore::Index idx1(48, 0);
- StringEnumStore::Index idx2(80, 0);
- StringEnumStore::Index idx3(48, 0);
- EXPECT_TRUE(!(idx1 == idx2));
- EXPECT_TRUE(idx1 == idx3);
- }
- {
- EXPECT_TRUE(StringEnumStore::Index::numBuffers() == 2);
- }
-}
-
-void
-EnumStoreTest::fillDataBuffer(char * data, uint32_t refCount,
- const std::string & string)
-{
- StringEnumStore::insertEntry(data, refCount, string.c_str());
-}
-
-void
-EnumStoreTest::fillDataBuffer(char * data, uint32_t refCount,
- uint32_t value)
-{
- NumericEnumStore::insertEntry(data, refCount, value);
-}
-
-void
-EnumStoreTest::testStringEntry()
-{
- {
- char data[9];
- fillDataBuffer(data, 0, "");
- StringEnumStore::Entry e(data);
- EXPECT_TRUE(StringEnumStore::getEntrySize("") ==
- StringEnumStore::alignEntrySize(8 + 1));
-
- EXPECT_TRUE(e.getRefCount() == 0);
- EXPECT_TRUE(strcmp(e.getValue(), "") == 0);
-
- e.incRefCount();
- EXPECT_TRUE(e.getRefCount() == 1);
- EXPECT_TRUE(strcmp(e.getValue(), "") == 0);
- e.decRefCount();
- EXPECT_TRUE(e.getRefCount() == 0);
- EXPECT_TRUE(strcmp(e.getValue(), "") == 0);
- }
- {
- char data[18];
- fillDataBuffer(data, 5, "enumstore");
- StringEnumStore::Entry e(data);
- EXPECT_TRUE(StringEnumStore::getEntrySize("enumstore") ==
- StringEnumStore::alignEntrySize(8 + 1 + 9));
-
- EXPECT_TRUE(e.getRefCount() == 5);
- EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0);
-
- e.incRefCount();
- EXPECT_TRUE(e.getRefCount() == 6);
- EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0);
- e.decRefCount();
- EXPECT_TRUE(e.getRefCount() == 5);
- EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0);
- }
-}
-
-void
-EnumStoreTest::testNumericEntry()
-{
- {
- char data[12];
- fillDataBuffer(data, 20, 30);
- NumericEnumStore::Entry e(data);
- EXPECT_TRUE(NumericEnumStore::getEntrySize(30) ==
- NumericEnumStore::alignEntrySize(8 + 4));
-
- EXPECT_TRUE(e.getRefCount() == 20);
- EXPECT_TRUE(e.getValue() == 30);
-
- e.incRefCount();
- EXPECT_TRUE(e.getRefCount() == 21);
- EXPECT_TRUE(e.getValue() == 30);
- e.decRefCount();
- EXPECT_TRUE(e.getRefCount() == 20);
- EXPECT_TRUE(e.getValue() == 30);
- }
-}
-
template <typename EnumStoreType, typename T>
void
EnumStoreTest::testFloatEnumStore(EnumStoreType & es)
@@ -256,11 +115,11 @@ void
EnumStoreTest::testFloatEnumStore()
{
{
- FloatEnumStore fes(1000, false);
+ FloatEnumStore fes(false);
testFloatEnumStore<FloatEnumStore, float>(fes);
}
{
- DoubleEnumStore des(1000, false);
+ DoubleEnumStore des(false);
testFloatEnumStore<DoubleEnumStore, double>(des);
}
}
@@ -268,7 +127,7 @@ EnumStoreTest::testFloatEnumStore()
void
EnumStoreTest::testFindFolded()
{
- StringEnumStore ses(100, false);
+ StringEnumStore ses(false);
std::vector<EnumIndex> indices;
std::vector<std::string> unique({"", "one", "two", "TWO", "Two", "three"});
for (std::string &str : unique) {
@@ -308,15 +167,10 @@ template <typename EnumStoreType>
void
EnumStoreTest::testAddEnum(bool hasPostings)
{
- EnumStoreType ses(100, hasPostings);
- EXPECT_EQUAL(enumStoreAlign(100u) + RESERVED_BYTES,
- ses.getBuffer(0).capacity());
- EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0).size());
- EXPECT_EQUAL(enumStoreAlign(100u), ses.getBuffer(0).remaining());
- EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0).getDeadElems());
+ // TODO: Rewrite test to use BatchUpdater
+ EnumStoreType ses(hasPostings);
EnumIndex idx;
- uint64_t offset = ses.getBuffer(0).size();
std::vector<EnumIndex> indices;
std::vector<std::string> unique;
unique.push_back("");
@@ -326,12 +180,9 @@ EnumStoreTest::testAddEnum(bool hasPostings)
for (uint32_t i = 0; i < unique.size(); ++i) {
ses.addEnum(unique[i].c_str(), idx);
- EXPECT_EQUAL(offset, idx.offset());
- EXPECT_EQUAL(0u, idx.bufferId());
ses.incRefCount(idx);
EXPECT_EQUAL(1u, ses.getRefCount(idx));
indices.push_back(idx);
- offset += EnumStoreType::alignEntrySize(unique[i].size() + 1 + 8);
EXPECT_TRUE(ses.findIndex(unique[i].c_str(), idx));
}
ses.freezeTree();
@@ -374,197 +225,11 @@ EnumStoreTest::testUniques
EXPECT_EQUAL(static_cast<uint32_t>(unique.size()), i);
}
-
-void
-EnumStoreTest::testCompaction()
-{
- testCompaction<StringEnumStore>(false);
- testCompaction<StringEnumStore>(true);
-}
-
-template <typename EnumStoreType>
-void
-EnumStoreTest::testCompaction(bool hasPostings)
-{
- // entrySize = 15 before alignment
- uint32_t entrySize = EnumStoreType::alignEntrySize(15);
- uint32_t initBufferSize = entrySize * 5;
- EnumStoreType ses(initBufferSize, hasPostings);
- // Note: Sizes of underlying data store buffers are power of 2.
- uint32_t adjustedBufferSize = vespalib::roundUp2inN(initBufferSize) - RESERVED_BYTES;
- EnumIndex idx;
- std::vector<EnumIndex> indices;
- typename EnumStoreType::Type t = "foo";
- std::vector<std::string> uniques;
- uniques.push_back("enum00");
- uniques.push_back("enum01");
- uniques.push_back("enum02");
- uniques.push_back("enum03");
- uniques.push_back("enum04");
-
- // fill with unique values
- for (uint32_t i = 0; i < 5; ++i) {
- size_t expRemaining = adjustedBufferSize - i * entrySize;
- EXPECT_EQUAL(expRemaining, ses.getRemaining());
- ses.addEnum(uniques[i].c_str(), idx);
- ses.incRefCount(idx);
- EXPECT_TRUE(ses.getRefCount(idx));
- indices.push_back(idx);
- }
- EXPECT_EQUAL(32u, ses.getRemaining());
- EXPECT_EQUAL(32u, ses.getBuffer(0).remaining());
- EXPECT_EQUAL(entrySize * 5 + RESERVED_BYTES, ses.getBuffer(0).size());
- EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0).getDeadElems());
- uint32_t failEntrySize = ses.getEntrySize("enum05");
- EXPECT_EQUAL(16u, failEntrySize);
-
- // change from enum00 -> enum01
- ses.decRefCount(indices[0]);
- ses.incRefCount(indices[1]);
- indices[0] = indices[1];
-
- // check correct refcount
- for (uint32_t i = 0; i < 5; ++i) {
- EXPECT_TRUE(ses.findIndex(uniques[i].c_str(), idx));
- uint32_t refCount = ses.getRefCount(idx);
- if (i == 0) {
- EXPECT_TRUE(refCount == 0);
- } else if (i == 1) {
- EXPECT_TRUE(refCount == 2);
- } else {
- EXPECT_TRUE(refCount == 1);
- }
- }
-
- // free unused enums
- ses.freeUnusedEnums(true);
- EXPECT_TRUE(!ses.findIndex("enum00", idx));
- EXPECT_EQUAL(entrySize + RESERVED_BYTES, ses.getBuffer(0).getDeadElems());
-
- auto &data_store_base = ses.get_data_store_base();
- auto old_compaction_count = data_store_base.get_compaction_count();
-
- // perform compaction
- IEnumStore::EnumIndexMap old2New;
- EXPECT_TRUE(ses.performCompaction(3 * entrySize, old2New));
- EXPECT_TRUE(ses.getRemaining() >= 3 * entrySize);
- EXPECT_TRUE(ses.getBuffer(1).remaining() >= 3 * entrySize);
- EXPECT_TRUE(ses.getBuffer(1).size() == entrySize * 4);
- EXPECT_TRUE(ses.getBuffer(1).getDeadElems() == 0);
-
- EXPECT_NOT_EQUAL(old_compaction_count, data_store_base.get_compaction_count());
-
- // add new unique strings
- ses.addEnum("enum05", idx);
- ses.addEnum("enum06", idx);
- ses.addEnum("enum00", idx);
-
- // compare old and new indices
- for (uint32_t i = 0; i < indices.size(); ++i) {
- idx = old2New[indices[i]];
- EXPECT_TRUE(indices[i].bufferId() == 0);
- EXPECT_TRUE(idx.bufferId() == 1);
- EXPECT_TRUE(ses.getValue(indices[i], t));
- typename EnumStoreType::Type s = "bar";
- EXPECT_TRUE(ses.getValue(idx, s));
- EXPECT_TRUE(strcmp(t, s) == 0);
- }
- // EnumIndex(0,0) is reserved so we have 4 bytes extra at the start of buffer 0
- idx = old2New[indices[0]];
- EXPECT_EQUAL(entrySize + RESERVED_BYTES, indices[0].offset());
- EXPECT_EQUAL(0u, idx.offset());
- idx = old2New[indices[1]];
- EXPECT_EQUAL(entrySize + RESERVED_BYTES, indices[1].offset());
- EXPECT_EQUAL(0u, idx.offset());
- idx = old2New[indices[2]];
- EXPECT_EQUAL(2 * entrySize + RESERVED_BYTES, indices[2].offset());
- EXPECT_EQUAL(entrySize, idx.offset());
- idx = old2New[indices[3]];
- EXPECT_EQUAL(3 * entrySize + RESERVED_BYTES, indices[3].offset());
- EXPECT_EQUAL(2 * entrySize, idx.offset());
- idx = old2New[indices[4]];
- EXPECT_EQUAL(4 * entrySize + RESERVED_BYTES, indices[4].offset());
- EXPECT_EQUAL(3 * entrySize, idx.offset());
-}
-
-void
-EnumStoreTest::testReset()
-{
- testReset<StringEnumStore>(false);
-
- testReset<StringEnumStore>(true);
-}
-
-template <typename EnumStoreType>
-void
-EnumStoreTest::testReset(bool hasPostings)
-{
- uint32_t numUniques = 10000;
- srand(123456789);
- StringVector rndStrings = fillRandomStrings(numUniques, 10, 15);
- EXPECT_EQUAL(rndStrings.size(), size_t(numUniques));
- StringVector uniques = sortRandomStrings(rndStrings);
- EXPECT_EQUAL(uniques.size(), size_t(numUniques));
- // max entrySize = 25 before alignment
- uint32_t maxEntrySize = EnumStoreType::alignEntrySize(8 + 1 + 16);
- EnumStoreType ses(numUniques * maxEntrySize, hasPostings);
- EnumIndex idx;
-
- uint32_t cnt = 0;
- // add new unique strings
- for (StringVector::reverse_iterator iter = uniques.rbegin(); iter != uniques.rend(); ++iter) {
- ses.addEnum(iter->c_str(), idx);
- EXPECT_EQUAL(ses.getNumUniques(), ++cnt);
- }
-
- // check for unique strings
- for (StringVector::iterator iter = uniques.begin(); iter != uniques.end(); ++iter) {
- EXPECT_TRUE(ses.findIndex(iter->c_str(), idx));
- }
-
- EXPECT_EQUAL(ses.getNumUniques(), numUniques);
- if (hasPostings) {
- testUniques<EnumStoreType, EnumPostingTree>(ses, uniques);
- } else {
- testUniques<EnumStoreType, EnumTree>(ses, uniques);
- }
-
- rndStrings = fillRandomStrings(numUniques, 15, 20);
- StringVector newUniques = sortRandomStrings(rndStrings);
-
- typename EnumStoreType::Builder builder;
- for (StringVector::iterator iter = newUniques.begin(); iter != newUniques.end(); ++iter) {
- builder.insert(iter->c_str());
- }
-
- ses.reset(builder);
- // Note: Sizes of underlying data store buffers are power of 2.
- EXPECT_EQUAL(524288u, ses.getCapacity());
- EXPECT_EQUAL(204272u, ses.getRemaining());
-
- // check for old unique strings
- for (StringVector::iterator iter = uniques.begin(); iter != uniques.end(); ++iter) {
- EXPECT_TRUE(!ses.findIndex(iter->c_str(), idx));
- }
-
- // check for new unique strings
- for (StringVector::iterator iter = newUniques.begin(); iter != newUniques.end(); ++iter) {
- EXPECT_TRUE(ses.findIndex(iter->c_str(), idx));
- }
-
- EXPECT_EQUAL(ses.getNumUniques(), numUniques);
- if (hasPostings) {
- testUniques<EnumStoreType, EnumPostingTree>(ses, newUniques);
- } else {
- testUniques<EnumStoreType, EnumTree>(ses, newUniques);
- }
-}
-
void
EnumStoreTest::testHoldListAndGeneration()
{
- uint32_t entrySize = StringEnumStore::alignEntrySize(8 + 1 + 6);
- StringEnumStore ses(100 * entrySize, false);
+ // TODO: Rewrite test to use BatchUpdater
+ StringEnumStore ses(false);
StringEnumStore::Index idx;
StringVector uniques;
generation_t sesGen = 0u;
@@ -597,11 +262,11 @@ EnumStoreTest::testHoldListAndGeneration()
for (uint32_t j = i - 9; j <= i; ++j) {
EXPECT_TRUE(ses.findIndex(uniques[j].c_str(), idx));
indices.push_back(idx);
- StringEnumStore::Entry entry = ses.getEntry(idx);
- EXPECT_TRUE(entry.getRefCount() == 1);
- EXPECT_TRUE(strcmp(entry.getValue(), uniques[j].c_str()) == 0);
- expected.push_back(StringEntry(entry.getRefCount(),
- std::string(entry.getValue())));
+ uint32_t ref_count = ses.getRefCount(idx);
+ std::string value(ses.getValue(idx));
+ EXPECT_EQUAL(1u, ref_count);
+ EXPECT_EQUAL(uniques[j], value);
+ expected.emplace_back(ref_count, value);
}
EXPECT_TRUE(indices.size() == 10);
EXPECT_TRUE(expected.size() == 10);
@@ -611,10 +276,6 @@ EnumStoreTest::testHoldListAndGeneration()
}
}
- // Note: Sizes of underlying data store buffers are power of 2.
- EXPECT_EQUAL(432u, ses.getRemaining());
- EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0).getDeadElems());
-
// remove all uniques
for (uint32_t i = 0; i < 100; ++i) {
EXPECT_TRUE(ses.findIndex(uniques[i].c_str(), idx));
@@ -622,117 +283,12 @@ EnumStoreTest::testHoldListAndGeneration()
EXPECT_EQUAL(0u, ses.getRefCount(idx));
}
ses.freeUnusedEnums(true);
- EXPECT_EQUAL(100 * entrySize + RESERVED_BYTES, ses.getBuffer(0).getDeadElems());
-
- // perform compaction
- uint32_t newEntrySize = StringEnumStore::alignEntrySize(8 + 1 + 8);
- IEnumStore::EnumIndexMap old2New;
- EXPECT_TRUE(ses.performCompaction(5 * newEntrySize, old2New));
// check readers again
checkReaders(ses, sesGen, readers);
- // fill up buffer
- uint32_t i = 0;
- while (ses.getRemaining() >= newEntrySize) {
- //LOG(info, "fill: %s", newUniques[i].c_str());
- ses.addEnum(newUniques[i++].c_str(), idx);
- ses.incRefCount(idx);
- EXPECT_TRUE(ses.getRefCount(idx));
- }
- EXPECT_LESS(ses.getRemaining(), newEntrySize);
- // buffer on hold list
- old2New.clear();
- EXPECT_TRUE(!ses.performCompaction(5 * newEntrySize, old2New));
-
- checkReaders(ses, sesGen, readers);
- ses.transferHoldLists(sesGen);
- ses.trimHoldLists(sesGen + 1);
-
- // buffer no longer on hold list
- EXPECT_LESS(ses.getRemaining(), newEntrySize);
- old2New.clear();
- EXPECT_TRUE(ses.performCompaction(5 * newEntrySize, old2New));
- EXPECT_TRUE(ses.getRemaining() >= 5 * newEntrySize);
-}
-
-void
-EnumStoreTest::testMemoryUsage()
-{
- StringEnumStore ses(200, false);
- StringEnumStore::Index idx;
- uint32_t num = 8;
- std::vector<StringEnumStore::Index> indices;
- std::vector<std::string> uniques;
- for (uint32_t i = 0; i < num; ++i) {
- std::stringstream ss;
- ss << "enum" << i;
- uniques.push_back(ss.str());
- }
- generation_t sesGen = 0u;
- uint32_t entrySize = StringEnumStore::alignEntrySize(8 + 1 + 5); // enum(4) + refcount(4) + 1(\0) + strlen("enumx")
-
- // usage before inserting enums
- vespalib::MemoryUsage usage = ses.getMemoryUsage();
- EXPECT_EQUAL(ses.getNumUniques(), uint32_t(0));
- // Note: Sizes of underlying data store buffers are power of 2.
- EXPECT_EQUAL(vespalib::roundUp2inN(enumStoreAlign(200u) + RESERVED_BYTES), usage.allocatedBytes());
- EXPECT_EQUAL(RESERVED_BYTES, usage.usedBytes());
- EXPECT_EQUAL(RESERVED_BYTES, usage.deadBytes());
- EXPECT_EQUAL(0u, usage.allocatedBytesOnHold());
-
- for (uint32_t i = 0; i < num; ++i) {
- ses.addEnum(uniques[i].c_str(), idx);
- indices.push_back(idx);
- ses.incRefCount(idx);
- EXPECT_TRUE(ses.getRefCount(idx));
- }
-
- // usage after inserting enums
- usage = ses.getMemoryUsage();
- EXPECT_EQUAL(ses.getNumUniques(), num);
- // Note: Sizes of underlying data store buffers are power of 2.
- EXPECT_EQUAL(vespalib::roundUp2inN(enumStoreAlign(200u) + RESERVED_BYTES), usage.allocatedBytes());
- EXPECT_EQUAL(num * entrySize + RESERVED_BYTES, usage.usedBytes());
- EXPECT_EQUAL(RESERVED_BYTES, usage.deadBytes());
- EXPECT_EQUAL(0u, usage.allocatedBytesOnHold());
-
- // assign new enum for num / 2 of indices
- for (uint32_t i = 0; i < num / 2; ++i) {
- ses.decRefCount(indices[i]);
- EXPECT_TRUE(ses.findIndex(uniques.back().c_str(), idx));
- ses.incRefCount(idx);
- indices[i] = idx;
- }
- ses.freeUnusedEnums(true);
-
- // usage after removing enums
- usage = ses.getMemoryUsage();
- EXPECT_EQUAL(ses.getNumUniques(), num / 2);
- // Note: Sizes of underlying data store buffers are power of 2.
- EXPECT_EQUAL(vespalib::roundUp2inN(enumStoreAlign(200u) + RESERVED_BYTES), usage.allocatedBytes());
- EXPECT_EQUAL(num * entrySize + RESERVED_BYTES, usage.usedBytes());
- EXPECT_EQUAL((num / 2) * entrySize + RESERVED_BYTES, usage.deadBytes());
- EXPECT_EQUAL(0u, usage.allocatedBytesOnHold());
-
- IEnumStore::EnumIndexMap old2New;
- ses.performCompaction(400, old2New);
-
- // usage after compaction
- vespalib::MemoryUsage usage2 = ses.getMemoryUsage();
- EXPECT_EQUAL(ses.getNumUniques(), num / 2);
- EXPECT_EQUAL(usage.usedBytes() + (num / 2) * entrySize, usage2.usedBytes());
- EXPECT_EQUAL(usage.deadBytes(), usage2.deadBytes());
- EXPECT_EQUAL(usage.usedBytes() - usage.deadBytes(), usage2.allocatedBytesOnHold());
-
ses.transferHoldLists(sesGen);
ses.trimHoldLists(sesGen + 1);
-
- // usage after hold list trimming
- vespalib::MemoryUsage usage3 = ses.getMemoryUsage();
- EXPECT_EQUAL((num / 2) * entrySize, usage3.usedBytes());
- EXPECT_EQUAL(0u, usage3.deadBytes());
- EXPECT_EQUAL(0u, usage3.allocatedBytesOnHold());
}
namespace {
@@ -747,10 +303,13 @@ addEnum(NumericEnumStore &store, uint32_t value)
}
void
-decRefCount(NumericEnumStore &store, NumericEnumStore::Index idx)
+decRefCount(NumericEnumStore& store, NumericEnumStore::Index idx)
{
store.decRefCount(idx);
store.freeUnusedEnums(false);
+ generation_t gen = 5;
+ store.transferHoldLists(gen);
+ store.trimHoldLists(gen + 1);
}
}
@@ -758,106 +317,21 @@ decRefCount(NumericEnumStore &store, NumericEnumStore::Index idx)
void
EnumStoreTest::requireThatAddressSpaceUsageIsReported()
{
- const size_t ADDRESS_LIMIT = 34359738368; // NumericEnumStore::DataStoreType::RefType::offsetSize()
- NumericEnumStore store(200, false);
+ // TODO: Rewrite test to use BatchUpdater
+ const size_t ADDRESS_LIMIT = 4290772994; // Max allocated elements in un-allocated buffers + allocated elements in allocated buffers.
+ NumericEnumStore store(false);
using vespalib::AddressSpace;
- EXPECT_EQUAL(AddressSpace(16, 16, ADDRESS_LIMIT), store.getAddressSpaceUsage());
+ EXPECT_EQUAL(AddressSpace(1, 1, ADDRESS_LIMIT), store.getAddressSpaceUsage());
NumericEnumStore::Index idx1 = addEnum(store, 10);
- EXPECT_EQUAL(AddressSpace(32, 16, ADDRESS_LIMIT), store.getAddressSpaceUsage());
+ EXPECT_EQUAL(AddressSpace(2, 1, ADDRESS_LIMIT), store.getAddressSpaceUsage());
NumericEnumStore::Index idx2 = addEnum(store, 20);
- EXPECT_EQUAL(AddressSpace(48, 16, ADDRESS_LIMIT), store.getAddressSpaceUsage());
+ // Address limit increases because buffer is re-sized.
+ EXPECT_EQUAL(AddressSpace(3, 1, ADDRESS_LIMIT + 2), store.getAddressSpaceUsage());
decRefCount(store, idx1);
- EXPECT_EQUAL(AddressSpace(48, 32, ADDRESS_LIMIT), store.getAddressSpaceUsage());
+ EXPECT_EQUAL(AddressSpace(3, 2, ADDRESS_LIMIT + 2), store.getAddressSpaceUsage());
decRefCount(store, idx2);
- EXPECT_EQUAL(AddressSpace(48, 48, ADDRESS_LIMIT), store.getAddressSpaceUsage());
-}
-
-size_t
-digits(size_t num)
-{
- size_t digits = 1;
- while (num / 10 > 0) {
- num /= 10;
- digits++;
- }
- return digits;
-}
-
-void
-EnumStoreTest::testBufferLimit()
-{
- size_t enumSize = StringEnumStore::Index::offsetSize();
- StringEnumStore es(enumSize, false);
-
- size_t strLen = 65536;
- char str[strLen + 1];
- for (size_t i = 0; i < strLen; ++i) {
- str[i] = 'X';
- }
- str[strLen] = 0;
-
- size_t entrySize = StringEnumStore::getEntrySize(str);
- size_t numUniques = enumSize / entrySize;
- size_t uniqDigits = digits(numUniques);
-
- EnumIndex idx;
- EnumIndex lastIdx;
- for (size_t i = 0; i < numUniques; ++i) {
- sprintf(str, "%0*zu", (int)uniqDigits, i);
- str[uniqDigits] = 'X';
- es.addEnum(str, idx);
- if (i % (numUniques / 32) == 1) {
- EXPECT_TRUE(idx.offset() > lastIdx.offset());
- EXPECT_EQUAL(i + 1, es.getNumUniques());
- std::cout << "idx.offset(" << idx.offset() << "), str(" << std::string(str, uniqDigits) << ")" << std::endl;
- }
- lastIdx = idx;
- }
- EXPECT_EQUAL(idx.offset(), lastIdx.offset());
- EXPECT_EQUAL(numUniques, es.getNumUniques());
- std::cout << "idx.offset(" << idx.offset() << "), str(" << std::string(str, uniqDigits) << ")" << std::endl;
-}
-
-template <typename T>
-T
-EnumStoreTest::random(T low, T high)
-{
- return (rand() % (high - low)) + low;
-}
-
-std::string
-EnumStoreTest::getRandomString(uint32_t minLen, uint32_t maxLen)
-{
- uint32_t len = random(minLen, maxLen);
- std::string retval;
- for (uint32_t i = 0; i < len; ++i) {
- char c = random('a', 'z');
- retval.push_back(c);
- }
- return retval;
-}
-
-EnumStoreTest::StringVector
-EnumStoreTest::fillRandomStrings(uint32_t numStrings, uint32_t minLen, uint32_t maxLen)
-{
- StringVector retval;
- retval.reserve(numStrings);
- for (uint32_t i = 0; i < numStrings; ++i) {
- retval.push_back(getRandomString(minLen, maxLen));
- }
- return retval;
-}
-
-EnumStoreTest::StringVector
-EnumStoreTest::sortRandomStrings(StringVector & strings)
-{
- std::sort(strings.begin(), strings.end());
- std::vector<std::string> retval;
- retval.reserve(strings.size());
- std::vector<std::string>::iterator pos = std::unique(strings.begin(), strings.end());
- std::copy(strings.begin(), pos, std::back_inserter(retval));
- return retval;
+ EXPECT_EQUAL(AddressSpace(3, 3, ADDRESS_LIMIT + 2), store.getAddressSpaceUsage());
}
void
@@ -867,7 +341,7 @@ EnumStoreTest::checkReaders(const StringEnumStore & ses,
{
(void) sesGen;
//uint32_t refCount = 1000;
- StringEnumStore::Type t = "";
+ StringEnumStore::DataType t = "";
for (uint32_t i = 0; i < readers.size(); ++i) {
const Reader & r = readers[i];
for (uint32_t j = 0; j < r._indices.size(); ++j) {
@@ -883,20 +357,11 @@ EnumStoreTest::Main()
{
TEST_INIT("enumstore_test");
- testIndex();
- testStringEntry();
- testNumericEntry();
testFloatEnumStore();
testFindFolded();
testAddEnum();
- testCompaction();
- testReset();
testHoldListAndGeneration();
- testMemoryUsage();
TEST_DO(requireThatAddressSpaceUsageIsReported());
- if (_argc > 1) {
- testBufferLimit(); // large test with 8 GB buffer
- }
TEST_DONE();
}
diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
index 3e949384d4a..5f9ebd1bf44 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
@@ -218,6 +218,12 @@ AttributeVector::updateStatistics(uint64_t numValues, uint64_t numUniqueValue, u
_status.updateStatistics(numValues, numUniqueValue, allocated, used, dead, onHold);
}
+vespalib::MemoryUsage
+AttributeVector::getEnumStoreValuesMemoryUsage() const
+{
+ return vespalib::MemoryUsage();
+}
+
vespalib::AddressSpace
AttributeVector::getEnumStoreAddressSpaceUsage() const
{
@@ -715,7 +721,7 @@ AttributeVector::getEstimatedSaveByteSize() const
uint64_t idxFileSize = 0;
uint64_t udatFileSize = 0;
size_t fixedWidth = getFixedWidth();
- vespalib::AddressSpace enumAddressSpace(getEnumStoreAddressSpaceUsage());
+ vespalib::MemoryUsage values_mem_usage = getEnumStoreValuesMemoryUsage();
if (hasMultiValue()) {
idxFileSize = headerSize + sizeof(uint32_t) * (docIdLimit + 1);
@@ -723,13 +729,15 @@ AttributeVector::getEstimatedSaveByteSize() const
if (hasWeightedSetType()) {
weightFileSize = headerSize + sizeof(int32_t) * totalValueCount;
}
- if (hasEnum() && getEnumeratedSave()) {
- datFileSize = headerSize + 4 * totalValueCount;
+ if (hasEnum()) {
+ datFileSize = headerSize + sizeof(uint32_t) * totalValueCount;
if (fixedWidth != 0) {
udatFileSize = headerSize + fixedWidth * uniqueValueCount;
} else {
- udatFileSize = headerSize + enumAddressSpace.used()
- - 8 * uniqueValueCount;
+ size_t unique_values_bytes = values_mem_usage.usedBytes() -
+ (values_mem_usage.deadBytes() + values_mem_usage.allocatedBytesOnHold());
+ size_t ref_count_mem_usage = sizeof(uint32_t) * uniqueValueCount;
+ udatFileSize = headerSize + unique_values_bytes - ref_count_mem_usage;
}
} else {
BasicType::Type basicType(getBasicType());
@@ -744,12 +752,7 @@ AttributeVector::getEstimatedSaveByteSize() const
datFileSize = headerSize + memorySize;
break;
case BasicType::Type::STRING:
- assert(hasEnum());
- datFileSize = headerSize;
- if (uniqueValueCount > 0) {
- double avgEntrySize = (static_cast<double>(enumAddressSpace.used()) / uniqueValueCount) - 8;
- datFileSize += avgEntrySize * totalValueCount;
- }
+ abort();
break;
default:
datFileSize = headerSize + fixedWidth * totalValueCount;
diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.h b/searchlib/src/vespa/searchlib/attribute/attributevector.h
index 52e63385c7d..b5474fda9c9 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributevector.h
+++ b/searchlib/src/vespa/searchlib/attribute/attributevector.h
@@ -378,6 +378,7 @@ protected:
return value;
}
+ virtual vespalib::MemoryUsage getEnumStoreValuesMemoryUsage() const;
virtual vespalib::AddressSpace getEnumStoreAddressSpaceUsage() const;
virtual vespalib::AddressSpace getMultiValueAddressSpaceUsage() const;
void logEnumStoreEvent(const char *reason, const char *stage);
diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.h b/searchlib/src/vespa/searchlib/attribute/enumattribute.h
index 55af5a874f9..db8952d4f71 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.h
@@ -78,6 +78,7 @@ protected:
void insertNewUniqueValues(EnumStoreBatchUpdater& updater);
virtual void considerAttributeChange(const Change & c, UniqueSet & newUniques) = 0;
virtual void reEnumerate(const EnumIndexMap &) = 0;
+ vespalib::MemoryUsage getEnumStoreValuesMemoryUsage() const override;
vespalib::AddressSpace getEnumStoreAddressSpaceUsage() const override;
public:
EnumAttribute(const vespalib::string & baseFileName, const AttributeVector::Config & cfg);
diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp
index a5ba60cad4d..57cb33b1b70 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp
@@ -13,7 +13,7 @@ EnumAttribute<B>::
EnumAttribute(const vespalib::string &baseFileName,
const AttributeVector::Config &cfg)
: B(baseFileName, cfg),
- _enumStore(0, cfg.fastSearch())
+ _enumStore(cfg.fastSearch())
{
this->setEnum(true);
}
@@ -27,7 +27,7 @@ template <typename B>
void EnumAttribute<B>::fillEnum(LoadedVector & loaded)
{
if constexpr(!std::is_same_v<LoadedVector, NoLoadedVector>) {
- typename EnumStore::Builder builder;
+ auto builder = _enumStore.make_builder();
if (!loaded.empty()) {
auto value = loaded.read();
LoadedValueType prev = value.getValue();
@@ -36,7 +36,7 @@ void EnumAttribute<B>::fillEnum(LoadedVector & loaded)
for (size_t i(0), m(loaded.size()); i < m; ++i, loaded.next()) {
value = loaded.read();
if (EnumStore::ComparatorType::compare(prev, value.getValue()) != 0) {
- builder.updateRefCount(prevRefCount);
+ builder.set_ref_count_for_last_value(prevRefCount);
index = builder.insert(value.getValue(), value._pidx.ref());
prev = value.getValue();
prevRefCount = 1;
@@ -46,9 +46,9 @@ void EnumAttribute<B>::fillEnum(LoadedVector & loaded)
value.setEidx(index);
loaded.write(value);
}
- builder.updateRefCount(prevRefCount);
+ builder.set_ref_count_for_last_value(prevRefCount);
}
- _enumStore.reset(builder);
+ builder.build();
}
}
@@ -93,48 +93,18 @@ EnumAttribute<B>::insertNewUniqueValues(EnumStoreBatchUpdater& updater)
considerAttributeChange(data, newUniques);
}
- uint64_t extraBytesNeeded = 0;
- for (const auto & data : newUniques) {
- extraBytesNeeded += _enumStore.getEntrySize(data.raw());
- }
-
- do {
- // perform compaction on EnumStore if necessary
- if (extraBytesNeeded > this->_enumStore.getRemaining() ||
- this->_enumStore.getPendingCompact())
- {
- this->logEnumStoreEvent("enumstorecompact", "reserve");
- this->removeAllOldGenerations();
- this->_enumStore.clearPendingCompact();
- EnumIndexMap old2New(this->_enumStore.getNumUniques()*3);
- this->logEnumStoreEvent("enumstorecompact", "start");
- if (!this->_enumStore.performCompaction(extraBytesNeeded, old2New)) {
- this->logEnumStoreEvent("enumstorecompact", "failed_compact");
- // fallback to resize strategy
- this->_enumStore.fallbackResize(extraBytesNeeded);
- this->logEnumStoreEvent("enumstorecompact", "fallbackresize_complete");
- if (extraBytesNeeded > this->_enumStore.getRemaining()) {
- HDR_ABORT("Cannot fallbackResize enumStore");
- }
- break; // fallback resize performed instead of compaction.
- }
-
- // update underlying structure with new EnumIndex values.
- reEnumerate(old2New);
- // Clear scratch enumeration
- for (auto & data : this->_changes) {
- data._enumScratchPad = ChangeBase::UNSET_ENUM;
- }
- this->logEnumStoreEvent("enumstorecompact", "complete");
- }
- } while (0);
-
// insert new unique values in EnumStore
for (const auto & data : newUniques) {
updater.add(data.raw());
}
}
+template <typename B>
+vespalib::MemoryUsage
+EnumAttribute<B>::getEnumStoreValuesMemoryUsage() const
+{
+ return _enumStore.getValuesMemoryUsage();
+}
template <typename B>
vespalib::AddressSpace
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.cpp b/searchlib/src/vespa/searchlib/attribute/enumstore.cpp
index 4cf5ea9c766..7ce65193c40 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumstore.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.cpp
@@ -10,78 +10,50 @@ LOG_SETUP(".searchlib.attribute.enum_store");
namespace search {
-template <>
-void
-EnumStoreT<StringEntryType>::
-insertEntryValue(char * dst, Type value)
-{
- strcpy(dst, value);
-}
template <>
void
-EnumStoreT<StringEntryType>::writeValues(BufferWriter &writer,
- const Index *idxs,
+EnumStoreT<StringEntryType>::writeValues(BufferWriter& writer,
+ const Index* idxs,
size_t count) const
{
- for (uint32_t i = 0; i < count; ++i) {
+ for (size_t i = 0; i < count; ++i) {
Index idx = idxs[i];
- const char *src(_store.getEntry<char>(idx) +
- EntryBase::size());
+ const char* src = _store.get(idx);
size_t sz = strlen(src) + 1;
writer.write(src, sz);
}
}
-
template <>
ssize_t
-EnumStoreT<StringEntryType>::deserialize(const void *src,
- size_t available,
- size_t &initSpace)
+EnumStoreT<StringEntryType>::deserialize(const void* src,
+ size_t available,
+ Index& idx)
{
- size_t slen = strlen(static_cast<const char *>(src));
- size_t sz(StringEntryType::fixedSize() + slen);
- if (available < sz)
+ const char* value = static_cast<const char*>(src);
+ size_t slen = strlen(value);
+ size_t sz = slen + 1;
+ if (available < sz) {
return -1;
- uint32_t entrySize(alignEntrySize(EntryBase::size() + sz));
- initSpace += entrySize;
+ }
+ Index prev_idx = idx;
+ idx = _store.get_allocator().allocate(value);
+
+ if (prev_idx.valid()) {
+ assert(ComparatorType::compare(getValue(prev_idx), value) < 0);
+ }
return sz;
}
-
-template <>
-ssize_t
-EnumStoreT<StringEntryType>::deserialize(const void *src,
- size_t available,
- Index &idx)
+std::unique_ptr<datastore::IUniqueStoreDictionary>
+make_enum_store_dictionary(IEnumStore &store, bool has_postings)
{
- size_t slen = strlen(static_cast<const char *>(src));
- size_t sz(StringEntryType::fixedSize() + slen);
- if (available < sz)
- return -1;
- uint32_t activeBufferId = _store.getActiveBufferId(TYPE_ID);
- datastore::BufferState & buffer = _store.getBufferState(activeBufferId);
- uint32_t entrySize(alignEntrySize(EntryBase::size() + sz));
- if (buffer.remaining() < entrySize) {
- LOG_ABORT("Out of enumstore bufferspace");
- }
- uint64_t offset = buffer.size();
- Index newIdx(offset, activeBufferId);
- char *dst(_store.getEntry<char>(newIdx));
- memcpy(dst, &dummy_enum_value, sizeof(uint32_t));
- uint32_t pos = sizeof(uint32_t);
- uint32_t refCount(0);
- memcpy(dst + pos, &refCount, sizeof(uint32_t));
- pos += sizeof(uint32_t);
- memcpy(dst + pos, src, sz);
- buffer.pushed_back(entrySize);
-
- if (idx.valid()) {
- assert(ComparatorType::compare(getValue(idx), Entry(dst).getValue()) < 0);
+ if (has_postings) {
+ return std::make_unique<EnumStoreDictionary<EnumPostingTree>>(store);
+ } else {
+ return std::make_unique<EnumStoreDictionary<EnumTree>>(store);
}
- idx = newIdx;
- return sz;
}
vespalib::asciistream & operator << (vespalib::asciistream & os, const IEnumStore::Index & idx) {
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h
index fa5e9611c55..032acfc0ee2 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumstore.h
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h
@@ -10,6 +10,8 @@
#include <vespa/vespalib/btree/btree.h>
#include <vespa/vespalib/btree/btreebuilder.h>
#include <vespa/vespalib/datastore/entryref.h>
+#include <vespa/vespalib/datastore/unique_store.h>
+#include <vespa/vespalib/datastore/unique_store_string_allocator.h>
#include <vespa/vespalib/util/buffer.h>
#include <vespa/vespalib/util/array.h>
#include <vespa/vespalib/util/stringfmt.h>
@@ -78,202 +80,123 @@ class EnumStoreT : public IEnumStore
{
friend class EnumStoreTest;
public:
- using Type = typename EntryType::Type;
+ using DataType = typename EntryType::Type;
using ComparatorType = EnumStoreComparatorT<EntryType>;
+ using AllocatorType = std::conditional_t<std::is_same_v<DataType, const char *>,
+ datastore::UniqueStoreStringAllocator<Index>,
+ datastore::UniqueStoreAllocator<DataType, Index>>;
+
+ using UniqueStoreType = datastore::UniqueStore<DataType, Index, ComparatorType, AllocatorType>;
using FoldedComparatorType = EnumStoreFoldedComparatorT<EntryType>;
using EnumStoreType = EnumStoreT<EntryType>;
- using DataStoreType = datastore::DataStoreT<Index>;
+ using EntryRef = datastore::EntryRef;
using generation_t = vespalib::GenerationHandler::generation_t;
- class EntryBase {
- protected:
- char * _data;
- public:
- EntryBase(void * data) : _data(static_cast<char *>(data)) {}
- uint32_t getRefCount() const {
- return *(reinterpret_cast<uint32_t *>(_data) + 1);
- }
- void incRefCount() {
- uint32_t *dst = reinterpret_cast<uint32_t *>(_data) + 1;
- ++(*dst);
- }
- void decRefCount() {
- uint32_t *dst = reinterpret_cast<uint32_t *>(_data) + 1;
- --(*dst);
- }
- void setRefCount(uint32_t refCount) {
- uint32_t *dst = reinterpret_cast<uint32_t *>(_data) + 1;
- *dst = refCount;
- }
- static uint32_t size() { return 2*sizeof(uint32_t); }
- };
-
- class Entry : public EntryBase {
- public:
- Entry(void * data) : EntryBase(data) {}
- Type getValue() const;
- static uint32_t fixedSize() { return EntryBase::size() + EntryType::fixedSize(); }
- };
-
- class EnumBufferType : public datastore::BufferType<char> {
- private:
- size_t _minSizeNeeded; // lower cap for sizeNeeded
- size_t _deadElems; // dead elements in active buffer
- bool _pendingCompact;
- bool _wantCompact;
- public:
- EnumBufferType();
- size_t calcArraysToAlloc(uint32_t bufferId, size_t sizeNeeded, bool resizing) const override;
- void setSizeNeededAndDead(size_t sizeNeeded, size_t deadElems) {
- _minSizeNeeded = sizeNeeded;
- _deadElems = deadElems;
- }
- void onFree(size_t usedElems) override {
- datastore::BufferType<char>::onFree(usedElems);
- _pendingCompact = _wantCompact;
- _wantCompact = false;
- }
- void setWantCompact() { _wantCompact = true; }
- bool getPendingCompact() const { return _pendingCompact; }
- void clearPendingCompact() { _pendingCompact = false; }
- };
-
- static void insertEntry(char * dst, uint32_t refCount, Type value);
private:
- IEnumStoreDictionary *_enumDict;
- DataStoreType _store;
- EnumBufferType _type;
- std::vector<uint32_t> _toHoldBuffers; // used during compaction
-
- static const uint32_t TYPE_ID = 0;
+ UniqueStoreType _store;
+ IEnumStoreDictionary& _dict;
EnumStoreT(const EnumStoreT & rhs) = delete;
EnumStoreT & operator=(const EnumStoreT & rhs) = delete;
- static void insertEntryValue(char * dst, Type value) {
- memcpy(dst, &value, sizeof(Type));
- }
+ void freeUnusedEnum(Index idx, IndexSet& unused) override;
- EntryBase getEntryBase(Index idx) const {
- return EntryBase(const_cast<DataStoreType &>(_store).getEntry<char>(idx));
+ const datastore::UniqueStoreEntryBase& get_entry_base(Index idx) const {
+ return _store.get_allocator().get_wrapped(idx);
}
- datastore::BufferState & getBuffer(uint32_t bufferIdx) {
- return _store.getBufferState(bufferIdx);
- }
- const datastore::BufferState & getBuffer(uint32_t bufferIdx) const {
- return _store.getBufferState(bufferIdx);
- }
- bool validIndex(Index idx) const {
- return (idx.valid() && idx.offset() < _store.getBufferState(idx.bufferId()).size());
- }
- uint32_t getBufferIndex(datastore::BufferState::State status);
- void postCompact();
- bool preCompact(uint64_t bytesNeeded);
-
- Entry getEntry(Index idx) const {
- return Entry(const_cast<DataStoreType &>(_store).getEntry<char>(idx));
- }
-
- void freeUnusedEnum(Index idx, IndexSet & unused) override;
public:
- EnumStoreT(uint64_t initBufferSize, bool hasPostings);
+ EnumStoreT(bool hasPostings);
virtual ~EnumStoreT();
- void reset(uint64_t initBufferSize);
-
- uint32_t getRefCount(Index idx) const { return getEntryBase(idx).getRefCount(); }
- void incRefCount(Index idx) { getEntryBase(idx).incRefCount(); }
- void decRefCount(Index idx) { getEntryBase(idx).decRefCount(); }
+ uint32_t getRefCount(Index idx) const { return get_entry_base(idx).get_ref_count(); }
+ // TODO: Remove from public API
+ void incRefCount(Index idx) { return get_entry_base(idx).inc_ref_count(); }
+ void decRefCount(Index idx) { return get_entry_base(idx).dec_ref_count(); }
// Only use when reading from enumerated attribute save files
+ // TODO: Instead create an API that is used for loading/initializing.
void fixupRefCount(Index idx, uint32_t refCount) override {
- getEntryBase(idx).setRefCount(refCount);
+ get_entry_base(idx).set_ref_count(refCount);
}
- uint32_t getNumUniques() const override { return _enumDict->getNumUniques(); }
+ uint32_t getNumUniques() const override { return _dict.getNumUniques(); }
- uint32_t getRemaining() const {
- return _store.getBufferState(_store.getActiveBufferId(TYPE_ID)).remaining();
- }
- uint32_t getCapacity() const {
- return _store.getBufferState(_store.getActiveBufferId(TYPE_ID)).capacity();
- }
- vespalib::MemoryUsage getMemoryUsage() const override { return _store.getMemoryUsage(); }
- vespalib::MemoryUsage getTreeMemoryUsage() const override { return _enumDict->get_memory_usage(); }
+ vespalib::MemoryUsage getValuesMemoryUsage() const override { return _store.get_allocator().get_data_store().getMemoryUsage(); }
+ vespalib::MemoryUsage getDictionaryMemoryUsage() const override { return _dict.get_memory_usage(); }
vespalib::AddressSpace getAddressSpaceUsage() const;
void transferHoldLists(generation_t generation);
void trimHoldLists(generation_t firstUsed);
- static void failNewSize(uint64_t minNewSize, uint64_t maxSize);
-
- // Align buffers and entries to 4 bytes boundary.
- static uint64_t alignBufferSize(uint64_t val) { return Index::align(val); }
- static uint32_t alignEntrySize(uint32_t val) { return Index::align(val); }
-
- void fallbackResize(uint64_t bytesNeeded);
- bool getPendingCompact() const { return _type.getPendingCompact(); }
- void clearPendingCompact() { _type.clearPendingCompact(); }
-
ssize_t deserialize0(const void *src, size_t available, IndexVector &idx) override;
ssize_t deserialize(const void *src, size_t available, IndexVector &idx) {
- return _enumDict->deserialize(src, available, idx);
+ return _dict.deserialize(src, available, idx);
}
- void fixupRefCounts(const EnumVector &hist) { _enumDict->fixupRefCounts(hist); }
- void freezeTree() { _enumDict->freeze(); }
+ void fixupRefCounts(const EnumVector &hist) { _dict.fixupRefCounts(hist); }
+ void freezeTree() { _store.freeze(); }
- IEnumStoreDictionary &getEnumStoreDict() override { return *_enumDict; }
- const IEnumStoreDictionary &getEnumStoreDict() const override { return *_enumDict; }
- EnumPostingTree &getPostingDictionary() { return _enumDict->getPostingDictionary(); }
+ IEnumStoreDictionary &getEnumStoreDict() override { return _dict; }
+ const IEnumStoreDictionary &getEnumStoreDict() const override { return _dict; }
+ EnumPostingTree &getPostingDictionary() { return _dict.getPostingDictionary(); }
const EnumPostingTree &getPostingDictionary() const {
- return _enumDict->getPostingDictionary();
+ return _dict.getPostingDictionary();
}
- const datastore::DataStoreBase &get_data_store_base() const override { return _store; }
+ // TODO: Add API for getting compaction count instead.
+ const datastore::DataStoreBase &get_data_store_base() const override { return _store.get_allocator().get_data_store(); }
- bool getValue(Index idx, Type & value) const;
- Type getValue(uint32_t idx) const { return getValue(Index(datastore::EntryRef(idx))); }
- Type getValue(Index idx) const { return getEntry(idx).getValue(); }
- static uint32_t getEntrySize(Type value) {
- return alignEntrySize(EntryBase::size() + EntryType::size(value));
- }
+ bool getValue(Index idx, DataType& value) const;
+ DataType getValue(uint32_t idx) const { return getValue(Index(EntryRef(idx))); }
+ DataType getValue(Index idx) const { return _store.get(idx); }
+ // TODO: Implement helper class to populate enum store when loading from enumerated save files.
+
+ /**
+ * Used when building enum store from non-enumerated save files.
+ * TODO: Find better name.
+ */
class Builder {
- public:
- struct UniqueEntry {
- UniqueEntry(const Type & val, size_t sz, uint32_t pidx = 0) : _value(val), _sz(sz), _pidx(pidx), _refCount(1) { }
- Type _value;
- size_t _sz;
- size_t _pidx;
- uint32_t _refCount;
- };
-
- typedef vespalib::Array<UniqueEntry> Uniques;
private:
- Uniques _uniques;
- uint64_t _bufferSize;
+ AllocatorType& _allocator;
+ datastore::IUniqueStoreDictionary& _dict;
+ std::vector<EntryRef> _refs;
+ std::vector<uint32_t> _payloads;
+
public:
- Builder();
+ Builder(AllocatorType& allocator, datastore::IUniqueStoreDictionary& dict)
+ : _allocator(allocator),
+ _dict(dict),
+ _refs(),
+ _payloads()
+ {
+ }
~Builder();
- Index insert(Type value, uint32_t pidx = 0) {
- uint32_t entrySize = getEntrySize(value);
- _uniques.push_back(UniqueEntry(value, entrySize, pidx));
- Index index(_bufferSize, 0); // bufferId 0 should be used when resetting with a builder
- _bufferSize += entrySize;
- return index;
+ Index insert(const DataType& value, uint32_t posting_idx = 0) {
+ EntryRef new_ref = _allocator.allocate(value);
+ _refs.emplace_back(new_ref);
+ _payloads.emplace_back(posting_idx);
+ return new_ref;
+ }
+ void set_ref_count_for_last_value(uint32_t ref_count) {
+ assert(!_refs.empty());
+ _allocator.get_wrapped(_refs.back()).set_ref_count(ref_count);
+ }
+ void build() {
+ _dict.build_with_payload(_refs, _payloads);
}
- void updateRefCount(uint32_t refCount) { _uniques.rbegin()->_refCount = refCount; }
- const Uniques & getUniques() const { return _uniques; }
- uint64_t getBufferSize() const { return _bufferSize; }
};
+ Builder make_builder() {
+ return Builder(_store.get_allocator(), _dict);
+ }
+
class BatchUpdater {
private:
EnumStoreType& _store;
@@ -284,17 +207,19 @@ public:
: _store(store),
_possibly_unused()
{}
- void add(Type value) {
- Index new_idx;
- _store.addEnum(value, new_idx);
- _possibly_unused.insert(new_idx);
+ // TODO: Rename to insert()
+ void add(DataType value) {
+ Index idx;
+ _store.addEnum(value, idx);
+ _possibly_unused.insert(idx);
}
void inc_ref_count(Index idx) {
- _store.incRefCount(idx);
+ _store.get_entry_base(idx).inc_ref_count();
}
void dec_ref_count(Index idx) {
- _store.decRefCount(idx);
- if (_store.getRefCount(idx) == 0) {
+ auto& entry = _store.get_entry_base(idx);
+ entry.dec_ref_count();
+ if (entry.get_ref_count() == 0) {
_possibly_unused.insert(idx);
}
}
@@ -307,81 +232,44 @@ public:
return BatchUpdater(*this);
}
+ // TODO: Change to sending enum indexes as const array ref.
void writeValues(BufferWriter &writer, const Index *idxs, size_t count) const override;
ssize_t deserialize(const void *src, size_t available, size_t &initSpace);
ssize_t deserialize(const void *src, size_t available, Index &idx);
bool foldedChange(const Index &idx1, const Index &idx2) override;
- virtual bool findEnum(Type value, IEnumStore::EnumHandle &e) const;
- virtual std::vector<IEnumStore::EnumHandle> findFoldedEnums(Type value) const;
- void addEnum(Type value, Index &newIdx);
- virtual bool findIndex(Type value, Index &idx) const;
+ bool findEnum(DataType value, IEnumStore::EnumHandle &e) const;
+ std::vector<IEnumStore::EnumHandle> findFoldedEnums(DataType value) const;
+ void addEnum(DataType value, Index &newIdx);
+ bool findIndex(DataType value, Index &idx) const;
void freeUnusedEnums(bool movePostingidx) override;
void freeUnusedEnums(const IndexSet& toRemove);
- void reset(Builder &builder);
- bool performCompaction(uint64_t bytesNeeded, EnumIndexMap & old2New);
private:
template <typename Dictionary>
- void reset(Builder &builder, Dictionary &dict);
-
- template <typename Dictionary>
- void addEnum(Type value, Index &newIdx, Dictionary &dict);
+ void addEnum(DataType value, Index& newIdx, Dictionary& dict);
- template <typename Dictionary>
- void performCompaction(Dictionary &dict, EnumIndexMap & old2New);
};
+std::unique_ptr<datastore::IUniqueStoreDictionary>
+make_enum_store_dictionary(IEnumStore &store, bool has_postings);
+
vespalib::asciistream & operator << (vespalib::asciistream & os, const IEnumStore::Index & idx);
extern template
class datastore::DataStoreT<IEnumStore::Index>;
-template <typename EntryType>
-inline typename EntryType::Type
-EnumStoreT<EntryType>::Entry::getValue() const // implementation for numeric
-{
- Type dst;
- const char * src = this->_data + EntryBase::size();
- memcpy(&dst, src, sizeof(Type));
- return dst;
-}
-
-template <>
-inline StringEntryType::Type
-EnumStoreT<StringEntryType>::Entry::getValue() const
-{
- return (_data + EntryBase::size());
-}
-
-
template <>
void
-EnumStoreT<StringEntryType>::writeValues(BufferWriter &writer,
- const Index *idxs,
+EnumStoreT<StringEntryType>::writeValues(BufferWriter& writer,
+ const IEnumStore::Index* idxs,
size_t count) const;
template <>
ssize_t
-EnumStoreT<StringEntryType>::deserialize(const void *src,
- size_t available,
- size_t &initSpace);
-
-template <>
-ssize_t
-EnumStoreT<StringEntryType>::deserialize(const void *src,
- size_t available,
- Index &idx);
-
-
-//-----------------------------------------------------------------------------
-// EnumStore
-//-----------------------------------------------------------------------------
-
-template <>
-void
-EnumStoreT<StringEntryType>::
-insertEntryValue(char * dst, Type value);
+EnumStoreT<StringEntryType>::deserialize(const void* src,
+ size_t available,
+ Index& idx);
extern template
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
index 428875e00db..254f517ada2 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
@@ -14,174 +14,44 @@
#include <vespa/vespalib/btree/btreeroot.hpp>
#include <vespa/vespalib/btree/btreebuilder.hpp>
#include <vespa/vespalib/btree/btree.hpp>
+#include <vespa/vespalib/datastore/unique_store.hpp>
+#include <vespa/vespalib/datastore/unique_store_string_allocator.hpp>
#include <vespa/vespalib/util/array.hpp>
#include <vespa/vespalib/util/bufferwriter.h>
namespace search {
-namespace {
-
-const uint32_t dummy_enum_value = 0;
-
-}
-
-template <typename EntryType>
-EnumStoreT<EntryType>::EnumBufferType::EnumBufferType()
- : datastore::BufferType<char>(Index::align(1),
- Index::offsetSize() / Index::align(1),
- Index::offsetSize() / Index::align(1)),
- _minSizeNeeded(0),
- _deadElems(0),
- _pendingCompact(false),
- _wantCompact(false)
-{
-}
-
-template <typename EntryType>
-size_t
-EnumStoreT<EntryType>::EnumBufferType::calcArraysToAlloc(uint32_t bufferId, size_t sizeNeeded, bool resizing) const
-{
- (void) resizing;
- size_t reservedElements = getReservedElements(bufferId);
- sizeNeeded = std::max(sizeNeeded, _minSizeNeeded);
- size_t usedElems = _activeUsedElems;
- if (_lastUsedElems != nullptr) {
- usedElems += *_lastUsedElems;
- }
- assert((usedElems % _arraySize) == 0);
- double growRatio = 1.5f;
- uint64_t maxSize = static_cast<uint64_t>(_maxArrays) * _arraySize;
- uint64_t newSize = usedElems - _deadElems + sizeNeeded;
- if (usedElems != 0) {
- newSize *= growRatio;
- }
- newSize += reservedElements;
- newSize = alignBufferSize(newSize);
- assert((newSize % _arraySize) == 0);
- if (newSize <= maxSize) {
- return newSize / _arraySize;
- }
- newSize = usedElems - _deadElems + sizeNeeded + reservedElements + 1000000;
- newSize = alignBufferSize(newSize);
- assert((newSize % _arraySize) == 0);
- if (newSize <= maxSize) {
- return _maxArrays;
- }
- failNewSize(newSize, maxSize);
- return 0;
-}
-
-template <typename EntryType>
-void
-EnumStoreT<EntryType>::
-insertEntry(char * dst, uint32_t refCount, Type value)
-{
- memcpy(dst, &dummy_enum_value, sizeof(uint32_t));
- uint32_t pos = sizeof(uint32_t);
- memcpy(dst + pos, &refCount, sizeof(uint32_t));
- pos += sizeof(uint32_t);
- insertEntryValue(dst + pos, value);
-}
-
-template <>
-void
-EnumStoreT<StringEntryType>::
-insertEntryValue(char * dst, Type value);
-
template <typename EntryType>
-uint32_t
-EnumStoreT<EntryType>::getBufferIndex(datastore::BufferState::State status)
+void EnumStoreT<EntryType>::freeUnusedEnum(Index idx, IndexSet& unused)
{
- for (uint32_t i = 0; i < _store.getNumBuffers(); ++i) {
- if (_store.getBufferState(i).getState() == status) {
- return i;
- }
+ const auto& entry = get_entry_base(idx);
+ if (entry.get_ref_count() == 0) {
+ unused.insert(idx);
+ _store.get_allocator().hold(idx);
}
- return Index::numBuffers();
}
template <typename EntryType>
-void
-EnumStoreT<EntryType>::postCompact()
+EnumStoreT<EntryType>::EnumStoreT(bool has_postings)
+ : _store(make_enum_store_dictionary(*this, has_postings)),
+ _dict(static_cast<IEnumStoreDictionary&>(_store.get_dictionary()))
{
- _store.finishCompact(_toHoldBuffers);
}
template <typename EntryType>
-bool
-EnumStoreT<EntryType>::preCompact(uint64_t bytesNeeded)
-{
- if (getBufferIndex(datastore::BufferState::FREE) == Index::numBuffers()) {
- return false;
- }
- uint32_t activeBufId = _store.getActiveBufferId(TYPE_ID);
- datastore::BufferState & activeBuf = _store.getBufferState(activeBufId);
- _type.setSizeNeededAndDead(bytesNeeded, activeBuf.getDeadElems());
- _toHoldBuffers = _store.startCompact(TYPE_ID);
- return true;
-}
-
-template <typename EntryType>
-void EnumStoreT<EntryType>::freeUnusedEnum(Index idx, IndexSet & unused)
-{
- Entry e = getEntry(idx);
- if (e.getRefCount() == 0) {
- Type value = e.getValue();
- if (unused.insert(idx).second) {
- _store.incDead(idx, getEntrySize(value));
- }
- }
-}
-
-template <typename EntryType>
-EnumStoreT<EntryType>::EnumStoreT(uint64_t initBufferSize, bool hasPostings)
- : _enumDict(nullptr),
- _store(),
- _type(),
- _toHoldBuffers()
-{
- if (hasPostings) {
- _enumDict = new EnumStoreDictionary<EnumPostingTree>(*this);
- } else {
- _enumDict = new EnumStoreDictionary<EnumTree>(*this);
- }
- _store.addType(&_type);
- _type.setSizeNeededAndDead(initBufferSize, 0);
- _store.initActiveBuffers();
-}
-
-template <typename EntryType>
-EnumStoreT<EntryType>::~EnumStoreT()
-{
- _store.clearHoldLists();
- _store.dropBuffers();
- delete _enumDict;
-}
-
-template <typename EntryType>
-void
-EnumStoreT<EntryType>::reset(uint64_t initBufferSize)
-{
- _store.clearHoldLists();
- _store.dropBuffers();
- _type.setSizeNeededAndDead(initBufferSize, 0);
- _store.initActiveBuffers();
- _enumDict->onReset();
-}
+EnumStoreT<EntryType>::~EnumStoreT() = default;
template <typename EntryType>
vespalib::AddressSpace
EnumStoreT<EntryType>::getAddressSpaceUsage() const
{
- const datastore::BufferState &activeState = _store.getBufferState(_store.getActiveBufferId(TYPE_ID));
- return vespalib::AddressSpace(activeState.size(), activeState.getDeadElems(), DataStoreType::RefType::offsetSize());
+ return _store.get_address_space_usage();
}
template <typename EntryType>
void
EnumStoreT<EntryType>::transferHoldLists(generation_t generation)
{
- _enumDict->transfer_hold_lists(generation);
_store.transferHoldLists(generation);
}
@@ -190,52 +60,24 @@ void
EnumStoreT<EntryType>::trimHoldLists(generation_t firstUsed)
{
// remove generations in the range [0, firstUsed>
- _enumDict->trim_hold_lists(firstUsed);
_store.trimHoldLists(firstUsed);
}
-template <typename EntryType>
-void
-EnumStoreT<EntryType>::failNewSize(uint64_t minNewSize, uint64_t maxSize)
-{
- throw vespalib::IllegalStateException(vespalib::make_string("EnumStoreT::failNewSize: Minimum new size (%" PRIu64 ") exceeds max size (%" PRIu64 ")", minNewSize, maxSize));
-}
-
-template <typename EntryType>
-void
-EnumStoreT<EntryType>::fallbackResize(uint64_t bytesNeeded)
-{
- uint32_t activeBufId = _store.getActiveBufferId(TYPE_ID);
- size_t reservedElements = _type.getReservedElements(activeBufId);
- _type.setSizeNeededAndDead(bytesNeeded, reservedElements);
- _type.setWantCompact();
- _store.fallbackResize(activeBufId, bytesNeeded);
-}
template <typename EntryType>
ssize_t
-EnumStoreT<EntryType>::deserialize0(const void *src,
+EnumStoreT<EntryType>::deserialize0(const void* src,
size_t available,
- IndexVector &idx)
+ IndexVector& idx)
{
size_t left = available;
- size_t initSpace = Index::align(1);
- const char * p = static_cast<const char *>(src);
- while (left > 0) {
- ssize_t sz = deserialize(p, left, initSpace);
- if (sz < 0)
- return sz;
- p += sz;
- left -= sz;
- }
- reset(initSpace);
- left = available;
- p = static_cast<const char *>(src);
+ const char* p = static_cast<const char*>(src);
Index idx1;
while (left > 0) {
ssize_t sz = deserialize(p, left, idx1);
- if (sz < 0)
+ if (sz < 0) {
return sz;
+ }
p += sz;
left -= sz;
idx.push_back(idx1);
@@ -245,81 +87,45 @@ EnumStoreT<EntryType>::deserialize0(const void *src,
template <typename EntryType>
bool
-EnumStoreT<EntryType>::getValue(Index idx, Type & value) const
+EnumStoreT<EntryType>::getValue(Index idx, DataType& value) const
{
- if (!validIndex(idx)) {
+ if (!idx.valid()) {
return false;
}
- value = getEntry(idx).getValue();
+ value = _store.get(idx);
return true;
}
template <typename EntryType>
-EnumStoreT<EntryType>::Builder::Builder()
- : _uniques(),
- _bufferSize(Index::align(1))
-{ }
-
-template <typename EntryType>
-EnumStoreT<EntryType>::Builder::~Builder() { }
+EnumStoreT<EntryType>::Builder::~Builder() = default;
template <class EntryType>
void
-EnumStoreT<EntryType>::writeValues(BufferWriter &writer, const Index *idxs, size_t count) const
+EnumStoreT<EntryType>::writeValues(BufferWriter& writer, const Index* idxs, size_t count) const
{
- size_t sz(EntryType::fixedSize());
- for (uint32_t i = 0; i < count; ++i) {
+ for (size_t i = 0; i < count; ++i) {
Index idx = idxs[i];
- const char *src(_store.getEntry<char>(idx) + EntryBase::size());
- writer.write(src, sz);
+ writer.write(&_store.get(idx), sizeof(DataType));
}
}
template <class EntryType>
ssize_t
-EnumStoreT<EntryType>::deserialize(const void *src, size_t available, size_t &initSpace)
+EnumStoreT<EntryType>::deserialize(const void* src, size_t available, Index& idx)
{
- (void) src;
- size_t sz(EntryType::fixedSize());
- if (available < sz)
+ if (available < sizeof(DataType)) {
return -1;
- uint32_t entrySize(alignEntrySize(EntryBase::size() + sz));
- initSpace += entrySize;
- return sz;
-}
-
-template <class EntryType>
-ssize_t
-EnumStoreT<EntryType>::deserialize(const void *src, size_t available, Index &idx)
-{
- size_t sz(EntryType::fixedSize());
- if (available < sz)
- return -1;
- uint32_t activeBufferId = _store.getActiveBufferId(TYPE_ID);
- datastore::BufferState & buffer = _store.getBufferState(activeBufferId);
- uint32_t entrySize(alignEntrySize(EntryBase::size() + sz));
- if (buffer.remaining() < entrySize) {
- HDR_ABORT("not enough space");
}
- uint64_t offset = buffer.size();
- Index newIdx(offset, activeBufferId);
- char *dst(_store.getEntry<char>(newIdx));
- memcpy(dst, &dummy_enum_value, sizeof(uint32_t));
- uint32_t pos = sizeof(uint32_t);
- uint32_t refCount(0);
- memcpy(dst + pos, &refCount, sizeof(uint32_t));
- pos += sizeof(uint32_t);
- memcpy(dst + pos, src, sz);
- buffer.pushed_back(entrySize);
+ const auto* value = static_cast<const DataType*>(src);
+ Index prev_idx = idx;
+ idx = _store.get_allocator().allocate(*value);
- if (idx.valid()) {
- assert(ComparatorType::compare(getValue(idx), Entry(dst).getValue()) < 0);
+ if (prev_idx.valid()) {
+ assert(ComparatorType::compare(getValue(prev_idx), *value) < 0);
}
- idx = newIdx;
- return sz;
+ return sizeof(DataType);
}
-
template <class EntryType>
bool
EnumStoreT<EntryType>::foldedChange(const Index &idx1, const Index &idx2)
@@ -329,14 +135,13 @@ EnumStoreT<EntryType>::foldedChange(const Index &idx1, const Index &idx2)
return cmpres < 0;
}
-
template <typename EntryType>
bool
-EnumStoreT<EntryType>::findEnum(Type value, IEnumStore::EnumHandle &e) const
+EnumStoreT<EntryType>::findEnum(DataType value, IEnumStore::EnumHandle &e) const
{
ComparatorType cmp(*this, value);
Index idx;
- if (_enumDict->findFrozenIndex(cmp, idx)) {
+ if (_dict.findFrozenIndex(cmp, idx)) {
e = idx.ref();
return true;
}
@@ -345,22 +150,20 @@ EnumStoreT<EntryType>::findEnum(Type value, IEnumStore::EnumHandle &e) const
template <typename EntryType>
std::vector<IEnumStore::EnumHandle>
-EnumStoreT<EntryType>::findFoldedEnums(Type value) const
+EnumStoreT<EntryType>::findFoldedEnums(DataType value) const
{
FoldedComparatorType cmp(*this, value);
- return _enumDict->findMatchingEnums(cmp);
+ return _dict.findMatchingEnums(cmp);
}
-
template <typename EntryType>
bool
-EnumStoreT<EntryType>::findIndex(Type value, Index &idx) const
+EnumStoreT<EntryType>::findIndex(DataType value, Index &idx) const
{
ComparatorType cmp(*this, value);
- return _enumDict->findIndex(cmp, idx);
+ return _dict.findIndex(cmp, idx);
}
-
template <typename EntryType>
void
EnumStoreT<EntryType>::freeUnusedEnums(bool movePostingIdx)
@@ -368,13 +171,12 @@ EnumStoreT<EntryType>::freeUnusedEnums(bool movePostingIdx)
ComparatorType cmp(*this);
if (EntryType::hasFold() && movePostingIdx) {
FoldedComparatorType fcmp(*this);
- _enumDict->freeUnusedEnums(cmp, &fcmp);
+ _dict.freeUnusedEnums(cmp, &fcmp);
} else {
- _enumDict->freeUnusedEnums(cmp, nullptr);
+ _dict.freeUnusedEnums(cmp, nullptr);
}
}
-
template <typename EntryType>
void
EnumStoreT<EntryType>::freeUnusedEnums(const IndexSet& toRemove)
@@ -382,34 +184,18 @@ EnumStoreT<EntryType>::freeUnusedEnums(const IndexSet& toRemove)
ComparatorType cmp(*this);
if (EntryType::hasFold()) {
FoldedComparatorType fcmp(*this);
- _enumDict->freeUnusedEnums(toRemove, cmp, &fcmp);
+ _dict.freeUnusedEnums(toRemove, cmp, &fcmp);
} else {
- _enumDict->freeUnusedEnums(toRemove, cmp, nullptr);
+ _dict.freeUnusedEnums(toRemove, cmp, nullptr);
}
}
-
template <typename EntryType>
template <typename Dictionary>
void
-EnumStoreT<EntryType>::addEnum(Type value, Index &newIdx, Dictionary &dict)
+EnumStoreT<EntryType>::addEnum(DataType value, Index& newIdx, Dictionary& dict)
{
typedef typename Dictionary::Iterator DictionaryIterator;
- uint32_t entrySize = this->getEntrySize(value);
- uint32_t activeBufferId = _store.getActiveBufferId(TYPE_ID);
- datastore::BufferState & buffer = _store.getBufferState(activeBufferId);
-#ifdef LOG_ENUM_STORE
- LOG(info,
- "addEnum(): buffer[%u]: capacity = %" PRIu64
- ", size = %" PRIu64 ", remaining = %" PRIu64
- ", dead = %" PRIu64 ", entrySize = %u",
- activeBufferId, buffer.capacity(),
- buffer.size(), buffer.remaining(),
- buffer._deadElems, entrySize);
-#endif
- if (buffer.remaining() < entrySize) {
- HDR_ABORT("not enough space");
- }
// check if already present
ComparatorType cmp(*this, value);
@@ -420,33 +206,31 @@ EnumStoreT<EntryType>::addEnum(Type value, Index &newIdx, Dictionary &dict)
return;
}
- uint64_t offset = buffer.size();
- newIdx = Index(offset, activeBufferId);
- char * dst = _store.template getEntry<char>(newIdx);
- this->insertEntry(dst, 0, value);
- buffer.pushed_back(entrySize);
- assert(Index::pad(offset) == 0);
+ newIdx = _store.get_allocator().allocate(value);
+ // TODO: Move this logic to "add/insert" on the dictionary
// update tree with new index
dict.insert(it, newIdx, typename Dictionary::DataType());
- // Copy posting list idx from next entry if same
- // folded value.
+ // Copy posting list idx from next entry if same folded value.
// Only for string posting list attributes, i.e. dictionary has
// data and entry type has folded compare.
if (DictionaryIterator::hasData() && EntryType::hasFold()) {
FoldedComparatorType foldCmp(*this);
++it;
- if (!it.valid() || foldCmp(newIdx, it.getKey()))
+ if (!it.valid() || foldCmp(newIdx, it.getKey())) {
return; // Next entry does not use same posting list
+ }
--it;
--it;
- if (it.valid() && !foldCmp(it.getKey(), newIdx))
+ if (it.valid() && !foldCmp(it.getKey(), newIdx)) {
return; // Previous entry uses same posting list
- if (it.valid())
+ }
+ if (it.valid()) {
++it;
- else
+ } else {
it.begin();
+ }
assert(it.valid() && it.getKey() == newIdx);
++it;
typename Dictionary::DataType pidx(it.getData());
@@ -458,148 +242,15 @@ EnumStoreT<EntryType>::addEnum(Type value, Index &newIdx, Dictionary &dict)
}
}
-
template <typename EntryType>
void
-EnumStoreT<EntryType>::addEnum(Type value, Index & newIdx)
+EnumStoreT<EntryType>::addEnum(DataType value, Index& newIdx)
{
- if (_enumDict->hasData()) {
- addEnum(value, newIdx, static_cast<EnumStoreDictionary<EnumPostingTree> *>(_enumDict)->getDictionary());
+ if (_dict.hasData()) {
+ addEnum(value, newIdx, static_cast<EnumStoreDictionary<EnumPostingTree> &>(_dict).getDictionary());
} else {
- addEnum(value, newIdx, static_cast<EnumStoreDictionary<EnumTree> *>(_enumDict)->getDictionary());
+ addEnum(value, newIdx, static_cast<EnumStoreDictionary<EnumTree> &>(_dict).getDictionary());
}
}
-template <typename DictionaryType>
-struct TreeBuilderInserter {
- static void insert(typename DictionaryType::Builder & builder,
- IEnumStore::Index enumIdx,
- datastore::EntryRef postingIdx)
- {
- (void) postingIdx;
- builder.insert(enumIdx, typename DictionaryType::DataType());
- }
-};
-
-template <>
-struct TreeBuilderInserter<EnumPostingTree> {
- static void insert(EnumPostingTree::Builder & builder,
- IEnumStore::Index enumIdx,
- datastore::EntryRef postingIdx)
- {
- builder.insert(enumIdx, postingIdx);
- }
-};
-
-
-template <typename EntryType>
-template <typename Dictionary>
-void
-EnumStoreT<EntryType>::reset(Builder &builder, Dictionary &dict)
-{
- typedef typename Dictionary::Builder DictionaryBuilder;
- reset(builder.getBufferSize());
-
- DictionaryBuilder treeBuilder(dict.getAllocator());
- uint32_t activeBufferId = _store.getActiveBufferId(TYPE_ID);
- datastore::BufferState & state = _store.getBufferState(activeBufferId);
-
- // insert entries and update DictionaryBuilder
- const typename Builder::Uniques & uniques = builder.getUniques();
- for (typename Builder::Uniques::const_iterator iter = uniques.begin();
- iter != uniques.end(); ++iter)
- {
- uint64_t offset = state.size();
- Index idx(offset, activeBufferId);
- char * dst = _store.template getEntry<char>(idx);
- this->insertEntry(dst, iter->_refCount, iter->_value);
- state.pushed_back(iter->_sz);
-
- // update DictionaryBuilder with enum index and posting index
- TreeBuilderInserter<Dictionary>::insert(treeBuilder, idx, datastore::EntryRef(iter->_pidx));
- }
-
- // reset Dictionary
- dict.assign(treeBuilder); // destructive copy of treeBuilder
}
-
-
-template <typename EntryType>
-void
-EnumStoreT<EntryType>::reset(Builder &builder)
-{
- if (_enumDict->hasData()) {
- reset(builder, static_cast<EnumStoreDictionary<EnumPostingTree> *>(_enumDict)->getDictionary());
- } else {
- reset(builder, static_cast<EnumStoreDictionary<EnumTree> *>(_enumDict)->getDictionary());
- }
-}
-
-
-template <typename EntryType>
-template <typename Dictionary>
-void
-EnumStoreT<EntryType>::performCompaction(Dictionary &dict, EnumIndexMap & old2New)
-{
- typedef typename Dictionary::Iterator DictionaryIterator;
- uint32_t freeBufferIdx = _store.getActiveBufferId(TYPE_ID);
- datastore::BufferState & freeBuf = _store.getBufferState(freeBufferIdx);
- // copy entries from active buffer to free buffer
- for (DictionaryIterator iter = dict.begin(); iter.valid(); ++iter) {
- Index activeIdx = iter.getKey();
-
- Entry e = this->getEntry(activeIdx);
-
- // At this point the tree shall never reference any empty stuff.
- assert(e.getRefCount() > 0);
-#ifdef LOG_ENUM_STORE
- LOG(info, "performCompaction(): copy entry: enum = %u, refCount = %u, value = %s",
- e.getEnum(), e.getRefCount(), e.getValue());
-#endif
- Type value = e.getValue();
- uint32_t refCount = e.getRefCount();
- uint32_t entrySize = this->getEntrySize(value);
-
- uint64_t offset = freeBuf.size();
- Index newIdx = Index(offset, freeBufferIdx);
- char * dst = _store.template getEntry<char>(newIdx);
- // insert entry into free buffer
- this->insertEntry(dst, refCount, value);
-#ifdef LOG_ENUM_STORE
- LOG(info, "performCompaction(): new entry: refCount = %u, value = %s", 0, value);
-#endif
- freeBuf.pushed_back(entrySize);
- assert(Index::pad(offset) == 0);
-#ifdef LOG_ENUM_STORE
- LOG(info,
- "performCompaction(): new index: offset = %" PRIu64
- ", bufferIdx = %u",
- offset, freeBufferIdx);
-#endif
-
- // update tree with new index
- std::atomic_thread_fence(std::memory_order_release);
- iter.writeKey(newIdx);
-
- old2New[activeIdx] = newIdx;
- }
- this->postCompact();
-}
-
-
-template <typename EntryType>
-bool
-EnumStoreT<EntryType>::performCompaction(uint64_t bytesNeeded, EnumIndexMap & old2New)
-{
- if ( ! this->preCompact(bytesNeeded) ) {
- return false;
- }
- if (_enumDict->hasData()) {
- performCompaction(static_cast<EnumStoreDictionary<EnumPostingTree> *>(_enumDict)->getDictionary(), old2New);
- } else {
- performCompaction(static_cast<EnumStoreDictionary<EnumTree> *>(_enumDict)->getDictionary(), old2New);
- }
- return true;
-}
-
-} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h
index 0963e0ff67d..f79098a67df 100644
--- a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h
+++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h
@@ -22,7 +22,7 @@ class IEnumStoreDictionary;
*/
class IEnumStore {
public:
- using Index = datastore::AlignedEntryRefT<31, 4>;
+ using Index = datastore::EntryRefT<22>;
using IndexVector = vespalib::Array<Index>;
using EnumHandle = attribute::IAttributeVector::EnumHandle;
using EnumVector = vespalib::Array<uint32_t>;
@@ -52,8 +52,8 @@ public:
virtual const IEnumStoreDictionary& getEnumStoreDict() const = 0;
virtual const datastore::DataStoreBase& get_data_store_base() const = 0;
virtual uint32_t getNumUniques() const = 0;
- virtual vespalib::MemoryUsage getMemoryUsage() const = 0;
- virtual vespalib::MemoryUsage getTreeMemoryUsage() const = 0;
+ virtual vespalib::MemoryUsage getValuesMemoryUsage() const = 0;
+ virtual vespalib::MemoryUsage getDictionaryMemoryUsage() const = 0;
template <typename TreeT>
diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
index 5352dc492fd..9bdc36e805b 100644
--- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
@@ -183,8 +183,8 @@ MultiValueEnumAttribute<B, M>::onUpdateStat()
{
// update statistics
vespalib::MemoryUsage total;
- total.merge(this->_enumStore.getMemoryUsage());
- total.merge(this->_enumStore.getTreeMemoryUsage());
+ total.merge(this->_enumStore.getValuesMemoryUsage());
+ total.merge(this->_enumStore.getDictionaryMemoryUsage());
total.merge(this->_mvMapping.updateStat());
total.merge(this->getChangeVectorMemoryUsage());
mergeMemoryStats(total);
diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
index 08095b6bf13..7f4f7503eff 100644
--- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
@@ -102,8 +102,8 @@ SingleValueEnumAttribute<B>::onUpdateStat()
// update statistics
vespalib::MemoryUsage total = _enumIndices.getMemoryUsage();
total.mergeGenerationHeldBytes(getGenerationHolder().getHeldBytes());
- total.merge(this->_enumStore.getMemoryUsage());
- total.merge(this->_enumStore.getTreeMemoryUsage());
+ total.merge(this->_enumStore.getValuesMemoryUsage());
+ total.merge(this->_enumStore.getDictionaryMemoryUsage());
total.merge(this->getChangeVectorMemoryUsage());
mergeMemoryStats(total);
this->updateStatistics(_enumIndices.size(), this->_enumStore.getNumUniques(), total.allocatedBytes(),
diff --git a/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary.h b/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary.h
index cda62884318..a780cb4fe98 100644
--- a/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary.h
+++ b/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary.h
@@ -45,6 +45,7 @@ public:
virtual uint32_t get_num_uniques() const = 0;
virtual vespalib::MemoryUsage get_memory_usage() const = 0;
virtual void build(const std::vector<EntryRef> &refs, const std::vector<uint32_t> &ref_counts, std::function<void(EntryRef)> hold) = 0;
+ virtual void build_with_payload(const std::vector<EntryRef>& refs, const std::vector<uint32_t>& payloads) = 0;
virtual std::unique_ptr<ReadSnapshot> get_read_snapshot() const = 0;
virtual EntryRef get_frozen_root() const = 0;
};
diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store.h b/vespalib/src/vespa/vespalib/datastore/unique_store.h
index bf7808e9325..6b85e79d3eb 100644
--- a/vespalib/src/vespa/vespalib/datastore/unique_store.h
+++ b/vespalib/src/vespa/vespalib/datastore/unique_store.h
@@ -44,6 +44,7 @@ private:
public:
UniqueStore();
+ UniqueStore(std::unique_ptr<IUniqueStoreDictionary> dict);
~UniqueStore();
UniqueStoreAddResult add(EntryConstRefType value);
EntryRef find(EntryConstRefType value);
@@ -51,6 +52,12 @@ public:
void remove(EntryRef ref);
ICompactionContext::UP compactWorst();
vespalib::MemoryUsage getMemoryUsage() const;
+ vespalib::AddressSpace get_address_space_usage() const;
+
+ // TODO: Consider exposing only the needed functions from allocator
+ Allocator& get_allocator() { return _allocator; }
+ const Allocator& get_allocator() const { return _allocator; }
+ IUniqueStoreDictionary& get_dictionary() { return *_dict; }
// Pass on hold list management to underlying store
void transferHoldLists(generation_t generation);
diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store.hpp
index f1b60845403..ebd81010612 100644
--- a/vespalib/src/vespa/vespalib/datastore/unique_store.hpp
+++ b/vespalib/src/vespa/vespalib/datastore/unique_store.hpp
@@ -28,9 +28,15 @@ using DefaultUniqueStoreDictionary = UniqueStoreDictionary<DefaultDictionary>;
template <typename EntryT, typename RefT, typename Compare, typename Allocator>
UniqueStore<EntryT, RefT, Compare, Allocator>::UniqueStore()
+ : UniqueStore<EntryT, RefT, Compare, Allocator>(std::make_unique<uniquestore::DefaultUniqueStoreDictionary>())
+{
+}
+
+template <typename EntryT, typename RefT, typename Compare, typename Allocator>
+UniqueStore<EntryT, RefT, Compare, Allocator>::UniqueStore(std::unique_ptr<IUniqueStoreDictionary> dict)
: _allocator(),
_store(_allocator.get_data_store()),
- _dict(std::make_unique<uniquestore::DefaultUniqueStoreDictionary>())
+ _dict(std::move(dict))
{
}
@@ -178,6 +184,13 @@ UniqueStore<EntryT, RefT, Compare, Allocator>::getMemoryUsage() const
}
template <typename EntryT, typename RefT, typename Compare, typename Allocator>
+vespalib::AddressSpace
+UniqueStore<EntryT, RefT, Compare, Allocator>::get_address_space_usage() const
+{
+ return _allocator.get_data_store().getAddressSpaceUsage();
+}
+
+template <typename EntryT, typename RefT, typename Compare, typename Allocator>
const BufferState &
UniqueStore<EntryT, RefT, Compare, Allocator>::bufferState(EntryRef ref) const
{
diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_allocator.h b/vespalib/src/vespa/vespalib/datastore/unique_store_allocator.h
index 1981a190cc6..a4443742e33 100644
--- a/vespalib/src/vespa/vespalib/datastore/unique_store_allocator.h
+++ b/vespalib/src/vespa/vespalib/datastore/unique_store_allocator.h
@@ -42,6 +42,7 @@ public:
return get_wrapped(ref).value();
}
DataStoreType& get_data_store() { return _store; }
+ const DataStoreType& get_data_store() const { return _store; }
};
}
diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_builder.h b/vespalib/src/vespa/vespalib/datastore/unique_store_builder.h
index a0e9f3d63a7..7f5162d97ff 100644
--- a/vespalib/src/vespa/vespalib/datastore/unique_store_builder.h
+++ b/vespalib/src/vespa/vespalib/datastore/unique_store_builder.h
@@ -23,6 +23,7 @@ class UniqueStoreBuilder {
IUniqueStoreDictionary& _dict;
std::vector<EntryRef> _refs;
std::vector<uint32_t> _refCounts;
+
public:
UniqueStoreBuilder(Allocator& allocator, IUniqueStoreDictionary& dict, uint32_t uniqueValuesHint);
~UniqueStoreBuilder();
diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h
index 4ae32c45dea..15b947e283b 100644
--- a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h
+++ b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h
@@ -48,6 +48,7 @@ public:
uint32_t get_num_uniques() const override;
vespalib::MemoryUsage get_memory_usage() const override;
void build(const std::vector<EntryRef> &refs, const std::vector<uint32_t> &ref_counts, std::function<void(EntryRef)> hold) override;
+ void build_with_payload(const std::vector<EntryRef>& refs, const std::vector<uint32_t>& payloads) override;
std::unique_ptr<ReadSnapshot> get_read_snapshot() const override;
EntryRef get_frozen_root() const override;
};
diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.hpp
index f3087bc5610..3784b903ad6 100644
--- a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.hpp
+++ b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.hpp
@@ -176,6 +176,23 @@ UniqueStoreDictionary<DictionaryT, ParentT>::build(const std::vector<EntryRef> &
}
template <typename DictionaryT, typename ParentT>
+void
+UniqueStoreDictionary<DictionaryT, ParentT>::build_with_payload(const std::vector<EntryRef>& refs,
+ const std::vector<uint32_t>& payloads)
+{
+ assert(refs.size() == payloads.size());
+ typename DictionaryType::Builder builder(_dict.getAllocator());
+ for (size_t i = 0; i < refs.size(); ++i) {
+ if constexpr (std::is_same_v<DataType, uint32_t>) {
+ builder.insert(refs[i], payloads[i]);
+ } else {
+ builder.insert(refs[i], DataType());
+ }
+ }
+ _dict.assign(builder);
+}
+
+template <typename DictionaryT, typename ParentT>
std::unique_ptr<typename ParentT::ReadSnapshot>
UniqueStoreDictionary<DictionaryT, ParentT>::get_read_snapshot() const
{