summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2019-08-29 15:42:27 +0000
committerGeir Storli <geirst@verizonmedia.com>2019-09-02 08:57:40 +0000
commit94ab377491f19e0b4ea80201eb0340d6e4ee55b2 (patch)
tree78e4d337a6805a958fcb5b294d4bff7ddc17c9d3
parent9fad146519a83d29a4d7e0c539f923c322600d10 (diff)
Improve memory management in all enum attributes.
The new enum store uses 1024 small data buffers instead of 2 large as before. This avoids the problem with memory spikes when the active buffer was full and all values had to be compacted into the other buffer. In addition the new enum store uses free lists such that compaction is not needed as often.
-rw-r--r--searchcore/src/vespa/searchcore/proton/attribute/attribute_vector_explorer.cpp4
-rw-r--r--searchlib/src/tests/attribute/attribute_test.cpp8
-rw-r--r--searchlib/src/tests/attribute/comparator/comparator_test.cpp10
-rw-r--r--searchlib/src/tests/attribute/enumstore/enumstore_test.cpp591
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributevector.cpp25
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributevector.h1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumattribute.h1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumattribute.hpp52
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstore.cpp76
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstore.h298
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstore.hpp463
-rw-r--r--searchlib/src/vespa/searchlib/attribute/i_enum_store.h6
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp4
-rw-r--r--vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary.h1
-rw-r--r--vespalib/src/vespa/vespalib/datastore/unique_store.h7
-rw-r--r--vespalib/src/vespa/vespalib/datastore/unique_store.hpp15
-rw-r--r--vespalib/src/vespa/vespalib/datastore/unique_store_allocator.h1
-rw-r--r--vespalib/src/vespa/vespalib/datastore/unique_store_builder.h1
-rw-r--r--vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h1
-rw-r--r--vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.hpp17
21 files changed, 289 insertions, 1297 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/attribute/attribute_vector_explorer.cpp b/searchcore/src/vespa/searchcore/proton/attribute/attribute_vector_explorer.cpp
index a1d5f72bc9d..5a199c529b6 100644
--- a/searchcore/src/vespa/searchcore/proton/attribute/attribute_vector_explorer.cpp
+++ b/searchcore/src/vespa/searchcore/proton/attribute/attribute_vector_explorer.cpp
@@ -77,8 +77,8 @@ void
convertEnumStoreToSlime(const IEnumStore &enumStore, Cursor &object)
{
object.setLong("numUniques", enumStore.getNumUniques());
- convertMemoryUsageToSlime(enumStore.getMemoryUsage(), object.setObject("memoryUsage"));
- convertMemoryUsageToSlime(enumStore.getTreeMemoryUsage(), object.setObject("treeMemoryUsage"));
+ convertMemoryUsageToSlime(enumStore.getValuesMemoryUsage(), object.setObject("valuesMemoryUsage"));
+ convertMemoryUsageToSlime(enumStore.getDictionaryMemoryUsage(), object.setObject("dictionaryMemoryUsage"));
}
void
diff --git a/searchlib/src/tests/attribute/attribute_test.cpp b/searchlib/src/tests/attribute/attribute_test.cpp
index 4e520e86707..98caf39dace 100644
--- a/searchlib/src/tests/attribute/attribute_test.cpp
+++ b/searchlib/src/tests/attribute/attribute_test.cpp
@@ -2036,11 +2036,11 @@ AttributeTest::requireThatAddressSpaceUsageIsReported(const Config &config, bool
AddressSpaceUsage after = attrPtr->getAddressSpaceUsage();
if (attrPtr->hasEnum()) {
LOG(info, "requireThatAddressSpaceUsageIsReported(%s): Has enum", attrName.c_str());
- EXPECT_EQUAL(before.enumStoreUsage().used(), 16u);
- EXPECT_EQUAL(before.enumStoreUsage().dead(), 16u);
+ EXPECT_EQUAL(before.enumStoreUsage().used(), 1u);
+ EXPECT_EQUAL(before.enumStoreUsage().dead(), 1u);
EXPECT_GREATER(after.enumStoreUsage().used(), before.enumStoreUsage().used());
- EXPECT_EQUAL(after.enumStoreUsage().limit(), before.enumStoreUsage().limit());
- EXPECT_EQUAL(34359738368u, after.enumStoreUsage().limit()); // EnumStoreBase::DataStoreType::RefType::offsetSize()
+ EXPECT_GREATER_EQUAL(after.enumStoreUsage().limit(), before.enumStoreUsage().limit());
+ EXPECT_GREATER(after.enumStoreUsage().limit(), 4200000000u);
} else {
LOG(info, "requireThatAddressSpaceUsageIsReported(%s): NOT enum", attrName.c_str());
EXPECT_EQUAL(before.enumStoreUsage().used(), 0u);
diff --git a/searchlib/src/tests/attribute/comparator/comparator_test.cpp b/searchlib/src/tests/attribute/comparator/comparator_test.cpp
index a2000c48423..7bd6f3ca013 100644
--- a/searchlib/src/tests/attribute/comparator/comparator_test.cpp
+++ b/searchlib/src/tests/attribute/comparator/comparator_test.cpp
@@ -47,7 +47,7 @@ public:
void
Test::requireThatNumericComparatorIsWorking()
{
- NumericEnumStore es(1024, false);
+ NumericEnumStore es(false);
EnumIndex e1, e2;
es.addEnum(10, e1);
es.addEnum(30, e2);
@@ -63,7 +63,7 @@ Test::requireThatNumericComparatorIsWorking()
void
Test::requireThatFloatComparatorIsWorking()
{
- FloatEnumStore es(1024, false);
+ FloatEnumStore es(false);
EnumIndex e1, e2, e3;
es.addEnum(10.5, e1);
es.addEnum(30.5, e2);
@@ -83,7 +83,7 @@ Test::requireThatFloatComparatorIsWorking()
void
Test::requireThatStringComparatorIsWorking()
{
- StringEnumStore es(1024, false);
+ StringEnumStore es(false);
EnumIndex e1, e2, e3;
es.addEnum("Aa", e1);
es.addEnum("aa", e2);
@@ -102,7 +102,7 @@ Test::requireThatStringComparatorIsWorking()
void
Test::requireThatComparatorWithTreeIsWorking()
{
- NumericEnumStore es(2048, false);
+ NumericEnumStore es(false);
vespalib::GenerationHandler g;
TreeType t;
NodeAllocator m;
@@ -129,7 +129,7 @@ Test::requireThatComparatorWithTreeIsWorking()
void
Test::requireThatFoldedComparatorIsWorking()
{
- StringEnumStore es(1024, false);
+ StringEnumStore es(false);
EnumIndex e1, e2, e3, e4;
es.addEnum("Aa", e1);
es.addEnum("aa", e2);
diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
index c4ba8eecf43..f61211283a4 100644
--- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
+++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
@@ -17,7 +17,8 @@ size_t enumStoreAlign(size_t size)
// IEnumStore::Index(0,0) is reserved thus 16 bytes are reserved in buffer 0
const uint32_t RESERVED_BYTES = 16u;
-typedef EnumStoreT<NumericEntryType<uint32_t> > NumericEnumStore;
+using NumericEnumStore = EnumStoreT<NumericEntryType<uint32_t> >;
+using generation_t = vespalib::GenerationHandler::generation_t;
class EnumStoreTest : public vespalib::TestApp
{
@@ -27,15 +28,6 @@ private:
typedef EnumStoreT<NumericEntryType<double> > DoubleEnumStore;
typedef IEnumStore::Index EnumIndex;
- typedef vespalib::GenerationHandler::generation_t generation_t;
-
- void testIndex();
- void fillDataBuffer(char * data, uint32_t refCount,
- const std::string & string);
- void fillDataBuffer(char * data, uint32_t refCount,
- uint32_t value);
- void testStringEntry();
- void testNumericEntry();
template <typename EnumStoreType, typename T>
void testFloatEnumStore(EnumStoreType & es);
@@ -51,27 +43,11 @@ private:
testUniques(const EnumStoreType &ses,
const std::vector<std::string> &unique);
-
- void testCompaction();
- template <typename EnumStoreType>
- void testCompaction(bool hasPostings);
-
- void testReset();
- template <typename EnumStoreType>
- void testReset(bool hasPostings);
-
void testHoldListAndGeneration();
- void testMemoryUsage();
void requireThatAddressSpaceUsageIsReported();
- void testBufferLimit();
// helper methods
typedef std::vector<std::string> StringVector;
- template <typename T>
- T random(T low, T high);
- std::string getRandomString(uint32_t minLen, uint32_t maxLen);
- StringVector fillRandomStrings(uint32_t numStrings, uint32_t minLen, uint32_t maxLen);
- StringVector sortRandomStrings(StringVector & strings);
struct StringEntry {
StringEntry(uint32_t r, const std::string & s) :
@@ -107,123 +83,6 @@ EnumStoreTest::Reader::Reader(uint32_t generation, const IndexVector & indices,
{}
EnumStoreTest::Reader::~Reader() { }
-void
-EnumStoreTest::testIndex()
-{
- {
- StringEnumStore::Index idx;
- EXPECT_TRUE( ! idx.valid());
- EXPECT_EQUAL(idx.offset(), 0u);
- EXPECT_TRUE(idx.bufferId() == 0);
- }
- {
- StringEnumStore::Index idx(enumStoreAlign(1000), 0);
- EXPECT_TRUE(idx.offset() == enumStoreAlign(1000));
- EXPECT_TRUE(idx.bufferId() == 0);
- }
- {
- StringEnumStore::Index idx((UINT64_C(1) << 31)- RESERVED_BYTES, 1);
- EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 31) - RESERVED_BYTES);
- EXPECT_TRUE(idx.bufferId() == 1);
- }
- {
- StringEnumStore::Index idx((UINT64_C(1) << 33) - RESERVED_BYTES, 1);
- EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 33) - RESERVED_BYTES);
- EXPECT_TRUE(idx.bufferId() == 1);
- }
- {
- StringEnumStore::Index idx((UINT64_C(1) << 35) - RESERVED_BYTES, 1);
- EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 35) - RESERVED_BYTES);
- EXPECT_TRUE(idx.bufferId() == 1);
- }
- {
- // Change offsets when alignment changes.
- StringEnumStore::Index idx1(48, 0);
- StringEnumStore::Index idx2(80, 0);
- StringEnumStore::Index idx3(48, 0);
- EXPECT_TRUE(!(idx1 == idx2));
- EXPECT_TRUE(idx1 == idx3);
- }
- {
- EXPECT_TRUE(StringEnumStore::Index::numBuffers() == 2);
- }
-}
-
-void
-EnumStoreTest::fillDataBuffer(char * data, uint32_t refCount,
- const std::string & string)
-{
- StringEnumStore::insertEntry(data, refCount, string.c_str());
-}
-
-void
-EnumStoreTest::fillDataBuffer(char * data, uint32_t refCount,
- uint32_t value)
-{
- NumericEnumStore::insertEntry(data, refCount, value);
-}
-
-void
-EnumStoreTest::testStringEntry()
-{
- {
- char data[9];
- fillDataBuffer(data, 0, "");
- StringEnumStore::Entry e(data);
- EXPECT_TRUE(StringEnumStore::getEntrySize("") ==
- StringEnumStore::alignEntrySize(8 + 1));
-
- EXPECT_TRUE(e.getRefCount() == 0);
- EXPECT_TRUE(strcmp(e.getValue(), "") == 0);
-
- e.incRefCount();
- EXPECT_TRUE(e.getRefCount() == 1);
- EXPECT_TRUE(strcmp(e.getValue(), "") == 0);
- e.decRefCount();
- EXPECT_TRUE(e.getRefCount() == 0);
- EXPECT_TRUE(strcmp(e.getValue(), "") == 0);
- }
- {
- char data[18];
- fillDataBuffer(data, 5, "enumstore");
- StringEnumStore::Entry e(data);
- EXPECT_TRUE(StringEnumStore::getEntrySize("enumstore") ==
- StringEnumStore::alignEntrySize(8 + 1 + 9));
-
- EXPECT_TRUE(e.getRefCount() == 5);
- EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0);
-
- e.incRefCount();
- EXPECT_TRUE(e.getRefCount() == 6);
- EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0);
- e.decRefCount();
- EXPECT_TRUE(e.getRefCount() == 5);
- EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0);
- }
-}
-
-void
-EnumStoreTest::testNumericEntry()
-{
- {
- char data[12];
- fillDataBuffer(data, 20, 30);
- NumericEnumStore::Entry e(data);
- EXPECT_TRUE(NumericEnumStore::getEntrySize(30) ==
- NumericEnumStore::alignEntrySize(8 + 4));
-
- EXPECT_TRUE(e.getRefCount() == 20);
- EXPECT_TRUE(e.getValue() == 30);
-
- e.incRefCount();
- EXPECT_TRUE(e.getRefCount() == 21);
- EXPECT_TRUE(e.getValue() == 30);
- e.decRefCount();
- EXPECT_TRUE(e.getRefCount() == 20);
- EXPECT_TRUE(e.getValue() == 30);
- }
-}
-
template <typename EnumStoreType, typename T>
void
EnumStoreTest::testFloatEnumStore(EnumStoreType & es)
@@ -256,11 +115,11 @@ void
EnumStoreTest::testFloatEnumStore()
{
{
- FloatEnumStore fes(1000, false);
+ FloatEnumStore fes(false);
testFloatEnumStore<FloatEnumStore, float>(fes);
}
{
- DoubleEnumStore des(1000, false);
+ DoubleEnumStore des(false);
testFloatEnumStore<DoubleEnumStore, double>(des);
}
}
@@ -268,7 +127,7 @@ EnumStoreTest::testFloatEnumStore()
void
EnumStoreTest::testFindFolded()
{
- StringEnumStore ses(100, false);
+ StringEnumStore ses(false);
std::vector<EnumIndex> indices;
std::vector<std::string> unique({"", "one", "two", "TWO", "Two", "three"});
for (std::string &str : unique) {
@@ -308,15 +167,10 @@ template <typename EnumStoreType>
void
EnumStoreTest::testAddEnum(bool hasPostings)
{
- EnumStoreType ses(100, hasPostings);
- EXPECT_EQUAL(enumStoreAlign(100u) + RESERVED_BYTES,
- ses.getBuffer(0).capacity());
- EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0).size());
- EXPECT_EQUAL(enumStoreAlign(100u), ses.getBuffer(0).remaining());
- EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0).getDeadElems());
+ // TODO: Rewrite test to use BatchUpdater
+ EnumStoreType ses(hasPostings);
EnumIndex idx;
- uint64_t offset = ses.getBuffer(0).size();
std::vector<EnumIndex> indices;
std::vector<std::string> unique;
unique.push_back("");
@@ -326,12 +180,9 @@ EnumStoreTest::testAddEnum(bool hasPostings)
for (uint32_t i = 0; i < unique.size(); ++i) {
ses.addEnum(unique[i].c_str(), idx);
- EXPECT_EQUAL(offset, idx.offset());
- EXPECT_EQUAL(0u, idx.bufferId());
ses.incRefCount(idx);
EXPECT_EQUAL(1u, ses.getRefCount(idx));
indices.push_back(idx);
- offset += EnumStoreType::alignEntrySize(unique[i].size() + 1 + 8);
EXPECT_TRUE(ses.findIndex(unique[i].c_str(), idx));
}
ses.freezeTree();
@@ -374,197 +225,11 @@ EnumStoreTest::testUniques
EXPECT_EQUAL(static_cast<uint32_t>(unique.size()), i);
}
-
-void
-EnumStoreTest::testCompaction()
-{
- testCompaction<StringEnumStore>(false);
- testCompaction<StringEnumStore>(true);
-}
-
-template <typename EnumStoreType>
-void
-EnumStoreTest::testCompaction(bool hasPostings)
-{
- // entrySize = 15 before alignment
- uint32_t entrySize = EnumStoreType::alignEntrySize(15);
- uint32_t initBufferSize = entrySize * 5;
- EnumStoreType ses(initBufferSize, hasPostings);
- // Note: Sizes of underlying data store buffers are power of 2.
- uint32_t adjustedBufferSize = vespalib::roundUp2inN(initBufferSize) - RESERVED_BYTES;
- EnumIndex idx;
- std::vector<EnumIndex> indices;
- typename EnumStoreType::Type t = "foo";
- std::vector<std::string> uniques;
- uniques.push_back("enum00");
- uniques.push_back("enum01");
- uniques.push_back("enum02");
- uniques.push_back("enum03");
- uniques.push_back("enum04");
-
- // fill with unique values
- for (uint32_t i = 0; i < 5; ++i) {
- size_t expRemaining = adjustedBufferSize - i * entrySize;
- EXPECT_EQUAL(expRemaining, ses.getRemaining());
- ses.addEnum(uniques[i].c_str(), idx);
- ses.incRefCount(idx);
- EXPECT_TRUE(ses.getRefCount(idx));
- indices.push_back(idx);
- }
- EXPECT_EQUAL(32u, ses.getRemaining());
- EXPECT_EQUAL(32u, ses.getBuffer(0).remaining());
- EXPECT_EQUAL(entrySize * 5 + RESERVED_BYTES, ses.getBuffer(0).size());
- EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0).getDeadElems());
- uint32_t failEntrySize = ses.getEntrySize("enum05");
- EXPECT_EQUAL(16u, failEntrySize);
-
- // change from enum00 -> enum01
- ses.decRefCount(indices[0]);
- ses.incRefCount(indices[1]);
- indices[0] = indices[1];
-
- // check correct refcount
- for (uint32_t i = 0; i < 5; ++i) {
- EXPECT_TRUE(ses.findIndex(uniques[i].c_str(), idx));
- uint32_t refCount = ses.getRefCount(idx);
- if (i == 0) {
- EXPECT_TRUE(refCount == 0);
- } else if (i == 1) {
- EXPECT_TRUE(refCount == 2);
- } else {
- EXPECT_TRUE(refCount == 1);
- }
- }
-
- // free unused enums
- ses.freeUnusedEnums(true);
- EXPECT_TRUE(!ses.findIndex("enum00", idx));
- EXPECT_EQUAL(entrySize + RESERVED_BYTES, ses.getBuffer(0).getDeadElems());
-
- auto &data_store_base = ses.get_data_store_base();
- auto old_compaction_count = data_store_base.get_compaction_count();
-
- // perform compaction
- IEnumStore::EnumIndexMap old2New;
- EXPECT_TRUE(ses.performCompaction(3 * entrySize, old2New));
- EXPECT_TRUE(ses.getRemaining() >= 3 * entrySize);
- EXPECT_TRUE(ses.getBuffer(1).remaining() >= 3 * entrySize);
- EXPECT_TRUE(ses.getBuffer(1).size() == entrySize * 4);
- EXPECT_TRUE(ses.getBuffer(1).getDeadElems() == 0);
-
- EXPECT_NOT_EQUAL(old_compaction_count, data_store_base.get_compaction_count());
-
- // add new unique strings
- ses.addEnum("enum05", idx);
- ses.addEnum("enum06", idx);
- ses.addEnum("enum00", idx);
-
- // compare old and new indices
- for (uint32_t i = 0; i < indices.size(); ++i) {
- idx = old2New[indices[i]];
- EXPECT_TRUE(indices[i].bufferId() == 0);
- EXPECT_TRUE(idx.bufferId() == 1);
- EXPECT_TRUE(ses.getValue(indices[i], t));
- typename EnumStoreType::Type s = "bar";
- EXPECT_TRUE(ses.getValue(idx, s));
- EXPECT_TRUE(strcmp(t, s) == 0);
- }
- // EnumIndex(0,0) is reserved so we have 4 bytes extra at the start of buffer 0
- idx = old2New[indices[0]];
- EXPECT_EQUAL(entrySize + RESERVED_BYTES, indices[0].offset());
- EXPECT_EQUAL(0u, idx.offset());
- idx = old2New[indices[1]];
- EXPECT_EQUAL(entrySize + RESERVED_BYTES, indices[1].offset());
- EXPECT_EQUAL(0u, idx.offset());
- idx = old2New[indices[2]];
- EXPECT_EQUAL(2 * entrySize + RESERVED_BYTES, indices[2].offset());
- EXPECT_EQUAL(entrySize, idx.offset());
- idx = old2New[indices[3]];
- EXPECT_EQUAL(3 * entrySize + RESERVED_BYTES, indices[3].offset());
- EXPECT_EQUAL(2 * entrySize, idx.offset());
- idx = old2New[indices[4]];
- EXPECT_EQUAL(4 * entrySize + RESERVED_BYTES, indices[4].offset());
- EXPECT_EQUAL(3 * entrySize, idx.offset());
-}
-
-void
-EnumStoreTest::testReset()
-{
- testReset<StringEnumStore>(false);
-
- testReset<StringEnumStore>(true);
-}
-
-template <typename EnumStoreType>
-void
-EnumStoreTest::testReset(bool hasPostings)
-{
- uint32_t numUniques = 10000;
- srand(123456789);
- StringVector rndStrings = fillRandomStrings(numUniques, 10, 15);
- EXPECT_EQUAL(rndStrings.size(), size_t(numUniques));
- StringVector uniques = sortRandomStrings(rndStrings);
- EXPECT_EQUAL(uniques.size(), size_t(numUniques));
- // max entrySize = 25 before alignment
- uint32_t maxEntrySize = EnumStoreType::alignEntrySize(8 + 1 + 16);
- EnumStoreType ses(numUniques * maxEntrySize, hasPostings);
- EnumIndex idx;
-
- uint32_t cnt = 0;
- // add new unique strings
- for (StringVector::reverse_iterator iter = uniques.rbegin(); iter != uniques.rend(); ++iter) {
- ses.addEnum(iter->c_str(), idx);
- EXPECT_EQUAL(ses.getNumUniques(), ++cnt);
- }
-
- // check for unique strings
- for (StringVector::iterator iter = uniques.begin(); iter != uniques.end(); ++iter) {
- EXPECT_TRUE(ses.findIndex(iter->c_str(), idx));
- }
-
- EXPECT_EQUAL(ses.getNumUniques(), numUniques);
- if (hasPostings) {
- testUniques<EnumStoreType, EnumPostingTree>(ses, uniques);
- } else {
- testUniques<EnumStoreType, EnumTree>(ses, uniques);
- }
-
- rndStrings = fillRandomStrings(numUniques, 15, 20);
- StringVector newUniques = sortRandomStrings(rndStrings);
-
- typename EnumStoreType::Builder builder;
- for (StringVector::iterator iter = newUniques.begin(); iter != newUniques.end(); ++iter) {
- builder.insert(iter->c_str());
- }
-
- ses.reset(builder);
- // Note: Sizes of underlying data store buffers are power of 2.
- EXPECT_EQUAL(524288u, ses.getCapacity());
- EXPECT_EQUAL(204272u, ses.getRemaining());
-
- // check for old unique strings
- for (StringVector::iterator iter = uniques.begin(); iter != uniques.end(); ++iter) {
- EXPECT_TRUE(!ses.findIndex(iter->c_str(), idx));
- }
-
- // check for new unique strings
- for (StringVector::iterator iter = newUniques.begin(); iter != newUniques.end(); ++iter) {
- EXPECT_TRUE(ses.findIndex(iter->c_str(), idx));
- }
-
- EXPECT_EQUAL(ses.getNumUniques(), numUniques);
- if (hasPostings) {
- testUniques<EnumStoreType, EnumPostingTree>(ses, newUniques);
- } else {
- testUniques<EnumStoreType, EnumTree>(ses, newUniques);
- }
-}
-
void
EnumStoreTest::testHoldListAndGeneration()
{
- uint32_t entrySize = StringEnumStore::alignEntrySize(8 + 1 + 6);
- StringEnumStore ses(100 * entrySize, false);
+ // TODO: Rewrite test to use BatchUpdater
+ StringEnumStore ses(false);
StringEnumStore::Index idx;
StringVector uniques;
generation_t sesGen = 0u;
@@ -597,11 +262,11 @@ EnumStoreTest::testHoldListAndGeneration()
for (uint32_t j = i - 9; j <= i; ++j) {
EXPECT_TRUE(ses.findIndex(uniques[j].c_str(), idx));
indices.push_back(idx);
- StringEnumStore::Entry entry = ses.getEntry(idx);
- EXPECT_TRUE(entry.getRefCount() == 1);
- EXPECT_TRUE(strcmp(entry.getValue(), uniques[j].c_str()) == 0);
- expected.push_back(StringEntry(entry.getRefCount(),
- std::string(entry.getValue())));
+ uint32_t ref_count = ses.getRefCount(idx);
+ std::string value(ses.getValue(idx));
+ EXPECT_EQUAL(1u, ref_count);
+ EXPECT_EQUAL(uniques[j], value);
+ expected.emplace_back(ref_count, value);
}
EXPECT_TRUE(indices.size() == 10);
EXPECT_TRUE(expected.size() == 10);
@@ -611,10 +276,6 @@ EnumStoreTest::testHoldListAndGeneration()
}
}
- // Note: Sizes of underlying data store buffers are power of 2.
- EXPECT_EQUAL(432u, ses.getRemaining());
- EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0).getDeadElems());
-
// remove all uniques
for (uint32_t i = 0; i < 100; ++i) {
EXPECT_TRUE(ses.findIndex(uniques[i].c_str(), idx));
@@ -622,117 +283,12 @@ EnumStoreTest::testHoldListAndGeneration()
EXPECT_EQUAL(0u, ses.getRefCount(idx));
}
ses.freeUnusedEnums(true);
- EXPECT_EQUAL(100 * entrySize + RESERVED_BYTES, ses.getBuffer(0).getDeadElems());
-
- // perform compaction
- uint32_t newEntrySize = StringEnumStore::alignEntrySize(8 + 1 + 8);
- IEnumStore::EnumIndexMap old2New;
- EXPECT_TRUE(ses.performCompaction(5 * newEntrySize, old2New));
// check readers again
checkReaders(ses, sesGen, readers);
- // fill up buffer
- uint32_t i = 0;
- while (ses.getRemaining() >= newEntrySize) {
- //LOG(info, "fill: %s", newUniques[i].c_str());
- ses.addEnum(newUniques[i++].c_str(), idx);
- ses.incRefCount(idx);
- EXPECT_TRUE(ses.getRefCount(idx));
- }
- EXPECT_LESS(ses.getRemaining(), newEntrySize);
- // buffer on hold list
- old2New.clear();
- EXPECT_TRUE(!ses.performCompaction(5 * newEntrySize, old2New));
-
- checkReaders(ses, sesGen, readers);
- ses.transferHoldLists(sesGen);
- ses.trimHoldLists(sesGen + 1);
-
- // buffer no longer on hold list
- EXPECT_LESS(ses.getRemaining(), newEntrySize);
- old2New.clear();
- EXPECT_TRUE(ses.performCompaction(5 * newEntrySize, old2New));
- EXPECT_TRUE(ses.getRemaining() >= 5 * newEntrySize);
-}
-
-void
-EnumStoreTest::testMemoryUsage()
-{
- StringEnumStore ses(200, false);
- StringEnumStore::Index idx;
- uint32_t num = 8;
- std::vector<StringEnumStore::Index> indices;
- std::vector<std::string> uniques;
- for (uint32_t i = 0; i < num; ++i) {
- std::stringstream ss;
- ss << "enum" << i;
- uniques.push_back(ss.str());
- }
- generation_t sesGen = 0u;
- uint32_t entrySize = StringEnumStore::alignEntrySize(8 + 1 + 5); // enum(4) + refcount(4) + 1(\0) + strlen("enumx")
-
- // usage before inserting enums
- vespalib::MemoryUsage usage = ses.getMemoryUsage();
- EXPECT_EQUAL(ses.getNumUniques(), uint32_t(0));
- // Note: Sizes of underlying data store buffers are power of 2.
- EXPECT_EQUAL(vespalib::roundUp2inN(enumStoreAlign(200u) + RESERVED_BYTES), usage.allocatedBytes());
- EXPECT_EQUAL(RESERVED_BYTES, usage.usedBytes());
- EXPECT_EQUAL(RESERVED_BYTES, usage.deadBytes());
- EXPECT_EQUAL(0u, usage.allocatedBytesOnHold());
-
- for (uint32_t i = 0; i < num; ++i) {
- ses.addEnum(uniques[i].c_str(), idx);
- indices.push_back(idx);
- ses.incRefCount(idx);
- EXPECT_TRUE(ses.getRefCount(idx));
- }
-
- // usage after inserting enums
- usage = ses.getMemoryUsage();
- EXPECT_EQUAL(ses.getNumUniques(), num);
- // Note: Sizes of underlying data store buffers are power of 2.
- EXPECT_EQUAL(vespalib::roundUp2inN(enumStoreAlign(200u) + RESERVED_BYTES), usage.allocatedBytes());
- EXPECT_EQUAL(num * entrySize + RESERVED_BYTES, usage.usedBytes());
- EXPECT_EQUAL(RESERVED_BYTES, usage.deadBytes());
- EXPECT_EQUAL(0u, usage.allocatedBytesOnHold());
-
- // assign new enum for num / 2 of indices
- for (uint32_t i = 0; i < num / 2; ++i) {
- ses.decRefCount(indices[i]);
- EXPECT_TRUE(ses.findIndex(uniques.back().c_str(), idx));
- ses.incRefCount(idx);
- indices[i] = idx;
- }
- ses.freeUnusedEnums(true);
-
- // usage after removing enums
- usage = ses.getMemoryUsage();
- EXPECT_EQUAL(ses.getNumUniques(), num / 2);
- // Note: Sizes of underlying data store buffers are power of 2.
- EXPECT_EQUAL(vespalib::roundUp2inN(enumStoreAlign(200u) + RESERVED_BYTES), usage.allocatedBytes());
- EXPECT_EQUAL(num * entrySize + RESERVED_BYTES, usage.usedBytes());
- EXPECT_EQUAL((num / 2) * entrySize + RESERVED_BYTES, usage.deadBytes());
- EXPECT_EQUAL(0u, usage.allocatedBytesOnHold());
-
- IEnumStore::EnumIndexMap old2New;
- ses.performCompaction(400, old2New);
-
- // usage after compaction
- vespalib::MemoryUsage usage2 = ses.getMemoryUsage();
- EXPECT_EQUAL(ses.getNumUniques(), num / 2);
- EXPECT_EQUAL(usage.usedBytes() + (num / 2) * entrySize, usage2.usedBytes());
- EXPECT_EQUAL(usage.deadBytes(), usage2.deadBytes());
- EXPECT_EQUAL(usage.usedBytes() - usage.deadBytes(), usage2.allocatedBytesOnHold());
-
ses.transferHoldLists(sesGen);
ses.trimHoldLists(sesGen + 1);
-
- // usage after hold list trimming
- vespalib::MemoryUsage usage3 = ses.getMemoryUsage();
- EXPECT_EQUAL((num / 2) * entrySize, usage3.usedBytes());
- EXPECT_EQUAL(0u, usage3.deadBytes());
- EXPECT_EQUAL(0u, usage3.allocatedBytesOnHold());
}
namespace {
@@ -747,10 +303,13 @@ addEnum(NumericEnumStore &store, uint32_t value)
}
void
-decRefCount(NumericEnumStore &store, NumericEnumStore::Index idx)
+decRefCount(NumericEnumStore& store, NumericEnumStore::Index idx)
{
store.decRefCount(idx);
store.freeUnusedEnums(false);
+ generation_t gen = 5;
+ store.transferHoldLists(gen);
+ store.trimHoldLists(gen + 1);
}
}
@@ -758,106 +317,21 @@ decRefCount(NumericEnumStore &store, NumericEnumStore::Index idx)
void
EnumStoreTest::requireThatAddressSpaceUsageIsReported()
{
- const size_t ADDRESS_LIMIT = 34359738368; // NumericEnumStore::DataStoreType::RefType::offsetSize()
- NumericEnumStore store(200, false);
+ // TODO: Rewrite test to use BatchUpdater
+ const size_t ADDRESS_LIMIT = 4290772994; // Max allocated elements in un-allocated buffers + allocated elements in allocated buffers.
+ NumericEnumStore store(false);
using vespalib::AddressSpace;
- EXPECT_EQUAL(AddressSpace(16, 16, ADDRESS_LIMIT), store.getAddressSpaceUsage());
+ EXPECT_EQUAL(AddressSpace(1, 1, ADDRESS_LIMIT), store.getAddressSpaceUsage());
NumericEnumStore::Index idx1 = addEnum(store, 10);
- EXPECT_EQUAL(AddressSpace(32, 16, ADDRESS_LIMIT), store.getAddressSpaceUsage());
+ EXPECT_EQUAL(AddressSpace(2, 1, ADDRESS_LIMIT), store.getAddressSpaceUsage());
NumericEnumStore::Index idx2 = addEnum(store, 20);
- EXPECT_EQUAL(AddressSpace(48, 16, ADDRESS_LIMIT), store.getAddressSpaceUsage());
+ // Address limit increases because buffer is re-sized.
+ EXPECT_EQUAL(AddressSpace(3, 1, ADDRESS_LIMIT + 2), store.getAddressSpaceUsage());
decRefCount(store, idx1);
- EXPECT_EQUAL(AddressSpace(48, 32, ADDRESS_LIMIT), store.getAddressSpaceUsage());
+ EXPECT_EQUAL(AddressSpace(3, 2, ADDRESS_LIMIT + 2), store.getAddressSpaceUsage());
decRefCount(store, idx2);
- EXPECT_EQUAL(AddressSpace(48, 48, ADDRESS_LIMIT), store.getAddressSpaceUsage());
-}
-
-size_t
-digits(size_t num)
-{
- size_t digits = 1;
- while (num / 10 > 0) {
- num /= 10;
- digits++;
- }
- return digits;
-}
-
-void
-EnumStoreTest::testBufferLimit()
-{
- size_t enumSize = StringEnumStore::Index::offsetSize();
- StringEnumStore es(enumSize, false);
-
- size_t strLen = 65536;
- char str[strLen + 1];
- for (size_t i = 0; i < strLen; ++i) {
- str[i] = 'X';
- }
- str[strLen] = 0;
-
- size_t entrySize = StringEnumStore::getEntrySize(str);
- size_t numUniques = enumSize / entrySize;
- size_t uniqDigits = digits(numUniques);
-
- EnumIndex idx;
- EnumIndex lastIdx;
- for (size_t i = 0; i < numUniques; ++i) {
- sprintf(str, "%0*zu", (int)uniqDigits, i);
- str[uniqDigits] = 'X';
- es.addEnum(str, idx);
- if (i % (numUniques / 32) == 1) {
- EXPECT_TRUE(idx.offset() > lastIdx.offset());
- EXPECT_EQUAL(i + 1, es.getNumUniques());
- std::cout << "idx.offset(" << idx.offset() << "), str(" << std::string(str, uniqDigits) << ")" << std::endl;
- }
- lastIdx = idx;
- }
- EXPECT_EQUAL(idx.offset(), lastIdx.offset());
- EXPECT_EQUAL(numUniques, es.getNumUniques());
- std::cout << "idx.offset(" << idx.offset() << "), str(" << std::string(str, uniqDigits) << ")" << std::endl;
-}
-
-template <typename T>
-T
-EnumStoreTest::random(T low, T high)
-{
- return (rand() % (high - low)) + low;
-}
-
-std::string
-EnumStoreTest::getRandomString(uint32_t minLen, uint32_t maxLen)
-{
- uint32_t len = random(minLen, maxLen);
- std::string retval;
- for (uint32_t i = 0; i < len; ++i) {
- char c = random('a', 'z');
- retval.push_back(c);
- }
- return retval;
-}
-
-EnumStoreTest::StringVector
-EnumStoreTest::fillRandomStrings(uint32_t numStrings, uint32_t minLen, uint32_t maxLen)
-{
- StringVector retval;
- retval.reserve(numStrings);
- for (uint32_t i = 0; i < numStrings; ++i) {
- retval.push_back(getRandomString(minLen, maxLen));
- }
- return retval;
-}
-
-EnumStoreTest::StringVector
-EnumStoreTest::sortRandomStrings(StringVector & strings)
-{
- std::sort(strings.begin(), strings.end());
- std::vector<std::string> retval;
- retval.reserve(strings.size());
- std::vector<std::string>::iterator pos = std::unique(strings.begin(), strings.end());
- std::copy(strings.begin(), pos, std::back_inserter(retval));
- return retval;
+ EXPECT_EQUAL(AddressSpace(3, 3, ADDRESS_LIMIT + 2), store.getAddressSpaceUsage());
}
void
@@ -867,7 +341,7 @@ EnumStoreTest::checkReaders(const StringEnumStore & ses,
{
(void) sesGen;
//uint32_t refCount = 1000;
- StringEnumStore::Type t = "";
+ StringEnumStore::DataType t = "";
for (uint32_t i = 0; i < readers.size(); ++i) {
const Reader & r = readers[i];
for (uint32_t j = 0; j < r._indices.size(); ++j) {
@@ -883,20 +357,11 @@ EnumStoreTest::Main()
{
TEST_INIT("enumstore_test");
- testIndex();
- testStringEntry();
- testNumericEntry();
testFloatEnumStore();
testFindFolded();
testAddEnum();
- testCompaction();
- testReset();
testHoldListAndGeneration();
- testMemoryUsage();
TEST_DO(requireThatAddressSpaceUsageIsReported());
- if (_argc > 1) {
- testBufferLimit(); // large test with 8 GB buffer
- }
TEST_DONE();
}
diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
index 3e949384d4a..5f9ebd1bf44 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
@@ -218,6 +218,12 @@ AttributeVector::updateStatistics(uint64_t numValues, uint64_t numUniqueValue, u
_status.updateStatistics(numValues, numUniqueValue, allocated, used, dead, onHold);
}
+vespalib::MemoryUsage
+AttributeVector::getEnumStoreValuesMemoryUsage() const
+{
+ return vespalib::MemoryUsage();
+}
+
vespalib::AddressSpace
AttributeVector::getEnumStoreAddressSpaceUsage() const
{
@@ -715,7 +721,7 @@ AttributeVector::getEstimatedSaveByteSize() const
uint64_t idxFileSize = 0;
uint64_t udatFileSize = 0;
size_t fixedWidth = getFixedWidth();
- vespalib::AddressSpace enumAddressSpace(getEnumStoreAddressSpaceUsage());
+ vespalib::MemoryUsage values_mem_usage = getEnumStoreValuesMemoryUsage();
if (hasMultiValue()) {
idxFileSize = headerSize + sizeof(uint32_t) * (docIdLimit + 1);
@@ -723,13 +729,15 @@ AttributeVector::getEstimatedSaveByteSize() const
if (hasWeightedSetType()) {
weightFileSize = headerSize + sizeof(int32_t) * totalValueCount;
}
- if (hasEnum() && getEnumeratedSave()) {
- datFileSize = headerSize + 4 * totalValueCount;
+ if (hasEnum()) {
+ datFileSize = headerSize + sizeof(uint32_t) * totalValueCount;
if (fixedWidth != 0) {
udatFileSize = headerSize + fixedWidth * uniqueValueCount;
} else {
- udatFileSize = headerSize + enumAddressSpace.used()
- - 8 * uniqueValueCount;
+ size_t unique_values_bytes = values_mem_usage.usedBytes() -
+ (values_mem_usage.deadBytes() + values_mem_usage.allocatedBytesOnHold());
+ size_t ref_count_mem_usage = sizeof(uint32_t) * uniqueValueCount;
+ udatFileSize = headerSize + unique_values_bytes - ref_count_mem_usage;
}
} else {
BasicType::Type basicType(getBasicType());
@@ -744,12 +752,7 @@ AttributeVector::getEstimatedSaveByteSize() const
datFileSize = headerSize + memorySize;
break;
case BasicType::Type::STRING:
- assert(hasEnum());
- datFileSize = headerSize;
- if (uniqueValueCount > 0) {
- double avgEntrySize = (static_cast<double>(enumAddressSpace.used()) / uniqueValueCount) - 8;
- datFileSize += avgEntrySize * totalValueCount;
- }
+ abort();
break;
default:
datFileSize = headerSize + fixedWidth * totalValueCount;
diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.h b/searchlib/src/vespa/searchlib/attribute/attributevector.h
index 52e63385c7d..b5474fda9c9 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributevector.h
+++ b/searchlib/src/vespa/searchlib/attribute/attributevector.h
@@ -378,6 +378,7 @@ protected:
return value;
}
+ virtual vespalib::MemoryUsage getEnumStoreValuesMemoryUsage() const;
virtual vespalib::AddressSpace getEnumStoreAddressSpaceUsage() const;
virtual vespalib::AddressSpace getMultiValueAddressSpaceUsage() const;
void logEnumStoreEvent(const char *reason, const char *stage);
diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.h b/searchlib/src/vespa/searchlib/attribute/enumattribute.h
index 55af5a874f9..db8952d4f71 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.h
@@ -78,6 +78,7 @@ protected:
void insertNewUniqueValues(EnumStoreBatchUpdater& updater);
virtual void considerAttributeChange(const Change & c, UniqueSet & newUniques) = 0;
virtual void reEnumerate(const EnumIndexMap &) = 0;
+ vespalib::MemoryUsage getEnumStoreValuesMemoryUsage() const override;
vespalib::AddressSpace getEnumStoreAddressSpaceUsage() const override;
public:
EnumAttribute(const vespalib::string & baseFileName, const AttributeVector::Config & cfg);
diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp
index a5ba60cad4d..57cb33b1b70 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp
@@ -13,7 +13,7 @@ EnumAttribute<B>::
EnumAttribute(const vespalib::string &baseFileName,
const AttributeVector::Config &cfg)
: B(baseFileName, cfg),
- _enumStore(0, cfg.fastSearch())
+ _enumStore(cfg.fastSearch())
{
this->setEnum(true);
}
@@ -27,7 +27,7 @@ template <typename B>
void EnumAttribute<B>::fillEnum(LoadedVector & loaded)
{
if constexpr(!std::is_same_v<LoadedVector, NoLoadedVector>) {
- typename EnumStore::Builder builder;
+ auto builder = _enumStore.make_builder();
if (!loaded.empty()) {
auto value = loaded.read();
LoadedValueType prev = value.getValue();
@@ -36,7 +36,7 @@ void EnumAttribute<B>::fillEnum(LoadedVector & loaded)
for (size_t i(0), m(loaded.size()); i < m; ++i, loaded.next()) {
value = loaded.read();
if (EnumStore::ComparatorType::compare(prev, value.getValue()) != 0) {
- builder.updateRefCount(prevRefCount);
+ builder.set_ref_count_for_last_value(prevRefCount);
index = builder.insert(value.getValue(), value._pidx.ref());
prev = value.getValue();
prevRefCount = 1;
@@ -46,9 +46,9 @@ void EnumAttribute<B>::fillEnum(LoadedVector & loaded)
value.setEidx(index);
loaded.write(value);
}
- builder.updateRefCount(prevRefCount);
+ builder.set_ref_count_for_last_value(prevRefCount);
}
- _enumStore.reset(builder);
+ builder.build();
}
}
@@ -93,48 +93,18 @@ EnumAttribute<B>::insertNewUniqueValues(EnumStoreBatchUpdater& updater)
considerAttributeChange(data, newUniques);
}
- uint64_t extraBytesNeeded = 0;
- for (const auto & data : newUniques) {
- extraBytesNeeded += _enumStore.getEntrySize(data.raw());
- }
-
- do {
- // perform compaction on EnumStore if necessary
- if (extraBytesNeeded > this->_enumStore.getRemaining() ||
- this->_enumStore.getPendingCompact())
- {
- this->logEnumStoreEvent("enumstorecompact", "reserve");
- this->removeAllOldGenerations();
- this->_enumStore.clearPendingCompact();
- EnumIndexMap old2New(this->_enumStore.getNumUniques()*3);
- this->logEnumStoreEvent("enumstorecompact", "start");
- if (!this->_enumStore.performCompaction(extraBytesNeeded, old2New)) {
- this->logEnumStoreEvent("enumstorecompact", "failed_compact");
- // fallback to resize strategy
- this->_enumStore.fallbackResize(extraBytesNeeded);
- this->logEnumStoreEvent("enumstorecompact", "fallbackresize_complete");
- if (extraBytesNeeded > this->_enumStore.getRemaining()) {
- HDR_ABORT("Cannot fallbackResize enumStore");
- }
- break; // fallback resize performed instead of compaction.
- }
-
- // update underlying structure with new EnumIndex values.
- reEnumerate(old2New);
- // Clear scratch enumeration
- for (auto & data : this->_changes) {
- data._enumScratchPad = ChangeBase::UNSET_ENUM;
- }
- this->logEnumStoreEvent("enumstorecompact", "complete");
- }
- } while (0);
-
// insert new unique values in EnumStore
for (const auto & data : newUniques) {
updater.add(data.raw());
}
}
+template <typename B>
+vespalib::MemoryUsage
+EnumAttribute<B>::getEnumStoreValuesMemoryUsage() const
+{
+ return _enumStore.getValuesMemoryUsage();
+}
template <typename B>
vespalib::AddressSpace
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.cpp b/searchlib/src/vespa/searchlib/attribute/enumstore.cpp
index 4cf5ea9c766..7ce65193c40 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumstore.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.cpp
@@ -10,78 +10,50 @@ LOG_SETUP(".searchlib.attribute.enum_store");
namespace search {
-template <>
-void
-EnumStoreT<StringEntryType>::
-insertEntryValue(char * dst, Type value)
-{
- strcpy(dst, value);
-}
template <>
void
-EnumStoreT<StringEntryType>::writeValues(BufferWriter &writer,
- const Index *idxs,
+EnumStoreT<StringEntryType>::writeValues(BufferWriter& writer,
+ const Index* idxs,
size_t count) const
{
- for (uint32_t i = 0; i < count; ++i) {
+ for (size_t i = 0; i < count; ++i) {
Index idx = idxs[i];
- const char *src(_store.getEntry<char>(idx) +
- EntryBase::size());
+ const char* src = _store.get(idx);
size_t sz = strlen(src) + 1;
writer.write(src, sz);
}
}
-
template <>
ssize_t
-EnumStoreT<StringEntryType>::deserialize(const void *src,
- size_t available,
- size_t &initSpace)
+EnumStoreT<StringEntryType>::deserialize(const void* src,
+ size_t available,
+ Index& idx)
{
- size_t slen = strlen(static_cast<const char *>(src));
- size_t sz(StringEntryType::fixedSize() + slen);
- if (available < sz)
+ const char* value = static_cast<const char*>(src);
+ size_t slen = strlen(value);
+ size_t sz = slen + 1;
+ if (available < sz) {
return -1;
- uint32_t entrySize(alignEntrySize(EntryBase::size() + sz));
- initSpace += entrySize;
+ }
+ Index prev_idx = idx;
+ idx = _store.get_allocator().allocate(value);
+
+ if (prev_idx.valid()) {
+ assert(ComparatorType::compare(getValue(prev_idx), value) < 0);
+ }
return sz;
}
-
-template <>
-ssize_t
-EnumStoreT<StringEntryType>::deserialize(const void *src,
- size_t available,
- Index &idx)
+std::unique_ptr<datastore::IUniqueStoreDictionary>
+make_enum_store_dictionary(IEnumStore &store, bool has_postings)
{
- size_t slen = strlen(static_cast<const char *>(src));
- size_t sz(StringEntryType::fixedSize() + slen);
- if (available < sz)
- return -1;
- uint32_t activeBufferId = _store.getActiveBufferId(TYPE_ID);
- datastore::BufferState & buffer = _store.getBufferState(activeBufferId);
- uint32_t entrySize(alignEntrySize(EntryBase::size() + sz));
- if (buffer.remaining() < entrySize) {
- LOG_ABORT("Out of enumstore bufferspace");
- }
- uint64_t offset = buffer.size();
- Index newIdx(offset, activeBufferId);
- char *dst(_store.getEntry<char>(newIdx));
- memcpy(dst, &dummy_enum_value, sizeof(uint32_t));
- uint32_t pos = sizeof(uint32_t);
- uint32_t refCount(0);
- memcpy(dst + pos, &refCount, sizeof(uint32_t));
- pos += sizeof(uint32_t);
- memcpy(dst + pos, src, sz);
- buffer.pushed_back(entrySize);
-
- if (idx.valid()) {
- assert(ComparatorType::compare(getValue(idx), Entry(dst).getValue()) < 0);
+ if (has_postings) {
+ return std::make_unique<EnumStoreDictionary<EnumPostingTree>>(store);
+ } else {
+ return std::make_unique<EnumStoreDictionary<EnumTree>>(store);
}
- idx = newIdx;
- return sz;
}
vespalib::asciistream & operator << (vespalib::asciistream & os, const IEnumStore::Index & idx) {
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h
index fa5e9611c55..032acfc0ee2 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumstore.h
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h
@@ -10,6 +10,8 @@
#include <vespa/vespalib/btree/btree.h>
#include <vespa/vespalib/btree/btreebuilder.h>
#include <vespa/vespalib/datastore/entryref.h>
+#include <vespa/vespalib/datastore/unique_store.h>
+#include <vespa/vespalib/datastore/unique_store_string_allocator.h>
#include <vespa/vespalib/util/buffer.h>
#include <vespa/vespalib/util/array.h>
#include <vespa/vespalib/util/stringfmt.h>
@@ -78,202 +80,123 @@ class EnumStoreT : public IEnumStore
{
friend class EnumStoreTest;
public:
- using Type = typename EntryType::Type;
+ using DataType = typename EntryType::Type;
using ComparatorType = EnumStoreComparatorT<EntryType>;
+ using AllocatorType = std::conditional_t<std::is_same_v<DataType, const char *>,
+ datastore::UniqueStoreStringAllocator<Index>,
+ datastore::UniqueStoreAllocator<DataType, Index>>;
+
+ using UniqueStoreType = datastore::UniqueStore<DataType, Index, ComparatorType, AllocatorType>;
using FoldedComparatorType = EnumStoreFoldedComparatorT<EntryType>;
using EnumStoreType = EnumStoreT<EntryType>;
- using DataStoreType = datastore::DataStoreT<Index>;
+ using EntryRef = datastore::EntryRef;
using generation_t = vespalib::GenerationHandler::generation_t;
- class EntryBase {
- protected:
- char * _data;
- public:
- EntryBase(void * data) : _data(static_cast<char *>(data)) {}
- uint32_t getRefCount() const {
- return *(reinterpret_cast<uint32_t *>(_data) + 1);
- }
- void incRefCount() {
- uint32_t *dst = reinterpret_cast<uint32_t *>(_data) + 1;
- ++(*dst);
- }
- void decRefCount() {
- uint32_t *dst = reinterpret_cast<uint32_t *>(_data) + 1;
- --(*dst);
- }
- void setRefCount(uint32_t refCount) {
- uint32_t *dst = reinterpret_cast<uint32_t *>(_data) + 1;
- *dst = refCount;
- }
- static uint32_t size() { return 2*sizeof(uint32_t); }
- };
-
- class Entry : public EntryBase {
- public:
- Entry(void * data) : EntryBase(data) {}
- Type getValue() const;
- static uint32_t fixedSize() { return EntryBase::size() + EntryType::fixedSize(); }
- };
-
- class EnumBufferType : public datastore::BufferType<char> {
- private:
- size_t _minSizeNeeded; // lower cap for sizeNeeded
- size_t _deadElems; // dead elements in active buffer
- bool _pendingCompact;
- bool _wantCompact;
- public:
- EnumBufferType();
- size_t calcArraysToAlloc(uint32_t bufferId, size_t sizeNeeded, bool resizing) const override;
- void setSizeNeededAndDead(size_t sizeNeeded, size_t deadElems) {
- _minSizeNeeded = sizeNeeded;
- _deadElems = deadElems;
- }
- void onFree(size_t usedElems) override {
- datastore::BufferType<char>::onFree(usedElems);
- _pendingCompact = _wantCompact;
- _wantCompact = false;
- }
- void setWantCompact() { _wantCompact = true; }
- bool getPendingCompact() const { return _pendingCompact; }
- void clearPendingCompact() { _pendingCompact = false; }
- };
-
- static void insertEntry(char * dst, uint32_t refCount, Type value);
private:
- IEnumStoreDictionary *_enumDict;
- DataStoreType _store;
- EnumBufferType _type;
- std::vector<uint32_t> _toHoldBuffers; // used during compaction
-
- static const uint32_t TYPE_ID = 0;
+ UniqueStoreType _store;
+ IEnumStoreDictionary& _dict;
EnumStoreT(const EnumStoreT & rhs) = delete;
EnumStoreT & operator=(const EnumStoreT & rhs) = delete;
- static void insertEntryValue(char * dst, Type value) {
- memcpy(dst, &value, sizeof(Type));
- }
+ void freeUnusedEnum(Index idx, IndexSet& unused) override;
- EntryBase getEntryBase(Index idx) const {
- return EntryBase(const_cast<DataStoreType &>(_store).getEntry<char>(idx));
+ const datastore::UniqueStoreEntryBase& get_entry_base(Index idx) const {
+ return _store.get_allocator().get_wrapped(idx);
}
- datastore::BufferState & getBuffer(uint32_t bufferIdx) {
- return _store.getBufferState(bufferIdx);
- }
- const datastore::BufferState & getBuffer(uint32_t bufferIdx) const {
- return _store.getBufferState(bufferIdx);
- }
- bool validIndex(Index idx) const {
- return (idx.valid() && idx.offset() < _store.getBufferState(idx.bufferId()).size());
- }
- uint32_t getBufferIndex(datastore::BufferState::State status);
- void postCompact();
- bool preCompact(uint64_t bytesNeeded);
-
- Entry getEntry(Index idx) const {
- return Entry(const_cast<DataStoreType &>(_store).getEntry<char>(idx));
- }
-
- void freeUnusedEnum(Index idx, IndexSet & unused) override;
public:
- EnumStoreT(uint64_t initBufferSize, bool hasPostings);
+ EnumStoreT(bool hasPostings);
virtual ~EnumStoreT();
- void reset(uint64_t initBufferSize);
-
- uint32_t getRefCount(Index idx) const { return getEntryBase(idx).getRefCount(); }
- void incRefCount(Index idx) { getEntryBase(idx).incRefCount(); }
- void decRefCount(Index idx) { getEntryBase(idx).decRefCount(); }
+ uint32_t getRefCount(Index idx) const { return get_entry_base(idx).get_ref_count(); }
+ // TODO: Remove from public API
+ void incRefCount(Index idx) { return get_entry_base(idx).inc_ref_count(); }
+ void decRefCount(Index idx) { return get_entry_base(idx).dec_ref_count(); }
// Only use when reading from enumerated attribute save files
+ // TODO: Instead create an API that is used for loading/initializing.
void fixupRefCount(Index idx, uint32_t refCount) override {
- getEntryBase(idx).setRefCount(refCount);
+ get_entry_base(idx).set_ref_count(refCount);
}
- uint32_t getNumUniques() const override { return _enumDict->getNumUniques(); }
+ uint32_t getNumUniques() const override { return _dict.getNumUniques(); }
- uint32_t getRemaining() const {
- return _store.getBufferState(_store.getActiveBufferId(TYPE_ID)).remaining();
- }
- uint32_t getCapacity() const {
- return _store.getBufferState(_store.getActiveBufferId(TYPE_ID)).capacity();
- }
- vespalib::MemoryUsage getMemoryUsage() const override { return _store.getMemoryUsage(); }
- vespalib::MemoryUsage getTreeMemoryUsage() const override { return _enumDict->get_memory_usage(); }
+ vespalib::MemoryUsage getValuesMemoryUsage() const override { return _store.get_allocator().get_data_store().getMemoryUsage(); }
+ vespalib::MemoryUsage getDictionaryMemoryUsage() const override { return _dict.get_memory_usage(); }
vespalib::AddressSpace getAddressSpaceUsage() const;
void transferHoldLists(generation_t generation);
void trimHoldLists(generation_t firstUsed);
- static void failNewSize(uint64_t minNewSize, uint64_t maxSize);
-
- // Align buffers and entries to 4 bytes boundary.
- static uint64_t alignBufferSize(uint64_t val) { return Index::align(val); }
- static uint32_t alignEntrySize(uint32_t val) { return Index::align(val); }
-
- void fallbackResize(uint64_t bytesNeeded);
- bool getPendingCompact() const { return _type.getPendingCompact(); }
- void clearPendingCompact() { _type.clearPendingCompact(); }
-
ssize_t deserialize0(const void *src, size_t available, IndexVector &idx) override;
ssize_t deserialize(const void *src, size_t available, IndexVector &idx) {
- return _enumDict->deserialize(src, available, idx);
+ return _dict.deserialize(src, available, idx);
}
- void fixupRefCounts(const EnumVector &hist) { _enumDict->fixupRefCounts(hist); }
- void freezeTree() { _enumDict->freeze(); }
+ void fixupRefCounts(const EnumVector &hist) { _dict.fixupRefCounts(hist); }
+ void freezeTree() { _store.freeze(); }
- IEnumStoreDictionary &getEnumStoreDict() override { return *_enumDict; }
- const IEnumStoreDictionary &getEnumStoreDict() const override { return *_enumDict; }
- EnumPostingTree &getPostingDictionary() { return _enumDict->getPostingDictionary(); }
+ IEnumStoreDictionary &getEnumStoreDict() override { return _dict; }
+ const IEnumStoreDictionary &getEnumStoreDict() const override { return _dict; }
+ EnumPostingTree &getPostingDictionary() { return _dict.getPostingDictionary(); }
const EnumPostingTree &getPostingDictionary() const {
- return _enumDict->getPostingDictionary();
+ return _dict.getPostingDictionary();
}
- const datastore::DataStoreBase &get_data_store_base() const override { return _store; }
+ // TODO: Add API for getting compaction count instead.
+ const datastore::DataStoreBase &get_data_store_base() const override { return _store.get_allocator().get_data_store(); }
- bool getValue(Index idx, Type & value) const;
- Type getValue(uint32_t idx) const { return getValue(Index(datastore::EntryRef(idx))); }
- Type getValue(Index idx) const { return getEntry(idx).getValue(); }
- static uint32_t getEntrySize(Type value) {
- return alignEntrySize(EntryBase::size() + EntryType::size(value));
- }
+ bool getValue(Index idx, DataType& value) const;
+ DataType getValue(uint32_t idx) const { return getValue(Index(EntryRef(idx))); }
+ DataType getValue(Index idx) const { return _store.get(idx); }
+ // TODO: Implement helper class to populate enum store when loading from enumerated save files.
+
+ /**
+ * Used when building enum store from non-enumerated save files.
+ * TODO: Find better name.
+ */
class Builder {
- public:
- struct UniqueEntry {
- UniqueEntry(const Type & val, size_t sz, uint32_t pidx = 0) : _value(val), _sz(sz), _pidx(pidx), _refCount(1) { }
- Type _value;
- size_t _sz;
- size_t _pidx;
- uint32_t _refCount;
- };
-
- typedef vespalib::Array<UniqueEntry> Uniques;
private:
- Uniques _uniques;
- uint64_t _bufferSize;
+ AllocatorType& _allocator;
+ datastore::IUniqueStoreDictionary& _dict;
+ std::vector<EntryRef> _refs;
+ std::vector<uint32_t> _payloads;
+
public:
- Builder();
+ Builder(AllocatorType& allocator, datastore::IUniqueStoreDictionary& dict)
+ : _allocator(allocator),
+ _dict(dict),
+ _refs(),
+ _payloads()
+ {
+ }
~Builder();
- Index insert(Type value, uint32_t pidx = 0) {
- uint32_t entrySize = getEntrySize(value);
- _uniques.push_back(UniqueEntry(value, entrySize, pidx));
- Index index(_bufferSize, 0); // bufferId 0 should be used when resetting with a builder
- _bufferSize += entrySize;
- return index;
+ Index insert(const DataType& value, uint32_t posting_idx = 0) {
+ EntryRef new_ref = _allocator.allocate(value);
+ _refs.emplace_back(new_ref);
+ _payloads.emplace_back(posting_idx);
+ return new_ref;
+ }
+ void set_ref_count_for_last_value(uint32_t ref_count) {
+ assert(!_refs.empty());
+ _allocator.get_wrapped(_refs.back()).set_ref_count(ref_count);
+ }
+ void build() {
+ _dict.build_with_payload(_refs, _payloads);
}
- void updateRefCount(uint32_t refCount) { _uniques.rbegin()->_refCount = refCount; }
- const Uniques & getUniques() const { return _uniques; }
- uint64_t getBufferSize() const { return _bufferSize; }
};
+ Builder make_builder() {
+ return Builder(_store.get_allocator(), _dict);
+ }
+
class BatchUpdater {
private:
EnumStoreType& _store;
@@ -284,17 +207,19 @@ public:
: _store(store),
_possibly_unused()
{}
- void add(Type value) {
- Index new_idx;
- _store.addEnum(value, new_idx);
- _possibly_unused.insert(new_idx);
+ // TODO: Rename to insert()
+ void add(DataType value) {
+ Index idx;
+ _store.addEnum(value, idx);
+ _possibly_unused.insert(idx);
}
void inc_ref_count(Index idx) {
- _store.incRefCount(idx);
+ _store.get_entry_base(idx).inc_ref_count();
}
void dec_ref_count(Index idx) {
- _store.decRefCount(idx);
- if (_store.getRefCount(idx) == 0) {
+ auto& entry = _store.get_entry_base(idx);
+ entry.dec_ref_count();
+ if (entry.get_ref_count() == 0) {
_possibly_unused.insert(idx);
}
}
@@ -307,81 +232,44 @@ public:
return BatchUpdater(*this);
}
+ // TODO: Change to sending enum indexes as const array ref.
void writeValues(BufferWriter &writer, const Index *idxs, size_t count) const override;
ssize_t deserialize(const void *src, size_t available, size_t &initSpace);
ssize_t deserialize(const void *src, size_t available, Index &idx);
bool foldedChange(const Index &idx1, const Index &idx2) override;
- virtual bool findEnum(Type value, IEnumStore::EnumHandle &e) const;
- virtual std::vector<IEnumStore::EnumHandle> findFoldedEnums(Type value) const;
- void addEnum(Type value, Index &newIdx);
- virtual bool findIndex(Type value, Index &idx) const;
+ bool findEnum(DataType value, IEnumStore::EnumHandle &e) const;
+ std::vector<IEnumStore::EnumHandle> findFoldedEnums(DataType value) const;
+ void addEnum(DataType value, Index &newIdx);
+ bool findIndex(DataType value, Index &idx) const;
void freeUnusedEnums(bool movePostingidx) override;
void freeUnusedEnums(const IndexSet& toRemove);
- void reset(Builder &builder);
- bool performCompaction(uint64_t bytesNeeded, EnumIndexMap & old2New);
private:
template <typename Dictionary>
- void reset(Builder &builder, Dictionary &dict);
-
- template <typename Dictionary>
- void addEnum(Type value, Index &newIdx, Dictionary &dict);
+ void addEnum(DataType value, Index& newIdx, Dictionary& dict);
- template <typename Dictionary>
- void performCompaction(Dictionary &dict, EnumIndexMap & old2New);
};
+std::unique_ptr<datastore::IUniqueStoreDictionary>
+make_enum_store_dictionary(IEnumStore &store, bool has_postings);
+
vespalib::asciistream & operator << (vespalib::asciistream & os, const IEnumStore::Index & idx);
extern template
class datastore::DataStoreT<IEnumStore::Index>;
-template <typename EntryType>
-inline typename EntryType::Type
-EnumStoreT<EntryType>::Entry::getValue() const // implementation for numeric
-{
- Type dst;
- const char * src = this->_data + EntryBase::size();
- memcpy(&dst, src, sizeof(Type));
- return dst;
-}
-
-template <>
-inline StringEntryType::Type
-EnumStoreT<StringEntryType>::Entry::getValue() const
-{
- return (_data + EntryBase::size());
-}
-
-
template <>
void
-EnumStoreT<StringEntryType>::writeValues(BufferWriter &writer,
- const Index *idxs,
+EnumStoreT<StringEntryType>::writeValues(BufferWriter& writer,
+ const IEnumStore::Index* idxs,
size_t count) const;
template <>
ssize_t
-EnumStoreT<StringEntryType>::deserialize(const void *src,
- size_t available,
- size_t &initSpace);
-
-template <>
-ssize_t
-EnumStoreT<StringEntryType>::deserialize(const void *src,
- size_t available,
- Index &idx);
-
-
-//-----------------------------------------------------------------------------
-// EnumStore
-//-----------------------------------------------------------------------------
-
-template <>
-void
-EnumStoreT<StringEntryType>::
-insertEntryValue(char * dst, Type value);
+EnumStoreT<StringEntryType>::deserialize(const void* src,
+ size_t available,
+ Index& idx);
extern template
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
index 428875e00db..254f517ada2 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
@@ -14,174 +14,44 @@
#include <vespa/vespalib/btree/btreeroot.hpp>
#include <vespa/vespalib/btree/btreebuilder.hpp>
#include <vespa/vespalib/btree/btree.hpp>
+#include <vespa/vespalib/datastore/unique_store.hpp>
+#include <vespa/vespalib/datastore/unique_store_string_allocator.hpp>
#include <vespa/vespalib/util/array.hpp>
#include <vespa/vespalib/util/bufferwriter.h>
namespace search {
-namespace {
-
-const uint32_t dummy_enum_value = 0;
-
-}
-
-template <typename EntryType>
-EnumStoreT<EntryType>::EnumBufferType::EnumBufferType()
- : datastore::BufferType<char>(Index::align(1),
- Index::offsetSize() / Index::align(1),
- Index::offsetSize() / Index::align(1)),
- _minSizeNeeded(0),
- _deadElems(0),
- _pendingCompact(false),
- _wantCompact(false)
-{
-}
-
-template <typename EntryType>
-size_t
-EnumStoreT<EntryType>::EnumBufferType::calcArraysToAlloc(uint32_t bufferId, size_t sizeNeeded, bool resizing) const
-{
- (void) resizing;
- size_t reservedElements = getReservedElements(bufferId);
- sizeNeeded = std::max(sizeNeeded, _minSizeNeeded);
- size_t usedElems = _activeUsedElems;
- if (_lastUsedElems != nullptr) {
- usedElems += *_lastUsedElems;
- }
- assert((usedElems % _arraySize) == 0);
- double growRatio = 1.5f;
- uint64_t maxSize = static_cast<uint64_t>(_maxArrays) * _arraySize;
- uint64_t newSize = usedElems - _deadElems + sizeNeeded;
- if (usedElems != 0) {
- newSize *= growRatio;
- }
- newSize += reservedElements;
- newSize = alignBufferSize(newSize);
- assert((newSize % _arraySize) == 0);
- if (newSize <= maxSize) {
- return newSize / _arraySize;
- }
- newSize = usedElems - _deadElems + sizeNeeded + reservedElements + 1000000;
- newSize = alignBufferSize(newSize);
- assert((newSize % _arraySize) == 0);
- if (newSize <= maxSize) {
- return _maxArrays;
- }
- failNewSize(newSize, maxSize);
- return 0;
-}
-
-template <typename EntryType>
-void
-EnumStoreT<EntryType>::
-insertEntry(char * dst, uint32_t refCount, Type value)
-{
- memcpy(dst, &dummy_enum_value, sizeof(uint32_t));
- uint32_t pos = sizeof(uint32_t);
- memcpy(dst + pos, &refCount, sizeof(uint32_t));
- pos += sizeof(uint32_t);
- insertEntryValue(dst + pos, value);
-}
-
-template <>
-void
-EnumStoreT<StringEntryType>::
-insertEntryValue(char * dst, Type value);
-
template <typename EntryType>
-uint32_t
-EnumStoreT<EntryType>::getBufferIndex(datastore::BufferState::State status)
+void EnumStoreT<EntryType>::freeUnusedEnum(Index idx, IndexSet& unused)
{
- for (uint32_t i = 0; i < _store.getNumBuffers(); ++i) {
- if (_store.getBufferState(i).getState() == status) {
- return i;
- }
+ const auto& entry = get_entry_base(idx);
+ if (entry.get_ref_count() == 0) {
+ unused.insert(idx);
+ _store.get_allocator().hold(idx);
}
- return Index::numBuffers();
}
template <typename EntryType>
-void
-EnumStoreT<EntryType>::postCompact()
+EnumStoreT<EntryType>::EnumStoreT(bool has_postings)
+ : _store(make_enum_store_dictionary(*this, has_postings)),
+ _dict(static_cast<IEnumStoreDictionary&>(_store.get_dictionary()))
{
- _store.finishCompact(_toHoldBuffers);
}
template <typename EntryType>
-bool
-EnumStoreT<EntryType>::preCompact(uint64_t bytesNeeded)
-{
- if (getBufferIndex(datastore::BufferState::FREE) == Index::numBuffers()) {
- return false;
- }
- uint32_t activeBufId = _store.getActiveBufferId(TYPE_ID);
- datastore::BufferState & activeBuf = _store.getBufferState(activeBufId);
- _type.setSizeNeededAndDead(bytesNeeded, activeBuf.getDeadElems());
- _toHoldBuffers = _store.startCompact(TYPE_ID);
- return true;
-}
-
-template <typename EntryType>
-void EnumStoreT<EntryType>::freeUnusedEnum(Index idx, IndexSet & unused)
-{
- Entry e = getEntry(idx);
- if (e.getRefCount() == 0) {
- Type value = e.getValue();
- if (unused.insert(idx).second) {
- _store.incDead(idx, getEntrySize(value));
- }
- }
-}
-
-template <typename EntryType>
-EnumStoreT<EntryType>::EnumStoreT(uint64_t initBufferSize, bool hasPostings)
- : _enumDict(nullptr),
- _store(),
- _type(),
- _toHoldBuffers()
-{
- if (hasPostings) {
- _enumDict = new EnumStoreDictionary<EnumPostingTree>(*this);
- } else {
- _enumDict = new EnumStoreDictionary<EnumTree>(*this);
- }
- _store.addType(&_type);
- _type.setSizeNeededAndDead(initBufferSize, 0);
- _store.initActiveBuffers();
-}
-
-template <typename EntryType>
-EnumStoreT<EntryType>::~EnumStoreT()
-{
- _store.clearHoldLists();
- _store.dropBuffers();
- delete _enumDict;
-}
-
-template <typename EntryType>
-void
-EnumStoreT<EntryType>::reset(uint64_t initBufferSize)
-{
- _store.clearHoldLists();
- _store.dropBuffers();
- _type.setSizeNeededAndDead(initBufferSize, 0);
- _store.initActiveBuffers();
- _enumDict->onReset();
-}
+EnumStoreT<EntryType>::~EnumStoreT() = default;
template <typename EntryType>
vespalib::AddressSpace
EnumStoreT<EntryType>::getAddressSpaceUsage() const
{
- const datastore::BufferState &activeState = _store.getBufferState(_store.getActiveBufferId(TYPE_ID));
- return vespalib::AddressSpace(activeState.size(), activeState.getDeadElems(), DataStoreType::RefType::offsetSize());
+ return _store.get_address_space_usage();
}
template <typename EntryType>
void
EnumStoreT<EntryType>::transferHoldLists(generation_t generation)
{
- _enumDict->transfer_hold_lists(generation);
_store.transferHoldLists(generation);
}
@@ -190,52 +60,24 @@ void
EnumStoreT<EntryType>::trimHoldLists(generation_t firstUsed)
{
// remove generations in the range [0, firstUsed>
- _enumDict->trim_hold_lists(firstUsed);
_store.trimHoldLists(firstUsed);
}
-template <typename EntryType>
-void
-EnumStoreT<EntryType>::failNewSize(uint64_t minNewSize, uint64_t maxSize)
-{
- throw vespalib::IllegalStateException(vespalib::make_string("EnumStoreT::failNewSize: Minimum new size (%" PRIu64 ") exceeds max size (%" PRIu64 ")", minNewSize, maxSize));
-}
-
-template <typename EntryType>
-void
-EnumStoreT<EntryType>::fallbackResize(uint64_t bytesNeeded)
-{
- uint32_t activeBufId = _store.getActiveBufferId(TYPE_ID);
- size_t reservedElements = _type.getReservedElements(activeBufId);
- _type.setSizeNeededAndDead(bytesNeeded, reservedElements);
- _type.setWantCompact();
- _store.fallbackResize(activeBufId, bytesNeeded);
-}
template <typename EntryType>
ssize_t
-EnumStoreT<EntryType>::deserialize0(const void *src,
+EnumStoreT<EntryType>::deserialize0(const void* src,
size_t available,
- IndexVector &idx)
+ IndexVector& idx)
{
size_t left = available;
- size_t initSpace = Index::align(1);
- const char * p = static_cast<const char *>(src);
- while (left > 0) {
- ssize_t sz = deserialize(p, left, initSpace);
- if (sz < 0)
- return sz;
- p += sz;
- left -= sz;
- }
- reset(initSpace);
- left = available;
- p = static_cast<const char *>(src);
+ const char* p = static_cast<const char*>(src);
Index idx1;
while (left > 0) {
ssize_t sz = deserialize(p, left, idx1);
- if (sz < 0)
+ if (sz < 0) {
return sz;
+ }
p += sz;
left -= sz;
idx.push_back(idx1);
@@ -245,81 +87,45 @@ EnumStoreT<EntryType>::deserialize0(const void *src,
template <typename EntryType>
bool
-EnumStoreT<EntryType>::getValue(Index idx, Type & value) const
+EnumStoreT<EntryType>::getValue(Index idx, DataType& value) const
{
- if (!validIndex(idx)) {
+ if (!idx.valid()) {
return false;
}
- value = getEntry(idx).getValue();
+ value = _store.get(idx);
return true;
}
template <typename EntryType>
-EnumStoreT<EntryType>::Builder::Builder()
- : _uniques(),
- _bufferSize(Index::align(1))
-{ }
-
-template <typename EntryType>
-EnumStoreT<EntryType>::Builder::~Builder() { }
+EnumStoreT<EntryType>::Builder::~Builder() = default;
template <class EntryType>
void
-EnumStoreT<EntryType>::writeValues(BufferWriter &writer, const Index *idxs, size_t count) const
+EnumStoreT<EntryType>::writeValues(BufferWriter& writer, const Index* idxs, size_t count) const
{
- size_t sz(EntryType::fixedSize());
- for (uint32_t i = 0; i < count; ++i) {
+ for (size_t i = 0; i < count; ++i) {
Index idx = idxs[i];
- const char *src(_store.getEntry<char>(idx) + EntryBase::size());
- writer.write(src, sz);
+ writer.write(&_store.get(idx), sizeof(DataType));
}
}
template <class EntryType>
ssize_t
-EnumStoreT<EntryType>::deserialize(const void *src, size_t available, size_t &initSpace)
+EnumStoreT<EntryType>::deserialize(const void* src, size_t available, Index& idx)
{
- (void) src;
- size_t sz(EntryType::fixedSize());
- if (available < sz)
+ if (available < sizeof(DataType)) {
return -1;
- uint32_t entrySize(alignEntrySize(EntryBase::size() + sz));
- initSpace += entrySize;
- return sz;
-}
-
-template <class EntryType>
-ssize_t
-EnumStoreT<EntryType>::deserialize(const void *src, size_t available, Index &idx)
-{
- size_t sz(EntryType::fixedSize());
- if (available < sz)
- return -1;
- uint32_t activeBufferId = _store.getActiveBufferId(TYPE_ID);
- datastore::BufferState & buffer = _store.getBufferState(activeBufferId);
- uint32_t entrySize(alignEntrySize(EntryBase::size() + sz));
- if (buffer.remaining() < entrySize) {
- HDR_ABORT("not enough space");
}
- uint64_t offset = buffer.size();
- Index newIdx(offset, activeBufferId);
- char *dst(_store.getEntry<char>(newIdx));
- memcpy(dst, &dummy_enum_value, sizeof(uint32_t));
- uint32_t pos = sizeof(uint32_t);
- uint32_t refCount(0);
- memcpy(dst + pos, &refCount, sizeof(uint32_t));
- pos += sizeof(uint32_t);
- memcpy(dst + pos, src, sz);
- buffer.pushed_back(entrySize);
+ const auto* value = static_cast<const DataType*>(src);
+ Index prev_idx = idx;
+ idx = _store.get_allocator().allocate(*value);
- if (idx.valid()) {
- assert(ComparatorType::compare(getValue(idx), Entry(dst).getValue()) < 0);
+ if (prev_idx.valid()) {
+ assert(ComparatorType::compare(getValue(prev_idx), *value) < 0);
}
- idx = newIdx;
- return sz;
+ return sizeof(DataType);
}
-
template <class EntryType>
bool
EnumStoreT<EntryType>::foldedChange(const Index &idx1, const Index &idx2)
@@ -329,14 +135,13 @@ EnumStoreT<EntryType>::foldedChange(const Index &idx1, const Index &idx2)
return cmpres < 0;
}
-
template <typename EntryType>
bool
-EnumStoreT<EntryType>::findEnum(Type value, IEnumStore::EnumHandle &e) const
+EnumStoreT<EntryType>::findEnum(DataType value, IEnumStore::EnumHandle &e) const
{
ComparatorType cmp(*this, value);
Index idx;
- if (_enumDict->findFrozenIndex(cmp, idx)) {
+ if (_dict.findFrozenIndex(cmp, idx)) {
e = idx.ref();
return true;
}
@@ -345,22 +150,20 @@ EnumStoreT<EntryType>::findEnum(Type value, IEnumStore::EnumHandle &e) const
template <typename EntryType>
std::vector<IEnumStore::EnumHandle>
-EnumStoreT<EntryType>::findFoldedEnums(Type value) const
+EnumStoreT<EntryType>::findFoldedEnums(DataType value) const
{
FoldedComparatorType cmp(*this, value);
- return _enumDict->findMatchingEnums(cmp);
+ return _dict.findMatchingEnums(cmp);
}
-
template <typename EntryType>
bool
-EnumStoreT<EntryType>::findIndex(Type value, Index &idx) const
+EnumStoreT<EntryType>::findIndex(DataType value, Index &idx) const
{
ComparatorType cmp(*this, value);
- return _enumDict->findIndex(cmp, idx);
+ return _dict.findIndex(cmp, idx);
}
-
template <typename EntryType>
void
EnumStoreT<EntryType>::freeUnusedEnums(bool movePostingIdx)
@@ -368,13 +171,12 @@ EnumStoreT<EntryType>::freeUnusedEnums(bool movePostingIdx)
ComparatorType cmp(*this);
if (EntryType::hasFold() && movePostingIdx) {
FoldedComparatorType fcmp(*this);
- _enumDict->freeUnusedEnums(cmp, &fcmp);
+ _dict.freeUnusedEnums(cmp, &fcmp);
} else {
- _enumDict->freeUnusedEnums(cmp, nullptr);
+ _dict.freeUnusedEnums(cmp, nullptr);
}
}
-
template <typename EntryType>
void
EnumStoreT<EntryType>::freeUnusedEnums(const IndexSet& toRemove)
@@ -382,34 +184,18 @@ EnumStoreT<EntryType>::freeUnusedEnums(const IndexSet& toRemove)
ComparatorType cmp(*this);
if (EntryType::hasFold()) {
FoldedComparatorType fcmp(*this);
- _enumDict->freeUnusedEnums(toRemove, cmp, &fcmp);
+ _dict.freeUnusedEnums(toRemove, cmp, &fcmp);
} else {
- _enumDict->freeUnusedEnums(toRemove, cmp, nullptr);
+ _dict.freeUnusedEnums(toRemove, cmp, nullptr);
}
}
-
template <typename EntryType>
template <typename Dictionary>
void
-EnumStoreT<EntryType>::addEnum(Type value, Index &newIdx, Dictionary &dict)
+EnumStoreT<EntryType>::addEnum(DataType value, Index& newIdx, Dictionary& dict)
{
typedef typename Dictionary::Iterator DictionaryIterator;
- uint32_t entrySize = this->getEntrySize(value);
- uint32_t activeBufferId = _store.getActiveBufferId(TYPE_ID);
- datastore::BufferState & buffer = _store.getBufferState(activeBufferId);
-#ifdef LOG_ENUM_STORE
- LOG(info,
- "addEnum(): buffer[%u]: capacity = %" PRIu64
- ", size = %" PRIu64 ", remaining = %" PRIu64
- ", dead = %" PRIu64 ", entrySize = %u",
- activeBufferId, buffer.capacity(),
- buffer.size(), buffer.remaining(),
- buffer._deadElems, entrySize);
-#endif
- if (buffer.remaining() < entrySize) {
- HDR_ABORT("not enough space");
- }
// check if already present
ComparatorType cmp(*this, value);
@@ -420,33 +206,31 @@ EnumStoreT<EntryType>::addEnum(Type value, Index &newIdx, Dictionary &dict)
return;
}
- uint64_t offset = buffer.size();
- newIdx = Index(offset, activeBufferId);
- char * dst = _store.template getEntry<char>(newIdx);
- this->insertEntry(dst, 0, value);
- buffer.pushed_back(entrySize);
- assert(Index::pad(offset) == 0);
+ newIdx = _store.get_allocator().allocate(value);
+ // TODO: Move this logic to "add/insert" on the dictionary
// update tree with new index
dict.insert(it, newIdx, typename Dictionary::DataType());
- // Copy posting list idx from next entry if same
- // folded value.
+ // Copy posting list idx from next entry if same folded value.
// Only for string posting list attributes, i.e. dictionary has
// data and entry type has folded compare.
if (DictionaryIterator::hasData() && EntryType::hasFold()) {
FoldedComparatorType foldCmp(*this);
++it;
- if (!it.valid() || foldCmp(newIdx, it.getKey()))
+ if (!it.valid() || foldCmp(newIdx, it.getKey())) {
return; // Next entry does not use same posting list
+ }
--it;
--it;
- if (it.valid() && !foldCmp(it.getKey(), newIdx))
+ if (it.valid() && !foldCmp(it.getKey(), newIdx)) {
return; // Previous entry uses same posting list
- if (it.valid())
+ }
+ if (it.valid()) {
++it;
- else
+ } else {
it.begin();
+ }
assert(it.valid() && it.getKey() == newIdx);
++it;
typename Dictionary::DataType pidx(it.getData());
@@ -458,148 +242,15 @@ EnumStoreT<EntryType>::addEnum(Type value, Index &newIdx, Dictionary &dict)
}
}
-
template <typename EntryType>
void
-EnumStoreT<EntryType>::addEnum(Type value, Index & newIdx)
+EnumStoreT<EntryType>::addEnum(DataType value, Index& newIdx)
{
- if (_enumDict->hasData()) {
- addEnum(value, newIdx, static_cast<EnumStoreDictionary<EnumPostingTree> *>(_enumDict)->getDictionary());
+ if (_dict.hasData()) {
+ addEnum(value, newIdx, static_cast<EnumStoreDictionary<EnumPostingTree> &>(_dict).getDictionary());
} else {
- addEnum(value, newIdx, static_cast<EnumStoreDictionary<EnumTree> *>(_enumDict)->getDictionary());
+ addEnum(value, newIdx, static_cast<EnumStoreDictionary<EnumTree> &>(_dict).getDictionary());
}
}
-template <typename DictionaryType>
-struct TreeBuilderInserter {
- static void insert(typename DictionaryType::Builder & builder,
- IEnumStore::Index enumIdx,
- datastore::EntryRef postingIdx)
- {
- (void) postingIdx;
- builder.insert(enumIdx, typename DictionaryType::DataType());
- }
-};
-
-template <>
-struct TreeBuilderInserter<EnumPostingTree> {
- static void insert(EnumPostingTree::Builder & builder,
- IEnumStore::Index enumIdx,
- datastore::EntryRef postingIdx)
- {
- builder.insert(enumIdx, postingIdx);
- }
-};
-
-
-template <typename EntryType>
-template <typename Dictionary>
-void
-EnumStoreT<EntryType>::reset(Builder &builder, Dictionary &dict)
-{
- typedef typename Dictionary::Builder DictionaryBuilder;
- reset(builder.getBufferSize());
-
- DictionaryBuilder treeBuilder(dict.getAllocator());
- uint32_t activeBufferId = _store.getActiveBufferId(TYPE_ID);
- datastore::BufferState & state = _store.getBufferState(activeBufferId);
-
- // insert entries and update DictionaryBuilder
- const typename Builder::Uniques & uniques = builder.getUniques();
- for (typename Builder::Uniques::const_iterator iter = uniques.begin();
- iter != uniques.end(); ++iter)
- {
- uint64_t offset = state.size();
- Index idx(offset, activeBufferId);
- char * dst = _store.template getEntry<char>(idx);
- this->insertEntry(dst, iter->_refCount, iter->_value);
- state.pushed_back(iter->_sz);
-
- // update DictionaryBuilder with enum index and posting index
- TreeBuilderInserter<Dictionary>::insert(treeBuilder, idx, datastore::EntryRef(iter->_pidx));
- }
-
- // reset Dictionary
- dict.assign(treeBuilder); // destructive copy of treeBuilder
}
-
-
-template <typename EntryType>
-void
-EnumStoreT<EntryType>::reset(Builder &builder)
-{
- if (_enumDict->hasData()) {
- reset(builder, static_cast<EnumStoreDictionary<EnumPostingTree> *>(_enumDict)->getDictionary());
- } else {
- reset(builder, static_cast<EnumStoreDictionary<EnumTree> *>(_enumDict)->getDictionary());
- }
-}
-
-
-template <typename EntryType>
-template <typename Dictionary>
-void
-EnumStoreT<EntryType>::performCompaction(Dictionary &dict, EnumIndexMap & old2New)
-{
- typedef typename Dictionary::Iterator DictionaryIterator;
- uint32_t freeBufferIdx = _store.getActiveBufferId(TYPE_ID);
- datastore::BufferState & freeBuf = _store.getBufferState(freeBufferIdx);
- // copy entries from active buffer to free buffer
- for (DictionaryIterator iter = dict.begin(); iter.valid(); ++iter) {
- Index activeIdx = iter.getKey();
-
- Entry e = this->getEntry(activeIdx);
-
- // At this point the tree shall never reference any empty stuff.
- assert(e.getRefCount() > 0);
-#ifdef LOG_ENUM_STORE
- LOG(info, "performCompaction(): copy entry: enum = %u, refCount = %u, value = %s",
- e.getEnum(), e.getRefCount(), e.getValue());
-#endif
- Type value = e.getValue();
- uint32_t refCount = e.getRefCount();
- uint32_t entrySize = this->getEntrySize(value);
-
- uint64_t offset = freeBuf.size();
- Index newIdx = Index(offset, freeBufferIdx);
- char * dst = _store.template getEntry<char>(newIdx);
- // insert entry into free buffer
- this->insertEntry(dst, refCount, value);
-#ifdef LOG_ENUM_STORE
- LOG(info, "performCompaction(): new entry: refCount = %u, value = %s", 0, value);
-#endif
- freeBuf.pushed_back(entrySize);
- assert(Index::pad(offset) == 0);
-#ifdef LOG_ENUM_STORE
- LOG(info,
- "performCompaction(): new index: offset = %" PRIu64
- ", bufferIdx = %u",
- offset, freeBufferIdx);
-#endif
-
- // update tree with new index
- std::atomic_thread_fence(std::memory_order_release);
- iter.writeKey(newIdx);
-
- old2New[activeIdx] = newIdx;
- }
- this->postCompact();
-}
-
-
-template <typename EntryType>
-bool
-EnumStoreT<EntryType>::performCompaction(uint64_t bytesNeeded, EnumIndexMap & old2New)
-{
- if ( ! this->preCompact(bytesNeeded) ) {
- return false;
- }
- if (_enumDict->hasData()) {
- performCompaction(static_cast<EnumStoreDictionary<EnumPostingTree> *>(_enumDict)->getDictionary(), old2New);
- } else {
- performCompaction(static_cast<EnumStoreDictionary<EnumTree> *>(_enumDict)->getDictionary(), old2New);
- }
- return true;
-}
-
-} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h
index 0963e0ff67d..f79098a67df 100644
--- a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h
+++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h
@@ -22,7 +22,7 @@ class IEnumStoreDictionary;
*/
class IEnumStore {
public:
- using Index = datastore::AlignedEntryRefT<31, 4>;
+ using Index = datastore::EntryRefT<22>;
using IndexVector = vespalib::Array<Index>;
using EnumHandle = attribute::IAttributeVector::EnumHandle;
using EnumVector = vespalib::Array<uint32_t>;
@@ -52,8 +52,8 @@ public:
virtual const IEnumStoreDictionary& getEnumStoreDict() const = 0;
virtual const datastore::DataStoreBase& get_data_store_base() const = 0;
virtual uint32_t getNumUniques() const = 0;
- virtual vespalib::MemoryUsage getMemoryUsage() const = 0;
- virtual vespalib::MemoryUsage getTreeMemoryUsage() const = 0;
+ virtual vespalib::MemoryUsage getValuesMemoryUsage() const = 0;
+ virtual vespalib::MemoryUsage getDictionaryMemoryUsage() const = 0;
template <typename TreeT>
diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
index 5352dc492fd..9bdc36e805b 100644
--- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
@@ -183,8 +183,8 @@ MultiValueEnumAttribute<B, M>::onUpdateStat()
{
// update statistics
vespalib::MemoryUsage total;
- total.merge(this->_enumStore.getMemoryUsage());
- total.merge(this->_enumStore.getTreeMemoryUsage());
+ total.merge(this->_enumStore.getValuesMemoryUsage());
+ total.merge(this->_enumStore.getDictionaryMemoryUsage());
total.merge(this->_mvMapping.updateStat());
total.merge(this->getChangeVectorMemoryUsage());
mergeMemoryStats(total);
diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
index 08095b6bf13..7f4f7503eff 100644
--- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
@@ -102,8 +102,8 @@ SingleValueEnumAttribute<B>::onUpdateStat()
// update statistics
vespalib::MemoryUsage total = _enumIndices.getMemoryUsage();
total.mergeGenerationHeldBytes(getGenerationHolder().getHeldBytes());
- total.merge(this->_enumStore.getMemoryUsage());
- total.merge(this->_enumStore.getTreeMemoryUsage());
+ total.merge(this->_enumStore.getValuesMemoryUsage());
+ total.merge(this->_enumStore.getDictionaryMemoryUsage());
total.merge(this->getChangeVectorMemoryUsage());
mergeMemoryStats(total);
this->updateStatistics(_enumIndices.size(), this->_enumStore.getNumUniques(), total.allocatedBytes(),
diff --git a/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary.h b/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary.h
index cda62884318..a780cb4fe98 100644
--- a/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary.h
+++ b/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary.h
@@ -45,6 +45,7 @@ public:
virtual uint32_t get_num_uniques() const = 0;
virtual vespalib::MemoryUsage get_memory_usage() const = 0;
virtual void build(const std::vector<EntryRef> &refs, const std::vector<uint32_t> &ref_counts, std::function<void(EntryRef)> hold) = 0;
+ virtual void build_with_payload(const std::vector<EntryRef>& refs, const std::vector<uint32_t>& payloads) = 0;
virtual std::unique_ptr<ReadSnapshot> get_read_snapshot() const = 0;
virtual EntryRef get_frozen_root() const = 0;
};
diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store.h b/vespalib/src/vespa/vespalib/datastore/unique_store.h
index bf7808e9325..6b85e79d3eb 100644
--- a/vespalib/src/vespa/vespalib/datastore/unique_store.h
+++ b/vespalib/src/vespa/vespalib/datastore/unique_store.h
@@ -44,6 +44,7 @@ private:
public:
UniqueStore();
+ UniqueStore(std::unique_ptr<IUniqueStoreDictionary> dict);
~UniqueStore();
UniqueStoreAddResult add(EntryConstRefType value);
EntryRef find(EntryConstRefType value);
@@ -51,6 +52,12 @@ public:
void remove(EntryRef ref);
ICompactionContext::UP compactWorst();
vespalib::MemoryUsage getMemoryUsage() const;
+ vespalib::AddressSpace get_address_space_usage() const;
+
+ // TODO: Consider exposing only the needed functions from allocator
+ Allocator& get_allocator() { return _allocator; }
+ const Allocator& get_allocator() const { return _allocator; }
+ IUniqueStoreDictionary& get_dictionary() { return *_dict; }
// Pass on hold list management to underlying store
void transferHoldLists(generation_t generation);
diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store.hpp
index f1b60845403..ebd81010612 100644
--- a/vespalib/src/vespa/vespalib/datastore/unique_store.hpp
+++ b/vespalib/src/vespa/vespalib/datastore/unique_store.hpp
@@ -28,9 +28,15 @@ using DefaultUniqueStoreDictionary = UniqueStoreDictionary<DefaultDictionary>;
template <typename EntryT, typename RefT, typename Compare, typename Allocator>
UniqueStore<EntryT, RefT, Compare, Allocator>::UniqueStore()
+ : UniqueStore<EntryT, RefT, Compare, Allocator>(std::make_unique<uniquestore::DefaultUniqueStoreDictionary>())
+{
+}
+
+template <typename EntryT, typename RefT, typename Compare, typename Allocator>
+UniqueStore<EntryT, RefT, Compare, Allocator>::UniqueStore(std::unique_ptr<IUniqueStoreDictionary> dict)
: _allocator(),
_store(_allocator.get_data_store()),
- _dict(std::make_unique<uniquestore::DefaultUniqueStoreDictionary>())
+ _dict(std::move(dict))
{
}
@@ -178,6 +184,13 @@ UniqueStore<EntryT, RefT, Compare, Allocator>::getMemoryUsage() const
}
template <typename EntryT, typename RefT, typename Compare, typename Allocator>
+vespalib::AddressSpace
+UniqueStore<EntryT, RefT, Compare, Allocator>::get_address_space_usage() const
+{
+ return _allocator.get_data_store().getAddressSpaceUsage();
+}
+
+template <typename EntryT, typename RefT, typename Compare, typename Allocator>
const BufferState &
UniqueStore<EntryT, RefT, Compare, Allocator>::bufferState(EntryRef ref) const
{
diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_allocator.h b/vespalib/src/vespa/vespalib/datastore/unique_store_allocator.h
index 1981a190cc6..a4443742e33 100644
--- a/vespalib/src/vespa/vespalib/datastore/unique_store_allocator.h
+++ b/vespalib/src/vespa/vespalib/datastore/unique_store_allocator.h
@@ -42,6 +42,7 @@ public:
return get_wrapped(ref).value();
}
DataStoreType& get_data_store() { return _store; }
+ const DataStoreType& get_data_store() const { return _store; }
};
}
diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_builder.h b/vespalib/src/vespa/vespalib/datastore/unique_store_builder.h
index a0e9f3d63a7..7f5162d97ff 100644
--- a/vespalib/src/vespa/vespalib/datastore/unique_store_builder.h
+++ b/vespalib/src/vespa/vespalib/datastore/unique_store_builder.h
@@ -23,6 +23,7 @@ class UniqueStoreBuilder {
IUniqueStoreDictionary& _dict;
std::vector<EntryRef> _refs;
std::vector<uint32_t> _refCounts;
+
public:
UniqueStoreBuilder(Allocator& allocator, IUniqueStoreDictionary& dict, uint32_t uniqueValuesHint);
~UniqueStoreBuilder();
diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h
index 4ae32c45dea..15b947e283b 100644
--- a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h
+++ b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h
@@ -48,6 +48,7 @@ public:
uint32_t get_num_uniques() const override;
vespalib::MemoryUsage get_memory_usage() const override;
void build(const std::vector<EntryRef> &refs, const std::vector<uint32_t> &ref_counts, std::function<void(EntryRef)> hold) override;
+ void build_with_payload(const std::vector<EntryRef>& refs, const std::vector<uint32_t>& payloads) override;
std::unique_ptr<ReadSnapshot> get_read_snapshot() const override;
EntryRef get_frozen_root() const override;
};
diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.hpp
index f3087bc5610..3784b903ad6 100644
--- a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.hpp
+++ b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.hpp
@@ -176,6 +176,23 @@ UniqueStoreDictionary<DictionaryT, ParentT>::build(const std::vector<EntryRef> &
}
template <typename DictionaryT, typename ParentT>
+void
+UniqueStoreDictionary<DictionaryT, ParentT>::build_with_payload(const std::vector<EntryRef>& refs,
+ const std::vector<uint32_t>& payloads)
+{
+ assert(refs.size() == payloads.size());
+ typename DictionaryType::Builder builder(_dict.getAllocator());
+ for (size_t i = 0; i < refs.size(); ++i) {
+ if constexpr (std::is_same_v<DataType, uint32_t>) {
+ builder.insert(refs[i], payloads[i]);
+ } else {
+ builder.insert(refs[i], DataType());
+ }
+ }
+ _dict.assign(builder);
+}
+
+template <typename DictionaryT, typename ParentT>
std::unique_ptr<typename ParentT::ReadSnapshot>
UniqueStoreDictionary<DictionaryT, ParentT>::get_read_snapshot() const
{