summaryrefslogtreecommitdiffstats
path: root/searchlib/src/tests
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2019-08-29 15:42:27 +0000
committerGeir Storli <geirst@verizonmedia.com>2019-09-02 08:57:40 +0000
commit94ab377491f19e0b4ea80201eb0340d6e4ee55b2 (patch)
tree78e4d337a6805a958fcb5b294d4bff7ddc17c9d3 /searchlib/src/tests
parent9fad146519a83d29a4d7e0c539f923c322600d10 (diff)
Improve memory management in all enum attributes.
The new enum store uses 1024 small data buffers instead of 2 large as before. This avoids the problem with memory spikes when the active buffer was full and all values had to be compacted into the other buffer. In addition the new enum store uses free lists such that compaction is not needed as often.
Diffstat (limited to 'searchlib/src/tests')
-rw-r--r--searchlib/src/tests/attribute/attribute_test.cpp8
-rw-r--r--searchlib/src/tests/attribute/comparator/comparator_test.cpp10
-rw-r--r--searchlib/src/tests/attribute/enumstore/enumstore_test.cpp591
3 files changed, 37 insertions, 572 deletions
diff --git a/searchlib/src/tests/attribute/attribute_test.cpp b/searchlib/src/tests/attribute/attribute_test.cpp
index 4e520e86707..98caf39dace 100644
--- a/searchlib/src/tests/attribute/attribute_test.cpp
+++ b/searchlib/src/tests/attribute/attribute_test.cpp
@@ -2036,11 +2036,11 @@ AttributeTest::requireThatAddressSpaceUsageIsReported(const Config &config, bool
AddressSpaceUsage after = attrPtr->getAddressSpaceUsage();
if (attrPtr->hasEnum()) {
LOG(info, "requireThatAddressSpaceUsageIsReported(%s): Has enum", attrName.c_str());
- EXPECT_EQUAL(before.enumStoreUsage().used(), 16u);
- EXPECT_EQUAL(before.enumStoreUsage().dead(), 16u);
+ EXPECT_EQUAL(before.enumStoreUsage().used(), 1u);
+ EXPECT_EQUAL(before.enumStoreUsage().dead(), 1u);
EXPECT_GREATER(after.enumStoreUsage().used(), before.enumStoreUsage().used());
- EXPECT_EQUAL(after.enumStoreUsage().limit(), before.enumStoreUsage().limit());
- EXPECT_EQUAL(34359738368u, after.enumStoreUsage().limit()); // EnumStoreBase::DataStoreType::RefType::offsetSize()
+ EXPECT_GREATER_EQUAL(after.enumStoreUsage().limit(), before.enumStoreUsage().limit());
+ EXPECT_GREATER(after.enumStoreUsage().limit(), 4200000000u);
} else {
LOG(info, "requireThatAddressSpaceUsageIsReported(%s): NOT enum", attrName.c_str());
EXPECT_EQUAL(before.enumStoreUsage().used(), 0u);
diff --git a/searchlib/src/tests/attribute/comparator/comparator_test.cpp b/searchlib/src/tests/attribute/comparator/comparator_test.cpp
index a2000c48423..7bd6f3ca013 100644
--- a/searchlib/src/tests/attribute/comparator/comparator_test.cpp
+++ b/searchlib/src/tests/attribute/comparator/comparator_test.cpp
@@ -47,7 +47,7 @@ public:
void
Test::requireThatNumericComparatorIsWorking()
{
- NumericEnumStore es(1024, false);
+ NumericEnumStore es(false);
EnumIndex e1, e2;
es.addEnum(10, e1);
es.addEnum(30, e2);
@@ -63,7 +63,7 @@ Test::requireThatNumericComparatorIsWorking()
void
Test::requireThatFloatComparatorIsWorking()
{
- FloatEnumStore es(1024, false);
+ FloatEnumStore es(false);
EnumIndex e1, e2, e3;
es.addEnum(10.5, e1);
es.addEnum(30.5, e2);
@@ -83,7 +83,7 @@ Test::requireThatFloatComparatorIsWorking()
void
Test::requireThatStringComparatorIsWorking()
{
- StringEnumStore es(1024, false);
+ StringEnumStore es(false);
EnumIndex e1, e2, e3;
es.addEnum("Aa", e1);
es.addEnum("aa", e2);
@@ -102,7 +102,7 @@ Test::requireThatStringComparatorIsWorking()
void
Test::requireThatComparatorWithTreeIsWorking()
{
- NumericEnumStore es(2048, false);
+ NumericEnumStore es(false);
vespalib::GenerationHandler g;
TreeType t;
NodeAllocator m;
@@ -129,7 +129,7 @@ Test::requireThatComparatorWithTreeIsWorking()
void
Test::requireThatFoldedComparatorIsWorking()
{
- StringEnumStore es(1024, false);
+ StringEnumStore es(false);
EnumIndex e1, e2, e3, e4;
es.addEnum("Aa", e1);
es.addEnum("aa", e2);
diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
index c4ba8eecf43..f61211283a4 100644
--- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
+++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
@@ -17,7 +17,8 @@ size_t enumStoreAlign(size_t size)
// IEnumStore::Index(0,0) is reserved thus 16 bytes are reserved in buffer 0
const uint32_t RESERVED_BYTES = 16u;
-typedef EnumStoreT<NumericEntryType<uint32_t> > NumericEnumStore;
+using NumericEnumStore = EnumStoreT<NumericEntryType<uint32_t> >;
+using generation_t = vespalib::GenerationHandler::generation_t;
class EnumStoreTest : public vespalib::TestApp
{
@@ -27,15 +28,6 @@ private:
typedef EnumStoreT<NumericEntryType<double> > DoubleEnumStore;
typedef IEnumStore::Index EnumIndex;
- typedef vespalib::GenerationHandler::generation_t generation_t;
-
- void testIndex();
- void fillDataBuffer(char * data, uint32_t refCount,
- const std::string & string);
- void fillDataBuffer(char * data, uint32_t refCount,
- uint32_t value);
- void testStringEntry();
- void testNumericEntry();
template <typename EnumStoreType, typename T>
void testFloatEnumStore(EnumStoreType & es);
@@ -51,27 +43,11 @@ private:
testUniques(const EnumStoreType &ses,
const std::vector<std::string> &unique);
-
- void testCompaction();
- template <typename EnumStoreType>
- void testCompaction(bool hasPostings);
-
- void testReset();
- template <typename EnumStoreType>
- void testReset(bool hasPostings);
-
void testHoldListAndGeneration();
- void testMemoryUsage();
void requireThatAddressSpaceUsageIsReported();
- void testBufferLimit();
// helper methods
typedef std::vector<std::string> StringVector;
- template <typename T>
- T random(T low, T high);
- std::string getRandomString(uint32_t minLen, uint32_t maxLen);
- StringVector fillRandomStrings(uint32_t numStrings, uint32_t minLen, uint32_t maxLen);
- StringVector sortRandomStrings(StringVector & strings);
struct StringEntry {
StringEntry(uint32_t r, const std::string & s) :
@@ -107,123 +83,6 @@ EnumStoreTest::Reader::Reader(uint32_t generation, const IndexVector & indices,
{}
EnumStoreTest::Reader::~Reader() { }
-void
-EnumStoreTest::testIndex()
-{
- {
- StringEnumStore::Index idx;
- EXPECT_TRUE( ! idx.valid());
- EXPECT_EQUAL(idx.offset(), 0u);
- EXPECT_TRUE(idx.bufferId() == 0);
- }
- {
- StringEnumStore::Index idx(enumStoreAlign(1000), 0);
- EXPECT_TRUE(idx.offset() == enumStoreAlign(1000));
- EXPECT_TRUE(idx.bufferId() == 0);
- }
- {
- StringEnumStore::Index idx((UINT64_C(1) << 31)- RESERVED_BYTES, 1);
- EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 31) - RESERVED_BYTES);
- EXPECT_TRUE(idx.bufferId() == 1);
- }
- {
- StringEnumStore::Index idx((UINT64_C(1) << 33) - RESERVED_BYTES, 1);
- EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 33) - RESERVED_BYTES);
- EXPECT_TRUE(idx.bufferId() == 1);
- }
- {
- StringEnumStore::Index idx((UINT64_C(1) << 35) - RESERVED_BYTES, 1);
- EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 35) - RESERVED_BYTES);
- EXPECT_TRUE(idx.bufferId() == 1);
- }
- {
- // Change offsets when alignment changes.
- StringEnumStore::Index idx1(48, 0);
- StringEnumStore::Index idx2(80, 0);
- StringEnumStore::Index idx3(48, 0);
- EXPECT_TRUE(!(idx1 == idx2));
- EXPECT_TRUE(idx1 == idx3);
- }
- {
- EXPECT_TRUE(StringEnumStore::Index::numBuffers() == 2);
- }
-}
-
-void
-EnumStoreTest::fillDataBuffer(char * data, uint32_t refCount,
- const std::string & string)
-{
- StringEnumStore::insertEntry(data, refCount, string.c_str());
-}
-
-void
-EnumStoreTest::fillDataBuffer(char * data, uint32_t refCount,
- uint32_t value)
-{
- NumericEnumStore::insertEntry(data, refCount, value);
-}
-
-void
-EnumStoreTest::testStringEntry()
-{
- {
- char data[9];
- fillDataBuffer(data, 0, "");
- StringEnumStore::Entry e(data);
- EXPECT_TRUE(StringEnumStore::getEntrySize("") ==
- StringEnumStore::alignEntrySize(8 + 1));
-
- EXPECT_TRUE(e.getRefCount() == 0);
- EXPECT_TRUE(strcmp(e.getValue(), "") == 0);
-
- e.incRefCount();
- EXPECT_TRUE(e.getRefCount() == 1);
- EXPECT_TRUE(strcmp(e.getValue(), "") == 0);
- e.decRefCount();
- EXPECT_TRUE(e.getRefCount() == 0);
- EXPECT_TRUE(strcmp(e.getValue(), "") == 0);
- }
- {
- char data[18];
- fillDataBuffer(data, 5, "enumstore");
- StringEnumStore::Entry e(data);
- EXPECT_TRUE(StringEnumStore::getEntrySize("enumstore") ==
- StringEnumStore::alignEntrySize(8 + 1 + 9));
-
- EXPECT_TRUE(e.getRefCount() == 5);
- EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0);
-
- e.incRefCount();
- EXPECT_TRUE(e.getRefCount() == 6);
- EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0);
- e.decRefCount();
- EXPECT_TRUE(e.getRefCount() == 5);
- EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0);
- }
-}
-
-void
-EnumStoreTest::testNumericEntry()
-{
- {
- char data[12];
- fillDataBuffer(data, 20, 30);
- NumericEnumStore::Entry e(data);
- EXPECT_TRUE(NumericEnumStore::getEntrySize(30) ==
- NumericEnumStore::alignEntrySize(8 + 4));
-
- EXPECT_TRUE(e.getRefCount() == 20);
- EXPECT_TRUE(e.getValue() == 30);
-
- e.incRefCount();
- EXPECT_TRUE(e.getRefCount() == 21);
- EXPECT_TRUE(e.getValue() == 30);
- e.decRefCount();
- EXPECT_TRUE(e.getRefCount() == 20);
- EXPECT_TRUE(e.getValue() == 30);
- }
-}
-
template <typename EnumStoreType, typename T>
void
EnumStoreTest::testFloatEnumStore(EnumStoreType & es)
@@ -256,11 +115,11 @@ void
EnumStoreTest::testFloatEnumStore()
{
{
- FloatEnumStore fes(1000, false);
+ FloatEnumStore fes(false);
testFloatEnumStore<FloatEnumStore, float>(fes);
}
{
- DoubleEnumStore des(1000, false);
+ DoubleEnumStore des(false);
testFloatEnumStore<DoubleEnumStore, double>(des);
}
}
@@ -268,7 +127,7 @@ EnumStoreTest::testFloatEnumStore()
void
EnumStoreTest::testFindFolded()
{
- StringEnumStore ses(100, false);
+ StringEnumStore ses(false);
std::vector<EnumIndex> indices;
std::vector<std::string> unique({"", "one", "two", "TWO", "Two", "three"});
for (std::string &str : unique) {
@@ -308,15 +167,10 @@ template <typename EnumStoreType>
void
EnumStoreTest::testAddEnum(bool hasPostings)
{
- EnumStoreType ses(100, hasPostings);
- EXPECT_EQUAL(enumStoreAlign(100u) + RESERVED_BYTES,
- ses.getBuffer(0).capacity());
- EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0).size());
- EXPECT_EQUAL(enumStoreAlign(100u), ses.getBuffer(0).remaining());
- EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0).getDeadElems());
+ // TODO: Rewrite test to use BatchUpdater
+ EnumStoreType ses(hasPostings);
EnumIndex idx;
- uint64_t offset = ses.getBuffer(0).size();
std::vector<EnumIndex> indices;
std::vector<std::string> unique;
unique.push_back("");
@@ -326,12 +180,9 @@ EnumStoreTest::testAddEnum(bool hasPostings)
for (uint32_t i = 0; i < unique.size(); ++i) {
ses.addEnum(unique[i].c_str(), idx);
- EXPECT_EQUAL(offset, idx.offset());
- EXPECT_EQUAL(0u, idx.bufferId());
ses.incRefCount(idx);
EXPECT_EQUAL(1u, ses.getRefCount(idx));
indices.push_back(idx);
- offset += EnumStoreType::alignEntrySize(unique[i].size() + 1 + 8);
EXPECT_TRUE(ses.findIndex(unique[i].c_str(), idx));
}
ses.freezeTree();
@@ -374,197 +225,11 @@ EnumStoreTest::testUniques
EXPECT_EQUAL(static_cast<uint32_t>(unique.size()), i);
}
-
-void
-EnumStoreTest::testCompaction()
-{
- testCompaction<StringEnumStore>(false);
- testCompaction<StringEnumStore>(true);
-}
-
-template <typename EnumStoreType>
-void
-EnumStoreTest::testCompaction(bool hasPostings)
-{
- // entrySize = 15 before alignment
- uint32_t entrySize = EnumStoreType::alignEntrySize(15);
- uint32_t initBufferSize = entrySize * 5;
- EnumStoreType ses(initBufferSize, hasPostings);
- // Note: Sizes of underlying data store buffers are power of 2.
- uint32_t adjustedBufferSize = vespalib::roundUp2inN(initBufferSize) - RESERVED_BYTES;
- EnumIndex idx;
- std::vector<EnumIndex> indices;
- typename EnumStoreType::Type t = "foo";
- std::vector<std::string> uniques;
- uniques.push_back("enum00");
- uniques.push_back("enum01");
- uniques.push_back("enum02");
- uniques.push_back("enum03");
- uniques.push_back("enum04");
-
- // fill with unique values
- for (uint32_t i = 0; i < 5; ++i) {
- size_t expRemaining = adjustedBufferSize - i * entrySize;
- EXPECT_EQUAL(expRemaining, ses.getRemaining());
- ses.addEnum(uniques[i].c_str(), idx);
- ses.incRefCount(idx);
- EXPECT_TRUE(ses.getRefCount(idx));
- indices.push_back(idx);
- }
- EXPECT_EQUAL(32u, ses.getRemaining());
- EXPECT_EQUAL(32u, ses.getBuffer(0).remaining());
- EXPECT_EQUAL(entrySize * 5 + RESERVED_BYTES, ses.getBuffer(0).size());
- EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0).getDeadElems());
- uint32_t failEntrySize = ses.getEntrySize("enum05");
- EXPECT_EQUAL(16u, failEntrySize);
-
- // change from enum00 -> enum01
- ses.decRefCount(indices[0]);
- ses.incRefCount(indices[1]);
- indices[0] = indices[1];
-
- // check correct refcount
- for (uint32_t i = 0; i < 5; ++i) {
- EXPECT_TRUE(ses.findIndex(uniques[i].c_str(), idx));
- uint32_t refCount = ses.getRefCount(idx);
- if (i == 0) {
- EXPECT_TRUE(refCount == 0);
- } else if (i == 1) {
- EXPECT_TRUE(refCount == 2);
- } else {
- EXPECT_TRUE(refCount == 1);
- }
- }
-
- // free unused enums
- ses.freeUnusedEnums(true);
- EXPECT_TRUE(!ses.findIndex("enum00", idx));
- EXPECT_EQUAL(entrySize + RESERVED_BYTES, ses.getBuffer(0).getDeadElems());
-
- auto &data_store_base = ses.get_data_store_base();
- auto old_compaction_count = data_store_base.get_compaction_count();
-
- // perform compaction
- IEnumStore::EnumIndexMap old2New;
- EXPECT_TRUE(ses.performCompaction(3 * entrySize, old2New));
- EXPECT_TRUE(ses.getRemaining() >= 3 * entrySize);
- EXPECT_TRUE(ses.getBuffer(1).remaining() >= 3 * entrySize);
- EXPECT_TRUE(ses.getBuffer(1).size() == entrySize * 4);
- EXPECT_TRUE(ses.getBuffer(1).getDeadElems() == 0);
-
- EXPECT_NOT_EQUAL(old_compaction_count, data_store_base.get_compaction_count());
-
- // add new unique strings
- ses.addEnum("enum05", idx);
- ses.addEnum("enum06", idx);
- ses.addEnum("enum00", idx);
-
- // compare old and new indices
- for (uint32_t i = 0; i < indices.size(); ++i) {
- idx = old2New[indices[i]];
- EXPECT_TRUE(indices[i].bufferId() == 0);
- EXPECT_TRUE(idx.bufferId() == 1);
- EXPECT_TRUE(ses.getValue(indices[i], t));
- typename EnumStoreType::Type s = "bar";
- EXPECT_TRUE(ses.getValue(idx, s));
- EXPECT_TRUE(strcmp(t, s) == 0);
- }
- // EnumIndex(0,0) is reserved so we have 4 bytes extra at the start of buffer 0
- idx = old2New[indices[0]];
- EXPECT_EQUAL(entrySize + RESERVED_BYTES, indices[0].offset());
- EXPECT_EQUAL(0u, idx.offset());
- idx = old2New[indices[1]];
- EXPECT_EQUAL(entrySize + RESERVED_BYTES, indices[1].offset());
- EXPECT_EQUAL(0u, idx.offset());
- idx = old2New[indices[2]];
- EXPECT_EQUAL(2 * entrySize + RESERVED_BYTES, indices[2].offset());
- EXPECT_EQUAL(entrySize, idx.offset());
- idx = old2New[indices[3]];
- EXPECT_EQUAL(3 * entrySize + RESERVED_BYTES, indices[3].offset());
- EXPECT_EQUAL(2 * entrySize, idx.offset());
- idx = old2New[indices[4]];
- EXPECT_EQUAL(4 * entrySize + RESERVED_BYTES, indices[4].offset());
- EXPECT_EQUAL(3 * entrySize, idx.offset());
-}
-
-void
-EnumStoreTest::testReset()
-{
- testReset<StringEnumStore>(false);
-
- testReset<StringEnumStore>(true);
-}
-
-template <typename EnumStoreType>
-void
-EnumStoreTest::testReset(bool hasPostings)
-{
- uint32_t numUniques = 10000;
- srand(123456789);
- StringVector rndStrings = fillRandomStrings(numUniques, 10, 15);
- EXPECT_EQUAL(rndStrings.size(), size_t(numUniques));
- StringVector uniques = sortRandomStrings(rndStrings);
- EXPECT_EQUAL(uniques.size(), size_t(numUniques));
- // max entrySize = 25 before alignment
- uint32_t maxEntrySize = EnumStoreType::alignEntrySize(8 + 1 + 16);
- EnumStoreType ses(numUniques * maxEntrySize, hasPostings);
- EnumIndex idx;
-
- uint32_t cnt = 0;
- // add new unique strings
- for (StringVector::reverse_iterator iter = uniques.rbegin(); iter != uniques.rend(); ++iter) {
- ses.addEnum(iter->c_str(), idx);
- EXPECT_EQUAL(ses.getNumUniques(), ++cnt);
- }
-
- // check for unique strings
- for (StringVector::iterator iter = uniques.begin(); iter != uniques.end(); ++iter) {
- EXPECT_TRUE(ses.findIndex(iter->c_str(), idx));
- }
-
- EXPECT_EQUAL(ses.getNumUniques(), numUniques);
- if (hasPostings) {
- testUniques<EnumStoreType, EnumPostingTree>(ses, uniques);
- } else {
- testUniques<EnumStoreType, EnumTree>(ses, uniques);
- }
-
- rndStrings = fillRandomStrings(numUniques, 15, 20);
- StringVector newUniques = sortRandomStrings(rndStrings);
-
- typename EnumStoreType::Builder builder;
- for (StringVector::iterator iter = newUniques.begin(); iter != newUniques.end(); ++iter) {
- builder.insert(iter->c_str());
- }
-
- ses.reset(builder);
- // Note: Sizes of underlying data store buffers are power of 2.
- EXPECT_EQUAL(524288u, ses.getCapacity());
- EXPECT_EQUAL(204272u, ses.getRemaining());
-
- // check for old unique strings
- for (StringVector::iterator iter = uniques.begin(); iter != uniques.end(); ++iter) {
- EXPECT_TRUE(!ses.findIndex(iter->c_str(), idx));
- }
-
- // check for new unique strings
- for (StringVector::iterator iter = newUniques.begin(); iter != newUniques.end(); ++iter) {
- EXPECT_TRUE(ses.findIndex(iter->c_str(), idx));
- }
-
- EXPECT_EQUAL(ses.getNumUniques(), numUniques);
- if (hasPostings) {
- testUniques<EnumStoreType, EnumPostingTree>(ses, newUniques);
- } else {
- testUniques<EnumStoreType, EnumTree>(ses, newUniques);
- }
-}
-
void
EnumStoreTest::testHoldListAndGeneration()
{
- uint32_t entrySize = StringEnumStore::alignEntrySize(8 + 1 + 6);
- StringEnumStore ses(100 * entrySize, false);
+ // TODO: Rewrite test to use BatchUpdater
+ StringEnumStore ses(false);
StringEnumStore::Index idx;
StringVector uniques;
generation_t sesGen = 0u;
@@ -597,11 +262,11 @@ EnumStoreTest::testHoldListAndGeneration()
for (uint32_t j = i - 9; j <= i; ++j) {
EXPECT_TRUE(ses.findIndex(uniques[j].c_str(), idx));
indices.push_back(idx);
- StringEnumStore::Entry entry = ses.getEntry(idx);
- EXPECT_TRUE(entry.getRefCount() == 1);
- EXPECT_TRUE(strcmp(entry.getValue(), uniques[j].c_str()) == 0);
- expected.push_back(StringEntry(entry.getRefCount(),
- std::string(entry.getValue())));
+ uint32_t ref_count = ses.getRefCount(idx);
+ std::string value(ses.getValue(idx));
+ EXPECT_EQUAL(1u, ref_count);
+ EXPECT_EQUAL(uniques[j], value);
+ expected.emplace_back(ref_count, value);
}
EXPECT_TRUE(indices.size() == 10);
EXPECT_TRUE(expected.size() == 10);
@@ -611,10 +276,6 @@ EnumStoreTest::testHoldListAndGeneration()
}
}
- // Note: Sizes of underlying data store buffers are power of 2.
- EXPECT_EQUAL(432u, ses.getRemaining());
- EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0).getDeadElems());
-
// remove all uniques
for (uint32_t i = 0; i < 100; ++i) {
EXPECT_TRUE(ses.findIndex(uniques[i].c_str(), idx));
@@ -622,117 +283,12 @@ EnumStoreTest::testHoldListAndGeneration()
EXPECT_EQUAL(0u, ses.getRefCount(idx));
}
ses.freeUnusedEnums(true);
- EXPECT_EQUAL(100 * entrySize + RESERVED_BYTES, ses.getBuffer(0).getDeadElems());
-
- // perform compaction
- uint32_t newEntrySize = StringEnumStore::alignEntrySize(8 + 1 + 8);
- IEnumStore::EnumIndexMap old2New;
- EXPECT_TRUE(ses.performCompaction(5 * newEntrySize, old2New));
// check readers again
checkReaders(ses, sesGen, readers);
- // fill up buffer
- uint32_t i = 0;
- while (ses.getRemaining() >= newEntrySize) {
- //LOG(info, "fill: %s", newUniques[i].c_str());
- ses.addEnum(newUniques[i++].c_str(), idx);
- ses.incRefCount(idx);
- EXPECT_TRUE(ses.getRefCount(idx));
- }
- EXPECT_LESS(ses.getRemaining(), newEntrySize);
- // buffer on hold list
- old2New.clear();
- EXPECT_TRUE(!ses.performCompaction(5 * newEntrySize, old2New));
-
- checkReaders(ses, sesGen, readers);
- ses.transferHoldLists(sesGen);
- ses.trimHoldLists(sesGen + 1);
-
- // buffer no longer on hold list
- EXPECT_LESS(ses.getRemaining(), newEntrySize);
- old2New.clear();
- EXPECT_TRUE(ses.performCompaction(5 * newEntrySize, old2New));
- EXPECT_TRUE(ses.getRemaining() >= 5 * newEntrySize);
-}
-
-void
-EnumStoreTest::testMemoryUsage()
-{
- StringEnumStore ses(200, false);
- StringEnumStore::Index idx;
- uint32_t num = 8;
- std::vector<StringEnumStore::Index> indices;
- std::vector<std::string> uniques;
- for (uint32_t i = 0; i < num; ++i) {
- std::stringstream ss;
- ss << "enum" << i;
- uniques.push_back(ss.str());
- }
- generation_t sesGen = 0u;
- uint32_t entrySize = StringEnumStore::alignEntrySize(8 + 1 + 5); // enum(4) + refcount(4) + 1(\0) + strlen("enumx")
-
- // usage before inserting enums
- vespalib::MemoryUsage usage = ses.getMemoryUsage();
- EXPECT_EQUAL(ses.getNumUniques(), uint32_t(0));
- // Note: Sizes of underlying data store buffers are power of 2.
- EXPECT_EQUAL(vespalib::roundUp2inN(enumStoreAlign(200u) + RESERVED_BYTES), usage.allocatedBytes());
- EXPECT_EQUAL(RESERVED_BYTES, usage.usedBytes());
- EXPECT_EQUAL(RESERVED_BYTES, usage.deadBytes());
- EXPECT_EQUAL(0u, usage.allocatedBytesOnHold());
-
- for (uint32_t i = 0; i < num; ++i) {
- ses.addEnum(uniques[i].c_str(), idx);
- indices.push_back(idx);
- ses.incRefCount(idx);
- EXPECT_TRUE(ses.getRefCount(idx));
- }
-
- // usage after inserting enums
- usage = ses.getMemoryUsage();
- EXPECT_EQUAL(ses.getNumUniques(), num);
- // Note: Sizes of underlying data store buffers are power of 2.
- EXPECT_EQUAL(vespalib::roundUp2inN(enumStoreAlign(200u) + RESERVED_BYTES), usage.allocatedBytes());
- EXPECT_EQUAL(num * entrySize + RESERVED_BYTES, usage.usedBytes());
- EXPECT_EQUAL(RESERVED_BYTES, usage.deadBytes());
- EXPECT_EQUAL(0u, usage.allocatedBytesOnHold());
-
- // assign new enum for num / 2 of indices
- for (uint32_t i = 0; i < num / 2; ++i) {
- ses.decRefCount(indices[i]);
- EXPECT_TRUE(ses.findIndex(uniques.back().c_str(), idx));
- ses.incRefCount(idx);
- indices[i] = idx;
- }
- ses.freeUnusedEnums(true);
-
- // usage after removing enums
- usage = ses.getMemoryUsage();
- EXPECT_EQUAL(ses.getNumUniques(), num / 2);
- // Note: Sizes of underlying data store buffers are power of 2.
- EXPECT_EQUAL(vespalib::roundUp2inN(enumStoreAlign(200u) + RESERVED_BYTES), usage.allocatedBytes());
- EXPECT_EQUAL(num * entrySize + RESERVED_BYTES, usage.usedBytes());
- EXPECT_EQUAL((num / 2) * entrySize + RESERVED_BYTES, usage.deadBytes());
- EXPECT_EQUAL(0u, usage.allocatedBytesOnHold());
-
- IEnumStore::EnumIndexMap old2New;
- ses.performCompaction(400, old2New);
-
- // usage after compaction
- vespalib::MemoryUsage usage2 = ses.getMemoryUsage();
- EXPECT_EQUAL(ses.getNumUniques(), num / 2);
- EXPECT_EQUAL(usage.usedBytes() + (num / 2) * entrySize, usage2.usedBytes());
- EXPECT_EQUAL(usage.deadBytes(), usage2.deadBytes());
- EXPECT_EQUAL(usage.usedBytes() - usage.deadBytes(), usage2.allocatedBytesOnHold());
-
ses.transferHoldLists(sesGen);
ses.trimHoldLists(sesGen + 1);
-
- // usage after hold list trimming
- vespalib::MemoryUsage usage3 = ses.getMemoryUsage();
- EXPECT_EQUAL((num / 2) * entrySize, usage3.usedBytes());
- EXPECT_EQUAL(0u, usage3.deadBytes());
- EXPECT_EQUAL(0u, usage3.allocatedBytesOnHold());
}
namespace {
@@ -747,10 +303,13 @@ addEnum(NumericEnumStore &store, uint32_t value)
}
void
-decRefCount(NumericEnumStore &store, NumericEnumStore::Index idx)
+decRefCount(NumericEnumStore& store, NumericEnumStore::Index idx)
{
store.decRefCount(idx);
store.freeUnusedEnums(false);
+ generation_t gen = 5;
+ store.transferHoldLists(gen);
+ store.trimHoldLists(gen + 1);
}
}
@@ -758,106 +317,21 @@ decRefCount(NumericEnumStore &store, NumericEnumStore::Index idx)
void
EnumStoreTest::requireThatAddressSpaceUsageIsReported()
{
- const size_t ADDRESS_LIMIT = 34359738368; // NumericEnumStore::DataStoreType::RefType::offsetSize()
- NumericEnumStore store(200, false);
+ // TODO: Rewrite test to use BatchUpdater
+ const size_t ADDRESS_LIMIT = 4290772994; // Max allocated elements in un-allocated buffers + allocated elements in allocated buffers.
+ NumericEnumStore store(false);
using vespalib::AddressSpace;
- EXPECT_EQUAL(AddressSpace(16, 16, ADDRESS_LIMIT), store.getAddressSpaceUsage());
+ EXPECT_EQUAL(AddressSpace(1, 1, ADDRESS_LIMIT), store.getAddressSpaceUsage());
NumericEnumStore::Index idx1 = addEnum(store, 10);
- EXPECT_EQUAL(AddressSpace(32, 16, ADDRESS_LIMIT), store.getAddressSpaceUsage());
+ EXPECT_EQUAL(AddressSpace(2, 1, ADDRESS_LIMIT), store.getAddressSpaceUsage());
NumericEnumStore::Index idx2 = addEnum(store, 20);
- EXPECT_EQUAL(AddressSpace(48, 16, ADDRESS_LIMIT), store.getAddressSpaceUsage());
+ // Address limit increases because buffer is re-sized.
+ EXPECT_EQUAL(AddressSpace(3, 1, ADDRESS_LIMIT + 2), store.getAddressSpaceUsage());
decRefCount(store, idx1);
- EXPECT_EQUAL(AddressSpace(48, 32, ADDRESS_LIMIT), store.getAddressSpaceUsage());
+ EXPECT_EQUAL(AddressSpace(3, 2, ADDRESS_LIMIT + 2), store.getAddressSpaceUsage());
decRefCount(store, idx2);
- EXPECT_EQUAL(AddressSpace(48, 48, ADDRESS_LIMIT), store.getAddressSpaceUsage());
-}
-
-size_t
-digits(size_t num)
-{
- size_t digits = 1;
- while (num / 10 > 0) {
- num /= 10;
- digits++;
- }
- return digits;
-}
-
-void
-EnumStoreTest::testBufferLimit()
-{
- size_t enumSize = StringEnumStore::Index::offsetSize();
- StringEnumStore es(enumSize, false);
-
- size_t strLen = 65536;
- char str[strLen + 1];
- for (size_t i = 0; i < strLen; ++i) {
- str[i] = 'X';
- }
- str[strLen] = 0;
-
- size_t entrySize = StringEnumStore::getEntrySize(str);
- size_t numUniques = enumSize / entrySize;
- size_t uniqDigits = digits(numUniques);
-
- EnumIndex idx;
- EnumIndex lastIdx;
- for (size_t i = 0; i < numUniques; ++i) {
- sprintf(str, "%0*zu", (int)uniqDigits, i);
- str[uniqDigits] = 'X';
- es.addEnum(str, idx);
- if (i % (numUniques / 32) == 1) {
- EXPECT_TRUE(idx.offset() > lastIdx.offset());
- EXPECT_EQUAL(i + 1, es.getNumUniques());
- std::cout << "idx.offset(" << idx.offset() << "), str(" << std::string(str, uniqDigits) << ")" << std::endl;
- }
- lastIdx = idx;
- }
- EXPECT_EQUAL(idx.offset(), lastIdx.offset());
- EXPECT_EQUAL(numUniques, es.getNumUniques());
- std::cout << "idx.offset(" << idx.offset() << "), str(" << std::string(str, uniqDigits) << ")" << std::endl;
-}
-
-template <typename T>
-T
-EnumStoreTest::random(T low, T high)
-{
- return (rand() % (high - low)) + low;
-}
-
-std::string
-EnumStoreTest::getRandomString(uint32_t minLen, uint32_t maxLen)
-{
- uint32_t len = random(minLen, maxLen);
- std::string retval;
- for (uint32_t i = 0; i < len; ++i) {
- char c = random('a', 'z');
- retval.push_back(c);
- }
- return retval;
-}
-
-EnumStoreTest::StringVector
-EnumStoreTest::fillRandomStrings(uint32_t numStrings, uint32_t minLen, uint32_t maxLen)
-{
- StringVector retval;
- retval.reserve(numStrings);
- for (uint32_t i = 0; i < numStrings; ++i) {
- retval.push_back(getRandomString(minLen, maxLen));
- }
- return retval;
-}
-
-EnumStoreTest::StringVector
-EnumStoreTest::sortRandomStrings(StringVector & strings)
-{
- std::sort(strings.begin(), strings.end());
- std::vector<std::string> retval;
- retval.reserve(strings.size());
- std::vector<std::string>::iterator pos = std::unique(strings.begin(), strings.end());
- std::copy(strings.begin(), pos, std::back_inserter(retval));
- return retval;
+ EXPECT_EQUAL(AddressSpace(3, 3, ADDRESS_LIMIT + 2), store.getAddressSpaceUsage());
}
void
@@ -867,7 +341,7 @@ EnumStoreTest::checkReaders(const StringEnumStore & ses,
{
(void) sesGen;
//uint32_t refCount = 1000;
- StringEnumStore::Type t = "";
+ StringEnumStore::DataType t = "";
for (uint32_t i = 0; i < readers.size(); ++i) {
const Reader & r = readers[i];
for (uint32_t j = 0; j < r._indices.size(); ++j) {
@@ -883,20 +357,11 @@ EnumStoreTest::Main()
{
TEST_INIT("enumstore_test");
- testIndex();
- testStringEntry();
- testNumericEntry();
testFloatEnumStore();
testFindFolded();
testAddEnum();
- testCompaction();
- testReset();
testHoldListAndGeneration();
- testMemoryUsage();
TEST_DO(requireThatAddressSpaceUsageIsReported());
- if (_argc > 1) {
- testBufferLimit(); // large test with 8 GB buffer
- }
TEST_DONE();
}