diff options
Diffstat (limited to 'searchlib/src/tests/attribute/enumstore/enumstore_test.cpp')
-rw-r--r-- | searchlib/src/tests/attribute/enumstore/enumstore_test.cpp | 879 |
1 files changed, 879 insertions, 0 deletions
diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp new file mode 100644 index 00000000000..e63889bbeb8 --- /dev/null +++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp @@ -0,0 +1,879 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("enumstore_test"); +#include <vespa/vespalib/testkit/testapp.h> +//#define LOG_ENUM_STORE +#include <vespa/searchlib/attribute/enumstore.hpp> +#include <limits> +#include <string> +#include <iostream> + +namespace search { + +size_t enumStoreAlign(size_t size) +{ + return (size + 15) & -UINT64_C(16); +} + +// EnumStoreBase::Index(0,0) is reserved thus 16 bytes are reserved in buffer 0 +const uint32_t RESERVED_BYTES = 16u; +typedef EnumStoreT<NumericEntryType<uint32_t> > NumericEnumStore; + +class EnumStoreTest : public vespalib::TestApp +{ +private: + typedef EnumStoreT<StringEntryType> StringEnumStore; + typedef EnumStoreT<NumericEntryType<float> > FloatEnumStore; + typedef EnumStoreT<NumericEntryType<double> > DoubleEnumStore; + + typedef EnumStoreBase::Index EnumIndex; + typedef vespalib::GenerationHandler::generation_t generation_t; + + void testIndex(); + void fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount, + const std::string & string); + void fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount, + uint32_t value); + void testStringEntry(); + void testNumericEntry(); + + template <typename EnumStoreType, typename T> + void testFloatEnumStore(EnumStoreType & es); + void testFloatEnumStore(); + + void testAddEnum(); + template <typename EnumStoreType> + void testAddEnum(bool hasPostings); + + template <typename EnumStoreType, typename Dictionary> + void + testUniques(const EnumStoreType &ses, + const std::vector<std::string> &unique); + + + void testCompaction(); + template <typename EnumStoreType> + void testCompaction(bool hasPostings, bool disableReEnumerate); + + void testReset(); + template <typename EnumStoreType> + void testReset(bool hasPostings); + + void testHoldListAndGeneration(); + void testMemoryUsage(); + void requireThatAddressSpaceUsageIsReported(); + void testBufferLimit(); + + // helper methods + typedef std::vector<std::string> StringVector; + template <typename T> + T random(T low, T high); + std::string getRandomString(uint32_t minLen, uint32_t maxLen); + StringVector fillRandomStrings(uint32_t numStrings, uint32_t minLen, uint32_t maxLen); + StringVector sortRandomStrings(StringVector & strings); + + struct StringEntry { + StringEntry(uint32_t e, uint32_t r, const std::string & s) : + _enum(e), _refCount(r), _string(s) {} + uint32_t _enum; + uint32_t _refCount; + std::string _string; + }; + + struct Reader { + typedef StringEnumStore::Index Index; + typedef std::vector<Index> IndexVector; + typedef std::vector<StringEntry> ExpectedVector; + uint32_t _generation; + IndexVector _indices; + ExpectedVector _expected; + Reader(uint32_t generation, const IndexVector & indices, + const ExpectedVector & expected) : + _generation(generation), _indices(indices), _expected(expected) {} + }; + + void + checkReaders(const StringEnumStore &ses, + generation_t sesGen, + const std::vector<Reader> &readers); + +public: + EnumStoreTest() {} + int Main(); +}; + +void +EnumStoreTest::testIndex() +{ + { + StringEnumStore::Index idx; + EXPECT_TRUE( ! idx.valid()); + EXPECT_EQUAL(idx.offset(), 0u); + EXPECT_TRUE(idx.bufferId() == 0); + } + { + StringEnumStore::Index idx(enumStoreAlign(1000), 0); + EXPECT_TRUE(idx.offset() == enumStoreAlign(1000)); + EXPECT_TRUE(idx.bufferId() == 0); + } + { + StringEnumStore::Index idx((UINT64_C(1) << 31)- RESERVED_BYTES, 1); + EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 31) - RESERVED_BYTES); + EXPECT_TRUE(idx.bufferId() == 1); + } + { + StringEnumStore::Index idx((UINT64_C(1) << 33) - RESERVED_BYTES, 1); + EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 33) - RESERVED_BYTES); + EXPECT_TRUE(idx.bufferId() == 1); + } + { + StringEnumStore::Index idx((UINT64_C(1) << 35) - RESERVED_BYTES, 1); + EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 35) - RESERVED_BYTES); + EXPECT_TRUE(idx.bufferId() == 1); + } + { + // Change offsets when alignment changes. + StringEnumStore::Index idx1(48, 0); + StringEnumStore::Index idx2(80, 0); + StringEnumStore::Index idx3(48, 0); + EXPECT_TRUE(!(idx1 == idx2)); + EXPECT_TRUE(idx1 == idx3); + } + { + EXPECT_TRUE(StringEnumStore::Index::numBuffers() == 2); + } +} + +void +EnumStoreTest::fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount, + const std::string & string) +{ + StringEnumStore::insertEntry(data, enumValue, refCount, string.c_str()); +} + +void +EnumStoreTest::fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount, + uint32_t value) +{ + NumericEnumStore::insertEntry(data, enumValue, refCount, value); +} + +void +EnumStoreTest::testStringEntry() +{ + { + char data[9]; + fillDataBuffer(data, 0, 0, ""); + StringEnumStore::Entry e(data); + EXPECT_TRUE(StringEnumStore::getEntrySize("") == + StringEnumStore::alignEntrySize(8 + 1)); + + EXPECT_TRUE(e.getEnum() == 0); + EXPECT_TRUE(e.getRefCount() == 0); + EXPECT_TRUE(strcmp(e.getValue(), "") == 0); + + e.incRefCount(); + EXPECT_TRUE(e.getEnum() == 0); + EXPECT_TRUE(e.getRefCount() == 1); + EXPECT_TRUE(strcmp(e.getValue(), "") == 0); + e.decRefCount(); + EXPECT_TRUE(e.getEnum() == 0); + EXPECT_TRUE(e.getRefCount() == 0); + EXPECT_TRUE(strcmp(e.getValue(), "") == 0); + } + { + char data[18]; + fillDataBuffer(data, 10, 5, "enumstore"); + StringEnumStore::Entry e(data); + EXPECT_TRUE(StringEnumStore::getEntrySize("enumstore") == + StringEnumStore::alignEntrySize(8 + 1 + 9)); + + EXPECT_TRUE(e.getEnum() == 10); + EXPECT_TRUE(e.getRefCount() == 5); + EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0); + + e.incRefCount(); + EXPECT_TRUE(e.getEnum() == 10); + EXPECT_TRUE(e.getRefCount() == 6); + EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0); + e.decRefCount(); + EXPECT_TRUE(e.getEnum() == 10); + EXPECT_TRUE(e.getRefCount() == 5); + EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0); + } +} + +void +EnumStoreTest::testNumericEntry() +{ + { + char data[12]; + fillDataBuffer(data, 10, 20, 30); + NumericEnumStore::Entry e(data); + EXPECT_TRUE(NumericEnumStore::getEntrySize(30) == + NumericEnumStore::alignEntrySize(8 + 4)); + + EXPECT_TRUE(e.getEnum() == 10); + EXPECT_TRUE(e.getRefCount() == 20); + EXPECT_TRUE(e.getValue() == 30); + + e.incRefCount(); + EXPECT_TRUE(e.getEnum() == 10); + EXPECT_TRUE(e.getRefCount() == 21); + EXPECT_TRUE(e.getValue() == 30); + e.decRefCount(); + EXPECT_TRUE(e.getEnum() == 10); + EXPECT_TRUE(e.getRefCount() == 20); + EXPECT_TRUE(e.getValue() == 30); + } +} + +template <typename EnumStoreType, typename T> +void +EnumStoreTest::testFloatEnumStore(EnumStoreType & es) +{ + EnumIndex idx; + + T a[5] = {-20.5f, -10.5f, -0.5f, 9.5f, 19.5f}; + T b[5] = {-25.5f, -15.5f, -5.5f, 4.5f, 14.5f}; + + for (uint32_t i = 0; i < 5; ++i) { + es.addEnum(a[i], idx); + } + + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_TRUE(es.findIndex(a[i], idx)); + EXPECT_TRUE(!es.findIndex(b[i], idx)); + } + + es.addEnum(std::numeric_limits<T>::quiet_NaN(), idx); + EXPECT_TRUE(es.findIndex(std::numeric_limits<T>::quiet_NaN(), idx)); + EXPECT_TRUE(es.findIndex(std::numeric_limits<T>::quiet_NaN(), idx)); + + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_TRUE(es.findIndex(a[i], idx)); + EXPECT_TRUE(!es.findIndex(b[i], idx)); + } +} + +void +EnumStoreTest::testFloatEnumStore() +{ + { + FloatEnumStore fes(1000, false); + testFloatEnumStore<FloatEnumStore, float>(fes); + } + { + DoubleEnumStore des(1000, false); + testFloatEnumStore<DoubleEnumStore, double>(des); + } +} + +void +EnumStoreTest::testAddEnum() +{ + testAddEnum<StringEnumStore>(false); + + testAddEnum<StringEnumStore>(true); +} + +template <typename EnumStoreType> +void +EnumStoreTest::testAddEnum(bool hasPostings) +{ + EnumStoreType ses(100, hasPostings); + EXPECT_EQUAL(enumStoreAlign(100u) + RESERVED_BYTES, + ses.getBuffer(0).capacity()); + EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0).size()); + EXPECT_EQUAL(enumStoreAlign(100u), ses.getBuffer(0).remaining()); + EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0)._deadElems); + + EnumIndex idx; + uint64_t offset = ses.getBuffer(0).size(); + std::vector<EnumIndex> indices; + std::vector<std::string> unique; + unique.push_back(""); + unique.push_back("add"); + unique.push_back("enumstore"); + unique.push_back("unique"); + + for (uint32_t i = 0; i < unique.size(); ++i) { + ses.addEnum(unique[i].c_str(), idx); + EXPECT_EQUAL(offset, idx.offset()); + EXPECT_EQUAL(0u, idx.bufferId()); + ses.incRefCount(idx); + EXPECT_EQUAL(1u, ses.getRefCount(idx)); + indices.push_back(idx); + offset += EnumStoreType::alignEntrySize(unique[i].size() + 1 + 8); + EXPECT_TRUE(ses.findIndex(unique[i].c_str(), idx)); + EXPECT_TRUE(ses.getLastEnum() == i); + } + ses.freezeTree(); + + for (uint32_t i = 0; i < indices.size(); ++i) { + uint32_t e = ses.getEnum(indices[i]); + EXPECT_EQUAL(i, e); + EXPECT_TRUE(ses.findEnum(unique[i].c_str(), e)); + EXPECT_TRUE(ses.getEnum(btree::EntryRef(e)) == i); + EXPECT_TRUE(ses.findIndex(unique[i].c_str(), idx)); + EXPECT_TRUE(idx == indices[i]); + EXPECT_EQUAL(1u, ses.getRefCount(indices[i])); + StringEntryType::Type value = 0; + EXPECT_TRUE(ses.getValue(indices[i], value)); + EXPECT_TRUE(strcmp(unique[i].c_str(), value) == 0); + } + + if (hasPostings) { + testUniques<EnumStoreType, EnumPostingTree>(ses, unique); + } else { + testUniques<EnumStoreType, EnumTree>(ses, unique); + } +} + +template <typename EnumStoreType, typename Dictionary> +void +EnumStoreTest::testUniques +(const EnumStoreType &ses, const std::vector<std::string> &unique) +{ + const EnumStoreDict<Dictionary> *enumDict = + dynamic_cast<const EnumStoreDict<Dictionary> *> + (&ses.getEnumStoreDict()); + assert(enumDict != NULL); + const Dictionary &dict = enumDict->getDictionary(); + uint32_t i = 0; + EnumIndex idx; + for (typename Dictionary::Iterator iter = dict.begin(); + iter.valid(); ++iter, ++i) { + idx = iter.getKey(); + EXPECT_TRUE(strcmp(unique[i].c_str(), ses.getValue(idx)) == 0); + } + EXPECT_EQUAL(static_cast<uint32_t>(unique.size()), i); +} + + +void +EnumStoreTest::testCompaction() +{ + testCompaction<StringEnumStore>(false, false); + testCompaction<StringEnumStore>(true, false); + testCompaction<StringEnumStore>(false, true); + testCompaction<StringEnumStore>(true, true); +} + +template <typename EnumStoreType> +void +EnumStoreTest::testCompaction(bool hasPostings, bool disableReEnumerate) +{ + // entrySize = 15 before alignment + uint32_t entrySize = EnumStoreType::alignEntrySize(15); + uint32_t bufferSize = entrySize * 5; + EnumStoreType ses(bufferSize, hasPostings); + EnumIndex idx; + std::vector<EnumIndex> indices; + typename EnumStoreType::Type t = "foo"; + std::vector<std::string> uniques; + uniques.push_back("enum00"); + uniques.push_back("enum01"); + uniques.push_back("enum02"); + uniques.push_back("enum03"); + uniques.push_back("enum04"); + + // fill with unique values + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_TRUE(ses.getRemaining() == bufferSize - i * entrySize); + ses.addEnum(uniques[i].c_str(), idx); + ses.incRefCount(idx); + EXPECT_TRUE(ses.getRefCount(idx)); + indices.push_back(idx); + } + EXPECT_EQUAL(0u, ses.getRemaining()); + EXPECT_EQUAL(0u, ses.getBuffer(0).remaining()); + EXPECT_EQUAL(entrySize * 5 + RESERVED_BYTES, ses.getBuffer(0).size()); + EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0)._deadElems); + uint32_t failEntrySize = ses.getEntrySize("enum05"); + EXPECT_TRUE(failEntrySize > ses.getRemaining()); + + // change from enum00 -> enum01 + ses.decRefCount(indices[0]); + ses.incRefCount(indices[1]); + indices[0] = indices[1]; + + // check correct refcount + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_TRUE(ses.findIndex(uniques[i].c_str(), idx)); + uint32_t refCount = ses.getRefCount(idx); + if (i == 0) { + EXPECT_TRUE(refCount == 0); + } else if (i == 1) { + EXPECT_TRUE(refCount == 2); + } else { + EXPECT_TRUE(refCount == 1); + } + } + + // free unused enums + ses.freeUnusedEnums(true); + EXPECT_TRUE(!ses.findIndex("enum00", idx)); + EXPECT_EQUAL(entrySize + RESERVED_BYTES, ses.getBuffer(0)._deadElems); + + // perform compaction + if (disableReEnumerate) { + ses.disableReEnumerate(); + } + EXPECT_TRUE(ses.performCompaction(3 * entrySize)); + if (disableReEnumerate) { + ses.enableReEnumerate(); + } + EXPECT_TRUE(ses.getRemaining() >= 3 * entrySize); + EXPECT_TRUE(ses.getBuffer(1).remaining() >= 3 * entrySize); + EXPECT_TRUE(ses.getBuffer(1).size() == entrySize * 4); + EXPECT_TRUE(ses.getBuffer(1)._deadElems == 0); + + EXPECT_EQUAL((disableReEnumerate ? 4u : 3u), ses.getLastEnum()); + + // add new unique strings + ses.addEnum("enum05", idx); + EXPECT_EQUAL((disableReEnumerate ? 5u : 4u), ses.getEnum(idx)); + ses.addEnum("enum06", idx); + EXPECT_EQUAL((disableReEnumerate ? 6u : 5u), ses.getEnum(idx)); + ses.addEnum("enum00", idx); + EXPECT_EQUAL((disableReEnumerate ? 7u : 6u), ses.getEnum(idx)); + + EXPECT_EQUAL((disableReEnumerate ? 7u : 6u), ses.getLastEnum()); + + // compare old and new indices + for (uint32_t i = 0; i < indices.size(); ++i) { + EXPECT_TRUE(ses.getCurrentIndex(indices[i], idx)); + EXPECT_TRUE(indices[i].bufferId() == 0); + EXPECT_TRUE(idx.bufferId() == 1); + EXPECT_TRUE(ses.getValue(indices[i], t)); + typename EnumStoreType::Type s = "bar"; + EXPECT_TRUE(ses.getValue(idx, s)); + EXPECT_TRUE(strcmp(t, s) == 0); + } + // EnumIndex(0,0) is reserved so we have 4 bytes extra at the start of buffer 0 + EXPECT_TRUE(ses.getCurrentIndex(indices[0], idx)); + EXPECT_EQUAL(entrySize + RESERVED_BYTES, indices[0].offset()); + EXPECT_EQUAL(0u, idx.offset()); + EXPECT_TRUE(ses.getCurrentIndex(indices[1], idx)); + EXPECT_EQUAL(entrySize + RESERVED_BYTES, indices[1].offset()); + EXPECT_EQUAL(0u, idx.offset()); + EXPECT_TRUE(ses.getCurrentIndex(indices[2], idx)); + EXPECT_EQUAL(2 * entrySize + RESERVED_BYTES, indices[2].offset()); + EXPECT_EQUAL(entrySize, idx.offset()); + EXPECT_TRUE(ses.getCurrentIndex(indices[3], idx)); + EXPECT_EQUAL(3 * entrySize + RESERVED_BYTES, indices[3].offset()); + EXPECT_EQUAL(2 * entrySize, idx.offset()); + EXPECT_TRUE(ses.getCurrentIndex(indices[4], idx)); + EXPECT_EQUAL(4 * entrySize + RESERVED_BYTES, indices[4].offset()); + EXPECT_EQUAL(3 * entrySize, idx.offset()); +} + +void +EnumStoreTest::testReset() +{ + testReset<StringEnumStore>(false); + + testReset<StringEnumStore>(true); +} + +template <typename EnumStoreType> +void +EnumStoreTest::testReset(bool hasPostings) +{ + uint32_t numUniques = 10000; + srand(123456789); + StringVector rndStrings = fillRandomStrings(numUniques, 10, 15); + EXPECT_EQUAL(rndStrings.size(), size_t(numUniques)); + StringVector uniques = sortRandomStrings(rndStrings); + EXPECT_EQUAL(uniques.size(), size_t(numUniques)); + // max entrySize = 25 before alignment + uint32_t maxEntrySize = EnumStoreType::alignEntrySize(8 + 1 + 16); + EnumStoreType ses(numUniques * maxEntrySize, hasPostings); + EnumIndex idx; + + uint32_t cnt = 0; + // add new unique strings + for (StringVector::reverse_iterator iter = uniques.rbegin(); iter != uniques.rend(); ++iter) { + ses.addEnum(iter->c_str(), idx); + EXPECT_EQUAL(ses.getNumUniques(), ++cnt); + } + + // check for unique strings + for (StringVector::iterator iter = uniques.begin(); iter != uniques.end(); ++iter) { + EXPECT_TRUE(ses.findIndex(iter->c_str(), idx)); + } + + EXPECT_EQUAL(ses.getNumUniques(), numUniques); + if (hasPostings) { + testUniques<EnumStoreType, EnumPostingTree>(ses, uniques); + } else { + testUniques<EnumStoreType, EnumTree>(ses, uniques); + } + + rndStrings = fillRandomStrings(numUniques, 15, 20); + StringVector newUniques = sortRandomStrings(rndStrings); + + typename EnumStoreType::Builder builder; + for (StringVector::iterator iter = newUniques.begin(); iter != newUniques.end(); ++iter) { + builder.insert(iter->c_str()); + } + + ses.reset(builder); + EXPECT_EQUAL(RESERVED_BYTES, ses.getRemaining()); + + // check for old unique strings + for (StringVector::iterator iter = uniques.begin(); iter != uniques.end(); ++iter) { + EXPECT_TRUE(!ses.findIndex(iter->c_str(), idx)); + } + + // check for new unique strings + for (StringVector::iterator iter = newUniques.begin(); iter != newUniques.end(); ++iter) { + EXPECT_TRUE(ses.findIndex(iter->c_str(), idx)); + } + + EXPECT_EQUAL(ses.getNumUniques(), numUniques); + if (hasPostings) { + testUniques<EnumStoreType, EnumPostingTree>(ses, newUniques); + } else { + testUniques<EnumStoreType, EnumTree>(ses, newUniques); + } +} + +void +EnumStoreTest::testHoldListAndGeneration() +{ + uint32_t entrySize = StringEnumStore::alignEntrySize(8 + 1 + 6); + StringEnumStore ses(100 * entrySize, false); + StringEnumStore::Index idx; + StringVector uniques; + generation_t sesGen = 0u; + uniques.reserve(100); + for (uint32_t i = 0; i < 100; ++i) { + char tmp[16]; + sprintf(tmp, i < 10 ? "enum0%u" : "enum%u", i); + uniques.push_back(tmp); + } + StringVector newUniques; + newUniques.reserve(100); + for (uint32_t i = 0; i < 100; ++i) { + char tmp[16]; + sprintf(tmp, i < 10 ? "unique0%u" : "unique%u", i); + newUniques.push_back(tmp); + } + uint32_t generation = 0; + std::vector<Reader> readers; + + // insert first batch of unique strings + for (uint32_t i = 0; i < 100; ++i) { + ses.addEnum(uniques[i].c_str(), idx); + ses.incRefCount(idx); + EXPECT_TRUE(ses.getRefCount(idx)); + + // associate readers + if (i % 10 == 9) { + Reader::IndexVector indices; + Reader::ExpectedVector expected; + for (uint32_t j = i - 9; j <= i; ++j) { + EXPECT_TRUE(ses.findIndex(uniques[j].c_str(), idx)); + indices.push_back(idx); + StringEnumStore::Entry entry = ses.getEntry(idx); + EXPECT_TRUE(entry.getEnum() == j); + EXPECT_TRUE(entry.getRefCount() == 1); + EXPECT_TRUE(strcmp(entry.getValue(), uniques[j].c_str()) == 0); + expected.push_back(StringEntry(entry.getEnum(), entry.getRefCount(), + std::string(entry.getValue()))); + } + EXPECT_TRUE(indices.size() == 10); + EXPECT_TRUE(expected.size() == 10); + sesGen = generation++; + readers.push_back(Reader(sesGen, indices, expected)); + checkReaders(ses, sesGen, readers); + } + } + + EXPECT_EQUAL(0u, ses.getRemaining()); + EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0)._deadElems); + + // remove all uniques + for (uint32_t i = 0; i < 100; ++i) { + EXPECT_TRUE(ses.findIndex(uniques[i].c_str(), idx)); + ses.decRefCount(idx); + EXPECT_EQUAL(0u, ses.getRefCount(idx)); + } + ses.freeUnusedEnums(true); + EXPECT_EQUAL(100 * entrySize + RESERVED_BYTES, ses.getBuffer(0)._deadElems); + + // perform compaction + uint32_t newEntrySize = StringEnumStore::alignEntrySize(8 + 1 + 8); + EXPECT_TRUE(ses.performCompaction(5 * newEntrySize)); + + // check readers again + checkReaders(ses, sesGen, readers); + + // fill up buffer + uint32_t i = 0; + while (ses.getRemaining() >= newEntrySize) { + //LOG(info, "fill: %s", newUniques[i].c_str()); + ses.addEnum(newUniques[i++].c_str(), idx); + ses.incRefCount(idx); + EXPECT_TRUE(ses.getRefCount(idx)); + } + EXPECT_LESS(ses.getRemaining(), newEntrySize); + // buffer on hold list + EXPECT_TRUE(!ses.performCompaction(5 * newEntrySize)); + + checkReaders(ses, sesGen, readers); + ses.transferHoldLists(sesGen); + ses.trimHoldLists(sesGen + 1); + + // buffer no longer on hold list + EXPECT_LESS(ses.getRemaining(), newEntrySize); + EXPECT_TRUE(ses.performCompaction(5 * newEntrySize)); + EXPECT_TRUE(ses.getRemaining() >= 5 * newEntrySize); +} + +void +EnumStoreTest::testMemoryUsage() +{ + StringEnumStore ses(200, false); + StringEnumStore::Index idx; + uint32_t num = 8; + std::vector<StringEnumStore::Index> indices; + std::vector<std::string> uniques; + for (uint32_t i = 0; i < num; ++i) { + std::stringstream ss; + ss << "enum" << i; + uniques.push_back(ss.str()); + } + generation_t sesGen = 0u; + uint32_t entrySize = StringEnumStore::alignEntrySize(8 + 1 + 5); // enum(4) + refcount(4) + 1(\0) + strlen("enumx") + + // usage before inserting enums + MemoryUsage usage = ses.getMemoryUsage(); + EXPECT_EQUAL(ses.getNumUniques(), uint32_t(0)); + EXPECT_EQUAL(enumStoreAlign(200u) + RESERVED_BYTES, usage.allocatedBytes()); + EXPECT_EQUAL(RESERVED_BYTES, usage.usedBytes()); + EXPECT_EQUAL(RESERVED_BYTES, usage.deadBytes()); + EXPECT_EQUAL(0u, usage.allocatedBytesOnHold()); + + for (uint32_t i = 0; i < num; ++i) { + ses.addEnum(uniques[i].c_str(), idx); + indices.push_back(idx); + ses.incRefCount(idx); + EXPECT_TRUE(ses.getRefCount(idx)); + } + + // usage after inserting enums + usage = ses.getMemoryUsage(); + EXPECT_EQUAL(ses.getNumUniques(), num); + EXPECT_EQUAL(enumStoreAlign(200u) + RESERVED_BYTES, usage.allocatedBytes()); + EXPECT_EQUAL(num * entrySize + RESERVED_BYTES, usage.usedBytes()); + EXPECT_EQUAL(RESERVED_BYTES, usage.deadBytes()); + EXPECT_EQUAL(0u, usage.allocatedBytesOnHold()); + + // assign new enum for num / 2 of indices + for (uint32_t i = 0; i < num / 2; ++i) { + ses.decRefCount(indices[i]); + EXPECT_TRUE(ses.findIndex(uniques.back().c_str(), idx)); + ses.incRefCount(idx); + indices[i] = idx; + } + ses.freeUnusedEnums(true); + + // usage after removing enums + usage = ses.getMemoryUsage(); + EXPECT_EQUAL(ses.getNumUniques(), num / 2); + EXPECT_EQUAL(enumStoreAlign(200u) + RESERVED_BYTES, usage.allocatedBytes()); + EXPECT_EQUAL(num * entrySize + RESERVED_BYTES, usage.usedBytes()); + EXPECT_EQUAL((num / 2) * entrySize + RESERVED_BYTES, usage.deadBytes()); + EXPECT_EQUAL(0u, usage.allocatedBytesOnHold()); + + ses.performCompaction(400); + + // usage after compaction + MemoryUsage usage2 = ses.getMemoryUsage(); + EXPECT_EQUAL(ses.getNumUniques(), num / 2); + EXPECT_EQUAL(usage.usedBytes() + (num / 2) * entrySize, usage2.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), usage2.deadBytes()); + EXPECT_EQUAL(usage.usedBytes() - usage.deadBytes(), usage2.allocatedBytesOnHold()); + + ses.transferHoldLists(sesGen); + ses.trimHoldLists(sesGen + 1); + + // usage after hold list trimming + MemoryUsage usage3 = ses.getMemoryUsage(); + EXPECT_EQUAL((num / 2) * entrySize, usage3.usedBytes()); + EXPECT_EQUAL(0u, usage3.deadBytes()); + EXPECT_EQUAL(0u, usage3.allocatedBytesOnHold()); +} + +namespace { + +NumericEnumStore::Index +addEnum(NumericEnumStore &store, uint32_t value) +{ + NumericEnumStore::Index result; + store.addEnum(value, result); + store.incRefCount(result); + return result; +} + +void +decRefCount(NumericEnumStore &store, NumericEnumStore::Index idx) +{ + store.decRefCount(idx); + store.freeUnusedEnums(false); +} + +} + +void +EnumStoreTest::requireThatAddressSpaceUsageIsReported() +{ + const size_t ADDRESS_LIMIT = 34359738368; // NumericEnumStore::DataStoreType::RefType::offsetSize() + NumericEnumStore store(200, false); + + EXPECT_EQUAL(AddressSpace(0, ADDRESS_LIMIT), store.getAddressSpaceUsage()); + NumericEnumStore::Index idx1 = addEnum(store, 10); + EXPECT_EQUAL(AddressSpace(16, ADDRESS_LIMIT), store.getAddressSpaceUsage()); + NumericEnumStore::Index idx2 = addEnum(store, 20); + EXPECT_EQUAL(AddressSpace(32, ADDRESS_LIMIT), store.getAddressSpaceUsage()); + decRefCount(store, idx1); + EXPECT_EQUAL(AddressSpace(16, ADDRESS_LIMIT), store.getAddressSpaceUsage()); + decRefCount(store, idx2); + EXPECT_EQUAL(AddressSpace(0, ADDRESS_LIMIT), store.getAddressSpaceUsage()); +} + +size_t +digits(size_t num) +{ + size_t digits = 1; + while (num / 10 > 0) { + num /= 10; + digits++; + } + return digits; +} + +void +EnumStoreTest::testBufferLimit() +{ + size_t enumSize = StringEnumStore::Index::offsetSize(); + StringEnumStore es(enumSize, false); + + size_t strLen = 65536; + char str[strLen + 1]; + for (size_t i = 0; i < strLen; ++i) { + str[i] = 'X'; + } + str[strLen] = 0; + + size_t entrySize = StringEnumStore::getEntrySize(str); + size_t numUniques = enumSize / entrySize; + size_t uniqDigits = digits(numUniques); + + EnumIndex idx; + EnumIndex lastIdx; + for (size_t i = 0; i < numUniques; ++i) { + sprintf(str, "%0*zu", (int)uniqDigits, i); + str[uniqDigits] = 'X'; + es.addEnum(str, idx); + if (i % (numUniques / 32) == 1) { + EXPECT_TRUE(idx.offset() > lastIdx.offset()); + EXPECT_EQUAL(i + 1, es.getNumUniques()); + std::cout << "idx.offset(" << idx.offset() << "), str(" << std::string(str, uniqDigits) << ")" << std::endl; + } + lastIdx = idx; + } + EXPECT_EQUAL(idx.offset(), lastIdx.offset()); + EXPECT_EQUAL(numUniques, es.getNumUniques()); + std::cout << "idx.offset(" << idx.offset() << "), str(" << std::string(str, uniqDigits) << ")" << std::endl; +} + +template <typename T> +T +EnumStoreTest::random(T low, T high) +{ + return (rand() % (high - low)) + low; +} + +std::string +EnumStoreTest::getRandomString(uint32_t minLen, uint32_t maxLen) +{ + uint32_t len = random(minLen, maxLen); + std::string retval; + for (uint32_t i = 0; i < len; ++i) { + char c = random('a', 'z'); + retval.push_back(c); + } + return retval; +} + +EnumStoreTest::StringVector +EnumStoreTest::fillRandomStrings(uint32_t numStrings, uint32_t minLen, uint32_t maxLen) +{ + StringVector retval; + retval.reserve(numStrings); + for (uint32_t i = 0; i < numStrings; ++i) { + retval.push_back(getRandomString(minLen, maxLen)); + } + return retval; +} + +EnumStoreTest::StringVector +EnumStoreTest::sortRandomStrings(StringVector & strings) +{ + std::sort(strings.begin(), strings.end()); + std::vector<std::string> retval; + retval.reserve(strings.size()); + std::vector<std::string>::iterator pos = std::unique(strings.begin(), strings.end()); + std::copy(strings.begin(), pos, std::back_inserter(retval)); + return retval; +} + +void +EnumStoreTest::checkReaders(const StringEnumStore & ses, + generation_t sesGen, + const std::vector<Reader> & readers) +{ + (void) sesGen; + //uint32_t refCount = 1000; + StringEnumStore::Type t = ""; + for (uint32_t i = 0; i < readers.size(); ++i) { + const Reader & r = readers[i]; + for (uint32_t j = 0; j < r._indices.size(); ++j) { + EXPECT_EQUAL(r._expected[j]._enum, ses.getEnum(r._indices[j])); + EXPECT_TRUE(ses.getValue(r._indices[j], t)); + EXPECT_TRUE(r._expected[j]._string == std::string(t)); + } + } +} + + +int +EnumStoreTest::Main() +{ + TEST_INIT("enumstore_test"); + + testIndex(); + testStringEntry(); + testNumericEntry(); + testFloatEnumStore(); + testAddEnum(); + testCompaction(); + testReset(); + testHoldListAndGeneration(); + testMemoryUsage(); + TEST_DO(requireThatAddressSpaceUsageIsReported()); + if (_argc > 1) { + testBufferLimit(); // large test with 8 GB buffer + } + + TEST_DONE(); +} +} + + +TEST_APPHOOK(search::EnumStoreTest); |