summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahoo-inc.com>2017-05-10 14:30:29 +0000
committerGeir Storli <geirst@yahoo-inc.com>2017-05-12 14:01:24 +0000
commit6d253c5644128fa500dfb27ceb48a79a58522db8 (patch)
tree3104173b278121d2b0c4b5934c98604ba623562f /searchlib
parentbc5e77f6f49107a3005ef03fcfb8131c2261ecc9 (diff)
Support that lid space can be compacted and entries from old files skipped during load.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp78
-rw-r--r--searchlib/src/vespa/searchlib/docstore/logdatastore.cpp30
-rw-r--r--searchlib/src/vespa/searchlib/docstore/logdatastore.h3
3 files changed, 91 insertions, 20 deletions
diff --git a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp
index 4f4ebab9687..f2a41e10819 100644
--- a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp
+++ b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp
@@ -789,8 +789,8 @@ struct Fixture {
store.initFlush(serialNum);
store.flush(serialNum);
}
- Fixture &write(uint32_t lid) {
- vespalib::string data = genData(lid, 256);
+ Fixture &write(uint32_t lid, size_t numBytes = 1024) {
+ vespalib::string data = genData(lid, numBytes);
store.write(nextSerialNum(), lid, data.c_str(), data.size());
return *this;
}
@@ -806,6 +806,9 @@ struct Fixture {
void assertDocIdLimit(uint32_t expDocIdLimit) {
EXPECT_EQUAL(expDocIdLimit, store.getDocIdLimit());
}
+ void assertNumChunks(size_t numChunks) {
+ EXPECT_EQUAL(numChunks, store.getFileChunkStats().size());
+ }
void assertDocIdLimitInFileChunks(const std::vector<uint32_t> expLimits) {
std::vector<uint32_t> actLimits;
for (const auto &stat : store.getFileChunkStats()) {
@@ -813,22 +816,45 @@ struct Fixture {
}
EXPECT_EQUAL(expLimits, actLimits);
}
+ void assertContent(const std::set<uint32_t> &lids, uint32_t docIdLimit, size_t numBytesPerEntry = 1024) {
+ for (uint32_t lid = 0; lid < docIdLimit; ++lid) {
+ vespalib::DataBuffer buffer;
+ size_t size = store.read(lid, buffer);
+ if (lids.find(lid) != lids.end()) {
+ vespalib::string expData = genData(lid, numBytesPerEntry);
+ EXPECT_EQUAL(expData, vespalib::string(buffer.getData(), buffer.getDataLen()));
+ EXPECT_GREATER(size, 0u);
+ } else {
+ EXPECT_EQUAL("", vespalib::string(buffer.getData(), buffer.getDataLen()));
+ EXPECT_EQUAL(0u, size);
+ }
+ }
+ }
};
-TEST_F("require that docIdLimit is updated when inserting entries", Fixture("tmp"))
+TEST("require that docIdLimit is updated when inserting entries")
{
- f.assertDocIdLimit(0);
- f.write(10);
- f.assertDocIdLimit(11);
- f.write(9);
- f.assertDocIdLimit(11);
- f.write(11);
- f.assertDocIdLimit(12);
+ {
+ Fixture f("tmp", false);
+ f.assertDocIdLimit(0);
+ f.write(10);
+ f.assertDocIdLimit(11);
+ f.write(9);
+ f.assertDocIdLimit(11);
+ f.write(11);
+ f.assertDocIdLimit(12);
+ f.assertNumChunks(1);
+ f.flush();
+ }
+ {
+ Fixture f("tmp");
+ f.assertDocIdLimit(12);
+ }
}
-TEST("require that docIdLimit at idx file creation time is written to file header")
+TEST("require that docIdLimit at idx file creation time is written to idx file header")
{
- std::vector<uint32_t> expLimits = {std::numeric_limits<uint32_t>::max(),24,114,214};
+ std::vector<uint32_t> expLimits = {std::numeric_limits<uint32_t>::max(),14,104,204};
{
Fixture f("tmp", false);
f.writeUntilNewChunk(10);
@@ -843,6 +869,34 @@ TEST("require that docIdLimit at idx file creation time is written to file heade
}
}
+TEST("require that lid space can be compacted and entries from old files skipped during load")
+{
+ {
+ Fixture f("tmp", false);
+ f.write(10);
+ f.writeUntilNewChunk(100);
+ f.write(20);
+ f.writeUntilNewChunk(200);
+ f.write(30);
+ TEST_DO(f.assertContent({10,100,101,102,20,200,201,202,30}, 203));
+
+ f.assertDocIdLimit(203);
+ f.store.compactLidSpace(100);
+ f.assertDocIdLimit(100);
+ TEST_DO(f.assertContent({10,20,30}, 203));
+
+ f.writeUntilNewChunk(31);
+ f.write(99);
+ f.write(300);
+ TEST_DO(f.assertContent({10,20,30,31,32,33,99,300}, 301));
+ f.assertDocIdLimitInFileChunks({std::numeric_limits<uint32_t>::max(),103,203,100});
+ f.flush();
+ }
+ {
+ Fixture f("tmp");
+ TEST_DO(f.assertContent({10,20,30,31,32,33,99,300}, 301));
+ }
+}
TEST_MAIN() {
DummyFileHeaderContext::setCreator("logdatastore_test");
diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp
index 4daa1417ea7..478606f437c 100644
--- a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp
@@ -53,14 +53,15 @@ LogDataStore::LogDataStore(vespalib::ThreadExecutor &executor,
_executor(executor),
_initFlushSyncToken(0),
_tlSyncer(tlSyncer),
- _bucketizer(bucketizer)
+ _bucketizer(bucketizer),
+ _currentlyCompacting()
{
// Reserve space for 1TB summary in order to avoid locking.
_fileChunks.reserve(LidInfo::getFileIdLimit());
_holdFileChunks.resize(LidInfo::getFileIdLimit());
preload();
- updateLidMap();
+ updateLidMap(getLastFileChunkDocIdLimit());
updateSerialNum();
}
@@ -86,13 +87,16 @@ LogDataStore::~LogDataStore()
}
void
-LogDataStore::updateLidMap()
+LogDataStore::updateLidMap(uint32_t lastFileChunkDocIdLimit)
{
uint64_t lastSerialNum(0);
LockGuard guard(_updateLock);
- for (FileChunk::UP & fc : _fileChunks) {
- fc->updateLidMap(guard, *this, lastSerialNum, std::numeric_limits<uint32_t>::max());
- lastSerialNum = fc->getLastPersistedSerialNum();
+ for (size_t i = 0; i < _fileChunks.size(); ++i) {
+ FileChunk::UP &chunk = _fileChunks[i];
+ bool lastChunk = ((i + 1) == _fileChunks.size());
+ uint32_t docIdLimit = lastChunk ? std::numeric_limits<uint32_t>::max() : lastFileChunkDocIdLimit;
+ chunk->updateLidMap(guard, *this, lastSerialNum, docIdLimit);
+ lastSerialNum = chunk->getLastPersistedSerialNum();
}
}
@@ -770,6 +774,15 @@ LogDataStore::preload()
_prevActive = _active.prev();
}
+uint32_t
+LogDataStore::getLastFileChunkDocIdLimit()
+{
+ if (!_fileChunks.empty()) {
+ return _fileChunks.back()->getDocIdLimit();
+ }
+ return std::numeric_limits<uint32_t>::max();
+}
+
LogDataStore::NameIdSet
LogDataStore::eraseEmptyIdxFiles(const NameIdSet &partList)
{
@@ -1105,7 +1118,10 @@ LogDataStore::getFileChunkStats() const
void
LogDataStore::compactLidSpace(uint32_t wantedDocLidLimit)
{
- (void) wantedDocLidLimit;
+ for (size_t i = wantedDocLidLimit; i < _lidInfo.size(); ++i) {
+ _lidInfo[i] = LidInfo();
+ }
+ setDocIdLimit(wantedDocLidLimit);
}
bool
diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.h b/searchlib/src/vespa/searchlib/docstore/logdatastore.h
index b9d0bea5eed..426d1c4b8ff 100644
--- a/searchlib/src/vespa/searchlib/docstore/logdatastore.h
+++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.h
@@ -217,8 +217,9 @@ private:
typedef attribute::RcuVector<uint64_t> LidInfoVector;
typedef std::vector<FileChunk::UP> FileChunkVector;
- void updateLidMap();
+ void updateLidMap(uint32_t lastFileChunkDocIdLimit);
void preload();
+ uint32_t getLastFileChunkDocIdLimit();
void verifyModificationTime(const NameIdSet & partList);
void eraseDanglingDatFiles(const NameIdSet &partList, const NameIdSet &datPartList);