diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-01-20 21:45:41 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2021-01-20 21:45:41 +0000 |
commit | f26c811f275bfadcf36933d475e2d76e652cff1a (patch) | |
tree | 091abcecf72c9b7c80c2829b8a603913f7b008f2 | |
parent | 3a456107e830d82ba5895f348016c84c9de5b727 (diff) |
There is now no need to have the 10k maxdocs limit to avoid iterating too long.
Now we iterate in the most efficient way by scanning a bit vector and maxtime wil be so small that it can be ignored.
8 files changed, 27 insertions, 72 deletions
diff --git a/searchcore/src/tests/proton/documentdb/document_scan_iterator/document_scan_iterator_test.cpp b/searchcore/src/tests/proton/documentdb/document_scan_iterator/document_scan_iterator_test.cpp index a1e713802c1..7f7c0302926 100644 --- a/searchcore/src/tests/proton/documentdb/document_scan_iterator/document_scan_iterator_test.cpp +++ b/searchcore/src/tests/proton/documentdb/document_scan_iterator/document_scan_iterator_test.cpp @@ -42,25 +42,25 @@ struct Fixture _metaStore.put(gid, gid.convertToBucketId(), Timestamp(lid), docSize, lid, 0u); return *this; } - LidSet scan(uint32_t count, uint32_t compactLidLimit, uint32_t maxDocsToScan = 10) { + LidSet scan(uint32_t count, uint32_t compactLidLimit) { if (!_itr) { _itr = std::make_unique<DocumentScanIterator>(_metaStore); } LidSet retval; for (uint32_t i = 0; i < count; ++i) { - uint32_t lid = next(compactLidLimit, maxDocsToScan, false); + uint32_t lid = next(compactLidLimit, false); retval.insert(lid); EXPECT_TRUE(_itr->valid() || lid <= compactLidLimit); } - EXPECT_EQUAL(0u, next(compactLidLimit, maxDocsToScan, false)); + EXPECT_EQUAL(0u, next(compactLidLimit, false)); EXPECT_FALSE(_itr->valid()); return retval; } - uint32_t next(uint32_t compactLidLimit, uint32_t maxDocsToScan = 10, bool retry = false) { + uint32_t next(uint32_t compactLidLimit, bool retry = false) { if (!_itr) { _itr = std::make_unique<DocumentScanIterator>(_metaStore); } - return _itr->next(compactLidLimit, maxDocsToScan, retry).lid; + return _itr->next(compactLidLimit, retry).lid; } }; @@ -81,24 +81,11 @@ TEST_F("require that only lids > lid limit are returned", Fixture) assertLidSet({5,6,7,8}, f.scan(4, 4)); } -TEST_F("require that max docs to scan (1) are taken into consideration", Fixture) -{ - f.add({1,2,3,4,5,6,7,8}); - assertLidSet({0,5,6,7,8}, f.scan(8, 4, 1)); -} - -TEST_F("require that max docs to scan (2) are taken into consideration", Fixture) -{ - f.add({1,2,3,4,5,6,7,8}); - // scan order is: 8, {2,4}, 7, {5,3}, {1,6} (5 scans total) - assertLidSet({0,7,8}, f.scan(5, 6, 2)); -} - TEST_F("require that we start scan at previous doc if retry is set", Fixture) { f.add({1,2,3,4,5,6,7,8}); - uint32_t lid1 = f.next(4, 10, false); - uint32_t lid2 = f.next(4, 10, true); + uint32_t lid1 = f.next(4, false); + uint32_t lid2 = f.next(4, true); EXPECT_EQUAL(lid1, lid2); } diff --git a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp index 4f36608aace..c31141a943c 100644 --- a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp +++ b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_compaction_test.cpp @@ -31,7 +31,6 @@ constexpr uint32_t ALLOWED_LID_BLOAT = 1; constexpr double ALLOWED_LID_BLOAT_FACTOR = 0.3; constexpr double REMOVE_BATCH_BLOCK_RATE = 1.0 / 21.0; constexpr double REMOVE_BLOCK_RATE = 1.0 / 20.0; -constexpr uint32_t MAX_DOCS_TO_SCAN = 100; constexpr double RESOURCE_LIMIT_FACTOR = 1.0; constexpr uint32_t MAX_OUTSTANDING_MOVE_OPS = 10; const vespalib::string DOC_ID = "id:test:searchdocument::0"; @@ -52,11 +51,11 @@ struct MyScanIterator : public IDocumentScanIterator { bool valid() const override { return _validItr; } - search::DocumentMetaData next(uint32_t compactLidLimit, uint32_t maxDocsToScan, bool retry) override { + search::DocumentMetaData next(uint32_t compactLidLimit, bool retry) override { if (!retry && _itr != _lids.begin()) { ++_itr; } - for (uint32_t i = 0; i < maxDocsToScan && _itr != _lids.end() && (*_itr) <= compactLidLimit; ++i, ++_itr) {} + for (; _itr != _lids.end() && (*_itr) <= compactLidLimit; ++_itr) {} if (_itr != _lids.end()) { uint32_t lid = *_itr; if (lid > compactLidLimit) { @@ -75,7 +74,6 @@ struct MyHandler : public ILidSpaceCompactionHandler { mutable uint32_t _moveFromLid; mutable uint32_t _moveToLid; uint32_t _handleMoveCnt; - uint32_t _wantedSubDbId; uint32_t _wantedLidLimit; mutable uint32_t _iteratorCnt; bool _storeMoveDoneContexts; @@ -137,7 +135,6 @@ struct MyHandler : public ILidSpaceCompactionHandler { } } void handleCompactLidSpace(const CompactLidSpaceOperation &op, std::shared_ptr<IDestructorCallback>) override { - _wantedSubDbId = op.getSubDbId(); _wantedLidLimit = op.getLidLimit(); } }; @@ -147,7 +144,6 @@ MyHandler::MyHandler(bool storeMoveDoneContexts) _moveFromLid(0), _moveToLid(0), _handleMoveCnt(0), - _wantedSubDbId(0), _wantedLidLimit(0), _iteratorCnt(0), _storeMoveDoneContexts(storeMoveDoneContexts), @@ -281,7 +277,6 @@ struct JobTestBase : public ::testing::Test { } void init(uint32_t allowedLidBloat = ALLOWED_LID_BLOAT, double allowedLidBloatFactor = ALLOWED_LID_BLOAT_FACTOR, - uint32_t maxDocsToScan = MAX_DOCS_TO_SCAN, double resourceLimitFactor = RESOURCE_LIMIT_FACTOR, vespalib::duration interval = JOB_DELAY, bool nodeRetired = false, @@ -292,7 +287,7 @@ struct JobTestBase : public ::testing::Test { allowedLidBloatFactor, REMOVE_BATCH_BLOCK_RATE, REMOVE_BLOCK_RATE, - false, maxDocsToScan), + false), *_handler, _storer, _frozenHandler, _diskMemUsageNotifier, BlockableMaintenanceJobConfig(resourceLimitFactor, maxOutstandingMoveOps), _clusterStateHandler, nodeRetired); @@ -386,19 +381,18 @@ struct JobTest : public JobTestBase { {} void init(uint32_t allowedLidBloat = ALLOWED_LID_BLOAT, double allowedLidBloatFactor = ALLOWED_LID_BLOAT_FACTOR, - uint32_t maxDocsToScan = MAX_DOCS_TO_SCAN, double resourceLimitFactor = RESOURCE_LIMIT_FACTOR, vespalib::duration interval = JOB_DELAY, bool nodeRetired = false, uint32_t maxOutstandingMoveOps = MAX_OUTSTANDING_MOVE_OPS) { - JobTestBase::init(allowedLidBloat, allowedLidBloatFactor, maxDocsToScan, resourceLimitFactor, interval, nodeRetired, maxOutstandingMoveOps); + JobTestBase::init(allowedLidBloat, allowedLidBloatFactor, resourceLimitFactor, interval, nodeRetired, maxOutstandingMoveOps); _jobRunner = std::make_unique<MyDirectJobRunner>(*_job); } void init_with_interval(vespalib::duration interval) { - init(ALLOWED_LID_BLOAT, ALLOWED_LID_BLOAT_FACTOR, MAX_DOCS_TO_SCAN, RESOURCE_LIMIT_FACTOR, interval); + init(ALLOWED_LID_BLOAT, ALLOWED_LID_BLOAT_FACTOR, RESOURCE_LIMIT_FACTOR, interval); } void init_with_node_retired(bool retired) { - init(ALLOWED_LID_BLOAT, ALLOWED_LID_BLOAT_FACTOR, MAX_DOCS_TO_SCAN, RESOURCE_LIMIT_FACTOR, JOB_DELAY, retired); + init(ALLOWED_LID_BLOAT, ALLOWED_LID_BLOAT_FACTOR, RESOURCE_LIMIT_FACTOR, JOB_DELAY, retired); } }; @@ -490,16 +484,6 @@ TEST_F(JobTest, job_is_blocked_if_trying_to_move_document_for_frozen_bucket) EXPECT_FALSE(_job->isBlocked()); } -TEST_F(JobTest, job_handles_invalid_document_meta_data_when_max_docs_are_scanned) -{ - init(ALLOWED_LID_BLOAT, ALLOWED_LID_BLOAT_FACTOR, 3); - setupOneDocumentToCompact(); - EXPECT_FALSE(run()); // does not find 9 in first scan - assertNoWorkDone(); - EXPECT_FALSE(run()); // move 9 -> 2 - assertOneDocumentCompacted(); -} - TEST_F(JobTest, job_can_restart_documents_scan_if_lid_bloat_is_still_to_large) { init(ALLOWED_LID_BLOAT, ALLOWED_LID_BLOAT_FACTOR, 3); @@ -513,11 +497,8 @@ TEST_F(JobTest, job_can_restart_documents_scan_if_lid_bloat_is_still_to_large) // We simulate that the set of used docs have changed between these 2 runs EXPECT_FALSE(run()); // move 9 -> 2 endScan(); - assertJobContext(2, 9, 1, 0, 0); - EXPECT_EQ(2u, _handler->_iteratorCnt); - EXPECT_FALSE(run()); // does not find 8 in first scan - EXPECT_FALSE(run()); // move 8 -> 3 assertJobContext(3, 8, 2, 0, 0); + EXPECT_EQ(2u, _handler->_iteratorCnt); endScan().compact(); assertJobContext(3, 8, 2, 7, 1); } @@ -602,7 +583,7 @@ TEST_F(JobTest, ending_resource_starvation_resumes_lid_space_compaction) TEST_F(JobTest, resource_limit_factor_adjusts_limit) { - init(ALLOWED_LID_BLOAT, ALLOWED_LID_BLOAT_FACTOR, MAX_DOCS_TO_SCAN, 1.05); + init(ALLOWED_LID_BLOAT, ALLOWED_LID_BLOAT_FACTOR, 1.05); setupOneDocumentToCompact(); _diskMemUsageNotifier.notify({{100, 0}, {100, 101}}); EXPECT_FALSE(run()); // scan @@ -729,7 +710,7 @@ struct MaxOutstandingJobTest : public JobTest { runner() {} void init(uint32_t maxOutstandingMoveOps) { - JobTest::init(ALLOWED_LID_BLOAT, ALLOWED_LID_BLOAT_FACTOR, MAX_DOCS_TO_SCAN, + JobTest::init(ALLOWED_LID_BLOAT, ALLOWED_LID_BLOAT_FACTOR, RESOURCE_LIMIT_FACTOR, JOB_DELAY, false, maxOutstandingMoveOps); runner = std::make_unique<MyCountJobRunner>(*_job); } diff --git a/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp b/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp index 24965db5919..f365b6ae02d 100644 --- a/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.cpp @@ -55,8 +55,7 @@ DocumentDBLidSpaceCompactionConfig::DocumentDBLidSpaceCompactionConfig() _allowedLidBloatFactor(1.0), _remove_batch_block_rate(0.5), _remove_block_rate(100), - _disabled(false), - _maxDocsToScan(10000) + _disabled(false) { } @@ -65,16 +64,14 @@ DocumentDBLidSpaceCompactionConfig::DocumentDBLidSpaceCompactionConfig(vespalib: double allowedLidBloatFactor, double remove_batch_block_rate, double remove_block_rate, - bool disabled, - uint32_t maxDocsToScan) + bool disabled) : _delay(std::min(MAX_DELAY_SEC, interval)), _interval(interval), _allowedLidBloat(allowedLidBloat), _allowedLidBloatFactor(allowedLidBloatFactor), _remove_batch_block_rate(remove_batch_block_rate), _remove_block_rate(remove_block_rate), - _disabled(disabled), - _maxDocsToScan(maxDocsToScan) + _disabled(disabled) { } diff --git a/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h b/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h index 80064b7b5d7..731add1f62c 100644 --- a/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h +++ b/searchcore/src/vespa/searchcore/proton/server/document_db_maintenance_config.h @@ -50,7 +50,6 @@ private: double _remove_batch_block_rate; double _remove_block_rate; bool _disabled; - uint32_t _maxDocsToScan; public: DocumentDBLidSpaceCompactionConfig(); @@ -59,8 +58,7 @@ public: double allowwedLidBloatFactor, double remove_batch_block_rate, double remove_block_rate, - bool disabled, - uint32_t maxDocsToScan = 10000); + bool disabled); static DocumentDBLidSpaceCompactionConfig createDisabled(); bool operator==(const DocumentDBLidSpaceCompactionConfig &rhs) const; @@ -71,7 +69,6 @@ public: double get_remove_batch_block_rate() const { return _remove_batch_block_rate; } double get_remove_block_rate() const { return _remove_block_rate; } bool isDisabled() const { return _disabled; } - uint32_t getMaxDocsToScan() const { return _maxDocsToScan; } }; class BlockableMaintenanceJobConfig { diff --git a/searchcore/src/vespa/searchcore/proton/server/document_scan_iterator.cpp b/searchcore/src/vespa/searchcore/proton/server/document_scan_iterator.cpp index 5f25767368a..290fbc79951 100644 --- a/searchcore/src/vespa/searchcore/proton/server/document_scan_iterator.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/document_scan_iterator.cpp @@ -23,12 +23,12 @@ DocumentScanIterator::valid() const } DocumentMetaData -DocumentScanIterator::next(uint32_t compactLidLimit, uint32_t maxDocsToScan, bool retry) +DocumentScanIterator::next(uint32_t compactLidLimit, bool retry) { if (!retry) { --_lastLid; } - for (uint32_t i(0); (_lastLid > compactLidLimit) && (i < maxDocsToScan); ++i, --_lastLid) { + for (uint32_t i(0); _lastLid > compactLidLimit; ++i, --_lastLid) { if (_metaStore.validLid(_lastLid)) { const RawDocumentMetaData &metaData = _metaStore.getRawMetaData(_lastLid); return DocumentMetaData(_lastLid, metaData.getTimestamp(), diff --git a/searchcore/src/vespa/searchcore/proton/server/document_scan_iterator.h b/searchcore/src/vespa/searchcore/proton/server/document_scan_iterator.h index 93d9106ae87..62dff0a7d85 100644 --- a/searchcore/src/vespa/searchcore/proton/server/document_scan_iterator.h +++ b/searchcore/src/vespa/searchcore/proton/server/document_scan_iterator.h @@ -20,9 +20,8 @@ private: public: DocumentScanIterator(const IDocumentMetaStore &_metaStore); - bool valid() const override; - search::DocumentMetaData next(uint32_t compactLidLimit, uint32_t maxDocsToScan, bool retry) override; + search::DocumentMetaData next(uint32_t compactLidLimit, bool retry) override; }; } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/server/i_document_scan_iterator.h b/searchcore/src/vespa/searchcore/proton/server/i_document_scan_iterator.h index 263d1e2945a..bcc52ffb475 100644 --- a/searchcore/src/vespa/searchcore/proton/server/i_document_scan_iterator.h +++ b/searchcore/src/vespa/searchcore/proton/server/i_document_scan_iterator.h @@ -14,7 +14,7 @@ struct IDocumentScanIterator { typedef std::unique_ptr<IDocumentScanIterator> UP; - virtual ~IDocumentScanIterator() {} + virtual ~IDocumentScanIterator() = default; /** * Returns false if we are certain there are no more documents to scan, true otherwise. @@ -24,16 +24,12 @@ struct IDocumentScanIterator /** * Returns the next document that has lid > compactLidLimit to be moved. - * Returns an invalid document if no documents satisfy the limit or - * if max documents are scanned. + * Returns an invalid document if no documents satisfy the limit. * * @param compactLidLimit The returned document must have lid larger than this limit. - * @param maxDocsToScan The maximum documents to scan before returning. * @param retry Whether we should start the scan with the previous returned document. */ - virtual search::DocumentMetaData next(uint32_t compactLidLimit, - uint32_t maxDocsToScan, - bool retry) = 0; + virtual search::DocumentMetaData next(uint32_t compactLidLimit, bool retry) = 0; }; } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp index 43ba91e2ef1..1165c8e345e 100644 --- a/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/lid_space_compaction_job.cpp @@ -35,9 +35,7 @@ LidSpaceCompactionJob::shouldRestartScanDocuments(const LidUsageStats &stats) co DocumentMetaData LidSpaceCompactionJob::getNextDocument(const LidUsageStats &stats) { - DocumentMetaData document = - _scanItr->next(std::max(stats.getLowestFreeLid(), stats.getUsedLids()), - _cfg.getMaxDocsToScan(), _retryFrozenDocument); + DocumentMetaData document = _scanItr->next(std::max(stats.getLowestFreeLid(), stats.getUsedLids()), _retryFrozenDocument); _retryFrozenDocument = false; return document; } |