diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-10-30 16:16:39 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-10-30 16:16:39 +0200 |
commit | d8970d73872e5386f36aaa73e2dc6b0e881069bf (patch) | |
tree | 629867ff5eef670fa008ad0312bccaf1a2ba422b | |
parent | 18a747185627747b0d137d8d4f06da3024587114 (diff) | |
parent | c732b01d45595ad831325cb8a7e032c38c1de4c3 (diff) |
Merge pull request #19797 from vespa-engine/balder/use-coarser-tasks-for-delete-bucket
Remove the lids in one task instead of 1k tasks for full buckets on d…
7 files changed, 37 insertions, 17 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.h b/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.h index ddbc0ffa882..e4eaad538cb 100644 --- a/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.h +++ b/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.h @@ -80,9 +80,7 @@ public: _index.insertDocument(lid, doc); } void removeDocuments(LidVector lids) override { - for (uint32_t lid : lids) { - _index.removeDocument(lid); - } + _index.removeDocuments(std::move(lids)); } uint64_t getStaticMemoryFootprint() const override { return _index.getStaticMemoryFootprint(); diff --git a/searchlib/src/apps/tests/memoryindexstress_test.cpp b/searchlib/src/apps/tests/memoryindexstress_test.cpp index 9ee47cc6849..dd445745f18 100644 --- a/searchlib/src/apps/tests/memoryindexstress_test.cpp +++ b/searchlib/src/apps/tests/memoryindexstress_test.cpp @@ -227,7 +227,9 @@ struct Fixture { index.insertDocument(id, *doc); } void remove(uint32_t id) { - index.removeDocument(id); + std::vector<uint32_t> lids; + lids.push_back(id); + index.removeDocuments(std::move(lids)); } void readWork(uint32_t cnt); diff --git a/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp b/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp index 59ade118990..20cfb045081 100644 --- a/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp +++ b/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp @@ -112,7 +112,9 @@ struct Index { return d; } Index &remove(uint32_t id) { - index.removeDocument(id); + std::vector<uint32_t> lids; + lids.push_back(id); + index.removeDocuments(std::move(lids)); internalSyncCommit(); return *this; } diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp index 4f03a5cb95f..127ff1d52c3 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp @@ -116,6 +116,7 @@ DocumentInverter::buildFieldPath(const document::DocumentType &docType, void DocumentInverter::invertDocument(uint32_t docId, const Document &doc) { + // Might want to batch inverters as we do for attributes const document::DataType *dataType(doc.getDataType()); if (_indexedFieldPaths.empty() || _dataType != dataType) { buildFieldPath(doc.getType(), dataType); @@ -152,20 +153,31 @@ DocumentInverter::invertDocument(uint32_t docId, const Document &doc) } void -DocumentInverter::removeDocument(uint32_t docId) +DocumentInverter::removeDocument(uint32_t docId) { + LidVector lids; + lids.push_back(docId); + removeDocuments(std::move(lids)); +} +void +DocumentInverter::removeDocuments(LidVector lids) { + // Might want to batch inverters as we do for attributes for (uint32_t fieldId : _schemaIndexFields._textFields) { FieldInverter *inverter = _inverters[fieldId].get(); - _invertThreads.execute(fieldId, [inverter, docId]() { - inverter->removeDocument(docId); + _invertThreads.execute(fieldId, [inverter, lids]() { + for (uint32_t lid : lids) { + inverter->removeDocument(lid); + } }); } uint32_t urlId = 0; for (const auto & fi : _schemaIndexFields._uriFields) { uint32_t fieldId = fi._all; UrlFieldInverter *inverter = _urlInverters[urlId].get(); - _invertThreads.execute(fieldId,[inverter, docId]() { - inverter->removeDocument(docId); + _invertThreads.execute(fieldId, [inverter, lids]() { + for (uint32_t lid : lids) { + inverter->removeDocument(lid); + } }); ++urlId; } diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h index 7ab80e1a2e5..ed06a0b39cc 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h @@ -40,6 +40,7 @@ private: void addFieldPath(const document::DocumentType &docType, uint32_t fieldId); void buildFieldPath(const document::DocumentType & docType, const document::DataType *dataType); + using LidVector = std::vector<uint32_t>; using FieldPath = document::Field; using IndexedFieldPaths = std::vector<std::unique_ptr<FieldPath>>; IndexedFieldPaths _indexedFieldPaths; @@ -100,6 +101,7 @@ public: * (using a field inverter) is added to the 'invert threads' executor', then this function returns. */ void removeDocument(uint32_t docId); + void removeDocuments(LidVector lids); FieldInverter *getInverter(uint32_t fieldId) const { return _inverters[fieldId].get(); diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp index 177d8e612bd..cb198f4d33a 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp @@ -93,17 +93,20 @@ MemoryIndex::insertDocument(uint32_t docId, const document::Document &doc) } void -MemoryIndex::removeDocument(uint32_t docId) +MemoryIndex::removeDocuments(LidVector lids) { if (_frozen) { - LOG(warning, "Memory index frozen: ignoring remove of document (%u)", docId); + LOG(warning, "Memory index frozen: ignoring remove of %lu documents", lids.size()); return; } - _inverter->removeDocument(docId); - if (_indexedDocs.find(docId) != _indexedDocs.end()) { - _indexedDocs.erase(docId); - decNumDocs(); + for (uint32_t lid : lids) { + + if (_indexedDocs.find(lid) != _indexedDocs.end()) { + _indexedDocs.erase(lid); + decNumDocs(); + } } + _inverter->removeDocuments(std::move(lids)); } void diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h index 83270aaf2ce..c02e66f790a 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h +++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h @@ -41,6 +41,7 @@ class FieldIndexCollection; class MemoryIndex : public queryeval::Searchable { private: using ISequencedTaskExecutor = vespalib::ISequencedTaskExecutor; + using LidVector = std::vector<uint32_t>; index::Schema _schema; ISequencedTaskExecutor &_invertThreads; ISequencedTaskExecutor &_pushThreads; @@ -115,7 +116,7 @@ public: * * This function is async. commit() must be called for changes to take effect. */ - void removeDocument(uint32_t docId); + void removeDocuments(LidVector lids); /** * Commits the inserts and removes since the last commit, making them searchable. |