summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2021-10-30 16:16:39 +0200
committerGitHub <noreply@github.com>2021-10-30 16:16:39 +0200
commitd8970d73872e5386f36aaa73e2dc6b0e881069bf (patch)
tree629867ff5eef670fa008ad0312bccaf1a2ba422b
parent18a747185627747b0d137d8d4f06da3024587114 (diff)
parentc732b01d45595ad831325cb8a7e032c38c1de4c3 (diff)
Merge pull request #19797 from vespa-engine/balder/use-coarser-tasks-for-delete-bucket
Remove the lids in one task instead of 1k tasks for full buckets on d…
-rw-r--r--searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.h4
-rw-r--r--searchlib/src/apps/tests/memoryindexstress_test.cpp4
-rw-r--r--searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp22
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/document_inverter.h2
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp15
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/memory_index.h3
7 files changed, 37 insertions, 17 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.h b/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.h
index ddbc0ffa882..e4eaad538cb 100644
--- a/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.h
+++ b/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.h
@@ -80,9 +80,7 @@ public:
_index.insertDocument(lid, doc);
}
void removeDocuments(LidVector lids) override {
- for (uint32_t lid : lids) {
- _index.removeDocument(lid);
- }
+ _index.removeDocuments(std::move(lids));
}
uint64_t getStaticMemoryFootprint() const override {
return _index.getStaticMemoryFootprint();
diff --git a/searchlib/src/apps/tests/memoryindexstress_test.cpp b/searchlib/src/apps/tests/memoryindexstress_test.cpp
index 9ee47cc6849..dd445745f18 100644
--- a/searchlib/src/apps/tests/memoryindexstress_test.cpp
+++ b/searchlib/src/apps/tests/memoryindexstress_test.cpp
@@ -227,7 +227,9 @@ struct Fixture {
index.insertDocument(id, *doc);
}
void remove(uint32_t id) {
- index.removeDocument(id);
+ std::vector<uint32_t> lids;
+ lids.push_back(id);
+ index.removeDocuments(std::move(lids));
}
void readWork(uint32_t cnt);
diff --git a/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp b/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp
index 59ade118990..20cfb045081 100644
--- a/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp
+++ b/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp
@@ -112,7 +112,9 @@ struct Index {
return d;
}
Index &remove(uint32_t id) {
- index.removeDocument(id);
+ std::vector<uint32_t> lids;
+ lids.push_back(id);
+ index.removeDocuments(std::move(lids));
internalSyncCommit();
return *this;
}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp
index 4f03a5cb95f..127ff1d52c3 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp
@@ -116,6 +116,7 @@ DocumentInverter::buildFieldPath(const document::DocumentType &docType,
void
DocumentInverter::invertDocument(uint32_t docId, const Document &doc)
{
+ // Might want to batch inverters as we do for attributes
const document::DataType *dataType(doc.getDataType());
if (_indexedFieldPaths.empty() || _dataType != dataType) {
buildFieldPath(doc.getType(), dataType);
@@ -152,20 +153,31 @@ DocumentInverter::invertDocument(uint32_t docId, const Document &doc)
}
void
-DocumentInverter::removeDocument(uint32_t docId)
+DocumentInverter::removeDocument(uint32_t docId) {
+ LidVector lids;
+ lids.push_back(docId);
+ removeDocuments(std::move(lids));
+}
+void
+DocumentInverter::removeDocuments(LidVector lids)
{
+ // Might want to batch inverters as we do for attributes
for (uint32_t fieldId : _schemaIndexFields._textFields) {
FieldInverter *inverter = _inverters[fieldId].get();
- _invertThreads.execute(fieldId, [inverter, docId]() {
- inverter->removeDocument(docId);
+ _invertThreads.execute(fieldId, [inverter, lids]() {
+ for (uint32_t lid : lids) {
+ inverter->removeDocument(lid);
+ }
});
}
uint32_t urlId = 0;
for (const auto & fi : _schemaIndexFields._uriFields) {
uint32_t fieldId = fi._all;
UrlFieldInverter *inverter = _urlInverters[urlId].get();
- _invertThreads.execute(fieldId,[inverter, docId]() {
- inverter->removeDocument(docId);
+ _invertThreads.execute(fieldId, [inverter, lids]() {
+ for (uint32_t lid : lids) {
+ inverter->removeDocument(lid);
+ }
});
++urlId;
}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h
index 7ab80e1a2e5..ed06a0b39cc 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h
@@ -40,6 +40,7 @@ private:
void addFieldPath(const document::DocumentType &docType, uint32_t fieldId);
void buildFieldPath(const document::DocumentType & docType, const document::DataType *dataType);
+ using LidVector = std::vector<uint32_t>;
using FieldPath = document::Field;
using IndexedFieldPaths = std::vector<std::unique_ptr<FieldPath>>;
IndexedFieldPaths _indexedFieldPaths;
@@ -100,6 +101,7 @@ public:
* (using a field inverter) is added to the 'invert threads' executor', then this function returns.
*/
void removeDocument(uint32_t docId);
+ void removeDocuments(LidVector lids);
FieldInverter *getInverter(uint32_t fieldId) const {
return _inverters[fieldId].get();
diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp
index 177d8e612bd..cb198f4d33a 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp
@@ -93,17 +93,20 @@ MemoryIndex::insertDocument(uint32_t docId, const document::Document &doc)
}
void
-MemoryIndex::removeDocument(uint32_t docId)
+MemoryIndex::removeDocuments(LidVector lids)
{
if (_frozen) {
- LOG(warning, "Memory index frozen: ignoring remove of document (%u)", docId);
+ LOG(warning, "Memory index frozen: ignoring remove of %lu documents", lids.size());
return;
}
- _inverter->removeDocument(docId);
- if (_indexedDocs.find(docId) != _indexedDocs.end()) {
- _indexedDocs.erase(docId);
- decNumDocs();
+ for (uint32_t lid : lids) {
+
+ if (_indexedDocs.find(lid) != _indexedDocs.end()) {
+ _indexedDocs.erase(lid);
+ decNumDocs();
+ }
}
+ _inverter->removeDocuments(std::move(lids));
}
void
diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h
index 83270aaf2ce..c02e66f790a 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h
@@ -41,6 +41,7 @@ class FieldIndexCollection;
class MemoryIndex : public queryeval::Searchable {
private:
using ISequencedTaskExecutor = vespalib::ISequencedTaskExecutor;
+ using LidVector = std::vector<uint32_t>;
index::Schema _schema;
ISequencedTaskExecutor &_invertThreads;
ISequencedTaskExecutor &_pushThreads;
@@ -115,7 +116,7 @@ public:
*
* This function is async. commit() must be called for changes to take effect.
*/
- void removeDocument(uint32_t docId);
+ void removeDocuments(LidVector lids);
/**
* Commits the inserts and removes since the last commit, making them searchable.