summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2021-06-24 08:08:53 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2021-06-24 08:08:53 +0000
commit732ba0300a2eba3f9cbe821d8535ed3152b3f46f (patch)
tree34725e649f4a71e4b8107e1b02f341b0c71126e7
parent0beb995a14e08f737d23740ad0939fee11e4d029 (diff)
Various cleanup after reading indexing threading code
-rw-r--r--searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.cpp7
-rw-r--r--searchcorespi/src/vespa/searchcorespi/index/ithreadingservice.h3
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp39
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/document_inverter.h9
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/memory_index.h1
5 files changed, 25 insertions, 34 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.cpp b/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.cpp
index 59e943e25f4..302574704f0 100644
--- a/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.cpp
+++ b/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.cpp
@@ -32,16 +32,13 @@ MemoryIndexWrapper::MemoryIndexWrapper(const search::index::Schema& schema,
}
void
-MemoryIndexWrapper::flushToDisk(const vespalib::string &flushDir,
- uint32_t docIdLimit,
- SerialNum serialNum)
+MemoryIndexWrapper::flushToDisk(const vespalib::string &flushDir, uint32_t docIdLimit, SerialNum serialNum)
{
const uint64_t numWords = _index.getNumWords();
_index.freeze(); // TODO(geirst): is this needed anymore?
IndexBuilder indexBuilder(_index.getSchema());
indexBuilder.setPrefix(flushDir);
- SerialNumFileHeaderContext fileHeaderContext(_fileHeaderContext,
- serialNum);
+ SerialNumFileHeaderContext fileHeaderContext(_fileHeaderContext, serialNum);
indexBuilder.open(docIdLimit, numWords, *this, _tuneFileIndexing, fileHeaderContext);
_index.dump(indexBuilder);
indexBuilder.close();
diff --git a/searchcorespi/src/vespa/searchcorespi/index/ithreadingservice.h b/searchcorespi/src/vespa/searchcorespi/index/ithreadingservice.h
index 5b16c6f9235..addce510e63 100644
--- a/searchcorespi/src/vespa/searchcorespi/index/ithreadingservice.h
+++ b/searchcorespi/src/vespa/searchcorespi/index/ithreadingservice.h
@@ -53,6 +53,9 @@ namespace searchcorespi::index {
* tasks to the index field writer executor, so draining logic needs
* to sync index field inverter executor before syncing index field
* writer executor.
+ *
+ * TODO: * indexFieldWriter and indexFieldWriter can be collapsed to one. Both need sequencing,
+ * but they sequence on different things so efficiency will be the same and just depends on #threads
*/
struct IThreadingService : public vespalib::Syncable
{
diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp
index 03a8759c1db..a51b18e3640 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp
@@ -9,10 +9,7 @@
#include <vespa/document/datatype/urldatatype.h>
#include <vespa/document/repo/fixedtyperepo.h>
#include <vespa/vespalib/util/isequencedtaskexecutor.h>
-#include <vespa/searchlib/common/sort.h>
#include <vespa/searchlib/util/url.h>
-#include <vespa/vespalib/text/lowercase.h>
-#include <vespa/vespalib/text/utf8.h>
#include <stdexcept>
#include <vespa/log/log.h>
@@ -85,8 +82,7 @@ DocumentInverter::~DocumentInverter()
}
void
-DocumentInverter::addFieldPath(const document::DocumentType &docType,
- uint32_t fieldId)
+DocumentInverter::addFieldPath(const document::DocumentType &docType, uint32_t fieldId)
{
assert(fieldId < _indexedFieldPaths.size());
std::unique_ptr<FieldPath> fp;
@@ -133,9 +129,9 @@ DocumentInverter::invertDocument(uint32_t docId, const Document &doc)
fv = doc.getValue(*fieldPath);
}
FieldInverter *inverter = _inverters[fieldId].get();
- _invertThreads.execute(fieldId,
- [inverter, docId, fv(std::move(fv))]()
- { inverter->invertField(docId, fv); });
+ _invertThreads.execute(fieldId,[inverter, docId, fv(std::move(fv))]() {
+ inverter->invertField(docId, fv);
+ });
}
uint32_t urlId = 0;
for (const auto & fi : _schemaIndexFields._uriFields) {
@@ -148,9 +144,9 @@ DocumentInverter::invertDocument(uint32_t docId, const Document &doc)
fv = doc.getValue(*fieldPath);
}
UrlFieldInverter *inverter = _urlInverters[urlId].get();
- _invertThreads.execute(fieldId,
- [inverter, docId, fv(std::move(fv))]()
- { inverter->invertField(docId, fv); });
+ _invertThreads.execute(fieldId,[inverter, docId, fv(std::move(fv))]() {
+ inverter->invertField(docId, fv);
+ });
++urlId;
}
}
@@ -160,17 +156,17 @@ DocumentInverter::removeDocument(uint32_t docId)
{
for (uint32_t fieldId : _schemaIndexFields._textFields) {
FieldInverter *inverter = _inverters[fieldId].get();
- _invertThreads.execute(fieldId,
- [inverter, docId]()
- { inverter->removeDocument(docId); });
+ _invertThreads.execute(fieldId, [inverter, docId]() {
+ inverter->removeDocument(docId);
+ });
}
uint32_t urlId = 0;
for (const auto & fi : _schemaIndexFields._uriFields) {
uint32_t fieldId = fi._all;
UrlFieldInverter *inverter = _urlInverters[urlId].get();
- _invertThreads.execute(fieldId,
- [inverter, docId]()
- { inverter->removeDocument(docId); });
+ _invertThreads.execute(fieldId,[inverter, docId]() {
+ inverter->removeDocument(docId);
+ });
++urlId;
}
}
@@ -180,11 +176,10 @@ DocumentInverter::pushDocuments(const std::shared_ptr<vespalib::IDestructorCallb
{
uint32_t fieldId = 0;
for (auto &inverter : _inverters) {
- _pushThreads.execute(fieldId,
- [inverter(inverter.get()),
- onWriteDone]()
- { inverter->applyRemoves();
- inverter->pushDocuments(); });
+ _pushThreads.execute(fieldId,[inverter(inverter.get()), onWriteDone]() {
+ inverter->applyRemoves();
+ inverter->pushDocuments();
+ });
++fieldId;
}
}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h
index cab833704e6..a1325b7d51d 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h
@@ -39,15 +39,12 @@ private:
void addFieldPath(const document::DocumentType &docType, uint32_t fieldId);
void buildFieldPath(const document::DocumentType & docType, const document::DataType *dataType);
- void invertNormalDocTextField(size_t fieldId, const document::FieldValue &field);
- void invertNormalDocUriField(const index::UriField &handle, const document::FieldValue &field);
using FieldPath = document::Field;
using IndexedFieldPaths = std::vector<std::unique_ptr<FieldPath>>;
- IndexedFieldPaths _indexedFieldPaths;
- const document::DataType * _dataType;
-
- index::SchemaIndexFields _schemaIndexFields;
+ IndexedFieldPaths _indexedFieldPaths;
+ const document::DataType * _dataType;
+ index::SchemaIndexFields _schemaIndexFields;
std::vector<std::unique_ptr<FieldInverter>> _inverters;
std::vector<std::unique_ptr<UrlFieldInverter>> _urlInverters;
diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h
index 90d1cd7bb37..47275043bee 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h
@@ -62,7 +62,6 @@ private:
MemoryIndex &operator=(const MemoryIndex &) = delete;
MemoryIndex &operator=(MemoryIndex &&) = delete;
- void removeDocumentHelper(uint32_t docId, const document::Document &doc);
void updateMaxDocId(uint32_t docId) {
if (docId > _maxDocId) {
_maxDocId = docId;