diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2022-08-16 14:50:47 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-08-16 14:50:47 +0200 |
commit | ec3113e27c5f25931cba8608cd94dba57eccd8bb (patch) | |
tree | f79093750ce779af0372c43c36bf20cc833c4f96 | |
parent | 5b6505916ac261127efcf717923f8ea34b7b60c0 (diff) | |
parent | 8c09cc6af89251fda2e9f568a6d85e1442a330bd (diff) |
Merge pull request #23679 from vespa-engine/balder/balder/use-faster-hash_set_of_int-take-3
Balder/balder/use faster hash set of int take 3
6 files changed, 83 insertions, 69 deletions
diff --git a/searchcore/src/tests/proton/documentdb/feedview/feedview_test.cpp b/searchcore/src/tests/proton/documentdb/feedview/feedview_test.cpp index 2957a2a015d..9d9bb64e6b1 100644 --- a/searchcore/src/tests/proton/documentdb/feedview/feedview_test.cpp +++ b/searchcore/src/tests/proton/documentdb/feedview/feedview_test.cpp @@ -367,7 +367,7 @@ struct MyAttributeWriter : public IAttributeWriter _updateLid = lid; for (const auto & fieldUpdate : upd.getUpdates()) { search::AttributeVector * attr = getWritableAttribute(fieldUpdate.getField().getName()); - onUpdate.onUpdateField(fieldUpdate.getField().getName(), attr); + onUpdate.onUpdateField(fieldUpdate.getField(), attr); } } void update(SerialNum serialNum, const document::Document &doc, DocumentIdT lid, OnWriteDoneType) override { diff --git a/searchcore/src/vespa/searchcore/proton/attribute/attribute_writer.cpp b/searchcore/src/vespa/searchcore/proton/attribute/attribute_writer.cpp index bb25b3da7be..021fc4717af 100644 --- a/searchcore/src/vespa/searchcore/proton/attribute/attribute_writer.cpp +++ b/searchcore/src/vespa/searchcore/proton/attribute/attribute_writer.cpp @@ -764,9 +764,9 @@ AttributeWriter::update(SerialNum serialNum, const DocumentUpdate &upd, Document for (const auto &fupd : upd.getUpdates()) { LOG(debug, "Retrieving guard for attribute vector '%s'.", fupd.getField().getName().data()); - auto itr = _attrMap.find(fupd.getField().getName()); - AttributeVector * attrp = (itr != _attrMap.end()) ? itr->second.attribute : nullptr; - onUpdate.onUpdateField(fupd.getField().getName(), attrp); + auto found = _attrMap.find(fupd.getField().getName()); + AttributeVector * attrp = (found != _attrMap.end()) ? found->second.attribute : nullptr; + onUpdate.onUpdateField(fupd.getField(), attrp); if (__builtin_expect(attrp == nullptr, false)) { LOG(spam, "Failed to find attribute vector %s", fupd.getField().getName().data()); continue; @@ -776,16 +776,15 @@ AttributeWriter::update(SerialNum serialNum, const DocumentUpdate &upd, Document if (__builtin_expect(attrp->getStatus().getLastSyncToken() >= serialNum, false)) { continue; } - if (itr->second.use_two_phase_put_for_assign_updates && - is_single_assign_update(fupd)) { + if (found->second.use_two_phase_put_for_assign_updates && is_single_assign_update(fupd)) { auto prepare_task = std::make_unique<PreparePutTask>(serialNum, lid, *attrp, get_single_assign_update_field_value(fupd)); auto complete_task = std::make_unique<CompletePutTask>(*prepare_task, onWriteDone); LOG(debug, "About to handle assign update as two phase put for docid %u in attribute vector '%s'", lid, attrp->getName().c_str()); _shared_executor.execute(CpuUsage::wrap(std::move(prepare_task), CpuUsage::Category::WRITE)); - _attributeFieldWriter.executeTask(itr->second.executor_id, std::move(complete_task)); + _attributeFieldWriter.executeTask(found->second.executor_id, std::move(complete_task)); } else { - args[itr->second.executor_id.getId()]->_updates.emplace_back(attrp, &fupd); + args[found->second.executor_id.getId()]->_updates.emplace_back(attrp, &fupd); LOG(debug, "About to apply update for docId %u in attribute vector '%s'.", lid, attrp->getName().c_str()); } } diff --git a/searchcore/src/vespa/searchcore/proton/attribute/ifieldupdatecallback.h b/searchcore/src/vespa/searchcore/proton/attribute/ifieldupdatecallback.h index d8872607b44..d3ab970fb39 100644 --- a/searchcore/src/vespa/searchcore/proton/attribute/ifieldupdatecallback.h +++ b/searchcore/src/vespa/searchcore/proton/attribute/ifieldupdatecallback.h @@ -5,16 +5,17 @@ #include <vespa/vespalib/stllike/string.h> namespace search { class AttributeVector; } +namespace document { class Field; } namespace proton { struct IFieldUpdateCallback { - virtual ~IFieldUpdateCallback() { } - virtual void onUpdateField(vespalib::stringref fieldName, const search::AttributeVector * attr) = 0; + virtual ~IFieldUpdateCallback() = default; + virtual void onUpdateField(const document::Field & field, const search::AttributeVector * attr) = 0; }; struct DummyFieldUpdateCallback : IFieldUpdateCallback { - void onUpdateField(vespalib::stringref, const search::AttributeVector *) override {} + void onUpdateField(const document::Field & , const search::AttributeVector *) override {} }; } diff --git a/searchcore/src/vespa/searchcore/proton/server/searchable_feed_view.cpp b/searchcore/src/vespa/searchcore/proton/server/searchable_feed_view.cpp index 207f1d813d8..7a78b4ba82a 100644 --- a/searchcore/src/vespa/searchcore/proton/server/searchable_feed_view.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/searchable_feed_view.cpp @@ -34,7 +34,7 @@ SearchableFeedView::SearchableFeedView(StoreOnlyFeedView::Context storeOnlyCtx, const FastAccessFeedView::Context &fastUpdateCtx, Context ctx) : Parent(std::move(storeOnlyCtx), params, fastUpdateCtx), _indexWriter(ctx._indexWriter), - _hasIndexedFields(_schema->getNumIndexFields() > 0) + _hasIndexedFields(getSchema()->getNumIndexFields() > 0) { } SearchableFeedView::~SearchableFeedView() = default; diff --git a/searchcore/src/vespa/searchcore/proton/server/storeonlyfeedview.cpp b/searchcore/src/vespa/searchcore/proton/server/storeonlyfeedview.cpp index a537742b79b..a9850b5c2b7 100644 --- a/searchcore/src/vespa/searchcore/proton/server/storeonlyfeedview.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/storeonlyfeedview.cpp @@ -7,14 +7,15 @@ #include "putdonecontext.h" #include "removedonecontext.h" #include "updatedonecontext.h" -#include <vespa/document/datatype/documenttype.h> -#include <vespa/document/fieldvalue/document.h> -#include <vespa/document/repo/documenttyperepo.h> #include <vespa/searchcore/proton/attribute/ifieldupdatecallback.h> #include <vespa/searchcore/proton/common/feedtoken.h> #include <vespa/searchcore/proton/feedoperation/operations.h> #include <vespa/searchcore/proton/reference/i_gid_to_lid_change_handler.h> #include <vespa/searchcore/proton/reference/i_pending_gid_to_lid_changes.h> +#include <vespa/searchlib/index/uri_field.h> +#include <vespa/document/repo/documenttyperepo.h> +#include <vespa/document/datatype/documenttype.h> +#include <vespa/document/fieldvalue/document.h> #include <vespa/vespalib/util/cpu_usage.h> #include <vespa/vespalib/util/destructor_callbacks.h> #include <vespa/vespalib/util/exceptions.h> @@ -38,7 +39,7 @@ using vespalib::CpuUsage; using vespalib::IDestructorCallback; using vespalib::IllegalStateException; using vespalib::makeLambdaTask; -using vespalib::make_string; +using vespalib::make_string_short::fmt; namespace proton { @@ -88,8 +89,8 @@ SummaryPutDoneContext::SummaryPutDoneContext(FeedToken token, IPendingLidTracker SummaryPutDoneContext::~SummaryPutDoneContext() = default; -std::vector<document::GlobalId> getGidsToRemove(const IDocumentMetaStore &metaStore, - const LidVectorContext::LidVector &lidsToRemove) +std::vector<document::GlobalId> +getGidsToRemove(const IDocumentMetaStore &metaStore, const LidVectorContext::LidVector &lidsToRemove) { std::vector<document::GlobalId> gids; gids.reserve(lidsToRemove.size()); @@ -102,23 +103,24 @@ std::vector<document::GlobalId> getGidsToRemove(const IDocumentMetaStore &metaSt return gids; } -void putMetaData(documentmetastore::IStore &meta_store, const DocumentId & doc_id, - const DocumentOperation &op, bool is_removed_doc) +void +putMetaData(documentmetastore::IStore &meta_store, const DocumentId & doc_id, + const DocumentOperation &op, bool is_removed_doc) { documentmetastore::IStore::Result putRes( meta_store.put(doc_id.getGlobalId(), op.getBucketId(), op.getTimestamp(), op.getSerializedDocSize(), op.getLid(), op.get_prepare_serial_num())); if (!putRes.ok()) { - throw IllegalStateException( - make_string("Could not put <lid, gid> pair for %sdocument with id '%s' and gid '%s'", - is_removed_doc ? "removed " : "", doc_id.toString().c_str(), - doc_id.getGlobalId().toString().c_str())); + throw IllegalStateException(fmt("Could not put <lid, gid> pair for %sdocument with id '%s' and gid '%s'", + is_removed_doc ? "removed " : "", doc_id.toString().c_str(), + doc_id.getGlobalId().toString().c_str())); } assert(op.getLid() == putRes._lid); } -void removeMetaData(documentmetastore::IStore &meta_store, const GlobalId & gid, const DocumentId &doc_id, - const DocumentOperation &op, bool is_removed_doc) +void +removeMetaData(documentmetastore::IStore &meta_store, const GlobalId & gid, const DocumentId &doc_id, + const DocumentOperation &op, bool is_removed_doc) { assert(meta_store.validLid(op.getPrevLid())); assert(is_removed_doc == op.getPrevMarkedAsRemoved()); @@ -127,9 +129,8 @@ void removeMetaData(documentmetastore::IStore &meta_store, const GlobalId & gid, (void) meta; if (!meta_store.remove(op.getPrevLid(), op.get_prepare_serial_num())) { throw IllegalStateException( - make_string("Could not remove <lid, gid> pair for %sdocument with id '%s' and gid '%s'", - is_removed_doc ? "removed " : "", doc_id.toString().c_str(), - gid.toString().c_str())); + fmt("Could not remove <lid, gid> pair for %sdocument with id '%s' and gid '%s'", + is_removed_doc ? "removed " : "", doc_id.toString().c_str(), gid.toString().c_str())); } } @@ -147,6 +148,37 @@ moveMetaData(documentmetastore::IStore &meta_store, const DocumentId & doc_id, c meta_store.move(op.getPrevLid(), op.getLid(), op.get_prepare_serial_num()); } +class UpdateScope final : public IFieldUpdateCallback +{ +private: + const vespalib::hash_set<int32_t> & _indexedFields; + bool _nonAttributeFields; +public: + bool _hasIndexedFields; + + UpdateScope(const vespalib::hash_set<int32_t> & indexedFields, const DocumentUpdate & upd); + bool hasIndexOrNonAttributeFields() const { + return _hasIndexedFields || _nonAttributeFields; + } + void onUpdateField(const document::Field & field, const search::AttributeVector * attr) override; +}; + +UpdateScope::UpdateScope(const vespalib::hash_set<int32_t> & indexedFields, const DocumentUpdate & upd) + : _indexedFields(indexedFields), + _nonAttributeFields(!upd.getFieldPathUpdates().empty()), + _hasIndexedFields(false) +{} + +void +UpdateScope::onUpdateField(const document::Field & field, const search::AttributeVector * attr) { + if (!_nonAttributeFields && (attr == nullptr || !attr->isUpdateableInMemoryOnly())) { + _nonAttributeFields = true; + } + if (!_hasIndexedFields && (_indexedFields.find(field.getId()) != _indexedFields.end())) { + _hasIndexedFields = true; + } +} + } // namespace StoreOnlyFeedView::StoreOnlyFeedView(Context ctx, const PersistentParams ¶ms) @@ -160,12 +192,26 @@ StoreOnlyFeedView::StoreOnlyFeedView(Context ctx, const PersistentParams ¶ms _pendingLidsForDocStore(), _pendingLidsForCommit(std::move(ctx._pendingLidsForCommit)), _schema(std::move(ctx._schema)), + _indexedFields(), _writeService(ctx._writeService), _params(params), _metaStore(_documentMetaStoreContext->get()), _gidToLidChangeHandler(ctx._gidToLidChangeHandler) { _docType = _repo->getDocumentType(_params._docTypeName.getName()); + if (_schema && _docType) { + for (const auto &indexField : _schema->getIndexFields()) { + size_t dotPos = indexField.getName().find('.'); + if ((dotPos == vespalib::string::npos) || search::index::UriField::mightBePartofUri(indexField.getName())) { + document::FieldPath fieldPath; + _docType->buildFieldPath(fieldPath, indexField.getName().substr(0, dotPos)); + _indexedFields.insert(fieldPath.back().getFieldRef().getId()); + } else { + throw IllegalStateException("Field '%s' is not a valid index name", indexField.getName().c_str()); + } + + } + } } StoreOnlyFeedView::~StoreOnlyFeedView() = default; @@ -207,7 +253,7 @@ void StoreOnlyFeedView::putAttributes(SerialNum, Lid, const Document &, OnPutDoneType) {} void -StoreOnlyFeedView::putIndexedFields(SerialNum, Lid, const Document::SP &, OnOperationDoneType) {} +StoreOnlyFeedView::putIndexedFields(SerialNum, Lid, const std::shared_ptr<Document> &, OnOperationDoneType) {} void StoreOnlyFeedView::preparePut(PutOperation &putOp) @@ -285,7 +331,7 @@ StoreOnlyFeedView::updateAttributes(SerialNum, Lid, const DocumentUpdate & upd, OnOperationDoneType, IFieldUpdateCallback & onUpdate) { for (const auto & fieldUpdate : upd.getUpdates()) { - onUpdate.onUpdateField(fieldUpdate.getField().getName(), nullptr); + onUpdate.onUpdateField(fieldUpdate.getField(), nullptr); } } @@ -387,22 +433,6 @@ StoreOnlyFeedView::heartBeatSummary(SerialNum serialNum, DoneCallback onDone) { })); } -StoreOnlyFeedView::UpdateScope::UpdateScope(const search::index::Schema & schema, const DocumentUpdate & upd) - : _schema(&schema), - _indexedFields(false), - _nonAttributeFields(!upd.getFieldPathUpdates().empty()) -{} - -void -StoreOnlyFeedView::UpdateScope::onUpdateField(vespalib::stringref fieldName, const search::AttributeVector * attr) { - if (!_nonAttributeFields && (attr == nullptr || !attr->isUpdateableInMemoryOnly())) { - _nonAttributeFields = true; - } - if (!_indexedFields && _schema->isIndexField(fieldName)) { - _indexedFields = true; - } -} - void StoreOnlyFeedView::internalUpdate(FeedToken token, const UpdateOperation &updOp) { if ( ! updOp.getUpdate()) { @@ -432,7 +462,7 @@ StoreOnlyFeedView::internalUpdate(FeedToken token, const UpdateOperation &updOp) } auto onWriteDone = createUpdateDoneContext(std::move(token), get_pending_lid_token(updOp), updOp.getUpdate()); - UpdateScope updateScope(*_schema, upd); + UpdateScope updateScope(_indexedFields, upd); updateAttributes(serialNum, lid, upd, onWriteDone, updateScope); if (updateScope.hasIndexOrNonAttributeFields()) { @@ -440,7 +470,7 @@ StoreOnlyFeedView::internalUpdate(FeedToken token, const UpdateOperation &updOp) FutureDoc futureDoc = promisedDoc.get_future().share(); onWriteDone->setDocument(futureDoc); _pendingLidsForDocStore.waitComplete(lid); - if (updateScope._indexedFields) { + if (updateScope._hasIndexedFields) { updateIndexedFields(serialNum, lid, futureDoc, onWriteDone); } PromisedStream promisedStream; diff --git a/searchcore/src/vespa/searchcore/proton/server/storeonlyfeedview.h b/searchcore/src/vespa/searchcore/proton/server/storeonlyfeedview.h index 2822aa70525..25a98da7ce7 100644 --- a/searchcore/src/vespa/searchcore/proton/server/storeonlyfeedview.h +++ b/searchcore/src/vespa/searchcore/proton/server/storeonlyfeedview.h @@ -9,7 +9,6 @@ #include "searchcontext.h" #include <vespa/searchcore/proton/common/pendinglidtracker.h> #include <vespa/searchcore/proton/common/doctypename.h> -#include <vespa/searchcore/proton/attribute/ifieldupdatecallback.h> #include <vespa/searchcore/proton/common/feeddebugger.h> #include <vespa/searchcore/proton/documentmetastore/documentmetastore.h> #include <vespa/searchcore/proton/documentmetastore/documentmetastorecontext.h> @@ -18,9 +17,10 @@ #include <vespa/searchcore/proton/persistenceengine/resulthandler.h> #include <vespa/searchcorespi/index/ithreadingservice.h> #include <vespa/searchlib/query/base.h> +#include <vespa/searchcore/proton/feedoperation/operations.h> #include <vespa/vespalib/util/threadstackexecutorbase.h> +#include <vespa/vespalib/stllike/hash_set.h> #include <future> -#include <vespa/searchcore/proton/feedoperation/operations.h> namespace vespalib { class IDestructorCallback; } @@ -118,22 +118,6 @@ public: {} }; -protected: - class UpdateScope : public IFieldUpdateCallback - { - private: - const search::index::Schema *_schema; - public: - bool _indexedFields; - bool _nonAttributeFields; - - UpdateScope(const search::index::Schema & schema, const DocumentUpdate & upd); - bool hasIndexOrNonAttributeFields() const { - return _indexedFields || _nonAttributeFields; - } - void onUpdateField(vespalib::stringref fieldName, const search::AttributeVector * attr) override; - }; - private: const ISummaryAdapter::SP _summaryAdapter; const IDocumentMetaStoreContext::SP _documentMetaStoreContext; @@ -142,9 +126,9 @@ private: LidReuseDelayer _lidReuseDelayer; PendingLidTracker _pendingLidsForDocStore; std::shared_ptr<PendingLidTrackerBase> _pendingLidsForCommit; - + const search::index::Schema::SP _schema; + vespalib::hash_set<int32_t> _indexedFields; protected: - const search::index::Schema::SP _schema; searchcorespi::index::IThreadingService &_writeService; PersistentParams _params; IDocumentMetaStore &_metaStore; |