diff options
author | Tor Egge <Tor.Egge@online.no> | 2022-10-31 13:55:57 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2022-10-31 13:55:57 +0100 |
commit | 5bd5966f9f376452d223d02250e08f1258dfc372 (patch) | |
tree | a39a2325edebfa36dbdb60cbfd1f4be253268946 | |
parent | f6ee8d8c5214169b22d99a74f4c3229b89080f1b (diff) |
Consolidate tensor attribute savers.
15 files changed, 137 insertions, 194 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/attributesaver.h b/searchlib/src/vespa/searchlib/attribute/attributesaver.h index ce0069e2397..ca0ae582666 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributesaver.h +++ b/searchlib/src/vespa/searchlib/attribute/attributesaver.h @@ -27,6 +27,7 @@ protected: virtual bool onSave(IAttributeSaveTarget &saveTarget) = 0; + uint32_t get_header_version() const { return _header.getVersion(); } public: virtual ~AttributeSaver(); diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt index d1b88a7e2ab..1a412b77270 100644 --- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt @@ -6,7 +6,6 @@ vespa_add_library(searchlib_tensor OBJECT blob_sequence_reader.cpp default_nearest_neighbor_index_factory.cpp dense_tensor_attribute.cpp - dense_tensor_attribute_saver.cpp dense_tensor_store.cpp direct_tensor_attribute.cpp direct_tensor_store.cpp @@ -29,11 +28,11 @@ vespa_add_library(searchlib_tensor OBJECT serialized_fast_value_attribute.cpp small_subspaces_buffer_type.cpp tensor_attribute.cpp + tensor_attribute_saver.cpp tensor_buffer_operations.cpp tensor_buffer_store.cpp tensor_buffer_type_mapper.cpp tensor_deserialize.cpp tensor_store.cpp - tensor_store_saver.cpp DEPENDS ) diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp index e2b516154fd..12d5b2864a0 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp @@ -1,11 +1,10 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "dense_tensor_attribute.h" -#include "dense_tensor_attribute_saver.h" #include "nearest_neighbor_index.h" #include "nearest_neighbor_index_loader.h" -#include "nearest_neighbor_index_saver.h" #include "tensor_attribute_constants.h" +#include "tensor_attribute_saver.h" #include <vespa/eval/eval/value.h> #include <vespa/fastlib/io/bufferedfile.h> #include <vespa/searchcommon/attribute/config.h> @@ -68,7 +67,7 @@ can_use_index_save_file(const search::attribute::Config &config, const search::a bool has_index_file(AttributeVector& attr) { - return LoadUtils::file_exists(attr, DenseTensorAttributeSaver::index_file_suffix()); + return LoadUtils::file_exists(attr, TensorAttributeSaver::index_file_suffix()); } BlobSequenceReader::BlobSequenceReader(AttributeVector& attr, bool has_index) @@ -77,7 +76,7 @@ BlobSequenceReader::BlobSequenceReader(AttributeVector& attr, bool has_index) can_use_index_save_file(attr.getConfig(), search::attribute::AttributeHeader::extractTags(getDatHeader(), attr.getBaseFileName()))), _index_file(_use_index_file ? - attribute::LoadUtils::openFile(attr, DenseTensorAttributeSaver::index_file_suffix()) : + attribute::LoadUtils::openFile(attr, TensorAttributeSaver::index_file_suffix()) : std::unique_ptr<Fast_BufferedFile>()) { } @@ -155,7 +154,6 @@ DenseTensorAttribute::DenseTensorAttribute(vespalib::stringref baseFileName, con const NearestNeighborIndexFactory& index_factory) : TensorAttribute(baseFileName, cfg, _denseTensorStore), _denseTensorStore(cfg.tensorType(), get_memory_allocator()), - _index(), _comp(cfg.tensorType()) { if (cfg.hnsw_index_params().has_value()) { @@ -415,26 +413,6 @@ DenseTensorAttribute::onLoad(vespalib::Executor *executor) return true; } - -std::unique_ptr<AttributeSaver> -DenseTensorAttribute::onInitSave(vespalib::stringref fileName) -{ - vespalib::GenerationHandler::Guard guard(getGenerationHandler().takeGuard()); - auto index_saver = (_index ? _index->make_saver() : std::unique_ptr<NearestNeighborIndexSaver>()); - return std::make_unique<DenseTensorAttributeSaver> - (std::move(guard), - this->createAttributeHeader(fileName), - getRefCopy(), - _denseTensorStore, - std::move(index_saver)); -} - -uint32_t -DenseTensorAttribute::getVersion() const -{ - return DENSE_TENSOR_ATTRIBUTE_VERSION; -} - void DenseTensorAttribute::onCommit() { diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h index 3aa52fe622a..c75c9288dea 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h @@ -20,7 +20,6 @@ class NearestNeighborIndex; class DenseTensorAttribute : public TensorAttribute, public DocVectorAccess { private: DenseTensorStore _denseTensorStore; - std::unique_ptr<NearestNeighborIndex> _index; TypedCellsComparator _comp; bool tensor_is_unchanged(DocId docid, const vespalib::eval::Value& new_tensor) const; @@ -44,8 +43,6 @@ public: vespalib::eval::TypedCells extract_cells_ref(DocId docId) const override; bool supports_extract_cells_ref() const override { return true; } bool onLoad(vespalib::Executor *executor) override; - std::unique_ptr<AttributeSaver> onInitSave(vespalib::stringref fileName) override; - uint32_t getVersion() const override; void onCommit() override; void before_inc_generation(generation_t current_gen) override; void reclaim_memory(generation_t oldest_used_gen) override; diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute_saver.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute_saver.cpp deleted file mode 100644 index 5a47addd4ab..00000000000 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute_saver.cpp +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "dense_tensor_attribute_saver.h" -#include "dense_tensor_store.h" -#include "nearest_neighbor_index_saver.h" -#include "tensor_attribute_constants.h" -#include <vespa/searchlib/util/bufferwriter.h> -#include <vespa/searchlib/attribute/iattributesavetarget.h> - -using vespalib::GenerationHandler; - -namespace search::tensor { - -DenseTensorAttributeSaver:: -DenseTensorAttributeSaver(GenerationHandler::Guard &&guard, - const attribute::AttributeHeader &header, - RefCopyVector &&refs, - const DenseTensorStore &tensorStore, - IndexSaverUP index_saver) - : AttributeSaver(std::move(guard), header), - _refs(std::move(refs)), - _tensorStore(tensorStore), - _index_saver(std::move(index_saver)) -{ -} - -DenseTensorAttributeSaver::~DenseTensorAttributeSaver() = default; - -vespalib::string -DenseTensorAttributeSaver::index_file_suffix() -{ - return "nnidx"; -} - -bool -DenseTensorAttributeSaver::onSave(IAttributeSaveTarget &saveTarget) -{ - if (_index_saver) { - if (!saveTarget.setup_writer(index_file_suffix(), "Binary data file for nearest neighbor index")) { - return false; - } - } - - auto dat_writer = saveTarget.datWriter().allocBufferWriter(); - save_tensor_store(*dat_writer); - - if (_index_saver) { - auto index_writer = saveTarget.get_writer(index_file_suffix()).allocBufferWriter(); - // Note: Implementation of save() is responsible to call BufferWriter::flush(). - _index_saver->save(*index_writer); - } - return true; -} - -void -DenseTensorAttributeSaver::save_tensor_store(BufferWriter& writer) const -{ - const uint32_t docIdLimit(_refs.size()); - for (uint32_t lid = 0; lid < docIdLimit; ++lid) { - if (_refs[lid].valid()) { - auto raw = _tensorStore.getRawBuffer(_refs[lid]); - writer.write(&tensorIsPresent, sizeof(tensorIsPresent)); - size_t rawLen = _tensorStore.getBufSize(); - writer.write(static_cast<const char *>(raw), rawLen); - } else { - writer.write(&tensorIsNotPresent, sizeof(tensorIsNotPresent)); - } - } - writer.flush(); -} - -} diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp index 60a3546578a..6e4f2d8ddd9 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp @@ -184,4 +184,10 @@ DenseTensorStore::encode_stored_tensor(EntryRef ref, vespalib::nbostream& target abort(); } +const DenseTensorStore* +DenseTensorStore::as_dense() const +{ + return this; +} + } diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h index 298e58ee410..2e8e280cf11 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h @@ -70,6 +70,7 @@ public: EntryRef store_encoded_tensor(vespalib::nbostream &encoded) override; std::unique_ptr<vespalib::eval::Value> get_tensor(EntryRef ref) const override; bool encode_stored_tensor(EntryRef ref, vespalib::nbostream &target) const override; + const DenseTensorStore* as_dense() const override; vespalib::eval::TypedCells get_typed_cells(EntryRef ref) const { return vespalib::eval::TypedCells(ref.valid() ? getRawBuffer(ref) : &_emptySpace[0], diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp index 84944ea685e..c3dfe477e00 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp @@ -2,8 +2,10 @@ #include "tensor_attribute.h" #include "blob_sequence_reader.h" +#include "nearest_neighbor_index.h" +#include "nearest_neighbor_index_saver.h" #include "tensor_attribute_constants.h" -#include "tensor_store_saver.h" +#include "tensor_attribute_saver.h" #include <vespa/document/base/exceptions.h> #include <vespa/document/datatype/tensor_data_type.h> #include <vespa/searchlib/attribute/address_space_components.h> @@ -53,6 +55,7 @@ TensorAttribute::TensorAttribute(vespalib::stringref name, const Config &cfg, Te : NotImplementedAttribute(name, cfg), _refVector(cfg.getGrowStrategy(), getGenerationHolder()), _tensorStore(tensorStore), + _index(), _is_dense(cfg.tensorType().is_dense()), _emptyTensor(createEmptyTensor(cfg.tensorType())), _compactGeneration(0) @@ -261,7 +264,7 @@ TensorAttribute::onShrinkLidSpace() uint32_t TensorAttribute::getVersion() const { - return TENSOR_ATTRIBUTE_VERSION; + return (_tensorStore.as_dense() != nullptr) ? DENSE_TENSOR_ATTRIBUTE_VERSION : TENSOR_ATTRIBUTE_VERSION; } TensorAttribute::RefCopyVector @@ -316,11 +319,13 @@ TensorAttribute::onInitSave(vespalib::stringref fileName) { vespalib::GenerationHandler::Guard guard(getGenerationHandler(). takeGuard()); - return std::make_unique<TensorStoreSaver> + auto index_saver = (_index ? _index->make_saver() : std::unique_ptr<NearestNeighborIndexSaver>()); + return std::make_unique<TensorAttributeSaver> (std::move(guard), this->createAttributeHeader(fileName), getRefCopy(), - _tensorStore); + _tensorStore, + std::move(index_saver)); } void diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h index b7bac35d1b7..76bce7076ac 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h @@ -25,6 +25,7 @@ protected: RefVector _refVector; // docId -> ref in data store for serialized tensor TensorStore &_tensorStore; // data store for serialized tensors + std::unique_ptr<NearestNeighborIndex> _index; bool _is_dense; std::unique_ptr<vespalib::eval::Value> _emptyTensor; uint64_t _compactGeneration; // Generation when last compact occurred diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_saver.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_saver.cpp new file mode 100644 index 00000000000..2d51536963a --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_saver.cpp @@ -0,0 +1,98 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "tensor_attribute_saver.h" +#include "dense_tensor_store.h" +#include "nearest_neighbor_index_saver.h" +#include "tensor_attribute_constants.h" +#include <vespa/searchlib/util/bufferwriter.h> +#include <vespa/searchlib/attribute/iattributesavetarget.h> +#include <cassert> + +using vespalib::GenerationHandler; + +namespace search::tensor { + +TensorAttributeSaver:: +TensorAttributeSaver(GenerationHandler::Guard &&guard, + const attribute::AttributeHeader &header, + RefCopyVector &&refs, + const TensorStore &tensor_store, + IndexSaverUP index_saver) + : AttributeSaver(std::move(guard), header), + _refs(std::move(refs)), + _tensor_store(tensor_store), + _index_saver(std::move(index_saver)) +{ +} + +TensorAttributeSaver::~TensorAttributeSaver() = default; + +vespalib::string +TensorAttributeSaver::index_file_suffix() +{ + return "nnidx"; +} + +bool +TensorAttributeSaver::onSave(IAttributeSaveTarget &saveTarget) +{ + if (_index_saver) { + if (!saveTarget.setup_writer(index_file_suffix(), "Binary data file for nearest neighbor index")) { + return false; + } + } + + auto dat_writer = saveTarget.datWriter().allocBufferWriter(); + auto dense_tensor_store = _tensor_store.as_dense(); + if (dense_tensor_store != nullptr) { + save_dense_tensor_store(*dat_writer, *dense_tensor_store); + } else { + save_tensor_store(*dat_writer); + } + if (_index_saver) { + auto index_writer = saveTarget.get_writer(index_file_suffix()).allocBufferWriter(); + // Note: Implementation of save() is responsible to call BufferWriter::flush(). + _index_saver->save(*index_writer); + } + return true; +} + +void +TensorAttributeSaver::save_tensor_store(BufferWriter& writer) const +{ + assert(get_header_version() == TENSOR_ATTRIBUTE_VERSION); + const uint32_t docid_limit(_refs.size()); + vespalib::nbostream stream; + for (uint32_t lid = 0; lid < docid_limit; ++lid) { + if (_tensor_store.encode_stored_tensor(_refs[lid], stream)) { + uint32_t sz = stream.size(); + writer.write(&sz, sizeof(sz)); + writer.write(stream.peek(), stream.size()); + stream.clear(); + } else { + uint32_t sz = 0; + writer.write(&sz, sizeof(sz)); + } + } + writer.flush(); +} + +void +TensorAttributeSaver::save_dense_tensor_store(BufferWriter& writer, const DenseTensorStore& dense_tensor_store) const +{ + assert(get_header_version() == DENSE_TENSOR_ATTRIBUTE_VERSION); + auto raw_size = dense_tensor_store.getBufSize(); + const uint32_t docid_limit(_refs.size()); + for (uint32_t lid = 0; lid < docid_limit; ++lid) { + if (_refs[lid].valid()) { + auto raw = dense_tensor_store.getRawBuffer(_refs[lid]); + writer.write(&tensorIsPresent, sizeof(tensorIsPresent)); + writer.write(static_cast<const char *>(raw), raw_size); + } else { + writer.write(&tensorIsNotPresent, sizeof(tensorIsNotPresent)); + } + } + writer.flush(); +} + +} diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute_saver.h b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_saver.h index 731602f6a56..690cb3ac041 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute_saver.h +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_saver.h @@ -9,14 +9,14 @@ namespace search { class BufferWriter; } namespace search::tensor { -class DenseTensorStore; +class TensorStore; class NearestNeighborIndexSaver; /** - * Class for saving a dense tensor attribute. + * Class for saving a tensor attribute. * Will also save the nearest neighbor index if existing. */ -class DenseTensorAttributeSaver : public AttributeSaver { +class TensorAttributeSaver : public AttributeSaver { public: using RefCopyVector = TensorAttribute::RefCopyVector; private: @@ -24,20 +24,21 @@ private: using IndexSaverUP = std::unique_ptr<NearestNeighborIndexSaver>; RefCopyVector _refs; - const DenseTensorStore &_tensorStore; + const TensorStore& _tensor_store; IndexSaverUP _index_saver; bool onSave(IAttributeSaveTarget &saveTarget) override; + void save_dense_tensor_store(BufferWriter& writer, const DenseTensorStore& dense_tensor_store) const; void save_tensor_store(BufferWriter& writer) const; public: - DenseTensorAttributeSaver(GenerationHandler::Guard &&guard, + TensorAttributeSaver(GenerationHandler::Guard &&guard, const attribute::AttributeHeader &header, RefCopyVector &&refs, - const DenseTensorStore &tensorStore, + const TensorStore &tensor_store, IndexSaverUP index_saver); - ~DenseTensorAttributeSaver() override; + ~TensorAttributeSaver() override; static vespalib::string index_file_suffix(); }; diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_store.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_store.cpp index fc8f3175f99..5f07f378465 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_store.cpp +++ b/searchlib/src/vespa/searchlib/tensor/tensor_store.cpp @@ -11,4 +11,10 @@ TensorStore::TensorStore(vespalib::datastore::DataStoreBase &store) TensorStore::~TensorStore() = default; +const DenseTensorStore* +TensorStore::as_dense() const +{ + return nullptr; +} + } diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_store.h b/searchlib/src/vespa/searchlib/tensor/tensor_store.h index 53551bc48fa..11ab4158e8f 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_store.h +++ b/searchlib/src/vespa/searchlib/tensor/tensor_store.h @@ -14,6 +14,8 @@ namespace vespalib::eval { struct Value; } namespace search::tensor { +class DenseTensorStore; + /** * Class for storing serialized tensors in memory, used by TensorAttribute. * @@ -46,6 +48,7 @@ public: virtual EntryRef store_encoded_tensor(vespalib::nbostream& encoded) = 0; virtual std::unique_ptr<vespalib::eval::Value> get_tensor(EntryRef ref) const = 0; virtual bool encode_stored_tensor(EntryRef ref, vespalib::nbostream& target) const = 0; + virtual const DenseTensorStore* as_dense() const; // Inherit doc from DataStoreBase void reclaim_memory(generation_t oldest_used_gen) { diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_store_saver.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_store_saver.cpp deleted file mode 100644 index 0963e79b0dd..00000000000 --- a/searchlib/src/vespa/searchlib/tensor/tensor_store_saver.cpp +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "tensor_store_saver.h" -#include "tensor_store.h" - -#include <vespa/searchlib/attribute/iattributesavetarget.h> -#include <vespa/searchlib/util/bufferwriter.h> -#include <vespa/vespalib/objects/nbostream.h> - -using vespalib::GenerationHandler; - -namespace search::tensor { - -TensorStoreSaver:: -TensorStoreSaver(GenerationHandler::Guard &&guard, - const attribute::AttributeHeader &header, - RefCopyVector &&refs, - const TensorStore &tensorStore) - : AttributeSaver(std::move(guard), header), - _refs(std::move(refs)), - _tensorStore(tensorStore) -{ -} - -TensorStoreSaver::~TensorStoreSaver() = default; - -bool -TensorStoreSaver::onSave(IAttributeSaveTarget &saveTarget) -{ - auto datWriter = saveTarget.datWriter().allocBufferWriter(); - const uint32_t docIdLimit(_refs.size()); - vespalib::nbostream stream; - for (uint32_t lid = 0; lid < docIdLimit; ++lid) { - if (_tensorStore.encode_stored_tensor(_refs[lid], stream)) { - uint32_t sz = stream.size(); - datWriter->write(&sz, sizeof(sz)); - datWriter->write(stream.peek(), stream.size()); - stream.clear(); - } else { - uint32_t sz = 0; - datWriter->write(&sz, sizeof(sz)); - } - } - datWriter->flush(); - return true; -} - -} // namespace search::tensor diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_store_saver.h b/searchlib/src/vespa/searchlib/tensor/tensor_store_saver.h deleted file mode 100644 index a4bf6e07519..00000000000 --- a/searchlib/src/vespa/searchlib/tensor/tensor_store_saver.h +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <vespa/searchlib/attribute/attributesaver.h> -#include "tensor_attribute.h" - -namespace search::tensor { - -/* - * Class for saving a tensor attribute. - */ -class TensorStoreSaver : public AttributeSaver -{ -public: - using RefCopyVector = TensorAttribute::RefCopyVector; -private: - using GenerationHandler = vespalib::GenerationHandler; - - RefCopyVector _refs; - const TensorStore& _tensorStore; - - bool onSave(IAttributeSaveTarget &saveTarget) override; -public: - TensorStoreSaver(GenerationHandler::Guard &&guard, - const attribute::AttributeHeader &header, - RefCopyVector &&refs, - const TensorStore &tensorStore); - - virtual ~TensorStoreSaver(); -}; - -} // namespace search::tensor |