From 669af6406b4e37ead6bd515a2fca4c76504c57f6 Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Fri, 4 Sep 2020 07:45:06 +0000 Subject: Rename GenericTensorStore -> SerializedTensorStore. --- .../src/vespa/searchlib/tensor/CMakeLists.txt | 2 +- .../searchlib/tensor/generic_tensor_attribute.cpp | 12 +-- .../searchlib/tensor/generic_tensor_attribute.h | 13 +-- .../tensor/generic_tensor_attribute_saver.cpp | 13 +-- .../tensor/generic_tensor_attribute_saver.h | 6 +- .../searchlib/tensor/generic_tensor_store.cpp | 110 --------------------- .../vespa/searchlib/tensor/generic_tensor_store.h | 47 --------- .../searchlib/tensor/serialized_tensor_store.cpp | 110 +++++++++++++++++++++ .../searchlib/tensor/serialized_tensor_store.h | 41 ++++++++ 9 files changed, 169 insertions(+), 185 deletions(-) delete mode 100644 searchlib/src/vespa/searchlib/tensor/generic_tensor_store.cpp delete mode 100644 searchlib/src/vespa/searchlib/tensor/generic_tensor_store.h create mode 100644 searchlib/src/vespa/searchlib/tensor/serialized_tensor_store.cpp create mode 100644 searchlib/src/vespa/searchlib/tensor/serialized_tensor_store.h (limited to 'searchlib') diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt index 55e83fc6147..3b8b4d4ad93 100644 --- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt @@ -12,7 +12,6 @@ vespa_add_library(searchlib_tensor OBJECT distance_functions.cpp generic_tensor_attribute.cpp generic_tensor_attribute_saver.cpp - generic_tensor_store.cpp hnsw_graph.cpp hnsw_index.cpp hnsw_index_loader.cpp @@ -22,6 +21,7 @@ vespa_add_library(searchlib_tensor OBJECT inv_log_level_generator.cpp nearest_neighbor_index.cpp nearest_neighbor_index_saver.cpp + serialized_tensor_store.cpp tensor_attribute.cpp tensor_deserialize.cpp tensor_store.cpp diff --git a/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute.cpp index 6864fb52120..eb40d70cf24 100644 --- a/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute.cpp @@ -22,7 +22,7 @@ constexpr uint32_t TENSOR_ATTRIBUTE_VERSION = 0; } GenericTensorAttribute::GenericTensorAttribute(stringref name, const Config &cfg) - : TensorAttribute(name, cfg, _genericTensorStore) + : TensorAttribute(name, cfg, _serializedTensorStore) { } @@ -37,7 +37,7 @@ void GenericTensorAttribute::setTensor(DocId docId, const Tensor &tensor) { checkTensorType(tensor); - EntryRef ref = _genericTensorStore.setTensor(tensor); + EntryRef ref = _serializedTensorStore.setTensor(tensor); setTensorRef(docId, ref); } @@ -52,7 +52,7 @@ GenericTensorAttribute::getTensor(DocId docId) const if (!ref.valid()) { return std::unique_ptr(); } - return _genericTensorStore.getTensor(ref); + return _serializedTensorStore.getTensor(ref); } void @@ -75,7 +75,7 @@ GenericTensorAttribute::onLoad() _refVector.unsafe_reserve(numDocs); for (uint32_t lid = 0; lid < numDocs; ++lid) { uint32_t tensorSize = tensorReader.getNextSize(); - auto raw = _genericTensorStore.allocRawBuffer(tensorSize); + auto raw = _serializedTensorStore.allocRawBuffer(tensorSize); if (tensorSize != 0) { tensorReader.readBlob(raw.data, tensorSize); } @@ -96,13 +96,13 @@ GenericTensorAttribute::onInitSave(vespalib::stringref fileName) (std::move(guard), this->createAttributeHeader(fileName), getRefCopy(), - _genericTensorStore); + _serializedTensorStore); } void GenericTensorAttribute::compactWorst() { - doCompactWorst(); + doCompactWorst(); } } diff --git a/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute.h index 9dd3788511e..e5e98557947 100644 --- a/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute.h @@ -2,19 +2,17 @@ #pragma once +#include "serialized_tensor_store.h" #include "tensor_attribute.h" -#include "generic_tensor_store.h" -namespace search { - -namespace tensor { +namespace search::tensor { /** * Attribute vector class used to store tensors for all documents in memory. */ class GenericTensorAttribute : public TensorAttribute { - GenericTensorStore _genericTensorStore; // data store for serialized tensors + SerializedTensorStore _serializedTensorStore; // data store for serialized tensors public: GenericTensorAttribute(vespalib::stringref baseFileName, const Config &cfg); virtual ~GenericTensorAttribute(); @@ -26,7 +24,4 @@ public: virtual void compactWorst() override; }; - -} // namespace search::tensor - -} // namespace search +} diff --git a/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute_saver.cpp b/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute_saver.cpp index 81ec3a5218e..ceeb94a7153 100644 --- a/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute_saver.cpp +++ b/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute_saver.cpp @@ -1,21 +1,19 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "generic_tensor_attribute_saver.h" -#include "generic_tensor_store.h" +#include "serialized_tensor_store.h" #include #include using vespalib::GenerationHandler; -namespace search { - -namespace tensor { +namespace search::tensor { GenericTensorAttributeSaver:: GenericTensorAttributeSaver(GenerationHandler::Guard &&guard, const attribute::AttributeHeader &header, RefCopyVector &&refs, - const GenericTensorStore &tensorStore) + const SerializedTensorStore &tensorStore) : AttributeSaver(std::move(guard), header), _refs(std::move(refs)), _tensorStore(tensorStore) @@ -45,7 +43,4 @@ GenericTensorAttributeSaver::onSave(IAttributeSaveTarget &saveTarget) return true; } - -} // namespace search::tensor - -} // namespace search +} diff --git a/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute_saver.h b/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute_saver.h index 92beef49136..1cd65a7735d 100644 --- a/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute_saver.h +++ b/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute_saver.h @@ -9,7 +9,7 @@ namespace search { namespace tensor { -class GenericTensorStore; +class SerializedTensorStore; /* * Class for saving a tensor attribute. @@ -20,7 +20,7 @@ public: using RefCopyVector = TensorAttribute::RefCopyVector; private: RefCopyVector _refs; - const GenericTensorStore &_tensorStore; + const SerializedTensorStore &_tensorStore; using GenerationHandler = vespalib::GenerationHandler; virtual bool onSave(IAttributeSaveTarget &saveTarget) override; @@ -28,7 +28,7 @@ public: GenericTensorAttributeSaver(GenerationHandler::Guard &&guard, const attribute::AttributeHeader &header, RefCopyVector &&refs, - const GenericTensorStore &tensorStore); + const SerializedTensorStore &tensorStore); virtual ~GenericTensorAttributeSaver(); }; diff --git a/searchlib/src/vespa/searchlib/tensor/generic_tensor_store.cpp b/searchlib/src/vespa/searchlib/tensor/generic_tensor_store.cpp deleted file mode 100644 index 8c695c32719..00000000000 --- a/searchlib/src/vespa/searchlib/tensor/generic_tensor_store.cpp +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "generic_tensor_store.h" -#include "tensor_deserialize.h" -#include -#include -#include -#include -#include -#include - -using vespalib::datastore::Handle; -using vespalib::tensor::Tensor; -using vespalib::tensor::TypedBinaryFormat; - -namespace search::tensor { - -constexpr size_t MIN_BUFFER_ARRAYS = 1024; - -GenericTensorStore::GenericTensorStore() - : TensorStore(_concreteStore), - _concreteStore(), - _bufferType(RefType::align(1), - MIN_BUFFER_ARRAYS, - RefType::offsetSize() / RefType::align(1)) -{ - _store.addType(&_bufferType); - _store.initActiveBuffers(); -} - -GenericTensorStore::~GenericTensorStore() -{ - _store.dropBuffers(); -} - -std::pair -GenericTensorStore::getRawBuffer(RefType ref) const -{ - if (!ref.valid()) { - return std::make_pair(nullptr, 0u); - } - const char *buf = _store.getEntry(ref); - uint32_t len = *reinterpret_cast(buf); - return std::make_pair(buf + sizeof(uint32_t), len); -} - -Handle -GenericTensorStore::allocRawBuffer(uint32_t size) -{ - if (size == 0) { - return Handle(); - } - size_t extSize = size + sizeof(uint32_t); - size_t bufSize = RefType::align(extSize); - auto result = _concreteStore.rawAllocator(_typeId).alloc(bufSize); - *reinterpret_cast(result.data) = size; - char *padWritePtr = result.data + extSize; - for (size_t i = extSize; i < bufSize; ++i) { - *padWritePtr++ = 0; - } - // Hide length of buffer (first 4 bytes) from users of the buffer. - return Handle(result.ref, result.data + sizeof(uint32_t)); -} - -void -GenericTensorStore::holdTensor(EntryRef ref) -{ - if (!ref.valid()) { - return; - } - RefType iRef(ref); - const char *buf = _store.getEntry(iRef); - uint32_t len = *reinterpret_cast(buf); - _concreteStore.holdElem(ref, len + sizeof(uint32_t)); -} - -TensorStore::EntryRef -GenericTensorStore::move(EntryRef ref) -{ - if (!ref.valid()) { - return RefType(); - } - auto oldraw = getRawBuffer(ref); - auto newraw = allocRawBuffer(oldraw.second); - memcpy(newraw.data, oldraw.first, oldraw.second); - _concreteStore.holdElem(ref, oldraw.second + sizeof(uint32_t)); - return newraw.ref; -} - -std::unique_ptr -GenericTensorStore::getTensor(EntryRef ref) const -{ - auto raw = getRawBuffer(ref); - if (raw.second == 0u) { - return std::unique_ptr(); - } - return deserialize_tensor(raw.first, raw.second); -} - -TensorStore::EntryRef -GenericTensorStore::setTensor(const Tensor &tensor) -{ - vespalib::nbostream stream; - TypedBinaryFormat::serialize(stream, tensor); - auto raw = allocRawBuffer(stream.size()); - memcpy(raw.data, stream.peek(), stream.size()); - return raw.ref; -} - -} diff --git a/searchlib/src/vespa/searchlib/tensor/generic_tensor_store.h b/searchlib/src/vespa/searchlib/tensor/generic_tensor_store.h deleted file mode 100644 index 14b65e1ec5a..00000000000 --- a/searchlib/src/vespa/searchlib/tensor/generic_tensor_store.h +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include "tensor_store.h" - -namespace search { - -namespace tensor { - -/** - * Class for storing serialized tensors in memory, used by TensorAttribute. - * - * Serialization format is subject to change. Changes to serialization format - * might also require corresponding changes to implemented optimized tensor - * operations that use the serialized tensor as argument. - */ -class GenericTensorStore : public TensorStore -{ -public: - using RefType = vespalib::datastore::AlignedEntryRefT<22, 2>; - using DataStoreType = vespalib::datastore::DataStoreT; -private: - DataStoreType _concreteStore; - vespalib::datastore::BufferType _bufferType; -public: - GenericTensorStore(); - - virtual ~GenericTensorStore(); - - std::pair getRawBuffer(RefType ref) const; - - vespalib::datastore::Handle allocRawBuffer(uint32_t size); - - virtual void holdTensor(EntryRef ref) override; - - virtual EntryRef move(EntryRef ref) override; - - std::unique_ptr getTensor(EntryRef ref) const; - - EntryRef setTensor(const Tensor &tensor); -}; - - -} // namespace search::tensor - -} // namespace search diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_tensor_store.cpp b/searchlib/src/vespa/searchlib/tensor/serialized_tensor_store.cpp new file mode 100644 index 00000000000..77903291e13 --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/serialized_tensor_store.cpp @@ -0,0 +1,110 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "serialized_tensor_store.h" +#include "tensor_deserialize.h" +#include +#include +#include +#include +#include +#include + +using vespalib::datastore::Handle; +using vespalib::tensor::Tensor; +using vespalib::tensor::TypedBinaryFormat; + +namespace search::tensor { + +constexpr size_t MIN_BUFFER_ARRAYS = 1024; + +SerializedTensorStore::SerializedTensorStore() + : TensorStore(_concreteStore), + _concreteStore(), + _bufferType(RefType::align(1), + MIN_BUFFER_ARRAYS, + RefType::offsetSize() / RefType::align(1)) +{ + _store.addType(&_bufferType); + _store.initActiveBuffers(); +} + +SerializedTensorStore::~SerializedTensorStore() +{ + _store.dropBuffers(); +} + +std::pair +SerializedTensorStore::getRawBuffer(RefType ref) const +{ + if (!ref.valid()) { + return std::make_pair(nullptr, 0u); + } + const char *buf = _store.getEntry(ref); + uint32_t len = *reinterpret_cast(buf); + return std::make_pair(buf + sizeof(uint32_t), len); +} + +Handle +SerializedTensorStore::allocRawBuffer(uint32_t size) +{ + if (size == 0) { + return Handle(); + } + size_t extSize = size + sizeof(uint32_t); + size_t bufSize = RefType::align(extSize); + auto result = _concreteStore.rawAllocator(_typeId).alloc(bufSize); + *reinterpret_cast(result.data) = size; + char *padWritePtr = result.data + extSize; + for (size_t i = extSize; i < bufSize; ++i) { + *padWritePtr++ = 0; + } + // Hide length of buffer (first 4 bytes) from users of the buffer. + return Handle(result.ref, result.data + sizeof(uint32_t)); +} + +void +SerializedTensorStore::holdTensor(EntryRef ref) +{ + if (!ref.valid()) { + return; + } + RefType iRef(ref); + const char *buf = _store.getEntry(iRef); + uint32_t len = *reinterpret_cast(buf); + _concreteStore.holdElem(ref, len + sizeof(uint32_t)); +} + +TensorStore::EntryRef +SerializedTensorStore::move(EntryRef ref) +{ + if (!ref.valid()) { + return RefType(); + } + auto oldraw = getRawBuffer(ref); + auto newraw = allocRawBuffer(oldraw.second); + memcpy(newraw.data, oldraw.first, oldraw.second); + _concreteStore.holdElem(ref, oldraw.second + sizeof(uint32_t)); + return newraw.ref; +} + +std::unique_ptr +SerializedTensorStore::getTensor(EntryRef ref) const +{ + auto raw = getRawBuffer(ref); + if (raw.second == 0u) { + return std::unique_ptr(); + } + return deserialize_tensor(raw.first, raw.second); +} + +TensorStore::EntryRef +SerializedTensorStore::setTensor(const Tensor &tensor) +{ + vespalib::nbostream stream; + TypedBinaryFormat::serialize(stream, tensor); + auto raw = allocRawBuffer(stream.size()); + memcpy(raw.data, stream.peek(), stream.size()); + return raw.ref; +} + +} diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_tensor_store.h b/searchlib/src/vespa/searchlib/tensor/serialized_tensor_store.h new file mode 100644 index 00000000000..7c0a8e5ed16 --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/serialized_tensor_store.h @@ -0,0 +1,41 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "tensor_store.h" + +namespace search::tensor { + +/** + * Class for storing serialized tensors in memory, used by TensorAttribute. + * + * Serialization format is subject to change. Changes to serialization format + * might also require corresponding changes to implemented optimized tensor + * operations that use the serialized tensor as argument. + */ +class SerializedTensorStore : public TensorStore { +public: + using RefType = vespalib::datastore::AlignedEntryRefT<22, 2>; + using DataStoreType = vespalib::datastore::DataStoreT; +private: + DataStoreType _concreteStore; + vespalib::datastore::BufferType _bufferType; +public: + SerializedTensorStore(); + + virtual ~SerializedTensorStore(); + + std::pair getRawBuffer(RefType ref) const; + + vespalib::datastore::Handle allocRawBuffer(uint32_t size); + + virtual void holdTensor(EntryRef ref) override; + + virtual EntryRef move(EntryRef ref) override; + + std::unique_ptr getTensor(EntryRef ref) const; + + EntryRef setTensor(const Tensor &tensor); +}; + +} -- cgit v1.2.3 From a5a8c9c425dfa9d9e310bb36223eccc90b69b219 Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Fri, 4 Sep 2020 07:58:45 +0000 Subject: Rename GenericTensorAttribute -> SerializedTensorAttribute. --- .../attribute_updater/attribute_updater_test.cpp | 10 +- .../tensorattribute/tensorattribute_test.cpp | 6 +- .../vespa/searchlib/attribute/createsinglestd.cpp | 4 +- .../src/vespa/searchlib/tensor/CMakeLists.txt | 4 +- .../searchlib/tensor/generic_tensor_attribute.cpp | 108 --------------------- .../searchlib/tensor/generic_tensor_attribute.h | 27 ------ .../tensor/generic_tensor_attribute_saver.cpp | 46 --------- .../tensor/generic_tensor_attribute_saver.h | 38 -------- .../tensor/serialized_tensor_attribute.cpp | 108 +++++++++++++++++++++ .../searchlib/tensor/serialized_tensor_attribute.h | 26 +++++ .../tensor/serialized_tensor_attribute_saver.cpp | 46 +++++++++ .../tensor/serialized_tensor_attribute_saver.h | 33 +++++++ 12 files changed, 225 insertions(+), 231 deletions(-) delete mode 100644 searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute.cpp delete mode 100644 searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute.h delete mode 100644 searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute_saver.cpp delete mode 100644 searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute_saver.h create mode 100644 searchlib/src/vespa/searchlib/tensor/serialized_tensor_attribute.cpp create mode 100644 searchlib/src/vespa/searchlib/tensor/serialized_tensor_attribute.h create mode 100644 searchlib/src/vespa/searchlib/tensor/serialized_tensor_attribute_saver.cpp create mode 100644 searchlib/src/vespa/searchlib/tensor/serialized_tensor_attribute_saver.h (limited to 'searchlib') diff --git a/searchcore/src/tests/proton/common/attribute_updater/attribute_updater_test.cpp b/searchcore/src/tests/proton/common/attribute_updater/attribute_updater_test.cpp index a1dc619b3e6..34a2d139498 100644 --- a/searchcore/src/tests/proton/common/attribute_updater/attribute_updater_test.cpp +++ b/searchcore/src/tests/proton/common/attribute_updater/attribute_updater_test.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include @@ -48,7 +48,7 @@ using search::attribute::Reference; using search::attribute::ReferenceAttribute; using search::tensor::ITensorAttribute; using search::tensor::DenseTensorAttribute; -using search::tensor::GenericTensorAttribute; +using search::tensor::SerializedTensorAttribute; using search::tensor::TensorAttribute; using vespalib::eval::ValueType; using vespalib::eval::TensorSpec; @@ -457,7 +457,7 @@ TEST_F("require that tensor modify update is applied", } TEST_F("require that tensor add update is applied", - TensorFixture("tensor(x{})", "sparse_tensor")) + TensorFixture("tensor(x{})", "sparse_tensor")) { f.setTensor(TensorSpec(f.type).add({{"x", "a"}}, 2)); f.applyValueUpdate(*f.attribute, 1, @@ -466,7 +466,7 @@ TEST_F("require that tensor add update is applied", } TEST_F("require that tensor add update to non-existing tensor creates empty tensor first", - TensorFixture("tensor(x{})", "sparse_tensor")) + TensorFixture("tensor(x{})", "sparse_tensor")) { f.applyValueUpdate(*f.attribute, 1, TensorAddUpdate(makeTensorFieldValue(TensorSpec(f.type).add({{"x", "a"}}, 3)))); @@ -474,7 +474,7 @@ TEST_F("require that tensor add update to non-existing tensor creates empty tens } TEST_F("require that tensor remove update is applied", - TensorFixture("tensor(x{})", "sparse_tensor")) + TensorFixture("tensor(x{})", "sparse_tensor")) { f.setTensor(TensorSpec(f.type).add({{"x", "a"}}, 2).add({{"x", "b"}}, 3)); f.applyValueUpdate(*f.attribute, 1, diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index efd17f773f3..1a342a92b3d 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -12,11 +12,11 @@ #include #include #include -#include #include #include #include #include +#include #include #include #include @@ -40,7 +40,7 @@ using search::tensor::DefaultNearestNeighborIndexFactory; using search::tensor::DenseTensorAttribute; using search::tensor::DirectTensorAttribute; using search::tensor::DocVectorAccess; -using search::tensor::GenericTensorAttribute; +using search::tensor::SerializedTensorAttribute; using search::tensor::HnswIndex; using search::tensor::HnswNode; using search::tensor::NearestNeighborIndex; @@ -361,7 +361,7 @@ struct Fixture { } else if (_traits.use_direct_tensor_attribute) { return std::make_shared(_name, _cfg); } else { - return std::make_shared(_name, _cfg); + return std::make_shared(_name, _cfg); } } diff --git a/searchlib/src/vespa/searchlib/attribute/createsinglestd.cpp b/searchlib/src/vespa/searchlib/attribute/createsinglestd.cpp index a0cf47f64e0..148d18f79ff 100644 --- a/searchlib/src/vespa/searchlib/attribute/createsinglestd.cpp +++ b/searchlib/src/vespa/searchlib/attribute/createsinglestd.cpp @@ -7,8 +7,8 @@ #include "singlenumericattribute.hpp" #include "singlestringattribute.h" #include "singleboolattribute.h" -#include #include +#include namespace search { @@ -46,7 +46,7 @@ AttributeFactory::createSingleStd(stringref name, const Config & info) if (info.tensorType().is_dense()) { return std::make_shared(name, info); } else { - return std::make_shared(name, info); + return std::make_shared(name, info); } case BasicType::REFERENCE: return std::make_shared(name, info); diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt index 3b8b4d4ad93..fac6d015a5f 100644 --- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt @@ -10,8 +10,6 @@ vespa_add_library(searchlib_tensor OBJECT direct_tensor_saver.cpp distance_function_factory.cpp distance_functions.cpp - generic_tensor_attribute.cpp - generic_tensor_attribute_saver.cpp hnsw_graph.cpp hnsw_index.cpp hnsw_index_loader.cpp @@ -21,6 +19,8 @@ vespa_add_library(searchlib_tensor OBJECT inv_log_level_generator.cpp nearest_neighbor_index.cpp nearest_neighbor_index_saver.cpp + serialized_tensor_attribute.cpp + serialized_tensor_attribute_saver.cpp serialized_tensor_store.cpp tensor_attribute.cpp tensor_deserialize.cpp diff --git a/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute.cpp deleted file mode 100644 index eb40d70cf24..00000000000 --- a/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute.cpp +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "generic_tensor_attribute.h" -#include "generic_tensor_attribute_saver.h" -#include "tensor_attribute.hpp" -#include "blob_sequence_reader.h" -#include -#include -#include -#include -#include - -using vespalib::eval::ValueType; -using vespalib::tensor::Tensor; - -namespace search::tensor { - -namespace { - -constexpr uint32_t TENSOR_ATTRIBUTE_VERSION = 0; - -} - -GenericTensorAttribute::GenericTensorAttribute(stringref name, const Config &cfg) - : TensorAttribute(name, cfg, _serializedTensorStore) -{ -} - - -GenericTensorAttribute::~GenericTensorAttribute() -{ - getGenerationHolder().clearHoldLists(); - _tensorStore.clearHoldLists(); -} - -void -GenericTensorAttribute::setTensor(DocId docId, const Tensor &tensor) -{ - checkTensorType(tensor); - EntryRef ref = _serializedTensorStore.setTensor(tensor); - setTensorRef(docId, ref); -} - - -std::unique_ptr -GenericTensorAttribute::getTensor(DocId docId) const -{ - EntryRef ref; - if (docId < getCommittedDocIdLimit()) { - ref = _refVector[docId]; - } - if (!ref.valid()) { - return std::unique_ptr(); - } - return _serializedTensorStore.getTensor(ref); -} - -void -GenericTensorAttribute::getTensor(DocId, vespalib::tensor::MutableDenseTensorView &) const -{ - notImplemented(); -} - -bool -GenericTensorAttribute::onLoad() -{ - BlobSequenceReader tensorReader(*this); - if (!tensorReader.hasData()) { - return false; - } - setCreateSerialNum(tensorReader.getCreateSerialNum()); - assert(tensorReader.getVersion() == TENSOR_ATTRIBUTE_VERSION); - uint32_t numDocs(tensorReader.getDocIdLimit()); - _refVector.reset(); - _refVector.unsafe_reserve(numDocs); - for (uint32_t lid = 0; lid < numDocs; ++lid) { - uint32_t tensorSize = tensorReader.getNextSize(); - auto raw = _serializedTensorStore.allocRawBuffer(tensorSize); - if (tensorSize != 0) { - tensorReader.readBlob(raw.data, tensorSize); - } - _refVector.push_back(raw.ref); - } - setNumDocs(numDocs); - setCommittedDocIdLimit(numDocs); - return true; -} - - -std::unique_ptr -GenericTensorAttribute::onInitSave(vespalib::stringref fileName) -{ - vespalib::GenerationHandler::Guard guard(getGenerationHandler(). - takeGuard()); - return std::make_unique - (std::move(guard), - this->createAttributeHeader(fileName), - getRefCopy(), - _serializedTensorStore); -} - -void -GenericTensorAttribute::compactWorst() -{ - doCompactWorst(); -} - -} diff --git a/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute.h deleted file mode 100644 index e5e98557947..00000000000 --- a/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute.h +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include "serialized_tensor_store.h" -#include "tensor_attribute.h" - -namespace search::tensor { - -/** - * Attribute vector class used to store tensors for all documents in memory. - */ -class GenericTensorAttribute : public TensorAttribute -{ - SerializedTensorStore _serializedTensorStore; // data store for serialized tensors -public: - GenericTensorAttribute(vespalib::stringref baseFileName, const Config &cfg); - virtual ~GenericTensorAttribute(); - virtual void setTensor(DocId docId, const Tensor &tensor) override; - virtual std::unique_ptr getTensor(DocId docId) const override; - virtual void getTensor(DocId docId, vespalib::tensor::MutableDenseTensorView &tensor) const override; - virtual bool onLoad() override; - virtual std::unique_ptr onInitSave(vespalib::stringref fileName) override; - virtual void compactWorst() override; -}; - -} diff --git a/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute_saver.cpp b/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute_saver.cpp deleted file mode 100644 index ceeb94a7153..00000000000 --- a/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute_saver.cpp +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "generic_tensor_attribute_saver.h" -#include "serialized_tensor_store.h" -#include -#include - -using vespalib::GenerationHandler; - -namespace search::tensor { - -GenericTensorAttributeSaver:: -GenericTensorAttributeSaver(GenerationHandler::Guard &&guard, - const attribute::AttributeHeader &header, - RefCopyVector &&refs, - const SerializedTensorStore &tensorStore) - : AttributeSaver(std::move(guard), header), - _refs(std::move(refs)), - _tensorStore(tensorStore) -{ -} - - -GenericTensorAttributeSaver::~GenericTensorAttributeSaver() -{ -} - - -bool -GenericTensorAttributeSaver::onSave(IAttributeSaveTarget &saveTarget) -{ - std::unique_ptr - datWriter(saveTarget.datWriter().allocBufferWriter()); - const uint32_t docIdLimit(_refs.size()); - for (uint32_t lid = 0; lid < docIdLimit; ++lid) { - auto raw = _tensorStore.getRawBuffer(_refs[lid]); - datWriter->write(&raw.second, sizeof(raw.second)); - if (raw.second != 0) { - datWriter->write(raw.first, raw.second); - } - } - datWriter->flush(); - return true; -} - -} diff --git a/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute_saver.h b/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute_saver.h deleted file mode 100644 index 1cd65a7735d..00000000000 --- a/searchlib/src/vespa/searchlib/tensor/generic_tensor_attribute_saver.h +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include -#include "tensor_attribute.h" - -namespace search { - -namespace tensor { - -class SerializedTensorStore; - -/* - * Class for saving a tensor attribute. - */ -class GenericTensorAttributeSaver : public AttributeSaver -{ -public: - using RefCopyVector = TensorAttribute::RefCopyVector; -private: - RefCopyVector _refs; - const SerializedTensorStore &_tensorStore; - using GenerationHandler = vespalib::GenerationHandler; - - virtual bool onSave(IAttributeSaveTarget &saveTarget) override; -public: - GenericTensorAttributeSaver(GenerationHandler::Guard &&guard, - const attribute::AttributeHeader &header, - RefCopyVector &&refs, - const SerializedTensorStore &tensorStore); - - virtual ~GenericTensorAttributeSaver(); -}; - -} // namespace search::tensor - -} // namespace search diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/serialized_tensor_attribute.cpp new file mode 100644 index 00000000000..d4a20abf2fd --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/serialized_tensor_attribute.cpp @@ -0,0 +1,108 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "blob_sequence_reader.h" +#include "serialized_tensor_attribute.h" +#include "serialized_tensor_attribute_saver.h" +#include "tensor_attribute.hpp" +#include +#include +#include +#include +#include + +using vespalib::eval::ValueType; +using vespalib::tensor::Tensor; + +namespace search::tensor { + +namespace { + +constexpr uint32_t TENSOR_ATTRIBUTE_VERSION = 0; + +} + +SerializedTensorAttribute::SerializedTensorAttribute(stringref name, const Config &cfg) + : TensorAttribute(name, cfg, _serializedTensorStore) +{ +} + + +SerializedTensorAttribute::~SerializedTensorAttribute() +{ + getGenerationHolder().clearHoldLists(); + _tensorStore.clearHoldLists(); +} + +void +SerializedTensorAttribute::setTensor(DocId docId, const Tensor &tensor) +{ + checkTensorType(tensor); + EntryRef ref = _serializedTensorStore.setTensor(tensor); + setTensorRef(docId, ref); +} + + +std::unique_ptr +SerializedTensorAttribute::getTensor(DocId docId) const +{ + EntryRef ref; + if (docId < getCommittedDocIdLimit()) { + ref = _refVector[docId]; + } + if (!ref.valid()) { + return std::unique_ptr(); + } + return _serializedTensorStore.getTensor(ref); +} + +void +SerializedTensorAttribute::getTensor(DocId, vespalib::tensor::MutableDenseTensorView &) const +{ + notImplemented(); +} + +bool +SerializedTensorAttribute::onLoad() +{ + BlobSequenceReader tensorReader(*this); + if (!tensorReader.hasData()) { + return false; + } + setCreateSerialNum(tensorReader.getCreateSerialNum()); + assert(tensorReader.getVersion() == TENSOR_ATTRIBUTE_VERSION); + uint32_t numDocs(tensorReader.getDocIdLimit()); + _refVector.reset(); + _refVector.unsafe_reserve(numDocs); + for (uint32_t lid = 0; lid < numDocs; ++lid) { + uint32_t tensorSize = tensorReader.getNextSize(); + auto raw = _serializedTensorStore.allocRawBuffer(tensorSize); + if (tensorSize != 0) { + tensorReader.readBlob(raw.data, tensorSize); + } + _refVector.push_back(raw.ref); + } + setNumDocs(numDocs); + setCommittedDocIdLimit(numDocs); + return true; +} + + +std::unique_ptr +SerializedTensorAttribute::onInitSave(vespalib::stringref fileName) +{ + vespalib::GenerationHandler::Guard guard(getGenerationHandler(). + takeGuard()); + return std::make_unique + (std::move(guard), + this->createAttributeHeader(fileName), + getRefCopy(), + _serializedTensorStore); +} + +void +SerializedTensorAttribute::compactWorst() +{ + doCompactWorst(); +} + +} diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/serialized_tensor_attribute.h new file mode 100644 index 00000000000..5596341a5b7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/serialized_tensor_attribute.h @@ -0,0 +1,26 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "serialized_tensor_store.h" +#include "tensor_attribute.h" + +namespace search::tensor { + +/** + * Attribute vector class used to store tensors for all documents in memory. + */ +class SerializedTensorAttribute : public TensorAttribute { + SerializedTensorStore _serializedTensorStore; // data store for serialized tensors +public: + SerializedTensorAttribute(vespalib::stringref baseFileName, const Config &cfg); + virtual ~SerializedTensorAttribute(); + virtual void setTensor(DocId docId, const Tensor &tensor) override; + virtual std::unique_ptr getTensor(DocId docId) const override; + virtual void getTensor(DocId docId, vespalib::tensor::MutableDenseTensorView &tensor) const override; + virtual bool onLoad() override; + virtual std::unique_ptr onInitSave(vespalib::stringref fileName) override; + virtual void compactWorst() override; +}; + +} diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_tensor_attribute_saver.cpp b/searchlib/src/vespa/searchlib/tensor/serialized_tensor_attribute_saver.cpp new file mode 100644 index 00000000000..4c41c3a449e --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/serialized_tensor_attribute_saver.cpp @@ -0,0 +1,46 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "serialized_tensor_attribute_saver.h" +#include "serialized_tensor_store.h" +#include +#include + +using vespalib::GenerationHandler; + +namespace search::tensor { + +SerializedTensorAttributeSaver:: +SerializedTensorAttributeSaver(GenerationHandler::Guard &&guard, + const attribute::AttributeHeader &header, + RefCopyVector &&refs, + const SerializedTensorStore &tensorStore) + : AttributeSaver(std::move(guard), header), + _refs(std::move(refs)), + _tensorStore(tensorStore) +{ +} + + +SerializedTensorAttributeSaver::~SerializedTensorAttributeSaver() +{ +} + + +bool +SerializedTensorAttributeSaver::onSave(IAttributeSaveTarget &saveTarget) +{ + std::unique_ptr + datWriter(saveTarget.datWriter().allocBufferWriter()); + const uint32_t docIdLimit(_refs.size()); + for (uint32_t lid = 0; lid < docIdLimit; ++lid) { + auto raw = _tensorStore.getRawBuffer(_refs[lid]); + datWriter->write(&raw.second, sizeof(raw.second)); + if (raw.second != 0) { + datWriter->write(raw.first, raw.second); + } + } + datWriter->flush(); + return true; +} + +} diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_tensor_attribute_saver.h b/searchlib/src/vespa/searchlib/tensor/serialized_tensor_attribute_saver.h new file mode 100644 index 00000000000..1ae2279b893 --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/serialized_tensor_attribute_saver.h @@ -0,0 +1,33 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "tensor_attribute.h" + +namespace search::tensor { + +class SerializedTensorStore; + +/* + * Class for saving a tensor attribute. + */ +class SerializedTensorAttributeSaver : public AttributeSaver { +public: + using RefCopyVector = TensorAttribute::RefCopyVector; +private: + RefCopyVector _refs; + const SerializedTensorStore &_tensorStore; + using GenerationHandler = vespalib::GenerationHandler; + + virtual bool onSave(IAttributeSaveTarget &saveTarget) override; +public: + SerializedTensorAttributeSaver(GenerationHandler::Guard &&guard, + const attribute::AttributeHeader &header, + RefCopyVector &&refs, + const SerializedTensorStore &tensorStore); + + virtual ~SerializedTensorAttributeSaver(); +}; + +} -- cgit v1.2.3