From 0fc29c7dd07af1ad471f7597a3c651c7f2581996 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Fri, 31 Mar 2023 16:26:20 +0200 Subject: Add TensorExtAttribute. --- .../attribute/extendattributes/extendattribute.cpp | 124 ++++++++++++++ .../vespa/searchlib/attribute/attributevector.h | 3 + .../src/vespa/searchlib/tensor/CMakeLists.txt | 1 + .../searchlib/tensor/tensor_ext_attribute.cpp | 181 +++++++++++++++++++++ .../vespa/searchlib/tensor/tensor_ext_attribute.h | 54 ++++++ 5 files changed, 363 insertions(+) create mode 100644 searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.cpp create mode 100644 searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.h diff --git a/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp b/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp index 8f056323733..ae6a0d18a48 100644 --- a/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp +++ b/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp @@ -1,9 +1,25 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include +#include +#include +#include +#include +#include #include #include +#include +#include +#include +using search::attribute::Config; +using search::attribute::BasicType; +using search::attribute::CollectionType; using search::attribute::SingleRawExtAttribute; +using search::tensor::TensorExtAttribute; +using vespalib::eval::FastValueBuilderFactory; +using vespalib::eval::TensorSpec; +using vespalib::eval::Value; +using vespalib::eval::ValueType; namespace search { @@ -15,8 +31,37 @@ std::vector as_vector(vespalib::ConstArrayRef value) { return {value.data(), value.data() + value.size()}; } +std::vector as_vector(vespalib::ConstArrayRef value) { + return {value.data(), value.data() + value.size()}; +} + +vespalib::string vec_2d_spec("tensor(x[2])"); +vespalib::string vec_mixed_2d_spec("tensor(a{},x[2])"); + +TensorSpec +vec_2d(double x0, double x1) +{ + return TensorSpec(vec_2d_spec).add({{"x", 0}}, x0).add({{"x", 1}}, x1); +} + +TensorSpec +vec_mixed_2d(std::vector> val) +{ + TensorSpec spec(vec_mixed_2d_spec); + for (uint32_t a = 0; a < val.size(); ++a) { + vespalib::asciistream a_stream; + a_stream << a; + vespalib::string a_as_string = a_stream.str(); + for (uint32_t x = 0; x < val[a].size(); ++x) { + spec.add({{"a", a_as_string.c_str()},{"x", x}}, val[a][x]); + } + } + return spec; +} + class ExtendAttributeTest : public ::testing::Test { + std::vector> _tensors; protected: ExtendAttributeTest() = default; ~ExtendAttributeTest() override = default; @@ -27,8 +72,18 @@ protected: template void testExtendString(Attribute & attr); void testExtendRaw(AttributeVector& attr); + void testExtendTensor(AttributeVector& attr); + const Value& create_tensor(const TensorSpec &spec); }; +const Value& +ExtendAttributeTest::create_tensor(const TensorSpec &spec) +{ + auto value = value_from_spec(spec, FastValueBuilderFactory::get()); + _tensors.emplace_back(std::move(value)); + return *_tensors.back(); +} + template void ExtendAttributeTest::testExtendInteger(Attribute & attr) { @@ -185,6 +240,57 @@ void ExtendAttributeTest::testExtendRaw(AttributeVector& attr) EXPECT_EQ(empty, as_vector(buf)); } +void ExtendAttributeTest::testExtendTensor(AttributeVector& attr) +{ + std::vector empty_cells{0.0, 0.0}; + std::vector spec0_dense_cells{1.0, 2.0}; + std::vector spec0_mixed_cells0{3.0, 4.0}; + std::vector spec0_mixed_cells1{5.0, 6.0}; + bool dense = attr.getConfig().tensorType().is_dense(); + auto* ext_attr = attr.getExtendInterface(); + EXPECT_NE(nullptr, ext_attr); + auto* tensor_attr = attr.asTensorAttribute(); + EXPECT_NE(nullptr, tensor_attr); + uint32_t docId(0); + EXPECT_EQ(0u, attr.getNumDocs()); + attr.addDoc(docId); + EXPECT_EQ(0u, docId); + EXPECT_EQ(1u, attr.getNumDocs()); + TensorSpec spec0 = dense ? vec_2d(1.0, 2.0) : vec_mixed_2d({{3.0, 4.0}, {5.0, 6.0}}); + EXPECT_TRUE(ext_attr->add(create_tensor(spec0))); + auto tensor = tensor_attr->getTensor(0); + EXPECT_NE(nullptr, tensor.get()); + EXPECT_EQ(spec0, TensorSpec::from_value(*tensor)); + EXPECT_EQ(dense, tensor_attr->supports_extract_cells_ref()); + if (dense) { + EXPECT_EQ(spec0_dense_cells, as_vector(tensor_attr->extract_cells_ref(0).typify())); + } + EXPECT_TRUE(tensor_attr->supports_get_tensor_ref()); + EXPECT_EQ(spec0, TensorSpec::from_value(tensor_attr->get_tensor_ref(0))); + EXPECT_FALSE(tensor_attr->supports_get_serialized_tensor_ref()); + auto vectors = tensor_attr->get_vectors(0); + if (dense) { + EXPECT_EQ(1, vectors.subspaces()); + EXPECT_EQ(spec0_dense_cells, as_vector(vectors.cells(0).typify())); + EXPECT_EQ(spec0_dense_cells, as_vector(tensor_attr->get_vector(0, 0).typify())); + EXPECT_EQ(empty_cells, as_vector(tensor_attr->get_vector(0, 1).typify())); + } else { + EXPECT_EQ(2, vectors.subspaces()); + EXPECT_EQ(spec0_mixed_cells0, as_vector(vectors.cells(0).typify())); + EXPECT_EQ(spec0_mixed_cells1, as_vector(vectors.cells(1).typify())); + EXPECT_EQ(spec0_mixed_cells0, as_vector(tensor_attr->get_vector(0, 0).typify())); + EXPECT_EQ(spec0_mixed_cells1, as_vector(tensor_attr->get_vector(0, 1).typify())); + EXPECT_EQ(empty_cells, as_vector(tensor_attr->get_vector(0, 2).typify())); + } + attr.addDoc(docId); + EXPECT_EQ(1u, docId); + EXPECT_EQ(2u, attr.getNumDocs()); + vectors = tensor_attr->get_vectors(1); + EXPECT_EQ(0, vectors.subspaces()); + EXPECT_EQ(empty_cells, as_vector(tensor_attr->get_vector(1, 0).typify())); + EXPECT_EQ(nullptr, tensor_attr->getTensor(1).get()); +} + TEST_F(ExtendAttributeTest, single_integer_ext_attribute) { SingleIntegerExtAttribute siattr("si1"); @@ -255,6 +361,24 @@ TEST_F(ExtendAttributeTest, single_raw_ext_attribute) testExtendRaw(srattr); } +TEST_F(ExtendAttributeTest, tensor_ext_attribute_dense) +{ + Config cfg(BasicType::TENSOR, CollectionType::SINGLE); + cfg.setTensorType(ValueType::from_spec(vec_2d_spec)); + TensorExtAttribute tattr("td1", cfg); + EXPECT_TRUE(! tattr.hasMultiValue()); + testExtendTensor(tattr); +} + +TEST_F(ExtendAttributeTest, tensor_ext_attribute_mixed) +{ + Config cfg(BasicType::TENSOR, CollectionType::SINGLE); + cfg.setTensorType(ValueType::from_spec(vec_mixed_2d_spec)); + TensorExtAttribute tattr("tm1", cfg); + EXPECT_TRUE(! tattr.hasMultiValue()); + testExtendTensor(tattr); +} + } GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.h b/searchlib/src/vespa/searchlib/attribute/attributevector.h index 3d14622ca02..e40785911ea 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributevector.h +++ b/searchlib/src/vespa/searchlib/attribute/attributevector.h @@ -38,6 +38,8 @@ namespace vespalib::alloc { class Alloc; } +namespace vespalib::eval { struct Value; } + namespace search { template class ComponentGuard; @@ -86,6 +88,7 @@ public: virtual bool add(double, int32_t = 1) { return false; } virtual bool add(const char *, int32_t = 1) { return false; } virtual bool add(vespalib::ConstArrayRef, int32_t = 1) { return false; } + virtual bool add(const vespalib::eval::Value&, int32_t = 1) { return false; } virtual ~IExtendAttribute() = default; }; diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt index c8c5d4d4257..313863d8dcb 100644 --- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt @@ -40,6 +40,7 @@ vespa_add_library(searchlib_tensor OBJECT tensor_buffer_store.cpp tensor_buffer_type_mapper.cpp tensor_deserialize.cpp + tensor_ext_attribute.cpp tensor_store.cpp DEPENDS ) diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.cpp new file mode 100644 index 00000000000..19c8cf6053b --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.cpp @@ -0,0 +1,181 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "tensor_ext_attribute.h" +#include "serialized_tensor_ref.h" +#include "vector_bundle.h" +#include +#include +#include +#include +#include + +#include +LOG_SETUP(".searchlib.tensor.tensor_ext_attribute"); + +using vespalib::eval::FastValueBuilderFactory; +using vespalib::eval::TensorSpec; +using vespalib::eval::TypedCells; +using vespalib::eval::Value; +using vespalib::eval::ValueType; + +namespace search::tensor { + +namespace { + +std::unique_ptr +create_empty_tensor(const ValueType& type) +{ + const auto &factory = FastValueBuilderFactory::get(); + TensorSpec empty_spec(type.to_spec()); + return vespalib::eval::value_from_spec(empty_spec, factory); +} + +} + +TensorExtAttribute::TensorExtAttribute(const vespalib::string& name, const Config& cfg) + : NotImplementedAttribute(name, cfg), + ITensorAttribute(), + IExtendAttribute(), + _subspace_type(cfg.tensorType()), + _empty(_subspace_type), + _empty_tensor(create_empty_tensor(cfg.tensorType())) +{ +} + +TensorExtAttribute::~TensorExtAttribute() = default; + +const ITensorAttribute* +TensorExtAttribute::asTensorAttribute() const +{ + return this; +} + +void +TensorExtAttribute::onCommit() +{ + LOG_ABORT("should not be reached"); +} + +void +TensorExtAttribute::onUpdateStat() +{ +} + +bool +TensorExtAttribute::addDoc(DocId& docId) +{ + docId = _data.size(); + _data.emplace_back(nullptr); + incNumDocs(); + setCommittedDocIdLimit(getNumDocs()); + return true; +} + +bool +TensorExtAttribute::add(const vespalib::eval::Value& v, int32_t) +{ + _data.back() = &v; + return true; +} + +IExtendAttribute* +TensorExtAttribute::getExtendInterface() +{ + return this; +} + +TypedCells +TensorExtAttribute::get_vector(uint32_t docid, uint32_t subspace) const +{ + auto vectors = get_vectors(docid); + return (subspace < vectors.subspaces()) ? vectors.cells(subspace) : _empty.cells(); +} + +VectorBundle +TensorExtAttribute::get_vectors(uint32_t docid) const +{ + auto tensor = _data[docid]; + if (tensor == nullptr) { + return VectorBundle(); + } + return VectorBundle(tensor->cells().data, tensor->index().size(), _subspace_type); +} + +std::unique_ptr +TensorExtAttribute::getTensor(uint32_t docid) const +{ + auto tensor = _data[docid]; + if (tensor == nullptr) { + return {}; + } + return FastValueBuilderFactory::get().copy(*tensor); +} + +std::unique_ptr +TensorExtAttribute::getEmptyTensor() const +{ + return FastValueBuilderFactory::get().copy(*_empty_tensor); +} + +TypedCells +TensorExtAttribute::extract_cells_ref(uint32_t docid) const +{ + return get_vector(docid, 0); +} + +const vespalib::eval::Value& +TensorExtAttribute::get_tensor_ref(uint32_t docid) const +{ + auto tensor = _data[docid]; + return (tensor == nullptr) ? *_empty_tensor : *tensor; +} + +SerializedTensorRef +TensorExtAttribute::get_serialized_tensor_ref(uint32_t) const +{ + notImplemented(); +} + +bool +TensorExtAttribute::supports_extract_cells_ref() const +{ + return getConfig().tensorType().is_dense(); +} + +bool +TensorExtAttribute::supports_get_tensor_ref() const +{ + return true; +} + +bool +TensorExtAttribute::supports_get_serialized_tensor_ref() const +{ + return false; +} + +const ValueType& +TensorExtAttribute::getTensorType() const +{ + return getConfig().tensorType(); +} + +TensorExtAttribute::DistanceMetric +TensorExtAttribute::distance_metric() const +{ + return getConfig().distance_metric(); +} + +uint32_t +TensorExtAttribute::get_num_docs() const +{ + return _data.size(); +} + +void +TensorExtAttribute::get_state(const vespalib::slime::Inserter& inserter) const +{ + (void) inserter; +} + +} diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.h b/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.h new file mode 100644 index 00000000000..a58426cd146 --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.h @@ -0,0 +1,54 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "i_tensor_attribute.h" +#include "empty_subspace.h" +#include "subspace_type.h" +#include +#include + +namespace search::tensor { + +/** + * Attribute vector storing a pointer to single tensor value per + * document in streaming search. The tensor is not owned by this + * attribute vector. + */ +class TensorExtAttribute : public NotImplementedAttribute, + public ITensorAttribute, + public IExtendAttribute +{ + std::vector _data; + SubspaceType _subspace_type; + EmptySubspace _empty; + std::unique_ptr _empty_tensor; +public: + TensorExtAttribute(const vespalib::string& name, const Config& cfg); + ~TensorExtAttribute() override; + const ITensorAttribute* asTensorAttribute() const override; + void onCommit() override; + void onUpdateStat() override; + bool addDoc(DocId& docId) override; + bool add(const vespalib::eval::Value& v, int32_t) override; + IExtendAttribute* getExtendInterface() override; + // DocVectorAccess API + vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override; + VectorBundle get_vectors(uint32_t docid) const override; + + // ITensorAttribute API + std::unique_ptr getTensor(uint32_t docid) const override; + std::unique_ptr getEmptyTensor() const override; + vespalib::eval::TypedCells extract_cells_ref(uint32_t docid) const override; + const vespalib::eval::Value& get_tensor_ref(uint32_t docid) const override; + SerializedTensorRef get_serialized_tensor_ref(uint32_t docid) const override; + bool supports_extract_cells_ref() const override; + bool supports_get_tensor_ref() const override; + bool supports_get_serialized_tensor_ref() const override; + const vespalib::eval::ValueType & getTensorType() const override; + search::attribute::DistanceMetric distance_metric() const override; + uint32_t get_num_docs() const override; + void get_state(const vespalib::slime::Inserter& inserter) const override; +}; + +} -- cgit v1.2.3 From fe96fab936a7f6f920aafca24397febbb556219a Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Fri, 31 Mar 2023 17:00:18 +0200 Subject: Add add_doc helper function. --- .../attribute/extendattributes/extendattribute.cpp | 68 +++++++--------------- 1 file changed, 22 insertions(+), 46 deletions(-) diff --git a/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp b/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp index ae6a0d18a48..3f775e99891 100644 --- a/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp +++ b/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp @@ -59,6 +59,15 @@ vec_mixed_2d(std::vector> val) return spec; } +void add_doc(AttributeVector& attr, uint32_t exp_docid) +{ + uint32_t docid(0); + EXPECT_EQ(exp_docid, attr.getNumDocs()); + attr.addDoc(docid); + EXPECT_EQ(exp_docid, docid); + EXPECT_EQ(exp_docid + 1, attr.getNumDocs()); +} + class ExtendAttributeTest : public ::testing::Test { std::vector> _tensors; @@ -87,11 +96,7 @@ ExtendAttributeTest::create_tensor(const TensorSpec &spec) template void ExtendAttributeTest::testExtendInteger(Attribute & attr) { - uint32_t docId(0); - EXPECT_EQ(attr.getNumDocs(), 0u); - attr.addDoc(docId); - EXPECT_EQ(docId, 0u); - EXPECT_EQ(attr.getNumDocs(), 1u); + add_doc(attr, 0); attr.add(1, 10); EXPECT_EQ(attr.getInt(0), 1); attr.add(2, 20); @@ -106,9 +111,7 @@ void ExtendAttributeTest::testExtendInteger(Attribute & attr) EXPECT_EQ(v[1].getWeight(), 20); } } - attr.addDoc(docId); - EXPECT_EQ(docId, 1u); - EXPECT_EQ(attr.getNumDocs(), 2u); + add_doc(attr, 1); attr.add(3, 30); EXPECT_EQ(attr.getInt(1), 3); if (attr.hasMultiValue()) { @@ -124,11 +127,7 @@ void ExtendAttributeTest::testExtendInteger(Attribute & attr) template void ExtendAttributeTest::testExtendFloat(Attribute & attr) { - uint32_t docId(0); - EXPECT_EQ(attr.getNumDocs(), 0u); - attr.addDoc(docId); - EXPECT_EQ(docId, 0u); - EXPECT_EQ(attr.getNumDocs(), 1u); + add_doc(attr, 0); attr.add(1.7, 10); EXPECT_EQ(attr.getInt(0), 1); EXPECT_EQ(attr.getFloat(0), 1.7); @@ -144,9 +143,7 @@ void ExtendAttributeTest::testExtendFloat(Attribute & attr) EXPECT_EQ(v[1].getWeight(), 20); } } - attr.addDoc(docId); - EXPECT_EQ(docId, 1u); - EXPECT_EQ(attr.getNumDocs(), 2u); + add_doc(attr, 1); attr.add(3.6, 30); EXPECT_EQ(attr.getFloat(1), 3.6); if (attr.hasMultiValue()) { @@ -162,11 +159,7 @@ void ExtendAttributeTest::testExtendFloat(Attribute & attr) template void ExtendAttributeTest::testExtendString(Attribute & attr) { - uint32_t docId(0); - EXPECT_EQ(attr.getNumDocs(), 0u); - attr.addDoc(docId); - EXPECT_EQ(docId, 0u); - EXPECT_EQ(attr.getNumDocs(), 1u); + add_doc(attr, 0); attr.add("1.7", 10); auto buf = attr.get_raw(0); EXPECT_EQ(std::string(buf.data(), buf.size()), "1.7"); @@ -183,9 +176,7 @@ void ExtendAttributeTest::testExtendString(Attribute & attr) EXPECT_EQ(v[1].getWeight(), 20); } } - attr.addDoc(docId); - EXPECT_EQ(docId, 1u); - EXPECT_EQ(attr.getNumDocs(), 2u); + add_doc(attr, 1); attr.add("3.6", 30); buf = attr.get_raw(1); EXPECT_EQ(std::string(buf.data(), buf.size()), "3.6"); @@ -205,36 +196,27 @@ void ExtendAttributeTest::testExtendRaw(AttributeVector& attr) std::vector zeros{10, 0, 0, 11}; auto* ext_attr = attr.getExtendInterface(); EXPECT_NE(nullptr, ext_attr); - uint32_t docId(0); - EXPECT_EQ(0u, attr.getNumDocs()); - attr.addDoc(docId); - EXPECT_EQ(0u, docId); - EXPECT_EQ(1u, attr.getNumDocs()); + add_doc(attr, 0); ext_attr->add(as_vector("1.7")); auto buf = attr.get_raw(0); EXPECT_EQ(as_vector("1.7"), as_vector(buf)); ext_attr->add(vespalib::ConstArrayRef(as_vector("2.3"))); buf = attr.get_raw(0); EXPECT_EQ(as_vector("2.3"), as_vector(buf)); - attr.addDoc(docId); - EXPECT_EQ(1u, docId); - EXPECT_EQ(attr.getNumDocs(), 2u); + add_doc(attr, 1); ext_attr->add(as_vector("3.6")); buf = attr.get_raw(1); EXPECT_EQ(as_vector("3.6"), as_vector(buf)); buf = attr.get_raw(0); EXPECT_EQ(as_vector("2.3"), as_vector(buf)); - attr.addDoc(docId); - EXPECT_EQ(2u, docId); + add_doc(attr, 2); ext_attr->add(zeros); buf = attr.get_raw(2); EXPECT_EQ(zeros, as_vector(buf)); - attr.addDoc(docId); - EXPECT_EQ(3u, docId); + add_doc(attr, 3); buf = attr.get_raw(3); EXPECT_EQ(empty, as_vector(buf)); - attr.addDoc(docId); - EXPECT_EQ(4u, docId); + add_doc(attr, 4); ext_attr->add(empty); buf = attr.get_raw(4); EXPECT_EQ(empty, as_vector(buf)); @@ -251,11 +233,7 @@ void ExtendAttributeTest::testExtendTensor(AttributeVector& attr) EXPECT_NE(nullptr, ext_attr); auto* tensor_attr = attr.asTensorAttribute(); EXPECT_NE(nullptr, tensor_attr); - uint32_t docId(0); - EXPECT_EQ(0u, attr.getNumDocs()); - attr.addDoc(docId); - EXPECT_EQ(0u, docId); - EXPECT_EQ(1u, attr.getNumDocs()); + add_doc(attr, 0); TensorSpec spec0 = dense ? vec_2d(1.0, 2.0) : vec_mixed_2d({{3.0, 4.0}, {5.0, 6.0}}); EXPECT_TRUE(ext_attr->add(create_tensor(spec0))); auto tensor = tensor_attr->getTensor(0); @@ -282,9 +260,7 @@ void ExtendAttributeTest::testExtendTensor(AttributeVector& attr) EXPECT_EQ(spec0_mixed_cells1, as_vector(tensor_attr->get_vector(0, 1).typify())); EXPECT_EQ(empty_cells, as_vector(tensor_attr->get_vector(0, 2).typify())); } - attr.addDoc(docId); - EXPECT_EQ(1u, docId); - EXPECT_EQ(2u, attr.getNumDocs()); + add_doc(attr, 1); vectors = tensor_attr->get_vectors(1); EXPECT_EQ(0, vectors.subspaces()); EXPECT_EQ(empty_cells, as_vector(tensor_attr->get_vector(1, 0).typify())); -- cgit v1.2.3