aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@yahooinc.com>2023-03-31 19:34:50 +0200
committerGitHub <noreply@github.com>2023-03-31 19:34:50 +0200
commit9d93baa2b9d23258f8e760e3d804ee9065cf9a58 (patch)
tree594da70ff2a65a76bd96735c55291b261e4fa9a5
parente0db5db519c291dc9ea9ec994b51fb9499f1e246 (diff)
parentfe96fab936a7f6f920aafca24397febbb556219a (diff)
Merge pull request #26665 from vespa-engine/toregge/add-tensor-ext-attributev8.149.36
Add TensorExtAttribute.
-rw-r--r--searchlib/src/tests/attribute/extendattributes/extendattribute.cpp176
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributevector.h3
-rw-r--r--searchlib/src/vespa/searchlib/tensor/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.cpp181
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.h54
5 files changed, 377 insertions, 38 deletions
diff --git a/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp b/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp
index 8f056323733..3f775e99891 100644
--- a/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp
+++ b/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp
@@ -1,9 +1,25 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/vespalib/gtest/gtest.h>
+#include <vespa/eval/eval/fast_value.h>
+#include <vespa/eval/eval/tensor_spec.h>
+#include <vespa/eval/eval/value.h>
+#include <vespa/eval/eval/value_codec.h>
+#include <vespa/searchcommon/attribute/config.h>
#include <vespa/searchlib/attribute/extendableattributes.h>
#include <vespa/searchlib/attribute/single_raw_ext_attribute.h>
+#include <vespa/searchlib/tensor/tensor_ext_attribute.h>
+#include <vespa/searchlib/tensor/vector_bundle.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+using search::attribute::Config;
+using search::attribute::BasicType;
+using search::attribute::CollectionType;
using search::attribute::SingleRawExtAttribute;
+using search::tensor::TensorExtAttribute;
+using vespalib::eval::FastValueBuilderFactory;
+using vespalib::eval::TensorSpec;
+using vespalib::eval::Value;
+using vespalib::eval::ValueType;
namespace search {
@@ -15,8 +31,46 @@ std::vector<char> as_vector(vespalib::ConstArrayRef<char> value) {
return {value.data(), value.data() + value.size()};
}
+std::vector<double> as_vector(vespalib::ConstArrayRef<double> value) {
+ return {value.data(), value.data() + value.size()};
+}
+
+vespalib::string vec_2d_spec("tensor(x[2])");
+vespalib::string vec_mixed_2d_spec("tensor(a{},x[2])");
+
+TensorSpec
+vec_2d(double x0, double x1)
+{
+ return TensorSpec(vec_2d_spec).add({{"x", 0}}, x0).add({{"x", 1}}, x1);
+}
+
+TensorSpec
+vec_mixed_2d(std::vector<std::vector<double>> val)
+{
+ TensorSpec spec(vec_mixed_2d_spec);
+ for (uint32_t a = 0; a < val.size(); ++a) {
+ vespalib::asciistream a_stream;
+ a_stream << a;
+ vespalib::string a_as_string = a_stream.str();
+ for (uint32_t x = 0; x < val[a].size(); ++x) {
+ spec.add({{"a", a_as_string.c_str()},{"x", x}}, val[a][x]);
+ }
+ }
+ return spec;
+}
+
+void add_doc(AttributeVector& attr, uint32_t exp_docid)
+{
+ uint32_t docid(0);
+ EXPECT_EQ(exp_docid, attr.getNumDocs());
+ attr.addDoc(docid);
+ EXPECT_EQ(exp_docid, docid);
+ EXPECT_EQ(exp_docid + 1, attr.getNumDocs());
+}
+
class ExtendAttributeTest : public ::testing::Test
{
+ std::vector<std::unique_ptr<Value>> _tensors;
protected:
ExtendAttributeTest() = default;
~ExtendAttributeTest() override = default;
@@ -27,16 +81,22 @@ protected:
template <typename Attribute>
void testExtendString(Attribute & attr);
void testExtendRaw(AttributeVector& attr);
+ void testExtendTensor(AttributeVector& attr);
+ const Value& create_tensor(const TensorSpec &spec);
};
+const Value&
+ExtendAttributeTest::create_tensor(const TensorSpec &spec)
+{
+ auto value = value_from_spec(spec, FastValueBuilderFactory::get());
+ _tensors.emplace_back(std::move(value));
+ return *_tensors.back();
+}
+
template <typename Attribute>
void ExtendAttributeTest::testExtendInteger(Attribute & attr)
{
- uint32_t docId(0);
- EXPECT_EQ(attr.getNumDocs(), 0u);
- attr.addDoc(docId);
- EXPECT_EQ(docId, 0u);
- EXPECT_EQ(attr.getNumDocs(), 1u);
+ add_doc(attr, 0);
attr.add(1, 10);
EXPECT_EQ(attr.getInt(0), 1);
attr.add(2, 20);
@@ -51,9 +111,7 @@ void ExtendAttributeTest::testExtendInteger(Attribute & attr)
EXPECT_EQ(v[1].getWeight(), 20);
}
}
- attr.addDoc(docId);
- EXPECT_EQ(docId, 1u);
- EXPECT_EQ(attr.getNumDocs(), 2u);
+ add_doc(attr, 1);
attr.add(3, 30);
EXPECT_EQ(attr.getInt(1), 3);
if (attr.hasMultiValue()) {
@@ -69,11 +127,7 @@ void ExtendAttributeTest::testExtendInteger(Attribute & attr)
template <typename Attribute>
void ExtendAttributeTest::testExtendFloat(Attribute & attr)
{
- uint32_t docId(0);
- EXPECT_EQ(attr.getNumDocs(), 0u);
- attr.addDoc(docId);
- EXPECT_EQ(docId, 0u);
- EXPECT_EQ(attr.getNumDocs(), 1u);
+ add_doc(attr, 0);
attr.add(1.7, 10);
EXPECT_EQ(attr.getInt(0), 1);
EXPECT_EQ(attr.getFloat(0), 1.7);
@@ -89,9 +143,7 @@ void ExtendAttributeTest::testExtendFloat(Attribute & attr)
EXPECT_EQ(v[1].getWeight(), 20);
}
}
- attr.addDoc(docId);
- EXPECT_EQ(docId, 1u);
- EXPECT_EQ(attr.getNumDocs(), 2u);
+ add_doc(attr, 1);
attr.add(3.6, 30);
EXPECT_EQ(attr.getFloat(1), 3.6);
if (attr.hasMultiValue()) {
@@ -107,11 +159,7 @@ void ExtendAttributeTest::testExtendFloat(Attribute & attr)
template <typename Attribute>
void ExtendAttributeTest::testExtendString(Attribute & attr)
{
- uint32_t docId(0);
- EXPECT_EQ(attr.getNumDocs(), 0u);
- attr.addDoc(docId);
- EXPECT_EQ(docId, 0u);
- EXPECT_EQ(attr.getNumDocs(), 1u);
+ add_doc(attr, 0);
attr.add("1.7", 10);
auto buf = attr.get_raw(0);
EXPECT_EQ(std::string(buf.data(), buf.size()), "1.7");
@@ -128,9 +176,7 @@ void ExtendAttributeTest::testExtendString(Attribute & attr)
EXPECT_EQ(v[1].getWeight(), 20);
}
}
- attr.addDoc(docId);
- EXPECT_EQ(docId, 1u);
- EXPECT_EQ(attr.getNumDocs(), 2u);
+ add_doc(attr, 1);
attr.add("3.6", 30);
buf = attr.get_raw(1);
EXPECT_EQ(std::string(buf.data(), buf.size()), "3.6");
@@ -150,41 +196,77 @@ void ExtendAttributeTest::testExtendRaw(AttributeVector& attr)
std::vector<char> zeros{10, 0, 0, 11};
auto* ext_attr = attr.getExtendInterface();
EXPECT_NE(nullptr, ext_attr);
- uint32_t docId(0);
- EXPECT_EQ(0u, attr.getNumDocs());
- attr.addDoc(docId);
- EXPECT_EQ(0u, docId);
- EXPECT_EQ(1u, attr.getNumDocs());
+ add_doc(attr, 0);
ext_attr->add(as_vector("1.7"));
auto buf = attr.get_raw(0);
EXPECT_EQ(as_vector("1.7"), as_vector(buf));
ext_attr->add(vespalib::ConstArrayRef<char>(as_vector("2.3")));
buf = attr.get_raw(0);
EXPECT_EQ(as_vector("2.3"), as_vector(buf));
- attr.addDoc(docId);
- EXPECT_EQ(1u, docId);
- EXPECT_EQ(attr.getNumDocs(), 2u);
+ add_doc(attr, 1);
ext_attr->add(as_vector("3.6"));
buf = attr.get_raw(1);
EXPECT_EQ(as_vector("3.6"), as_vector(buf));
buf = attr.get_raw(0);
EXPECT_EQ(as_vector("2.3"), as_vector(buf));
- attr.addDoc(docId);
- EXPECT_EQ(2u, docId);
+ add_doc(attr, 2);
ext_attr->add(zeros);
buf = attr.get_raw(2);
EXPECT_EQ(zeros, as_vector(buf));
- attr.addDoc(docId);
- EXPECT_EQ(3u, docId);
+ add_doc(attr, 3);
buf = attr.get_raw(3);
EXPECT_EQ(empty, as_vector(buf));
- attr.addDoc(docId);
- EXPECT_EQ(4u, docId);
+ add_doc(attr, 4);
ext_attr->add(empty);
buf = attr.get_raw(4);
EXPECT_EQ(empty, as_vector(buf));
}
+void ExtendAttributeTest::testExtendTensor(AttributeVector& attr)
+{
+ std::vector<double> empty_cells{0.0, 0.0};
+ std::vector<double> spec0_dense_cells{1.0, 2.0};
+ std::vector<double> spec0_mixed_cells0{3.0, 4.0};
+ std::vector<double> spec0_mixed_cells1{5.0, 6.0};
+ bool dense = attr.getConfig().tensorType().is_dense();
+ auto* ext_attr = attr.getExtendInterface();
+ EXPECT_NE(nullptr, ext_attr);
+ auto* tensor_attr = attr.asTensorAttribute();
+ EXPECT_NE(nullptr, tensor_attr);
+ add_doc(attr, 0);
+ TensorSpec spec0 = dense ? vec_2d(1.0, 2.0) : vec_mixed_2d({{3.0, 4.0}, {5.0, 6.0}});
+ EXPECT_TRUE(ext_attr->add(create_tensor(spec0)));
+ auto tensor = tensor_attr->getTensor(0);
+ EXPECT_NE(nullptr, tensor.get());
+ EXPECT_EQ(spec0, TensorSpec::from_value(*tensor));
+ EXPECT_EQ(dense, tensor_attr->supports_extract_cells_ref());
+ if (dense) {
+ EXPECT_EQ(spec0_dense_cells, as_vector(tensor_attr->extract_cells_ref(0).typify<double>()));
+ }
+ EXPECT_TRUE(tensor_attr->supports_get_tensor_ref());
+ EXPECT_EQ(spec0, TensorSpec::from_value(tensor_attr->get_tensor_ref(0)));
+ EXPECT_FALSE(tensor_attr->supports_get_serialized_tensor_ref());
+ auto vectors = tensor_attr->get_vectors(0);
+ if (dense) {
+ EXPECT_EQ(1, vectors.subspaces());
+ EXPECT_EQ(spec0_dense_cells, as_vector(vectors.cells(0).typify<double>()));
+ EXPECT_EQ(spec0_dense_cells, as_vector(tensor_attr->get_vector(0, 0).typify<double>()));
+ EXPECT_EQ(empty_cells, as_vector(tensor_attr->get_vector(0, 1).typify<double>()));
+ } else {
+ EXPECT_EQ(2, vectors.subspaces());
+ EXPECT_EQ(spec0_mixed_cells0, as_vector(vectors.cells(0).typify<double>()));
+ EXPECT_EQ(spec0_mixed_cells1, as_vector(vectors.cells(1).typify<double>()));
+ EXPECT_EQ(spec0_mixed_cells0, as_vector(tensor_attr->get_vector(0, 0).typify<double>()));
+ EXPECT_EQ(spec0_mixed_cells1, as_vector(tensor_attr->get_vector(0, 1).typify<double>()));
+ EXPECT_EQ(empty_cells, as_vector(tensor_attr->get_vector(0, 2).typify<double>()));
+ }
+ add_doc(attr, 1);
+ vectors = tensor_attr->get_vectors(1);
+ EXPECT_EQ(0, vectors.subspaces());
+ EXPECT_EQ(empty_cells, as_vector(tensor_attr->get_vector(1, 0).typify<double>()));
+ EXPECT_EQ(nullptr, tensor_attr->getTensor(1).get());
+}
+
TEST_F(ExtendAttributeTest, single_integer_ext_attribute)
{
SingleIntegerExtAttribute siattr("si1");
@@ -255,6 +337,24 @@ TEST_F(ExtendAttributeTest, single_raw_ext_attribute)
testExtendRaw(srattr);
}
+TEST_F(ExtendAttributeTest, tensor_ext_attribute_dense)
+{
+ Config cfg(BasicType::TENSOR, CollectionType::SINGLE);
+ cfg.setTensorType(ValueType::from_spec(vec_2d_spec));
+ TensorExtAttribute tattr("td1", cfg);
+ EXPECT_TRUE(! tattr.hasMultiValue());
+ testExtendTensor(tattr);
+}
+
+TEST_F(ExtendAttributeTest, tensor_ext_attribute_mixed)
+{
+ Config cfg(BasicType::TENSOR, CollectionType::SINGLE);
+ cfg.setTensorType(ValueType::from_spec(vec_mixed_2d_spec));
+ TensorExtAttribute tattr("tm1", cfg);
+ EXPECT_TRUE(! tattr.hasMultiValue());
+ testExtendTensor(tattr);
+}
+
}
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.h b/searchlib/src/vespa/searchlib/attribute/attributevector.h
index 3d14622ca02..e40785911ea 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributevector.h
+++ b/searchlib/src/vespa/searchlib/attribute/attributevector.h
@@ -38,6 +38,8 @@ namespace vespalib::alloc {
class Alloc;
}
+namespace vespalib::eval { struct Value; }
+
namespace search {
template <typename T> class ComponentGuard;
@@ -86,6 +88,7 @@ public:
virtual bool add(double, int32_t = 1) { return false; }
virtual bool add(const char *, int32_t = 1) { return false; }
virtual bool add(vespalib::ConstArrayRef<char>, int32_t = 1) { return false; }
+ virtual bool add(const vespalib::eval::Value&, int32_t = 1) { return false; }
virtual ~IExtendAttribute() = default;
};
diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
index c8c5d4d4257..313863d8dcb 100644
--- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
@@ -40,6 +40,7 @@ vespa_add_library(searchlib_tensor OBJECT
tensor_buffer_store.cpp
tensor_buffer_type_mapper.cpp
tensor_deserialize.cpp
+ tensor_ext_attribute.cpp
tensor_store.cpp
DEPENDS
)
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.cpp
new file mode 100644
index 00000000000..19c8cf6053b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.cpp
@@ -0,0 +1,181 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "tensor_ext_attribute.h"
+#include "serialized_tensor_ref.h"
+#include "vector_bundle.h"
+#include <vespa/eval/eval/fast_value.h>
+#include <vespa/eval/eval/tensor_spec.h>
+#include <vespa/eval/eval/value.h>
+#include <vespa/eval/eval/value_codec.h>
+#include <vespa/searchcommon/attribute/config.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.tensor.tensor_ext_attribute");
+
+using vespalib::eval::FastValueBuilderFactory;
+using vespalib::eval::TensorSpec;
+using vespalib::eval::TypedCells;
+using vespalib::eval::Value;
+using vespalib::eval::ValueType;
+
+namespace search::tensor {
+
+namespace {
+
+std::unique_ptr<Value>
+create_empty_tensor(const ValueType& type)
+{
+ const auto &factory = FastValueBuilderFactory::get();
+ TensorSpec empty_spec(type.to_spec());
+ return vespalib::eval::value_from_spec(empty_spec, factory);
+}
+
+}
+
+TensorExtAttribute::TensorExtAttribute(const vespalib::string& name, const Config& cfg)
+ : NotImplementedAttribute(name, cfg),
+ ITensorAttribute(),
+ IExtendAttribute(),
+ _subspace_type(cfg.tensorType()),
+ _empty(_subspace_type),
+ _empty_tensor(create_empty_tensor(cfg.tensorType()))
+{
+}
+
+TensorExtAttribute::~TensorExtAttribute() = default;
+
+const ITensorAttribute*
+TensorExtAttribute::asTensorAttribute() const
+{
+ return this;
+}
+
+void
+TensorExtAttribute::onCommit()
+{
+ LOG_ABORT("should not be reached");
+}
+
+void
+TensorExtAttribute::onUpdateStat()
+{
+}
+
+bool
+TensorExtAttribute::addDoc(DocId& docId)
+{
+ docId = _data.size();
+ _data.emplace_back(nullptr);
+ incNumDocs();
+ setCommittedDocIdLimit(getNumDocs());
+ return true;
+}
+
+bool
+TensorExtAttribute::add(const vespalib::eval::Value& v, int32_t)
+{
+ _data.back() = &v;
+ return true;
+}
+
+IExtendAttribute*
+TensorExtAttribute::getExtendInterface()
+{
+ return this;
+}
+
+TypedCells
+TensorExtAttribute::get_vector(uint32_t docid, uint32_t subspace) const
+{
+ auto vectors = get_vectors(docid);
+ return (subspace < vectors.subspaces()) ? vectors.cells(subspace) : _empty.cells();
+}
+
+VectorBundle
+TensorExtAttribute::get_vectors(uint32_t docid) const
+{
+ auto tensor = _data[docid];
+ if (tensor == nullptr) {
+ return VectorBundle();
+ }
+ return VectorBundle(tensor->cells().data, tensor->index().size(), _subspace_type);
+}
+
+std::unique_ptr<Value>
+TensorExtAttribute::getTensor(uint32_t docid) const
+{
+ auto tensor = _data[docid];
+ if (tensor == nullptr) {
+ return {};
+ }
+ return FastValueBuilderFactory::get().copy(*tensor);
+}
+
+std::unique_ptr<Value>
+TensorExtAttribute::getEmptyTensor() const
+{
+ return FastValueBuilderFactory::get().copy(*_empty_tensor);
+}
+
+TypedCells
+TensorExtAttribute::extract_cells_ref(uint32_t docid) const
+{
+ return get_vector(docid, 0);
+}
+
+const vespalib::eval::Value&
+TensorExtAttribute::get_tensor_ref(uint32_t docid) const
+{
+ auto tensor = _data[docid];
+ return (tensor == nullptr) ? *_empty_tensor : *tensor;
+}
+
+SerializedTensorRef
+TensorExtAttribute::get_serialized_tensor_ref(uint32_t) const
+{
+ notImplemented();
+}
+
+bool
+TensorExtAttribute::supports_extract_cells_ref() const
+{
+ return getConfig().tensorType().is_dense();
+}
+
+bool
+TensorExtAttribute::supports_get_tensor_ref() const
+{
+ return true;
+}
+
+bool
+TensorExtAttribute::supports_get_serialized_tensor_ref() const
+{
+ return false;
+}
+
+const ValueType&
+TensorExtAttribute::getTensorType() const
+{
+ return getConfig().tensorType();
+}
+
+TensorExtAttribute::DistanceMetric
+TensorExtAttribute::distance_metric() const
+{
+ return getConfig().distance_metric();
+}
+
+uint32_t
+TensorExtAttribute::get_num_docs() const
+{
+ return _data.size();
+}
+
+void
+TensorExtAttribute::get_state(const vespalib::slime::Inserter& inserter) const
+{
+ (void) inserter;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.h b/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.h
new file mode 100644
index 00000000000..a58426cd146
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_ext_attribute.h
@@ -0,0 +1,54 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "i_tensor_attribute.h"
+#include "empty_subspace.h"
+#include "subspace_type.h"
+#include <vespa/searchlib/attribute/not_implemented_attribute.h>
+#include <vespa/vespalib/stllike/allocator.h>
+
+namespace search::tensor {
+
+/**
+ * Attribute vector storing a pointer to single tensor value per
+ * document in streaming search. The tensor is not owned by this
+ * attribute vector.
+ */
+class TensorExtAttribute : public NotImplementedAttribute,
+ public ITensorAttribute,
+ public IExtendAttribute
+{
+ std::vector<const vespalib::eval::Value*> _data;
+ SubspaceType _subspace_type;
+ EmptySubspace _empty;
+ std::unique_ptr<vespalib::eval::Value> _empty_tensor;
+public:
+ TensorExtAttribute(const vespalib::string& name, const Config& cfg);
+ ~TensorExtAttribute() override;
+ const ITensorAttribute* asTensorAttribute() const override;
+ void onCommit() override;
+ void onUpdateStat() override;
+ bool addDoc(DocId& docId) override;
+ bool add(const vespalib::eval::Value& v, int32_t) override;
+ IExtendAttribute* getExtendInterface() override;
+ // DocVectorAccess API
+ vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override;
+ VectorBundle get_vectors(uint32_t docid) const override;
+
+ // ITensorAttribute API
+ std::unique_ptr<vespalib::eval::Value> getTensor(uint32_t docid) const override;
+ std::unique_ptr<vespalib::eval::Value> getEmptyTensor() const override;
+ vespalib::eval::TypedCells extract_cells_ref(uint32_t docid) const override;
+ const vespalib::eval::Value& get_tensor_ref(uint32_t docid) const override;
+ SerializedTensorRef get_serialized_tensor_ref(uint32_t docid) const override;
+ bool supports_extract_cells_ref() const override;
+ bool supports_get_tensor_ref() const override;
+ bool supports_get_serialized_tensor_ref() const override;
+ const vespalib::eval::ValueType & getTensorType() const override;
+ search::attribute::DistanceMetric distance_metric() const override;
+ uint32_t get_num_docs() const override;
+ void get_state(const vespalib::slime::Inserter& inserter) const override;
+};
+
+}