summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2022-11-24 11:09:43 +0000
committerGeir Storli <geirst@yahooinc.com>2022-11-24 11:09:43 +0000
commit93767a816d8f449482012f0c065d3b926db4f811 (patch)
treeb2c91f3e59f76c12bd6786aa8d643115a7adf5c4 /searchlib
parentba11a8a87877dd3a97586255839b02782b70de87 (diff)
Add support for multiple vectors per document in exact distance calculator.
This also fixes a problem where previously the distance between the query tensor and a non-set attribute tensor (dense) would be calculated against origo. With this change the distance is std::numeric_limits<double>::max() is these cases.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/tensor/distance_calculator/CMakeLists.txt11
-rw-r--r--searchlib/src/tests/tensor/distance_calculator/distance_calculator_test.cpp86
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_calculator.h19
-rw-r--r--searchlib/src/vespa/searchlib/tensor/i_tensor_attribute.h6
-rw-r--r--searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.cpp13
-rw-r--r--searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.h3
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_attribute.h3
-rw-r--r--searchlib/src/vespa/searchlib/test/attribute_builder.cpp25
-rw-r--r--searchlib/src/vespa/searchlib/test/attribute_builder.h10
10 files changed, 169 insertions, 8 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index ae6469ae07a..6c5d8f4074f 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -219,6 +219,7 @@ vespa_define_module(
src/tests/sortspec
src/tests/tensor/dense_tensor_store
src/tests/tensor/direct_tensor_store
+ src/tests/tensor/distance_calculator
src/tests/tensor/distance_functions
src/tests/tensor/hnsw_index
src/tests/tensor/hnsw_nodeid_mapping
diff --git a/searchlib/src/tests/tensor/distance_calculator/CMakeLists.txt b/searchlib/src/tests/tensor/distance_calculator/CMakeLists.txt
new file mode 100644
index 00000000000..f4698ce355e
--- /dev/null
+++ b/searchlib/src/tests/tensor/distance_calculator/CMakeLists.txt
@@ -0,0 +1,11 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_distance_calculator_test_app TEST
+ SOURCES
+ distance_calculator_test.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+ GTest::GTest
+)
+vespa_add_test(NAME searchlib_distance_calculator_test_app COMMAND searchlib_distance_calculator_test_app)
+
diff --git a/searchlib/src/tests/tensor/distance_calculator/distance_calculator_test.cpp b/searchlib/src/tests/tensor/distance_calculator/distance_calculator_test.cpp
new file mode 100644
index 00000000000..11f767b546d
--- /dev/null
+++ b/searchlib/src/tests/tensor/distance_calculator/distance_calculator_test.cpp
@@ -0,0 +1,86 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/eval/simple_value.h>
+#include <vespa/eval/eval/tensor_spec.h>
+#include <vespa/eval/eval/value_type.h>
+#include <vespa/searchcommon/attribute/config.h>
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/tensor/distance_calculator.h>
+#include <vespa/searchlib/tensor/distance_function_factory.h>
+#include <vespa/searchlib/test/attribute_builder.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <iostream>
+
+using namespace search::attribute::test;
+using namespace search::attribute;
+using namespace search::tensor;
+using namespace vespalib::eval;
+
+using search::AttributeVector;
+
+class DistanceCalculatorTest : public testing::Test {
+public:
+ std::shared_ptr<AttributeVector> attr;
+ const ITensorAttribute* attr_tensor;
+ std::unique_ptr<DistanceFunction> func;
+
+ DistanceCalculatorTest()
+ : attr(),
+ attr_tensor(),
+ func(make_distance_function(DistanceMetric::Euclidean, CellType::DOUBLE))
+ {
+ }
+
+ void build_attribute(const vespalib::string& tensor_type,
+ const std::vector<vespalib::string>& tensor_values) {
+ Config cfg(BasicType::TENSOR);
+ cfg.setTensorType(ValueType::from_spec(tensor_type));
+ attr = AttributeBuilder("doc_tensor", cfg).fill_tensor(tensor_values).get();
+ attr_tensor = dynamic_cast<const ITensorAttribute*>(attr.get());
+ ASSERT_TRUE(attr_tensor != nullptr);
+ }
+ double calc_distance(uint32_t docid, const vespalib::string& query_tensor) {
+ auto qv = SimpleValue::from_spec(TensorSpec::from_expr(query_tensor));
+ DistanceCalculator calc(*attr_tensor, *qv, *func);
+ return calc.calc_with_limit(docid, std::numeric_limits<double>::max());
+ }
+ double calc_rawscore(uint32_t docid, const vespalib::string& query_tensor) {
+ auto qv = SimpleValue::from_spec(TensorSpec::from_expr(query_tensor));
+ DistanceCalculator calc(*attr_tensor, *qv, *func);
+ return calc.calc_raw_score(docid);
+ }
+};
+
+constexpr double max_distance = std::numeric_limits<double>::max();
+
+TEST_F(DistanceCalculatorTest, calculation_over_dense_tensor_attribute)
+{
+ build_attribute("tensor(y[2])", {"[3,10]", ""});
+ vespalib::string qt = "tensor(y[2]):[7,10]";
+ EXPECT_DOUBLE_EQ(16, calc_distance(1, qt));
+ EXPECT_DOUBLE_EQ(max_distance, calc_distance(2, qt));
+
+ EXPECT_DOUBLE_EQ(1.0/(1.0 + 4.0), calc_rawscore(1, qt));
+ EXPECT_DOUBLE_EQ(0.0, calc_rawscore(2, qt));
+}
+
+TEST_F(DistanceCalculatorTest, calculation_over_mixed_tensor_attribute)
+{
+ build_attribute("tensor(x{},y[2])",
+ {"{{x:\"a\",y:0}:3,{x:\"a\",y:1}:10,{x:\"b\",y:0}:5,{x:\"b\",y:1}:10}",
+ "{}", ""});
+ vespalib::string qt_1 = "tensor(y[2]):[9,10]";
+ vespalib::string qt_2 = "tensor(y[2]):[1,10]";
+ EXPECT_DOUBLE_EQ(16, calc_distance(1, qt_1));
+ EXPECT_DOUBLE_EQ(4, calc_distance(1, qt_2));
+ EXPECT_DOUBLE_EQ(max_distance, calc_distance(2, qt_1));
+ EXPECT_DOUBLE_EQ(max_distance, calc_distance(3, qt_1));
+
+ EXPECT_DOUBLE_EQ(1.0/(1.0 + 4.0), calc_rawscore(1, qt_1));
+ EXPECT_DOUBLE_EQ(1.0/(1.0 + 2.0), calc_rawscore(1, qt_2));
+ EXPECT_DOUBLE_EQ(0.0, calc_rawscore(2, qt_1));
+ EXPECT_DOUBLE_EQ(0.0, calc_rawscore(3, qt_1));
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
+
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_calculator.h b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h
index 3ef41906b92..320f071cbbb 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_calculator.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h
@@ -3,6 +3,7 @@
#include "distance_function.h"
#include "i_tensor_attribute.h"
+#include "vector_bundle.h"
namespace vespalib::eval { struct Value; }
@@ -43,12 +44,24 @@ public:
const DistanceFunction& function() const { return *_dist_fun; }
double calc_raw_score(uint32_t docid) const {
- double distance = _dist_fun->calc(_query_tensor_cells, _attr_tensor.extract_cells_ref(docid));
- return _dist_fun->to_rawscore(distance);
+ auto vectors = _attr_tensor.get_vectors(docid);
+ double result = 0.0;
+ for (uint32_t i = 0; i < vectors.subspaces(); ++i) {
+ double distance = _dist_fun->calc(_query_tensor_cells, vectors.cells(i));
+ double score = _dist_fun->to_rawscore(distance);
+ result = std::max(result, score);
+ }
+ return result;
}
double calc_with_limit(uint32_t docid, double limit) const {
- return _dist_fun->calc_with_limit(_query_tensor_cells, _attr_tensor.extract_cells_ref(docid), limit);
+ auto vectors = _attr_tensor.get_vectors(docid);
+ double result = std::numeric_limits<double>::max();
+ for (uint32_t i = 0; i < vectors.subspaces(); ++i) {
+ double distance = _dist_fun->calc_with_limit(_query_tensor_cells, vectors.cells(i), limit);
+ result = std::min(result, distance);
+ }
+ return result;
}
/**
diff --git a/searchlib/src/vespa/searchlib/tensor/i_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/i_tensor_attribute.h
index 3bcb55f2c2a..9b5f80b2ece 100644
--- a/searchlib/src/vespa/searchlib/tensor/i_tensor_attribute.h
+++ b/searchlib/src/vespa/searchlib/tensor/i_tensor_attribute.h
@@ -2,9 +2,10 @@
#pragma once
-#include <memory>
+#include "doc_vector_access.h"
#include <vespa/eval/eval/typed_cells.h>
#include <vespa/searchcommon/attribute/distance_metric.h>
+#include <memory>
namespace vespalib::eval { class ValueType; struct Value; }
namespace vespalib::slime { struct Inserter; }
@@ -16,8 +17,7 @@ class NearestNeighborIndex;
/**
* Interface for tensor attribute used by feature executors to get information.
*/
-class ITensorAttribute
-{
+class ITensorAttribute : public DocVectorAccess {
public:
virtual ~ITensorAttribute() {}
virtual std::unique_ptr<vespalib::eval::Value> getTensor(uint32_t docId) const = 0;
diff --git a/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.cpp b/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.cpp
index a9bc69c36c9..f9459823ce4 100644
--- a/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.cpp
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "imported_tensor_attribute_vector_read_guard.h"
+#include "vector_bundle.h"
#include <vespa/searchlib/attribute/attributevector.h>
#include <vespa/eval/eval/value.h>
@@ -60,6 +61,18 @@ ImportedTensorAttributeVectorReadGuard::get_tensor_ref(uint32_t docid) const
return _target_tensor_attribute.get_tensor_ref(getTargetLid(docid));
}
+vespalib::eval::TypedCells
+ImportedTensorAttributeVectorReadGuard::get_vector(uint32_t docid, uint32_t subspace) const
+{
+ return _target_tensor_attribute.get_vector(getTargetLid(docid), subspace);
+}
+
+search::tensor::VectorBundle
+ImportedTensorAttributeVectorReadGuard::get_vectors(uint32_t docid) const
+{
+ return _target_tensor_attribute.get_vectors(getTargetLid(docid));
+}
+
const vespalib::eval::ValueType &
ImportedTensorAttributeVectorReadGuard::getTensorType() const
{
diff --git a/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.h b/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.h
index acfd1821e1f..f277d39e97d 100644
--- a/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.h
+++ b/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.h
@@ -40,6 +40,9 @@ public:
DistanceMetric distance_metric() const override { return _target_tensor_attribute.distance_metric(); }
uint32_t get_num_docs() const override { return getNumDocs(); }
+ vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override;
+ VectorBundle get_vectors(uint32_t docid) const override;
+
const vespalib::eval::ValueType &getTensorType() const override;
void get_state(const vespalib::slime::Inserter& inserter) const override;
};
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h
index 15a2db2b861..a4c30a574e5 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h
@@ -3,7 +3,6 @@
#pragma once
#include "i_tensor_attribute.h"
-#include "doc_vector_access.h"
#include "prepare_result.h"
#include "subspace_type.h"
#include "tensor_store.h"
@@ -21,7 +20,7 @@ class NearestNeighborIndexFactory;
/**
* Attribute vector class used to store tensors for all documents in memory.
*/
-class TensorAttribute : public NotImplementedAttribute, public ITensorAttribute, public DocVectorAccess {
+class TensorAttribute : public NotImplementedAttribute, public ITensorAttribute {
protected:
using AtomicEntryRef = vespalib::datastore::AtomicEntryRef;
using EntryRef = TensorStore::EntryRef;
diff --git a/searchlib/src/vespa/searchlib/test/attribute_builder.cpp b/searchlib/src/vespa/searchlib/test/attribute_builder.cpp
index 04d06cd4c66..cc84355385d 100644
--- a/searchlib/src/vespa/searchlib/test/attribute_builder.cpp
+++ b/searchlib/src/vespa/searchlib/test/attribute_builder.cpp
@@ -1,13 +1,19 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "attribute_builder.h"
+#include <vespa/eval/eval/simple_value.h>
+#include <vespa/eval/eval/tensor_spec.h>
#include <vespa/searchlib/attribute/attributefactory.h>
#include <vespa/searchlib/attribute/attributevector.h>
#include <vespa/searchlib/attribute/floatbase.h>
#include <vespa/searchlib/attribute/integerbase.h>
#include <vespa/searchlib/attribute/stringbase.h>
+#include <vespa/searchlib/tensor/tensor_attribute.h>
#include <cassert>
+using vespalib::eval::SimpleValue;
+using vespalib::eval::TensorSpec;
+
namespace search::attribute::test {
AttributeBuilder::AttributeBuilder(const vespalib::string& name, const Config& cfg)
@@ -151,5 +157,24 @@ AttributeBuilder::fill_wset(std::initializer_list<WeightedStringList> values)
return *this;
}
+AttributeBuilder&
+AttributeBuilder::fill_tensor(const std::vector<vespalib::string>& values)
+{
+ add_docs(_attr, values.size());
+ auto& real = dynamic_cast<search::tensor::TensorAttribute&>(_attr);
+ vespalib::string tensor_type = real.getConfig().tensorType().to_spec();
+ uint32_t docid = 1;
+ for (const auto& value : values) {
+ if (!value.empty()) {
+ auto spec = TensorSpec::from_expr(tensor_type + ":" + value);
+ auto tensor = SimpleValue::from_spec(spec);
+ real.setTensor(docid, *tensor);
+ }
+ ++docid;
+ }
+ _attr.commit(true);
+ return *this;
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/test/attribute_builder.h b/searchlib/src/vespa/searchlib/test/attribute_builder.h
index d5c2c3aa90e..339af4e22f5 100644
--- a/searchlib/src/vespa/searchlib/test/attribute_builder.h
+++ b/searchlib/src/vespa/searchlib/test/attribute_builder.h
@@ -52,6 +52,16 @@ public:
AttributeBuilder& fill_array(std::initializer_list<StringList> values);
AttributeBuilder& fill_wset(std::initializer_list<WeightedStringList> values);
+ /**
+ * Fill this tensor attribute with the given tensor values.
+ *
+ * Each string value represents the last part of a vespalib::eval::TensorSpec,
+ * without the tensor type as this is known from the tensor attribute.
+ * E.g "[1, 2]" is expanded to "tensor(x[2]):[1, 2]".
+ * If the string value is empty no tensor is set for that document.
+ */
+ AttributeBuilder& fill_tensor(const std::vector<vespalib::string>& values);
+
std::shared_ptr<AttributeVector> get() const { return _attr_ptr; }
};