summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2022-07-11 12:22:34 +0000
committerGeir Storli <geirst@yahooinc.com>2022-07-11 12:46:38 +0000
commit059b45a1d6d0ec813f9875827741d0157840601f (patch)
tree0bb6abbd263e42a05975fbc5ad7dd4ba7b05a9c9 /searchlib
parentf97130213c9a2546788a5e5a6299cdbbd7fee94e (diff)
Test lazy distance calculation in distance and closeness features.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/features/nns_closeness/nns_closeness_test.cpp29
-rw-r--r--searchlib/src/tests/features/nns_distance/nns_distance_test.cpp34
-rw-r--r--searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.cpp77
-rw-r--r--searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.h19
4 files changed, 149 insertions, 10 deletions
diff --git a/searchlib/src/tests/features/nns_closeness/nns_closeness_test.cpp b/searchlib/src/tests/features/nns_closeness/nns_closeness_test.cpp
index 661ee884e46..e67370d48f6 100644
--- a/searchlib/src/tests/features/nns_closeness/nns_closeness_test.cpp
+++ b/searchlib/src/tests/features/nns_closeness/nns_closeness_test.cpp
@@ -1,5 +1,6 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/eval/eval/tensor_spec.h>
#include <vespa/searchlib/features/closenessfeature.h>
#include <vespa/searchlib/features/setup.h>
#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
@@ -15,6 +16,8 @@ using namespace search::features;
using namespace search::fef::test;
using namespace search::fef;
+using vespalib::eval::TensorSpec;
+
const vespalib::string labelFeatureName("closeness(label,nns)");
const vespalib::string fieldFeatureName("closeness(bar)");
@@ -76,4 +79,30 @@ TEST_FF("require that stale data is ignored", SingleLabel("nns", 2), RankFixture
EXPECT_EQUAL(0, f2.getScore(10));
}
+void
+expect_raw_score_calculated_on_the_fly(RankFixture& f)
+{
+ f.setBarScore(0, 8, 13.0);
+ f.set_attribute_tensor(9, TensorSpec::from_expr("tensor(x[2]):[5,11]"));
+ f.set_attribute_tensor(10, TensorSpec::from_expr("tensor(x[2]):[7,11]"));
+
+ // For docids 9 and 10 the raw score is calculated on the fly
+ // using a distance calculator over the attribute and query tensors.
+ EXPECT_EQUAL(1/(1+13.0), f.getScore(8));
+ EXPECT_EQUAL(1/(1+(5.0-3.0)), f.getScore(9));
+ EXPECT_EQUAL(1/(1+(7.0-3.0)), f.getScore(10));
+}
+
+TEST_FF("raw score is calculated on the fly (using field setup)",
+ NoLabel(), RankFixture(0, 1, f1, fieldFeatureName, "tensor(x[2]):[3,11]"))
+{
+ expect_raw_score_calculated_on_the_fly(f2);
+}
+
+TEST_FF("raw score is calculated on the fly (using label setup)",
+ SingleLabel("nns", 1), RankFixture(0, 1, f1, labelFeatureName, "tensor(x[2]):[3,11]"))
+{
+ expect_raw_score_calculated_on_the_fly(f2);
+}
+
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/features/nns_distance/nns_distance_test.cpp b/searchlib/src/tests/features/nns_distance/nns_distance_test.cpp
index 6b2669367ad..fff4c9f1c0e 100644
--- a/searchlib/src/tests/features/nns_distance/nns_distance_test.cpp
+++ b/searchlib/src/tests/features/nns_distance/nns_distance_test.cpp
@@ -1,12 +1,12 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/vespalib/testkit/test_kit.h>
-#include <vespa/searchlib/features/setup.h>
-#include <vespa/searchlib/fef/test/labels.h>
+#include <vespa/eval/eval/tensor_spec.h>
#include <vespa/searchlib/features/distancefeature.h>
#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
+#include <vespa/searchlib/fef/test/labels.h>
#include <vespa/searchlib/test/features/distance_closeness_fixture.h>
#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vespalib/testkit/test_kit.h>
#include <vespa/vespalib/util/stringfmt.h>
using search::feature_t;
@@ -15,6 +15,8 @@ using namespace search::features;
using namespace search::fef::test;
using namespace search::fef;
+using vespalib::eval::TensorSpec;
+
const vespalib::string labelFeatureName("distance(label,nns)");
const vespalib::string fieldFeatureName("distance(bar)");
@@ -82,4 +84,30 @@ TEST_FF("require that stale data is ignored", SingleLabel("nns", 2), RankFixture
EXPECT_EQUAL(std::numeric_limits<feature_t>::max(), f2.getScore(10));
}
+void
+expect_raw_score_calculated_on_the_fly(RankFixture& f)
+{
+ f.setBarScore(0, 8, 13.0);
+ f.set_attribute_tensor(9, TensorSpec::from_expr("tensor(x[2]):[5,11]"));
+ f.set_attribute_tensor(10, TensorSpec::from_expr("tensor(x[2]):[7,11]"));
+
+ // For docids 9 and 10 the raw score is calculated on the fly
+ // using a distance calculator over the attribute and query tensors.
+ EXPECT_EQUAL(13.0, f.getScore(8));
+ EXPECT_EQUAL((5-3), f.getScore(9));
+ EXPECT_EQUAL((7-3), f.getScore(10));
+}
+
+TEST_FF("raw score is calculated on the fly (using field setup)",
+ NoLabel(), RankFixture(0, 1, f1, fieldFeatureName, "tensor(x[2]):[3,11]"))
+{
+ expect_raw_score_calculated_on_the_fly(f2);
+}
+
+TEST_FF("raw score is calculated on the fly (using label setup)",
+ SingleLabel("nns", 1), RankFixture(0, 1, f1, labelFeatureName, "tensor(x[2]):[3,11]"))
+{
+ expect_raw_score_calculated_on_the_fly(f2);
+}
+
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.cpp b/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.cpp
index 76d40e14f48..e161a4e9839 100644
--- a/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.cpp
+++ b/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.cpp
@@ -1,14 +1,56 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "distance_closeness_fixture.h"
+#include <vespa/eval/eval/simple_value.h>
+#include <vespa/eval/eval/value.h>
+#include <vespa/eval/eval/value_codec.h>
+#include <vespa/eval/eval/value_type.h>
+#include <vespa/searchcommon/attribute/config.h>
+#include <vespa/searchlib/tensor/dense_tensor_attribute.h>
+
+using search::attribute::BasicType;
+using search::attribute::CollectionType;
+using search::attribute::Config;
+using search::attribute::DistanceMetric;
+using search::fef::test::IndexEnvironment;
+using search::fef::test::QueryEnvironment;
+using search::tensor::DenseTensorAttribute;
+using vespalib::eval::SimpleValue;
+using vespalib::eval::TensorSpec;
+using vespalib::eval::Value;
+using vespalib::eval::ValueType;
namespace search::features::test {
+namespace {
+
+std::shared_ptr<DenseTensorAttribute>
+create_tensor_attribute(const vespalib::string& attr_name,
+ const vespalib::string& tensor_type,
+ uint32_t docid_limit)
+{
+ Config cfg(BasicType::TENSOR, CollectionType::SINGLE);
+ cfg.setTensorType(ValueType::from_spec(tensor_type));
+ cfg.set_distance_metric(DistanceMetric::Euclidean);
+ auto result = std::make_shared<DenseTensorAttribute>(attr_name, cfg);
+ result->addReservedDoc();
+ result->addDocs(docid_limit-1);
+ result->commit();
+ return result;
+}
+
+}
+
FeatureDumpFixture::~FeatureDumpFixture() = default;
-DistanceClosenessFixture::DistanceClosenessFixture(size_t fooCnt, size_t barCnt, const Labels &labels, const vespalib::string &featureName)
+DistanceClosenessFixture::DistanceClosenessFixture(size_t fooCnt, size_t barCnt,
+ const Labels& labels,
+ const vespalib::string& featureName,
+ const vespalib::string& query_tensor)
: queryEnv(&indexEnv), rankSetup(factory, indexEnv),
- mdl(), match_data(), rankProgram(), fooHandles(), barHandles()
+ mdl(), match_data(), rankProgram(), fooHandles(), barHandles(),
+ tensor_attr(),
+ docid_limit(11)
{
for (size_t i = 0; i < fooCnt; ++i) {
uint32_t fieldId = indexEnv.getFieldByName("foo")->id();
@@ -24,16 +66,47 @@ DistanceClosenessFixture::DistanceClosenessFixture(size_t fooCnt, size_t barCnt,
SimpleTermData term;
term.setUniqueId(fooCnt + i + 1);
term.addField(fieldId).setHandle(barHandles.back());
+ if (!query_tensor.empty()) {
+ term.set_query_tensor_name("qbar");
+ }
queryEnv.getTerms().push_back(term);
}
+ if (!query_tensor.empty()) {
+ tensor_attr = create_tensor_attribute("bar", "tensor(x[2])", docid_limit);
+ indexEnv.getAttributeMap().add(tensor_attr);
+ set_query_tensor("qbar", "tensor(x[2])", TensorSpec::from_expr(query_tensor));
+ }
labels.inject(queryEnv.getProperties());
rankSetup.setFirstPhaseRank(featureName);
rankSetup.setIgnoreDefaultRankFeatures(true);
ASSERT_TRUE(rankSetup.compile());
+ rankSetup.prepareSharedState(queryEnv, queryEnv.getObjectStore());
match_data = mdl.createMatchData();
rankProgram = rankSetup.create_first_phase_program();
rankProgram->setup(*match_data, queryEnv);
}
+DistanceClosenessFixture::~DistanceClosenessFixture() = default;
+
+void
+DistanceClosenessFixture::set_attribute_tensor(uint32_t docid, const vespalib::eval::TensorSpec& spec)
+{
+ auto tensor = SimpleValue::from_spec(spec);
+ tensor_attr->setTensor(docid, *tensor);
+ tensor_attr->commit();
+}
+
+void
+DistanceClosenessFixture::set_query_tensor(const vespalib::string& query_tensor_name,
+ const vespalib::string& tensor_type,
+ const TensorSpec& spec)
+{
+ search::fef::indexproperties::type::QueryFeature::set(indexEnv.getProperties(), query_tensor_name, tensor_type);
+ auto tensor = SimpleValue::from_spec(spec);
+ vespalib::nbostream stream;
+ vespalib::eval::encode_value(*tensor, stream);
+ queryEnv.getProperties().add(query_tensor_name, vespalib::stringref(stream.peek(), stream.size()));
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.h b/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.h
index cdb1379659e..cc1c0a6fb15 100644
--- a/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.h
+++ b/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.h
@@ -13,8 +13,8 @@
using namespace search::fef;
using namespace search::fef::test;
-using CollectionType = FieldInfo::CollectionType;
-using DataType = FieldInfo::DataType;
+namespace search::tensor { class DenseTensorAttribute; }
+namespace vespalib::eval { class TensorSpec; }
namespace search::features::test {
@@ -31,8 +31,8 @@ struct IndexEnvironmentFixture {
IndexEnvironmentFixture() : indexEnv()
{
IndexEnvironmentBuilder builder(indexEnv);
- builder.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::INT64, "foo");
- builder.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::TENSOR, "bar");
+ builder.addField(FieldType::ATTRIBUTE, FieldInfo::CollectionType::SINGLE, FieldInfo::DataType::INT64, "foo");
+ builder.addField(FieldType::ATTRIBUTE, FieldInfo::CollectionType::SINGLE, FieldInfo::DataType::TENSOR, "bar");
}
};
@@ -55,7 +55,16 @@ struct DistanceClosenessFixture : BlueprintFactoryFixture, IndexEnvironmentFixtu
RankProgram::UP rankProgram;
std::vector<TermFieldHandle> fooHandles;
std::vector<TermFieldHandle> barHandles;
- DistanceClosenessFixture(size_t fooCnt, size_t barCnt, const Labels &labels, const vespalib::string &featureName);
+ std::shared_ptr<search::tensor::DenseTensorAttribute> tensor_attr;
+ uint32_t docid_limit;
+ DistanceClosenessFixture(size_t fooCnt, size_t barCnt,
+ const Labels &labels, const vespalib::string &featureName,
+ const vespalib::string& query_tensor = "");
+ ~DistanceClosenessFixture();
+ void set_attribute_tensor(uint32_t docid, const vespalib::eval::TensorSpec& spec);
+ void set_query_tensor(const vespalib::string& query_tensor_name,
+ const vespalib::string& tensor_type,
+ const vespalib::eval::TensorSpec& spec);
feature_t getScore(uint32_t docId) {
return Utils::getScoreFeature(*rankProgram, docId);
}