diff options
author | Arne Juul <arnej@verizonmedia.com> | 2020-03-13 11:22:44 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2020-03-13 11:30:42 +0000 |
commit | 707e76eceb19459afb0750d970a4947974db1211 (patch) | |
tree | 6d307e8438152fdb1c277477a5f3a0c7c215ea7f | |
parent | 076924e0d6048f6474c994f75dc6481ad06b1ea7 (diff) |
add unit test for extended distance feature
4 files changed, 187 insertions, 6 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index d754fd78394..30f7fc4f54c 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -134,6 +134,7 @@ vespa_define_module( src/tests/features/item_raw_score src/tests/features/max_reduce_prod_join_replacer src/tests/features/native_dot_product + src/tests/features/nns_distance src/tests/features/ranking_expression src/tests/features/raw_score src/tests/features/subqueries diff --git a/searchlib/src/tests/features/nns_distance/CMakeLists.txt b/searchlib/src/tests/features/nns_distance/CMakeLists.txt new file mode 100644 index 00000000000..74f90d56198 --- /dev/null +++ b/searchlib/src/tests/features/nns_distance/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_nns_distance_test_app TEST + SOURCES + nns_distance_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_nns_distance_test_app COMMAND searchlib_nns_distance_test_app) diff --git a/searchlib/src/tests/features/nns_distance/nns_distance_test.cpp b/searchlib/src/tests/features/nns_distance/nns_distance_test.cpp new file mode 100644 index 00000000000..6188aa8617c --- /dev/null +++ b/searchlib/src/tests/features/nns_distance/nns_distance_test.cpp @@ -0,0 +1,177 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/features/setup.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h> +#include <vespa/searchlib/fef/test/queryenvironment.h> +#include <vespa/searchlib/features/distancefeature.h> +#include <vespa/searchlib/fef/fef.h> +#include <vespa/searchlib/fef/test/dummy_dependency_handler.h> +#include <vespa/vespalib/stllike/asciistream.h> +#include <vespa/vespalib/util/stringfmt.h> + +using search::feature_t; +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; +using CollectionType = FieldInfo::CollectionType; +using DataType = FieldInfo::DataType; + +const vespalib::string labelFeatureName("distance(label)"); +const vespalib::string fieldFeatureName("distance(bar)"); + +struct BlueprintFactoryFixture { + BlueprintFactory factory; + BlueprintFactoryFixture() : factory() + { + setup_search_features(factory); + } +}; + +struct IndexFixture { + IndexEnvironment indexEnv; + IndexFixture() : indexEnv() + { + IndexEnvironmentBuilder builder(indexEnv); + builder.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::INT64, "foo"); + builder.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::TENSOR, "bar"); + } +}; + +struct FeatureDumpFixture : public IDumpFeatureVisitor { + virtual void visitDumpFeature(const vespalib::string &) override { + TEST_ERROR("no features should be dumped"); + } + FeatureDumpFixture() : IDumpFeatureVisitor() {} +}; + +struct Labels { + virtual void inject(Properties &p) const = 0; + virtual ~Labels() {} +}; +struct NoLabel : public Labels { + virtual void inject(Properties &) const override {} +}; +struct SingleLabel : public Labels { + vespalib::string label; + uint32_t uid; + SingleLabel(const vespalib::string &l, uint32_t x) : label(l), uid(x) {} + virtual void inject(Properties &p) const override { + vespalib::asciistream key; + key << "vespa.label." << label << ".id"; + vespalib::asciistream value; + value << uid; + p.add(key.str(), value.str()); + } +}; + +struct RankFixture : BlueprintFactoryFixture, IndexFixture { + QueryEnvironment queryEnv; + RankSetup rankSetup; + MatchDataLayout mdl; + MatchData::UP match_data; + RankProgram::UP rankProgram; + std::vector<TermFieldHandle> fooHandles; + std::vector<TermFieldHandle> barHandles; + RankFixture(size_t fooCnt, size_t barCnt, const Labels &labels, const vespalib::string &featureName) + : queryEnv(&indexEnv), rankSetup(factory, indexEnv), + mdl(), match_data(), rankProgram(), fooHandles(), barHandles() + { + for (size_t i = 0; i < fooCnt; ++i) { + uint32_t fieldId = indexEnv.getFieldByName("foo")->id(); + fooHandles.push_back(mdl.allocTermField(fieldId)); + SimpleTermData term; + term.setUniqueId(i + 1); + term.addField(fieldId).setHandle(fooHandles.back()); + queryEnv.getTerms().push_back(term); + } + for (size_t i = 0; i < barCnt; ++i) { + uint32_t fieldId = indexEnv.getFieldByName("bar")->id(); + barHandles.push_back(mdl.allocTermField(fieldId)); + SimpleTermData term; + term.setUniqueId(fooCnt + i + 1); + term.addField(fieldId).setHandle(barHandles.back()); + queryEnv.getTerms().push_back(term); + } + labels.inject(queryEnv.getProperties()); + rankSetup.setFirstPhaseRank(featureName); + rankSetup.setIgnoreDefaultRankFeatures(true); + ASSERT_TRUE(rankSetup.compile()); + match_data = mdl.createMatchData(); + rankProgram = rankSetup.create_first_phase_program(); + rankProgram->setup(*match_data, queryEnv); + } + feature_t getScore(uint32_t docId) { + return Utils::getScoreFeature(*rankProgram, docId); + } + void setScore(TermFieldHandle handle, uint32_t docId, feature_t score) { + match_data->resolveTermField(handle)->setRawScore(docId, score); + } + void setFooScore(uint32_t i, uint32_t docId, feature_t score) { + ASSERT_LESS(i, fooHandles.size()); + setScore(fooHandles[i], docId, score); + } + void setBarScore(uint32_t i, uint32_t docId, feature_t score) { + ASSERT_LESS(i, barHandles.size()); + setScore(barHandles[i], docId, score); + } +}; + +TEST_F("require that blueprint can be created from factory", BlueprintFactoryFixture) { + Blueprint::SP bp = f.factory.createBlueprint("distance"); + EXPECT_TRUE(bp.get() != 0); + EXPECT_TRUE(dynamic_cast<DistanceBlueprint*>(bp.get()) != 0); +} + +TEST_FFF("require that no features are dumped", DistanceBlueprint, IndexFixture, FeatureDumpFixture) { + f1.visitDumpFeatures(f2.indexEnv, f3); +} + +TEST_FF("require that setup can be done on random label", DistanceBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(random_label)", f1.getBaseName().c_str())); + EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "random_label"))); +} + +TEST_FF("require that no label gives max-double distance", NoLabel(), RankFixture(2, 2, f1, labelFeatureName)) { + EXPECT_EQUAL(std::numeric_limits<feature_t>::max(), f2.getScore(10)); +} + +TEST_FF("require that unrelated label gives max-double distance", SingleLabel("unrelated", 1), RankFixture(2, 2, f1, labelFeatureName)) { + EXPECT_EQUAL(std::numeric_limits<feature_t>::max(), f2.getScore(10)); +} + +TEST_FF("require that labeled item raw score can be obtained", SingleLabel("label", 1), RankFixture(2, 2, f1, labelFeatureName)) { + f2.setFooScore(0, 10, 5.0); + EXPECT_EQUAL(5.0, f2.getScore(10)); +} + +TEST_FF("require that field raw score can be obtained", NoLabel(), RankFixture(2, 2, f1, fieldFeatureName)) { + f2.setBarScore(0, 10, 5.0); + EXPECT_EQUAL(5.0, f2.getScore(10)); +} + +TEST_FF("require that other raw scores are ignored", SingleLabel("label", 2), RankFixture(2, 2, f1, labelFeatureName)) { + f2.setFooScore(0, 10, 1.0); + f2.setFooScore(1, 10, 2.0); + f2.setBarScore(0, 10, 5.0); + f2.setBarScore(1, 10, 6.0); + EXPECT_EQUAL(2.0, f2.getScore(10)); +} + +TEST_FF("require that the correct raw score is used", NoLabel(), RankFixture(2, 2, f1, fieldFeatureName)) { + f2.setFooScore(0, 10, 3.0); + f2.setFooScore(1, 10, 4.0); + f2.setBarScore(0, 10, 8.0); + f2.setBarScore(1, 10, 7.0); + EXPECT_EQUAL(7.0, f2.getScore(10)); +} + +TEST_FF("require that stale data is ignored", SingleLabel("label", 2), RankFixture(2, 2, f1, labelFeatureName)) { + f2.setFooScore(0, 10, 1.0); + f2.setFooScore(1, 5, 2.0); + EXPECT_EQUAL(std::numeric_limits<feature_t>::max(), f2.getScore(10)); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/vespa/searchlib/features/distancefeature.cpp b/searchlib/src/vespa/searchlib/features/distancefeature.cpp index c33fe12fc0c..588fdcf17b7 100644 --- a/searchlib/src/vespa/searchlib/features/distancefeature.cpp +++ b/searchlib/src/vespa/searchlib/features/distancefeature.cpp @@ -221,12 +221,7 @@ FeatureExecutor & DistanceBlueprint::createExecutor(const IQueryEnvironment &env, vespalib::Stash &stash) const { if (_use_nns_tensor) { - const search::attribute::IAttributeVector * attr = env.getAttributeContext().getAttribute(_arg_string); - if (attr != nullptr) { - return stash.create<ConvertRawscoreExecutor>(env, _attr_id); - } else { - LOG(warning, "unexpected missing attribute '%s'\n", _arg_string.c_str()); - } + return stash.create<ConvertRawscoreExecutor>(env, _attr_id); } if (_use_item_label) { return stash.create<ConvertRawscoreExecutor>(env, _arg_string); |