diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-01-19 06:28:07 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-19 06:28:07 +0100 |
commit | 69df7c8ea2aed132ffee975005a42b3d252d02ad (patch) | |
tree | 3ea957419836282baffe58542ddb5b548258a7ec /searchlib | |
parent | 9d99303eaada5669bd159af47edcc30602fc5222 (diff) |
Revert "Test exact nearest neighbor search using mixed tensor."
Diffstat (limited to 'searchlib')
-rw-r--r-- | searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp | 176 |
1 files changed, 81 insertions, 95 deletions
diff --git a/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp b/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp index 1062441d455..f02681908d6 100644 --- a/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp +++ b/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp @@ -6,14 +6,13 @@ #include <vespa/searchlib/common/bitvector.h> #include <vespa/searchlib/common/feature.h> #include <vespa/searchlib/fef/matchdata.h> -#include <vespa/searchlib/queryeval/global_filter.h> #include <vespa/searchlib/queryeval/nearest_neighbor_iterator.h> #include <vespa/searchlib/queryeval/nns_index_iterator.h> #include <vespa/searchlib/queryeval/simpleresult.h> +#include <vespa/searchlib/queryeval/global_filter.h> #include <vespa/searchlib/tensor/dense_tensor_attribute.h> #include <vespa/searchlib/tensor/distance_calculator.h> #include <vespa/searchlib/tensor/distance_function_factory.h> -#include <vespa/searchlib/tensor/serialized_fast_value_attribute.h> #include <vespa/vespalib/gtest/gtest.h> #include <vespa/vespalib/test/insertion_operators.h> #include <vespa/vespalib/util/stringfmt.h> @@ -30,8 +29,6 @@ using search::feature_t; using search::tensor::DenseTensorAttribute; using search::tensor::DistanceCalculator; using search::tensor::DistanceFunction; -using search::tensor::SerializedFastValueAttribute; -using search::tensor::TensorAttribute; using vespalib::eval::CellType; using vespalib::eval::SimpleValue; using vespalib::eval::TensorSpec; @@ -41,13 +38,8 @@ using vespalib::eval::ValueType; using namespace search::fef; using namespace search::queryeval; -using BasicType = search::attribute::BasicType; -using CollectionType = search::attribute::CollectionType; -using Config = search::attribute::Config; - vespalib::string denseSpecDouble("tensor(x[2])"); vespalib::string denseSpecFloat("tensor<float>(x[2])"); -vespalib::string mixed_spec("tensor(m{},x[2])"); DistanceFunction::UP euclid_d = search::tensor::make_distance_function(DistanceMetric::Euclidean, CellType::DOUBLE); DistanceFunction::UP euclid_f = search::tensor::make_distance_function(DistanceMetric::Euclidean, CellType::FLOAT); @@ -57,45 +49,43 @@ std::unique_ptr<Value> createTensor(const TensorSpec &spec) { } std::unique_ptr<Value> createTensor(const vespalib::string& type_spec, double v1, double v2) { - auto type = vespalib::eval::ValueType::from_spec(type_spec); - if (type.is_dense()) { - return createTensor(TensorSpec(type_spec).add({{"x", 0}}, v1) - .add({{"x", 1}}, v2)); - } else { - return createTensor(TensorSpec(type_spec).add({{"m", "a"},{"x", 0}}, v1) - .add({{"m", "a"},{"x", 1}}, v2)); - } + return createTensor(TensorSpec(type_spec).add({{"x", 0}}, v1) + .add({{"x", 1}}, v2)); } -std::shared_ptr<TensorAttribute> make_attr(const vespalib::string& name, const Config& cfg) { - if (cfg.tensorType().is_dense()) { - return std::make_shared<DenseTensorAttribute>(name, cfg); - } else { - return std::make_shared<SerializedFastValueAttribute>(name, cfg); - } -} +struct Fixture +{ + using BasicType = search::attribute::BasicType; + using CollectionType = search::attribute::CollectionType; + using Config = search::attribute::Config; -struct Fixture { Config _cfg; vespalib::string _name; vespalib::string _typeSpec; - std::shared_ptr<TensorAttribute> _attr; + std::shared_ptr<DenseTensorAttribute> _tensorAttr; + std::shared_ptr<AttributeVector> _attr; std::shared_ptr<GlobalFilter> _global_filter; Fixture(const vespalib::string &typeSpec) : _cfg(BasicType::TENSOR, CollectionType::SINGLE), _name("test"), _typeSpec(typeSpec), + _tensorAttr(), _attr(), _global_filter(GlobalFilter::create()) { _cfg.setTensorType(ValueType::from_spec(typeSpec)); - _attr = make_attr(_name, _cfg); + _tensorAttr = makeAttr(); + _attr = _tensorAttr; _attr->addReservedDoc(); } ~Fixture() {} + std::shared_ptr<DenseTensorAttribute> makeAttr() { + return std::make_shared<DenseTensorAttribute>(_name, _cfg); + } + void ensureSpace(uint32_t docId) { while (_attr->getNumDocs() <= docId) { uint32_t newDocId = 0u; @@ -111,7 +101,7 @@ struct Fixture { void setTensor(uint32_t docId, const Value &tensor) { ensureSpace(docId); - _attr->setTensor(docId, tensor); + _tensorAttr->setTensor(docId, tensor); _attr->commit(); } @@ -133,7 +123,7 @@ template <bool strict> SimpleResult find_matches(Fixture &env, const Value &qtv, double threshold = std::numeric_limits<double>::max()) { auto md = MatchData::makeTestInstance(2, 2); auto &tfmd = *(md->resolveTermField(0)); - auto &attr = *(env._attr); + auto &attr = *(env._tensorAttr); DistanceCalculator dist_calc(attr, qtv, env.dist_fun()); NearestNeighborDistanceHeap dh(2); dh.set_distance_threshold(env.dist_fun().convert_threshold(threshold)); @@ -182,32 +172,12 @@ verify_iterator_returns_expected_results(const vespalib::string& attribute_tenso EXPECT_EQ(result, far_thr4_exp); result = find_matches<false>(fixture, *farTensor, 4.0); EXPECT_EQ(result, far_thr4_exp); -} - -struct TestParam { - vespalib::string attribute_tensor_type_spec; - vespalib::string query_tensor_type_spec; - TestParam(const vespalib::string& attribute_tensor_type_spec_in, - const vespalib::string& query_tensor_type_spec_in) - : attribute_tensor_type_spec(attribute_tensor_type_spec_in), - query_tensor_type_spec(query_tensor_type_spec_in) - {} - ~TestParam() {} -}; - -struct NnsIndexIteratorParameterizedTest : public ::testing::TestWithParam<TestParam> {}; -INSTANTIATE_TEST_SUITE_P(NnsTestSuite, - NnsIndexIteratorParameterizedTest, - ::testing::Values( - TestParam(denseSpecDouble, denseSpecDouble), - TestParam(denseSpecFloat, denseSpecFloat), - TestParam(mixed_spec, denseSpecDouble) - )); +} -TEST_P(NnsIndexIteratorParameterizedTest, require_that_iterator_returns_expected_results) { - auto param = GetParam(); - verify_iterator_returns_expected_results(param.attribute_tensor_type_spec, param.query_tensor_type_spec); +TEST(NnsIndexIteratorTest, require_that_iterator_returns_expected_results) { + verify_iterator_returns_expected_results(denseSpecDouble, denseSpecDouble); + verify_iterator_returns_expected_results(denseSpecFloat, denseSpecFloat); } void @@ -237,16 +207,16 @@ verify_iterator_returns_filtered_results(const vespalib::string& attribute_tenso EXPECT_EQ(result, farExpect); } -TEST_P(NnsIndexIteratorParameterizedTest, require_that_iterator_returns_filtered_results) { - auto param = GetParam(); - verify_iterator_returns_filtered_results(param.attribute_tensor_type_spec, param.query_tensor_type_spec); +TEST(NnsIndexIteratorTest, require_that_iterator_returns_filtered_results) { + verify_iterator_returns_filtered_results(denseSpecDouble, denseSpecDouble); + verify_iterator_returns_filtered_results(denseSpecFloat, denseSpecFloat); } template <bool strict> std::vector<feature_t> get_rawscores(Fixture &env, const Value &qtv) { auto md = MatchData::makeTestInstance(2, 2); auto &tfmd = *(md->resolveTermField(0)); - auto &attr = *(env._attr); + auto &attr = *(env._tensorAttr); DistanceCalculator dist_calc(attr, qtv, env.dist_fun()); NearestNeighborDistanceHeap dh(2); auto dummy_filter = GlobalFilter::create(); @@ -291,29 +261,9 @@ verify_iterator_sets_expected_rawscore(const vespalib::string& attribute_tensor_ } } -TEST_P(NnsIndexIteratorParameterizedTest, require_that_iterator_sets_expected_rawscore) { - auto param = GetParam(); - verify_iterator_sets_expected_rawscore(param.attribute_tensor_type_spec, param.query_tensor_type_spec); -} - -void expect_match(SearchIterator& itr, uint32_t docid) { - bool match = itr.seek(docid); - EXPECT_TRUE(match); - EXPECT_FALSE(itr.isAtEnd()); - EXPECT_EQ(docid, itr.getDocId()); -} - -void expect_not_match(SearchIterator& itr, uint32_t curr_docid, uint32_t exp_next_docid) { - bool match = itr.seek(curr_docid); - EXPECT_FALSE(match); - EXPECT_FALSE(itr.isAtEnd()); - EXPECT_EQ(exp_next_docid, itr.getDocId()); -} - -void expect_at_end(SearchIterator& itr, uint32_t docid) { - bool match = itr.seek(docid); - EXPECT_FALSE(match); - EXPECT_TRUE(itr.isAtEnd()); +TEST(NnsIndexIteratorTest, require_that_iterator_sets_expected_rawscore) { + verify_iterator_sets_expected_rawscore(denseSpecDouble, denseSpecDouble); + verify_iterator_sets_expected_rawscore(denseSpecFloat, denseSpecFloat); } TEST(NnsIndexIteratorTest, require_that_iterator_works_as_expected) { @@ -321,29 +271,65 @@ TEST(NnsIndexIteratorTest, require_that_iterator_works_as_expected) { auto md = MatchData::makeTestInstance(2, 2); auto &tfmd = *(md->resolveTermField(0)); auto search = NnsIndexIterator::create(tfmd, hits, *euclid_d); + uint32_t docid = 1; search->initFullRange(); - expect_not_match(*search, 1, 2); - expect_match(*search, 2); - search->unpack(2); + bool match = search->seek(docid); + EXPECT_FALSE(match); + EXPECT_FALSE(search->isAtEnd()); + EXPECT_EQ(2u, search->getDocId()); + docid = 2; + match = search->seek(docid); + EXPECT_TRUE(match); + EXPECT_FALSE(search->isAtEnd()); + EXPECT_EQ(docid, search->getDocId()); + search->unpack(docid); EXPECT_NEAR(1.0/(1.0+2.0), tfmd.getRawScore(), EPS); - expect_match(*search, 3); - search->unpack(3); + docid = 3; + match = search->seek(docid); + EXPECT_TRUE(match); + EXPECT_FALSE(search->isAtEnd()); + EXPECT_EQ(docid, search->getDocId()); + search->unpack(docid); EXPECT_NEAR(1.0/(1.0+3.0), tfmd.getRawScore(), EPS); - expect_not_match(*search, 4, 5); - expect_not_match(*search, 6, 8); - search->unpack(8); - EXPECT_NEAR(1.0/(1.0+4.0), tfmd.getRawScore(), EPS); + docid = 4; + match = search->seek(docid); + EXPECT_FALSE(match); + EXPECT_FALSE(search->isAtEnd()); + EXPECT_EQ(5u, search->getDocId()); - expect_match(*search, 9); - expect_at_end(*search, 10); + docid = 6; + match = search->seek(docid); + EXPECT_FALSE(match); + EXPECT_FALSE(search->isAtEnd()); + EXPECT_EQ(8u, search->getDocId()); + docid = 8; + search->unpack(docid); + EXPECT_NEAR(1.0/(1.0+4.0), tfmd.getRawScore(), EPS); + docid = 9; + match = search->seek(docid); + EXPECT_TRUE(match); + EXPECT_FALSE(search->isAtEnd()); + docid = 10; + match = search->seek(docid); + EXPECT_FALSE(match); + EXPECT_TRUE(search->isAtEnd()); - search->initRange(4, 7); - expect_not_match(*search, 4, 5); - search->unpack(5); + docid = 4; + search->initRange(docid, 7); + match = search->seek(docid); + EXPECT_FALSE(match); + EXPECT_FALSE(search->isAtEnd()); + EXPECT_EQ(5u, search->getDocId()); + docid = 5; + search->unpack(docid); EXPECT_NEAR(1.0/(1.0+1.0), tfmd.getRawScore(), EPS); - expect_at_end(*search, 6); + EXPECT_FALSE(search->isAtEnd()); + docid = 6; + match = search->seek(docid); + EXPECT_FALSE(match); + EXPECT_TRUE(search->isAtEnd()); } GTEST_MAIN_RUN_ALL_TESTS() |