diff options
author | Geir Storli <geirst@verizonmedia.com> | 2020-06-19 15:46:24 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-06-19 15:46:24 +0200 |
commit | f3955d3cd4d3f01c41f2d5d96e05666e6d0c827d (patch) | |
tree | 28283443527696a87ee196989075276539895a41 | |
parent | 92b73034306ca58f6841f158149bd048bddb374f (diff) | |
parent | a4348c5da0e375b4f73c78771ba5d39ce41abee2 (diff) |
Merge pull request #13640 from vespa-engine/toregge/wire-in-nearest-neighbor-brute-foce-limit
Wire in nearest neighbor brute force limit.
16 files changed, 112 insertions, 11 deletions
diff --git a/searchcore/src/tests/proton/matching/request_context/request_context_test.cpp b/searchcore/src/tests/proton/matching/request_context/request_context_test.cpp index c3338a973c4..3b54768f223 100644 --- a/searchcore/src/tests/proton/matching/request_context/request_context_test.cpp +++ b/searchcore/src/tests/proton/matching/request_context/request_context_test.cpp @@ -3,10 +3,12 @@ #include <vespa/eval/eval/tensor_spec.h> #include <vespa/eval/tensor/default_tensor_engine.h> #include <vespa/searchcore/proton/matching/requestcontext.h> +#include <vespa/searchlib/attribute/attribute_blueprint_params.h> #include <vespa/searchlib/fef/properties.h> #include <vespa/vespalib/gtest/gtest.h> #include <vespa/vespalib/objects/nbostream.h> +using search::attribute::AttributeBlueprintParams; using search::attribute::IAttributeContext; using search::attribute::IAttributeFunctor; using search::attribute::IAttributeVector; @@ -45,7 +47,7 @@ public: _doom(_clock, vespalib::steady_time(), vespalib::steady_time(), false), _attr_ctx(), _props(), - _request_ctx(_doom, _attr_ctx, _props), + _request_ctx(_doom, _attr_ctx, _props, AttributeBlueprintParams()), _query_tensor(DefaultTensorEngine::ref().from_spec(TensorSpec("tensor(x[2])") .add({{"x", 0}}, 3).add({{"x", 1}}, 5))) { diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index f19b416b92f..fadea4b7962 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -9,6 +9,7 @@ #include <vespa/searchlib/parsequery/stackdumpiterator.h> #include <vespa/searchlib/attribute/diversity.h> #include <vespa/searchlib/attribute/attribute_operation.h> +#include <vespa/searchlib/attribute/attribute_blueprint_params.h> #include <vespa/searchlib/common/bitvector.h> #include <vespa/log/log.h> @@ -19,6 +20,7 @@ using search::queryeval::IRequestContext; using search::queryeval::IDiversifier; using search::attribute::diversity::DiversityFilter; using search::attribute::BasicType; +using search::attribute::AttributeBlueprintParams; using namespace search::fef; using namespace search::fef::indexproperties::matchphase; @@ -64,6 +66,12 @@ extractDiversityParams(const RankSetup &rankSetup, const Properties &rankPropert AttributeLimiter::toDiversityCutoffStrategy(DiversityCutoffStrategy::lookup(rankProperties, rankSetup.getDiversityCutoffStrategy()))); } +AttributeBlueprintParams +extractAttributeBlueprintParams(const RankSetup& rank_setup, const Properties &rankProperties) +{ + return AttributeBlueprintParams(NearestNeighborBruteForceLimit::lookup(rankProperties, rank_setup.get_nearest_neighbor_brute_force_limit())); +} + } // namespace proton::matching::<unnamed> void @@ -161,7 +169,7 @@ MatchToolsFactory(QueryLimiter & queryLimiter, const Properties & rankProperties, const Properties & featureOverrides) : _queryLimiter(queryLimiter), - _requestContext(doom, attributeContext, rankProperties), + _requestContext(doom, attributeContext, rankProperties, extractAttributeBlueprintParams(rankSetup, rankProperties)), _query(), _match_limiter(), _queryEnv(indexEnv, attributeContext, rankProperties, searchContext.getIndexes()), diff --git a/searchcore/src/vespa/searchcore/proton/matching/requestcontext.cpp b/searchcore/src/vespa/searchcore/proton/matching/requestcontext.cpp index c30854c051f..86afeb5de24 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/requestcontext.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/requestcontext.cpp @@ -15,10 +15,12 @@ namespace proton { using search::attribute::IAttributeVector; RequestContext::RequestContext(const Doom & doom, IAttributeContext & attributeContext, - const search::fef::Properties& rank_properties) + const search::fef::Properties& rank_properties, + const search::attribute::AttributeBlueprintParams& attribute_blueprint_params) : _doom(doom), _attributeContext(attributeContext), - _rank_properties(rank_properties) + _rank_properties(rank_properties), + _attribute_blueprint_params(attribute_blueprint_params) { } @@ -57,4 +59,10 @@ RequestContext::get_query_tensor(const vespalib::string& tensor_name) const return vespalib::eval::Value::UP(); } +const search::attribute::AttributeBlueprintParams& +RequestContext::get_attribute_blueprint_params() const +{ + return _attribute_blueprint_params; +} + } diff --git a/searchcore/src/vespa/searchcore/proton/matching/requestcontext.h b/searchcore/src/vespa/searchcore/proton/matching/requestcontext.h index 31d3d573a20..ada11167983 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/requestcontext.h +++ b/searchcore/src/vespa/searchcore/proton/matching/requestcontext.h @@ -5,6 +5,7 @@ #include <vespa/eval/tensor/tensor.h> #include <vespa/searchlib/queryeval/irequestcontext.h> #include <vespa/searchcommon/attribute/iattributecontext.h> +#include <vespa/searchlib/attribute/attribute_blueprint_params.h> #include <vespa/vespalib/util/doom.h> namespace search::fef { class Properties; } @@ -19,7 +20,8 @@ public: using IAttributeFunctor = search::attribute::IAttributeFunctor; using Doom = vespalib::Doom; RequestContext(const Doom & softDoom, IAttributeContext & attributeContext, - const search::fef::Properties& rank_properties); + const search::fef::Properties& rank_properties, + const search::attribute::AttributeBlueprintParams& attribute_blueprint_params); const Doom & getDoom() const override { return _doom; } const search::attribute::IAttributeVector *getAttribute(const vespalib::string &name) const override; @@ -30,11 +32,13 @@ public: vespalib::eval::Value::UP get_query_tensor(const vespalib::string& tensor_name) const override; + const search::attribute::AttributeBlueprintParams& get_attribute_blueprint_params() const override; private: const Doom _doom; IAttributeContext & _attributeContext; const search::fef::Properties & _rank_properties; + search::attribute::AttributeBlueprintParams _attribute_blueprint_params; }; } diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index 0a8b59c7d7e..6608959662f 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -893,7 +893,7 @@ public: field, as_dense_tensor(), createDenseTensor(vec_2d(17, 42)), - 3, true, 5); + 3, true, 5, 0.05); EXPECT_EQUAL(11u, bp->getState().estimate().estHits); EXPECT_TRUE(bp->may_approximate()); return bp; diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 74a23db8b95..84bed6bb5e9 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -5,6 +5,7 @@ #include "i_document_weight_attribute.h" #include "iterator_pack.h" #include "predicate_attribute.h" +#include "attribute_blueprint_params.h" #include <vespa/eval/eval/value.h> #include <vespa/eval/tensor/dense/dense_tensor_view.h> #include <vespa/searchlib/common/location.h> @@ -655,7 +656,8 @@ public: std::move(dense_query_tensor_up), n.get_target_num_hits(), n.get_allow_approximate(), - n.get_explore_additional_hits())); + n.get_explore_additional_hits(), + getRequestContext().get_attribute_blueprint_params().nearest_neighbor_brute_force_limit)); } }; diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h new file mode 100644 index 00000000000..4378858415a --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h @@ -0,0 +1,25 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search::attribute { + +/** + * Parameters for attribute blueprints from rank profile and query. + */ +struct AttributeBlueprintParams +{ + double nearest_neighbor_brute_force_limit; + + AttributeBlueprintParams(double nearest_neighbor_brute_force_limit_in) + : nearest_neighbor_brute_force_limit(nearest_neighbor_brute_force_limit_in) + { + } + + AttributeBlueprintParams() + : AttributeBlueprintParams(0.05) + { + } +}; + +} diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp index 5e7523f53c5..fb44b986301 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp @@ -274,6 +274,22 @@ MinHitsPerThread::lookup(const Properties &props, uint32_t defaultValue) return lookupUint32(props, NAME, defaultValue); } +const vespalib::string NearestNeighborBruteForceLimit::NAME("vespa.matching.nearest_neighbor.brute_force_limit"); + +const double NearestNeighborBruteForceLimit::DEFAULT_VALUE(0.05); + +double +NearestNeighborBruteForceLimit::lookup(const Properties &props) +{ + return lookup(props, DEFAULT_VALUE); +} + +double +NearestNeighborBruteForceLimit::lookup(const Properties &props, double defaultValue) +{ + return lookupDouble(props, NAME, defaultValue); +} + } // namespace matching namespace softtimeout { diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h index 9fa28bfaff2..30c726caeba 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.h +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h @@ -204,6 +204,20 @@ namespace matching { static uint32_t lookup(const Properties &props); static uint32_t lookup(const Properties &props, uint32_t defaultValue); }; + + /** + * Property to control fallback to brute force search for nearest + * neighbor query terms. If the ratio of candidates in the global + * filter (which tracks the documents that can match the query + * based on the other parts of the query) is less than this limit + * then use brute force search. + **/ + struct NearestNeighborBruteForceLimit { + static const vespalib::string NAME; + static const double DEFAULT_VALUE; + static double lookup(const Properties &props); + static double lookup(const Properties &props, double defaultValue); + }; } namespace softtimeout { diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp index 88f4a07d95d..e197f095852 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp @@ -61,7 +61,8 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i _diversityCutoffStrategy("loose"), _softTimeoutEnabled(false), _softTimeoutTailCost(0.1), - _softTimeoutFactor(0.5) + _softTimeoutFactor(0.5), + _nearest_neighbor_brute_force_limit(0.05) { } RankSetup::~RankSetup() = default; @@ -104,6 +105,7 @@ RankSetup::configure() setSoftTimeoutEnabled(softtimeout::Enabled::lookup(_indexEnv.getProperties())); setSoftTimeoutTailCost(softtimeout::TailCost::lookup(_indexEnv.getProperties())); setSoftTimeoutFactor(softtimeout::Factor::lookup(_indexEnv.getProperties())); + set_nearest_neighbor_brute_force_limit(matching::NearestNeighborBruteForceLimit::lookup(_indexEnv.getProperties())); } void diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h index e1cd78d41a9..ad793eeaceb 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.h +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h @@ -59,6 +59,7 @@ private: bool _softTimeoutEnabled; double _softTimeoutTailCost; double _softTimeoutFactor; + double _nearest_neighbor_brute_force_limit; public: @@ -365,6 +366,9 @@ public: void setSoftTimeoutFactor(double v) { _softTimeoutFactor = v; } double getSoftTimeoutFactor() const { return _softTimeoutFactor; } + void set_nearest_neighbor_brute_force_limit(double v) { _nearest_neighbor_brute_force_limit = v; } + double get_nearest_neighbor_brute_force_limit() const { return _nearest_neighbor_brute_force_limit; } + /** * This method may be used to indicate that certain features * should be dumped during a full feature dump. diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.cpp b/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.cpp index 28af2c14781..6c9e516879d 100644 --- a/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.cpp @@ -9,10 +9,17 @@ FakeRequestContext::FakeRequestContext(attribute::IAttributeContext * context, v _doom(_clock, softDoom, hardDoom, false), _attributeContext(context), _query_tensor_name(), - _query_tensor() + _query_tensor(), + _attribute_blueprint_params() { } FakeRequestContext::~FakeRequestContext() = default; +const search::attribute::AttributeBlueprintParams& +FakeRequestContext::get_attribute_blueprint_params() const +{ + return _attribute_blueprint_params; +} + } diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.h b/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.h index 3de464224f9..94257363c60 100644 --- a/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.h +++ b/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.h @@ -7,6 +7,7 @@ #include <vespa/eval/tensor/default_tensor_engine.h> #include <vespa/searchcommon/attribute/iattributecontext.h> #include <vespa/searchlib/attribute/attributevector.h> +#include <vespa/searchlib/attribute/attribute_blueprint_params.h> #include <vespa/searchlib/queryeval/irequestcontext.h> #include <vespa/vespalib/util/doom.h> #include <limits> @@ -42,12 +43,15 @@ public: _query_tensor = std::make_unique<vespalib::eval::TensorSpec>(tensor_spec); } + const search::attribute::AttributeBlueprintParams& get_attribute_blueprint_params() const override; + private: vespalib::Clock _clock; const vespalib::Doom _doom; attribute::IAttributeContext *_attributeContext; vespalib::string _query_tensor_name; std::unique_ptr<vespalib::eval::TensorSpec> _query_tensor; + search::attribute::AttributeBlueprintParams _attribute_blueprint_params; }; } diff --git a/searchlib/src/vespa/searchlib/queryeval/irequestcontext.h b/searchlib/src/vespa/searchlib/queryeval/irequestcontext.h index 037d72f1bbf..935c703d0a7 100644 --- a/searchlib/src/vespa/searchlib/queryeval/irequestcontext.h +++ b/searchlib/src/vespa/searchlib/queryeval/irequestcontext.h @@ -4,6 +4,7 @@ #include <vespa/vespalib/stllike/string.h> +namespace search::attribute { struct AttributeBlueprintParams; } namespace search::attribute { class IAttributeVector; } namespace vespalib::eval { struct Value; } namespace vespalib { class Doom; } @@ -36,6 +37,8 @@ public: * Returns nullptr if the tensor is not found or if it is not a tensor. */ virtual std::unique_ptr<vespalib::eval::Value> get_query_tensor(const vespalib::string& tensor_name) const = 0; + + virtual const search::attribute::AttributeBlueprintParams& get_attribute_blueprint_params() const = 0; }; } diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp index d8b63909142..fcf8b78056d 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp @@ -53,13 +53,14 @@ struct ConvertCellsSelector NearestNeighborBlueprint::NearestNeighborBlueprint(const queryeval::FieldSpec& field, const tensor::DenseTensorAttribute& attr_tensor, std::unique_ptr<vespalib::tensor::DenseTensorView> query_tensor, - uint32_t target_num_hits, bool approximate, uint32_t explore_additional_hits) + uint32_t target_num_hits, bool approximate, uint32_t explore_additional_hits, double brute_force_limit) : ComplexLeafBlueprint(field), _attr_tensor(attr_tensor), _query_tensor(std::move(query_tensor)), _target_num_hits(target_num_hits), _approximate(approximate), _explore_additional_hits(explore_additional_hits), + _brute_force_limit(brute_force_limit), _fallback_dist_fun(), _distance_heap(target_num_hits), _found_hits(), diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h index 3e402b46a43..8656e5b4bf2 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h @@ -24,6 +24,7 @@ private: uint32_t _target_num_hits; bool _approximate; uint32_t _explore_additional_hits; + double _brute_force_limit; search::tensor::DistanceFunction::UP _fallback_dist_fun; const search::tensor::DistanceFunction *_dist_fun; mutable NearestNeighborDistanceHeap _distance_heap; @@ -35,7 +36,7 @@ public: NearestNeighborBlueprint(const queryeval::FieldSpec& field, const tensor::DenseTensorAttribute& attr_tensor, std::unique_ptr<vespalib::tensor::DenseTensorView> query_tensor, - uint32_t target_num_hits, bool approximate, uint32_t explore_additional_hits); + uint32_t target_num_hits, bool approximate, uint32_t explore_additional_hits, double brute_force_limit); NearestNeighborBlueprint(const NearestNeighborBlueprint&) = delete; NearestNeighborBlueprint& operator=(const NearestNeighborBlueprint&) = delete; ~NearestNeighborBlueprint(); |