diff options
21 files changed, 806 insertions, 165 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index 290ac63939b..413e74adab9 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -127,6 +127,7 @@ vespa_define_module( src/tests/features/element_completeness src/tests/features/element_similarity_feature src/tests/features/euclidean_distance + src/tests/features/imported_dot_product src/tests/features/item_raw_score src/tests/features/native_dot_product src/tests/features/ranking_expression diff --git a/searchlib/src/tests/features/euclidean_distance/euclidean_distance_test.cpp b/searchlib/src/tests/features/euclidean_distance/euclidean_distance_test.cpp index b0d97902728..b947f1a71e2 100644 --- a/searchlib/src/tests/features/euclidean_distance/euclidean_distance_test.cpp +++ b/searchlib/src/tests/features/euclidean_distance/euclidean_distance_test.cpp @@ -76,7 +76,7 @@ struct ExecFixture for (const auto &attr : attrs) { attr->addReservedDoc(); attr->addDocs(1); - test.getIndexEnv().getAttributeManager().add(attr); + test.getIndexEnv().getAttributeMap().add(attr); } IntegerAttribute *aint = static_cast<IntegerAttribute *>(attrs[0].get()); diff --git a/searchlib/src/tests/features/featurebenchmark.cpp b/searchlib/src/tests/features/featurebenchmark.cpp index ed8af1cdf14..a20ec88fef3 100644 --- a/searchlib/src/tests/features/featurebenchmark.cpp +++ b/searchlib/src/tests/features/featurebenchmark.cpp @@ -356,7 +356,7 @@ Benchmark::runAttributeMatch(Config & cfg) FtFeatureTest ft(_factory, feature); ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo"); - ft.getIndexEnv().getAttributeManager().add(createAttributeVector("foo", "single", numDocs, 10, 10)); + ft.getIndexEnv().getAttributeMap().add(createAttributeVector("foo", "single", numDocs, 10, 10)); ft.getQueryEnv().getBuilder().addAttributeNode("foo"); setupPropertyMap(ft.getIndexEnv().getProperties(), cfg.getUnknown()); ASSERT_TRUE(ft.setup()); @@ -398,7 +398,7 @@ Benchmark::runAttribute(Config & cfg) FtFeatureTest ft(_factory, feature); ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "foo"); - ft.getIndexEnv().getAttributeManager().add(createStringAttributeVector("foo", "wset", numDocs, values)); + ft.getIndexEnv().getAttributeMap().add(createStringAttributeVector("foo", "wset", numDocs, values)); ASSERT_TRUE(ft.setup()); MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); @@ -438,15 +438,15 @@ Benchmark::runDotProduct(Config & cfg) values.add(vespalib::make_string("str%u", i)); } - ft.getIndexEnv().getAttributeManager().add(createStringAttributeVector("wsstr", collectionType, numDocs, values)); + ft.getIndexEnv().getAttributeMap().add(createStringAttributeVector("wsstr", collectionType, numDocs, values)); } else if (dataType == "int") { - ft.getIndexEnv().getAttributeManager().add(createAttributeVector(AVBT::INT32, "wsstr", collectionType, numDocs, 0, numValues)); + ft.getIndexEnv().getAttributeMap().add(createAttributeVector(AVBT::INT32, "wsstr", collectionType, numDocs, 0, numValues)); } else if (dataType == "long") { - ft.getIndexEnv().getAttributeManager().add(createAttributeVector(AVBT::INT64, "wsstr", collectionType, numDocs, 0, numValues)); + ft.getIndexEnv().getAttributeMap().add(createAttributeVector(AVBT::INT64, "wsstr", collectionType, numDocs, 0, numValues)); } else if (dataType == "float") { - ft.getIndexEnv().getAttributeManager().add(createAttributeVector(AVBT::FLOAT, "wsstr", collectionType, numDocs, 0, numValues)); + ft.getIndexEnv().getAttributeMap().add(createAttributeVector(AVBT::FLOAT, "wsstr", collectionType, numDocs, 0, numValues)); } else if (dataType == "double") { - ft.getIndexEnv().getAttributeManager().add(createAttributeVector(AVBT::DOUBLE, "wsstr", collectionType, numDocs, 0, numValues)); + ft.getIndexEnv().getAttributeMap().add(createAttributeVector(AVBT::DOUBLE, "wsstr", collectionType, numDocs, 0, numValues)); } else { std::cerr << "Illegal data type '" << dataType << std::endl; } diff --git a/searchlib/src/tests/features/imported_dot_product/CMakeLists.txt b/searchlib/src/tests/features/imported_dot_product/CMakeLists.txt new file mode 100644 index 00000000000..1216b7e8906 --- /dev/null +++ b/searchlib/src/tests/features/imported_dot_product/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_imported_dot_product_test_app TEST + SOURCES + imported_dot_product_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_imported_dot_product_test_app COMMAND searchlib_imported_dot_product_test_app) diff --git a/searchlib/src/tests/features/imported_dot_product/imported_dot_product_test.cpp b/searchlib/src/tests/features/imported_dot_product/imported_dot_product_test.cpp new file mode 100644 index 00000000000..a01b1ad1575 --- /dev/null +++ b/searchlib/src/tests/features/imported_dot_product/imported_dot_product_test.cpp @@ -0,0 +1,233 @@ +// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/features/dotproductfeature.h> +#include <vespa/searchlib/test/imported_attribute_fixture.h> +#include <vespa/searchlib/fef/test/ftlib.h> +#include <vespa/searchlib/fef/test/rankresult.h> +#include <vespa/searchlib/fef/test/dummy_dependency_handler.h> + +using namespace search; +using namespace search::attribute; +using namespace search::features; +using namespace search::fef; +using namespace search::fef::test; + +template <typename T> +std::unique_ptr<fef::Anything> create_param(const vespalib::string& param) { + Properties props; + props.add("foo", param); + return std::make_unique<dotproduct::ArrayParam<T>>(props.lookup("foo")); +} + +struct FixtureBase : ImportedAttributeFixture { + + BlueprintFactory _factory; + FixtureBase() { + DotProductBlueprint bp; + _factory.addPrototype(bp.createInstance()); + } + + // Both array and wset attributes can have integer "key" types, so we let specific + // sub-fixtures implement the mappings. + virtual void setup_integer_mappings(BasicType int_type) = 0; + + void check_single_execution(feature_t expected, + const vespalib::string& vector, + DocId doc_id, + std::unique_ptr<fef::Anything> pre_parsed = std::unique_ptr<fef::Anything>()) { + RankResult result; + result.addScore("dotProduct(" + imported_attr->getName() + ",vector)", expected); + result.setEpsilon(0.00001); + FtFeatureTest feature(_factory, result.getKeys()); + + feature.getQueryEnv().getProperties().add("dotProduct.vector", vector); + if (pre_parsed) { + feature.getQueryEnv().getObjectStore().add("dotProduct.vector.object", std::move(pre_parsed)); + } + feature.getIndexEnv().getAttributeMap().add(imported_attr); + fef::CollectionType collection_type( + (imported_attr->getCollectionType() == attribute::CollectionType::ARRAY) + ? fef::CollectionType::ARRAY : fef::CollectionType::WEIGHTEDSET); + feature.getIndexEnv().getBuilder().addField( + FieldType::ATTRIBUTE, collection_type, imported_attr->getName()); + ASSERT_TRUE(feature.setup()); + EXPECT_TRUE(feature.execute(result, doc_id)); + } + + template <typename BaseFullWidthType, typename PerTypeSetupFunctor> + void check_executions(PerTypeSetupFunctor setup_func, + const std::vector<BasicType>& types, + feature_t expected, + const vespalib::string& vector, + DocId doc_id, + const vespalib::string& shared_param = "") { + for (auto type : types) { + setup_func(type); + std::unique_ptr<fef::Anything> pre_parsed; + if (!shared_param.empty()) { + pre_parsed = create_param<BaseFullWidthType>(shared_param); + } + check_single_execution(expected, vector, doc_id, std::move(pre_parsed)); + } + } + + void check_all_integer_executions(feature_t expected, + const vespalib::string& vector, + DocId doc_id, + const vespalib::string& shared_param = "") { + check_executions<int64_t>([this](auto int_type){ this->setup_integer_mappings(int_type); }, + {{BasicType::INT32, BasicType::INT64}}, + expected, vector, doc_id, shared_param); + } +}; + +struct ArrayFixture : FixtureBase { + + void setup_integer_mappings(BasicType int_type) override { + reset_with_array_value_reference_mappings<IntegerAttribute, int64_t>( + int_type, + {{DocId(1), dummy_gid(3), DocId(3), {{2, 3, 5}}}, + {DocId(3), dummy_gid(7), DocId(7), {{7, 11}}}, + {DocId(5), dummy_gid(8), DocId(8), {{13, 17, 19, 23}}}}); + } + + void setup_float_mappings(BasicType float_type) { + reset_with_array_value_reference_mappings<FloatingPointAttribute, double>( + float_type, + {{DocId(2), dummy_gid(4), DocId(4), {{2.2, 3.3, 5.5}}}, + {DocId(4), dummy_gid(8), DocId(8), {{7.7, 11.11}}}, + {DocId(6), dummy_gid(9), DocId(9), {{13.1, 17.2, 19.3, 23.4}}}}); + } + + template <typename ExpectedType> + void check_prepare_state_output(const vespalib::string& input_vector) { + FtFeatureTest feature(_factory, ""); + DotProductBlueprint bp; + DummyDependencyHandler dependency_handler(bp); + ParameterList params({Parameter(ParameterType::ATTRIBUTE, imported_attr->getName()), + Parameter(ParameterType::STRING, "fancyvector")}); + + feature.getIndexEnv().getAttributeMap().add(imported_attr); + feature.getIndexEnv().getBuilder().addField( + FieldType::ATTRIBUTE, fef::CollectionType::ARRAY, imported_attr->getName()); + + bp.setup(feature.getIndexEnv(), params); + feature.getQueryEnv().getProperties().add("dotProduct.fancyvector", input_vector); + auto& obj_store = feature.getQueryEnv().getObjectStore(); + bp.prepareSharedState(feature.getQueryEnv(), obj_store); + // Resulting name is very implementation defined. But at least the tests will break if it changes. + const auto* parsed = obj_store.get("dotProduct.fancyvector.object"); + ASSERT_TRUE(parsed != nullptr); + const auto* as_object = dynamic_cast<const ExpectedType*>(parsed); + ASSERT_TRUE(as_object != nullptr); + // We don't test the parsed output values here; that's the responsibility of other tests. + } + + void check_all_float_executions(feature_t expected, + const vespalib::string& vector, + DocId doc_id, + const vespalib::string& shared_param = "") { + check_executions<double>([this](auto float_type){ this->setup_float_mappings(float_type); }, + {{BasicType::FLOAT, BasicType::DOUBLE}}, + expected, vector, doc_id, shared_param); + } +}; + +TEST_F("Dense i32/i64 array dot products can be evaluated with string parameter", ArrayFixture) { + f.check_all_integer_executions(2*2 + 3*3 + 5*4, "[2 3 4]", DocId(1)); +} + +TEST_F("Dense float/double array dot products can be evaluated with string parameter", ArrayFixture) { + f.check_all_float_executions(2.2*7.7 + 3.3*11.11 + 5.5*13.13, "[7.7 11.11 13.13]", DocId(2)); +} + +TEST_F("Zero-length i32/i64 array query vector evaluates to zero", ArrayFixture) { + f.check_all_integer_executions(0, "[]", DocId(1)); +} + +TEST_F("Zero-length float/double array query vector evaluates to zero", ArrayFixture) { + f.check_all_float_executions(0, "[]", DocId(1)); +} + +TEST_F("prepareSharedState emits i64 vector for i32 imported attribute", ArrayFixture) { + f.setup_integer_mappings(BasicType::INT32); + f.template check_prepare_state_output<dotproduct::ArrayParam<int64_t>>("[101 202 303]"); +} + +TEST_F("prepareSharedState emits i64 vector for i64 imported attribute", ArrayFixture) { + f.setup_integer_mappings(BasicType::INT64); + f.template check_prepare_state_output<dotproduct::ArrayParam<int64_t>>("[101 202 303]"); +} + +TEST_F("prepareSharedState emits double vector for float imported attribute", ArrayFixture) { + f.setup_float_mappings(BasicType::FLOAT); + f.template check_prepare_state_output<dotproduct::ArrayParam<double>>("[10.1 20.2 30.3]"); +} + +TEST_F("prepareSharedState emits double vector for double imported attribute", ArrayFixture) { + f.setup_float_mappings(BasicType::DOUBLE); + f.template check_prepare_state_output<dotproduct::ArrayParam<double>>("[10.1 20.2 30.3]"); +} + +TEST_F("Dense i32/i64 array dot product can be evaluated with pre-parsed object parameter", ArrayFixture) { + f.check_all_integer_executions(2*5 + 3*6 + 5*7, "[2 3 4]", DocId(1), "[5 6 7]"); // String input is ignored in favor of stored object +} + +TEST_F("Dense float/double array dot product can be evaluated with pre-parsed object parameter", ArrayFixture) { + f.check_all_float_executions(2.2*7.7 + 3.3*11.11 + 5.5*13.13, "[2.0 3.0 4.0]", DocId(2), "[7.7 11.11 13.13]"); +} + +TEST_F("Sparse i32/i64 array dot products can be evaluated with string parameter", ArrayFixture) { + // Have an outlier index to prevent auto-flattening of sparse input + f.check_all_integer_executions(2*13 + 4*23, "{0:2,3:4,50:100}", DocId(5)); +} + +TEST_F("Sparse float/double array dot products can be evaluated with string parameter", ArrayFixture) { + f.check_all_float_executions(2.5*13.1 + 4.25*23.4, "{0:2.5,3:4.25,50:100.1}", DocId(6)); +} + +TEST_F("Sparse i32/i64 array dot products can be evaluated with pre-parsed object parameter", ArrayFixture) { + // As before, we cheat a bit by having a different raw string vector than the pre-parsed vector. + f.check_all_integer_executions(2*13 + 4*23, "[0 0 0]", DocId(5), "{0:2,3:4,50:100}"); +} + +TEST_F("Sparse float/double array dot products can be evaluated with pre-parsed object parameter", ArrayFixture) { + f.check_all_float_executions(2.5*13.1 + 4.25*23.4, "[0 0 0]", DocId(6), "{0:2.5,3:4.25,50:100.1}"); +} + +struct WsetFixture : FixtureBase { + void setup_integer_mappings(BasicType int_type) override { + const std::vector<WeightedInt> doc7_values({WeightedInt(200, 7), WeightedInt(300, 13)}); + reset_with_wset_value_reference_mappings<IntegerAttribute, WeightedInt>( + int_type, + {{DocId(3), dummy_gid(7), DocId(7), doc7_values}}); + } +}; + +TEST_F("i32/i64 wset dot products can be evaluated with string parameter", WsetFixture) { + f.check_all_integer_executions(21*7 + 19*13, "{200:21,300:19,999:1234}", DocId(3)); +} + +TEST_F("string wset dot products can be evaluated with string parameter", WsetFixture) { + std::vector<WeightedString> doc7_values{{WeightedString("bar", 7), WeightedString("baz", 41)}}; + reset_with_wset_value_reference_mappings<StringAttribute, WeightedString>( + f, BasicType::STRING, + {{DocId(3), dummy_gid(7), DocId(7), doc7_values}}); + f.check_single_execution(5*7 + 3*41, "{bar:5,baz:3,nosuchkey:1234}", DocId(3)); +} + +TEST_F("integer enum dot products can be evaluated with string parameter", WsetFixture) { + const std::vector<WeightedInt> doc7_values({WeightedInt(200, 7), WeightedInt(300, 13)}); + // We only check i32 here, since the enum (fast search) aspect is what matters here. + reset_with_wset_value_reference_mappings<IntegerAttribute, WeightedInt>( + f, BasicType::INT32, + {{DocId(3), dummy_gid(7), DocId(7), doc7_values}}, + FastSearchConfig::ExplicitlyEnabled); + f.check_single_execution(21*7 + 19*13, "{200:21,300:19,999:1234}", DocId(3)); +} + +// Observed TODOs out of scope for these tests: +// - pre-parsed vectors not currently implemented for weighted sets. +// - non-imported cases should also be tested for prepareSharedState. + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp index 4efd140b871..3bdb1faa240 100644 --- a/searchlib/src/tests/features/prod_features.cpp +++ b/searchlib/src/tests/features/prod_features.cpp @@ -209,7 +209,7 @@ Test::setupForAgeTest(FtFeatureTest & ft, uint64_t docTime) ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "doctime"); doctime->addReservedDoc(); doctime->addDocs(1); - ft.getIndexEnv().getAttributeManager().add(doctime); + ft.getIndexEnv().getAttributeMap().add(doctime); (static_cast<IntegerAttribute *>(doctime.get()))->update(1, docTime); doctime->commit(); } @@ -395,7 +395,7 @@ Test::setupForAttributeTest(FtFeatureTest &ft, bool setup_env) for (uint32_t i = 0; i < avs.size(); ++i) { avs[i]->addReservedDoc(); avs[i]->addDocs(1); - ft.getIndexEnv().getAttributeManager().add(avs[i]); + ft.getIndexEnv().getAttributeMap().add(avs[i]); } // integer attributes @@ -831,7 +831,7 @@ Test::testDistance() FtFeatureTest ft(_factory, "distance(pos)"); AttributePtr pos = AttributeFactory::createAttribute("pos", AVC(AVBT::FLOAT, AVCT::SINGLE)); pos->commit(); - ft.getIndexEnv().getAttributeManager().add(pos); + ft.getIndexEnv().getAttributeMap().add(pos); ft.getQueryEnv().getLocation().setValid(true); ASSERT_TRUE(ft.setup()); ASSERT_TRUE(ft.execute(RankResult().addScore("distance(pos)", 6400000000.0))); @@ -840,7 +840,7 @@ Test::testDistance() FtFeatureTest ft(_factory, "distance(pos)"); AttributePtr pos = AttributeFactory::createAttribute("pos", AVC(AVBT::STRING, AVCT::SINGLE)); pos->commit(); - ft.getIndexEnv().getAttributeManager().add(pos); + ft.getIndexEnv().getAttributeMap().add(pos); ft.getQueryEnv().getLocation().setValid(true); ASSERT_TRUE(ft.setup()); ASSERT_TRUE(ft.execute(RankResult().addScore("distance(pos)", 6400000000.0))); @@ -849,7 +849,7 @@ Test::testDistance() FtFeatureTest ft(_factory, "distance(pos)"); AttributePtr pos = AttributeFactory::createAttribute("pos", AVC(AVBT::INT64, AVCT::WSET)); pos->commit(); - ft.getIndexEnv().getAttributeManager().add(pos); + ft.getIndexEnv().getAttributeMap().add(pos); ft.getQueryEnv().getLocation().setValid(true); ASSERT_TRUE(ft.setup()); ASSERT_TRUE(ft.execute(RankResult().addScore("distance(pos)", 6400000000.0))); @@ -866,7 +866,7 @@ Test::setupForDistanceTest(FtFeatureTest &ft, const vespalib::string & attrName, pos->addReservedDoc(); pos->addDocs(1); - ft.getIndexEnv().getAttributeManager().add(pos); + ft.getIndexEnv().getAttributeMap().add(pos); IntegerAttribute * ia = static_cast<IntegerAttribute *>(pos.get()); for (uint32_t i = 0; i < positions.size(); ++i) { @@ -984,7 +984,7 @@ Test::testDistanceToPath() FtFeatureTest ft(_factory, "distanceToPath(pos)"); AttributePtr att = AttributeFactory::createAttribute("pos", AVC(AVBT::FLOAT, AVCT::SINGLE)); att->commit(); - ft.getIndexEnv().getAttributeManager().add(att); + ft.getIndexEnv().getAttributeMap().add(att); ft.getQueryEnv().getProperties().add("distanceToPath(pos).path", "0 0 1 1"); ASSERT_TRUE(ft.setup()); ASSERT_TRUE(ft.execute(res)); @@ -994,7 +994,7 @@ Test::testDistanceToPath() FtFeatureTest ft(_factory, "distanceToPath(pos)"); AttributePtr att = AttributeFactory::createAttribute("pos", AVC(AVBT::STRING, AVCT::SINGLE)); att->commit(); - ft.getIndexEnv().getAttributeManager().add(att); + ft.getIndexEnv().getAttributeMap().add(att); ft.getQueryEnv().getProperties().add("distanceToPath(pos).path", "0 0 1 1"); ASSERT_TRUE(ft.setup()); ASSERT_TRUE(ft.execute(res)); @@ -1004,7 +1004,7 @@ Test::testDistanceToPath() FtFeatureTest ft(_factory, "distanceToPath(pos)"); AttributePtr att = AttributeFactory::createAttribute("pos", AVC(AVBT::INT64, AVCT::WSET)); att->commit(); - ft.getIndexEnv().getAttributeManager().add(att); + ft.getIndexEnv().getAttributeMap().add(att); ft.getQueryEnv().getProperties().add("distanceToPath(pos).path", "0 0 1 1"); ASSERT_TRUE(ft.setup()); ASSERT_TRUE(ft.execute(res)); @@ -1037,7 +1037,7 @@ Test::setupForDocumentTest(FtFeatureTest &ft, const vespalib::string & attrName, type->addReservedDoc(); type->addDocs(1); - ft.getIndexEnv().getAttributeManager().add(type); + ft.getIndexEnv().getAttributeMap().add(type); (static_cast<StringAttribute *>(type.get()))->update(1, docType); type->commit(); @@ -1066,8 +1066,8 @@ Test::testDotProduct() { // string enum vector FtFeatureTest ft(_factory, "value(0)"); setupForDotProductTest(ft); - search::AttributeGuard::UP ag(ft.getIndexEnv().getAttributeManager().getAttribute("wsstr")); - const search::attribute::IAttributeVector * sv = ag->operator->(); + const search::attribute::IAttributeVector * sv(ft.getIndexEnv().getAttributeMap().getAttribute("wsstr")); + ASSERT_TRUE(sv != nullptr); EXPECT_TRUE(sv->hasEnum()); search::attribute::EnumHandle e; { @@ -1271,7 +1271,7 @@ Test::setupForDotProductTest(FtFeatureTest & ft) cfg.name); baf->addReservedDoc(); baf->addDocs(2); - ft.getIndexEnv().getAttributeManager().add(baf); + ft.getIndexEnv().getAttributeMap().add(baf); for (size_t i(1); i < 6; i++) { IntegerAttribute * ia = dynamic_cast<IntegerAttribute *>(baf.get()); if (ia) { @@ -1288,9 +1288,9 @@ Test::setupForDotProductTest(FtFeatureTest & ft) c->addReservedDoc(); a->addDocs(2); c->addDocs(2); - ft.getIndexEnv().getAttributeManager().add(a); - ft.getIndexEnv().getAttributeManager().add(c); - ft.getIndexEnv().getAttributeManager().add(d); + ft.getIndexEnv().getAttributeMap().add(a); + ft.getIndexEnv().getAttributeMap().add(c); + ft.getIndexEnv().getAttributeMap().add(d); StringAttribute * sa = static_cast<StringAttribute *>(a.get()); sa->append(1, "a", 1); diff --git a/searchlib/src/tests/features/prod_features_attributematch.cpp b/searchlib/src/tests/features/prod_features_attributematch.cpp index fc69061b4ef..7ccfd1dea1a 100644 --- a/searchlib/src/tests/features/prod_features_attributematch.cpp +++ b/searchlib/src/tests/features/prod_features_attributematch.cpp @@ -283,8 +283,8 @@ Test::testAttributeMatch() AttributePtr wint = AttributeFactory::createAttribute("wint", AVC(AVBT::INT32, AVCT::WSET)); aint->addReservedDoc(); wint->addReservedDoc(); - ft.getIndexEnv().getAttributeManager().add(aint); - ft.getIndexEnv().getAttributeManager().add(wint); + ft.getIndexEnv().getAttributeMap().add(aint); + ft.getIndexEnv().getAttributeMap().add(wint); aint->addDocs(1); aint->commit(); ASSERT_TRUE(aint->getValueCount(0) == 0); diff --git a/searchlib/src/tests/features/tensor/tensor_test.cpp b/searchlib/src/tests/features/tensor/tensor_test.cpp index 34a5df23395..a2477b1cc8e 100644 --- a/searchlib/src/tests/features/tensor/tensor_test.cpp +++ b/searchlib/src/tests/features/tensor/tensor_test.cpp @@ -111,7 +111,7 @@ struct ExecFixture attr->clearDoc(1); attr->clearDoc(2); attr->commit(); - test.getIndexEnv().getAttributeManager().add(attr); + test.getIndexEnv().getAttributeMap().add(attr); } TensorAttribute *tensorAttr = diff --git a/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp b/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp index 6f413da004c..565184cb7e5 100644 --- a/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp +++ b/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp @@ -99,7 +99,7 @@ struct ExecFixture for (const auto &attr : attrs) { attr->addReservedDoc(); attr->addDocs(1); - test.getIndexEnv().getAttributeManager().add(attr); + test.getIndexEnv().getAttributeMap().add(attr); } StringAttribute *astr = static_cast<StringAttribute *>(attrs[0].get()); diff --git a/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp b/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp index d8dec88c418..171c7791877 100644 --- a/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp +++ b/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp @@ -100,7 +100,7 @@ struct ExecFixture for (const auto &attr : attrs) { attr->addReservedDoc(); attr->addDocs(1); - test.getIndexEnv().getAttributeManager().add(attr); + test.getIndexEnv().getAttributeMap().add(attr); } StringAttribute *wsstr = static_cast<StringAttribute *>(attrs[0].get()); diff --git a/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp b/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp index 6b1e4d0d57d..d5097e70479 100644 --- a/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp +++ b/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp @@ -6,10 +6,13 @@ #include "array_parser.hpp" #include <vespa/searchlib/fef/properties.h> #include <vespa/searchlib/attribute/integerbase.h> +#include <vespa/searchlib/attribute/imported_attribute_vector.h> #include <vespa/searchlib/attribute/floatbase.h> #include <vespa/searchlib/attribute/multinumericattribute.h> +#include <type_traits> #include <vespa/log/log.h> + LOG_SETUP(".features.dotproduct"); using namespace search::attribute; @@ -61,12 +64,31 @@ StringVector::~StringVector() { } namespace array { +template <typename BaseType> +DotProductExecutorBase<BaseType>::DotProductExecutorBase(const V & vector) + : FeatureExecutor(), + _multiplier(IAccelrated::getAccelrator()), + _vector(vector) +{ +} + +template <typename BaseType> +DotProductExecutorBase<BaseType>::~DotProductExecutorBase() { } + +template <typename BaseType> +void DotProductExecutorBase<BaseType>::execute(uint32_t docId) { + const AT *values(nullptr); + size_t count = getAttributeValues(docId, values); + size_t commonRange = std::min(count, _vector.size()); + static_assert(std::is_same<typename AT::ValueType, BaseType>::value); + outputs().set_number(0, _multiplier->dotProduct( + &_vector[0], reinterpret_cast<const typename AT::ValueType *>(values), commonRange)); +} + template <typename A> DotProductExecutor<A>::DotProductExecutor(const A * attribute, const V & vector) : - FeatureExecutor(), - _attribute(attribute), - _multiplier(IAccelrated::getAccelrator()), - _vector(vector) + DotProductExecutorBase<typename A::BaseType>(vector), + _attribute(attribute) { } @@ -81,16 +103,6 @@ DotProductExecutor<A>::getAttributeValues(uint32_t docId, const AT * & values) } template <typename A> -void -DotProductExecutor<A>::execute(uint32_t docId) -{ - const AT *values(NULL); - size_t count = getAttributeValues(docId, values); - size_t commonRange = std::min(count, _vector.size()); - outputs().set_number(0, _multiplier->dotProduct(&_vector[0], reinterpret_cast<const typename A::BaseType *>(values), commonRange)); -} - -template <typename A> SparseDotProductExecutor<A>::SparseDotProductExecutor(const A * attribute, const V & values, const IV & indexes) : DotProductExecutor<A>(attribute, values), _indexes(indexes), @@ -159,18 +171,90 @@ SparseDotProductByCopyExecutor<A>::getAttributeValues(uint32_t docId, const AT * } size_t i(0); for (const IV & iv(this->_indexes); (i < iv.size()) && (iv[i] < count); i++) { - if (i != iv[i]) { - _copy[i] = _copy[iv[i]]; - } + _copy[i] = _copy[iv[i]]; } values = reinterpret_cast<const AT *>(&_copy[0]); return i; } +template <typename BaseType> +DotProductByContentFillExecutor<BaseType>::DotProductByContentFillExecutor( + const attribute::IAttributeVector * attribute, + const V & vector) + : DotProductExecutorBase<BaseType>(vector), + _attribute(attribute), + _filler() +{ + _filler.allocate(attribute->getMaxValueCount()); +} + +template <typename BaseType> +DotProductByContentFillExecutor<BaseType>::~DotProductByContentFillExecutor() { +} + +namespace { + +template<typename T> struct IsNonWeightedType : std::false_type {}; +template<typename BaseType> struct IsNonWeightedType<multivalue::Value<BaseType>> : std::true_type {}; + +// Compile-time sanity check for type compatibility of gnarly BaseType <-> multivalue::Value +// reinterpret_cast used by some getAttributeValues calls. +template <typename BaseType, typename AttributeValueType, typename FillerValueType> +constexpr void sanity_check_reinterpret_cast_compatibility() { + static_assert(IsNonWeightedType<AttributeValueType>::value); + static_assert(sizeof(BaseType) == sizeof(AttributeValueType)); + static_assert(sizeof(BaseType) == sizeof(FillerValueType)); + static_assert(std::is_same<BaseType, typename AttributeValueType::ValueType>::value); } } +template <typename BaseType> +size_t DotProductByContentFillExecutor<BaseType>::getAttributeValues(uint32_t docid, const AT * & values) { + _filler.fill(*_attribute, docid); + sanity_check_reinterpret_cast_compatibility<BaseType, AT, decltype(*_filler.data())>(); + values = reinterpret_cast<const AT *>(_filler.data()); + return _filler.size(); +} + +template <typename BaseType> +SparseDotProductByContentFillExecutor<BaseType>::SparseDotProductByContentFillExecutor( + const attribute::IAttributeVector * attribute, + const V & vector, + const IV & indexes) + : DotProductExecutorBase<BaseType>(vector), + _attribute(attribute), + _indexes(indexes), + _filler() +{ + _filler.allocate(std::max(static_cast<size_t>(attribute->getMaxValueCount()), indexes.size())); +} + +template <typename BaseType> +SparseDotProductByContentFillExecutor<BaseType>::~SparseDotProductByContentFillExecutor() { +} + +template <typename BaseType> +size_t SparseDotProductByContentFillExecutor<BaseType>::getAttributeValues(uint32_t docid, const AT * & values) { + _filler.fill(*_attribute, docid); + + const size_t count = _filler.size(); + BaseType * data = _filler.data(); + size_t i = 0; + for (; (i < _indexes.size()) && (_indexes[i] < count); ++i) { + data[i] = data[_indexes[i]]; + } + + sanity_check_reinterpret_cast_compatibility<BaseType, AT, decltype(*_filler.data())>(); + values = reinterpret_cast<const AT *>(data); + return i; +} + + +} // namespace array + +} // namespace dotproduct + DotProductBlueprint::DotProductBlueprint() : Blueprint("dotProduct"), _defaultAttribute(), @@ -220,22 +304,22 @@ namespace { template <typename T> void -parseVectors(const Property & prop, std::vector<T> & values, std::vector<uint32_t> & indexes) +parseVectors(const Property& prop, std::vector<T>& values, std::vector<uint32_t>& indexes) { typedef std::vector<ArrayParser::ValueAndIndex<T>> SparseV; SparseV sparse; ArrayParser::parsePartial(prop.get(), sparse); if ( ! sparse.empty()) { std::sort(sparse.begin(), sparse.end()); - if ((sparse.back().getIndex()+1)/sparse.size() < 10) { - values.resize(sparse.back().getIndex()+1); - for(const typename SparseV::value_type & a : sparse) { + if ((sparse.back().getIndex() + 1) / sparse.size() < 10) { + values.resize(sparse.back().getIndex() + 1); + for (const typename SparseV::value_type & a : sparse) { values[a.getIndex()] = a.getValue(); } } else { values.reserve(sparse.size()); indexes.reserve(sparse.size()); - for(const typename SparseV::value_type & a : sparse) { + for (const typename SparseV::value_type & a : sparse) { values.push_back(a.getValue()); indexes.push_back(a.getIndex()); } @@ -243,155 +327,293 @@ parseVectors(const Property & prop, std::vector<T> & values, std::vector<uint32_ } } +} + +namespace dotproduct { + template <typename T> -struct ArrayParam : public fef::Anything -{ - ArrayParam(const Property & prop) { - parseVectors(prop, values, indexes); +ArrayParam<T>::ArrayParam(const Property & prop) { + parseVectors(prop, values, indexes); +} + +// Explicit instantiation since these are inspected by unit tests. +// FIXME this feels a bit dirty, consider breaking up ArrayParam to remove dependencies +// on templated vector parsing. This is why it's defined in this translation unit as it is. +template class ArrayParam<int64_t>; +template class ArrayParam<double>; + +} // namespace dotproduct + +namespace { + +bool isImportedAttribute(const IAttributeVector& attribute) noexcept { + return dynamic_cast<const ImportedAttributeVector*>(&attribute) != nullptr; +} + +using dotproduct::ArrayParam; + +template <typename A> +bool supportsGetRawValues(const A & attr) noexcept { + try { + const multivalue::Value<typename A::BaseType> * tmp = nullptr; + attr.getRawValues(0, tmp); // Throws if unsupported + return true; + } catch (const std::runtime_error & e) { + (void) e; + return false; } - std::vector<T> values; - std::vector<uint32_t> indexes; -}; +} +// Precondition: isImportedAttribute(*attribute) == false template <typename A> FeatureExecutor & -createForArrayImpl(const IAttributeVector * attribute, - const std::vector<typename A::BaseType> & values, - const std::vector<uint32_t> & indexes, - vespalib::Stash & stash) +createForDirectArrayImpl(const IAttributeVector * attribute, + const std::vector<typename A::BaseType> & values, + const std::vector<uint32_t> & indexes, + vespalib::Stash & stash) { if (values.empty()) { return stash.create<SingleZeroValueExecutor>(); } const A * iattr = dynamic_cast<const A *>(attribute); if (indexes.empty()) { - try { + if (supportsGetRawValues(*iattr)) { using T = typename A::BaseType; using VT = multivalue::Value<T>; - const VT * tmp; - iattr->getRawValues(0, tmp); using ExactA = MultiValueNumericAttribute<A, VT>; + const ExactA * exactA = dynamic_cast<const ExactA *>(iattr); if (exactA != nullptr) { return stash.create<dotproduct::array::DotProductExecutor<ExactA>>(exactA, values); } return stash.create<dotproduct::array::DotProductExecutor<A>>(iattr, values); - } catch (const std::runtime_error & e) { - (void) e; + } else { return stash.create<dotproduct::array::DotProductByCopyExecutor<A>>(iattr, values); } } else { - try { - const multivalue::Value<typename A::BaseType> * tmp; - iattr->getRawValues(0, tmp); + if (supportsGetRawValues(*iattr)) { return stash.create<dotproduct::array::SparseDotProductExecutor<A>>(iattr, values, indexes); - } catch (const std::runtime_error & e) { - (void) e; + } else { return stash.create<dotproduct::array::SparseDotProductByCopyExecutor<A>>(iattr, values, indexes); } } return stash.create<SingleZeroValueExecutor>(); } +template <typename BaseType> +FeatureExecutor & +createForImportedArrayImpl(const IAttributeVector * attribute, + const std::vector<BaseType> & values, + const std::vector<uint32_t> & indexes, + vespalib::Stash & stash) { + if (values.empty()) { + return stash.create<SingleZeroValueExecutor>(); + } + if (indexes.empty()) { + using ExecutorType = dotproduct::array::DotProductByContentFillExecutor<BaseType>; + return stash.create<ExecutorType>(attribute, values); + } else { + using ExecutorType = dotproduct::array::SparseDotProductByContentFillExecutor<BaseType>; + return stash.create<ExecutorType>(attribute, values, indexes); + } +} + +template <typename BaseType> +FeatureExecutor& +createForImportedArray(const IAttributeVector * attribute, + const Property & prop, + vespalib::Stash & stash) { + std::vector<BaseType> values; + std::vector<uint32_t> indexes; + parseVectors(prop, values, indexes); + return createForImportedArrayImpl<BaseType>(attribute, values, indexes, stash); +} + +template <typename BaseType> +FeatureExecutor& +createForImportedArray(const IAttributeVector * attribute, + const ArrayParam<BaseType> & arguments, + vespalib::Stash & stash) { + return createForImportedArrayImpl<BaseType>(attribute, arguments.values, arguments.indexes, stash); +} + template <typename A> FeatureExecutor & -createForArray(const IAttributeVector * attribute, - const Property & prop, - vespalib::Stash & stash) { +createForDirectArray(const IAttributeVector * attribute, + const Property & prop, + vespalib::Stash & stash) { std::vector<typename A::BaseType> values; std::vector<uint32_t> indexes; parseVectors(prop, values, indexes); - return createForArrayImpl<A>(attribute, values, indexes, stash); + return createForDirectArrayImpl<A>(attribute, values, indexes, stash); } template <typename A> FeatureExecutor & -createForArray(const IAttributeVector * attribute, - const ArrayParam<typename A::BaseType> & arguments, - vespalib::Stash & stash) { - return createForArrayImpl<A>(attribute, arguments.values, arguments.indexes, stash); +createForDirectArray(const IAttributeVector * attribute, + const ArrayParam<typename A::BaseType> & arguments, + vespalib::Stash & stash) { + return createForDirectArrayImpl<A>(attribute, arguments.values, arguments.indexes, stash); } -//const char * BINARY = "binary"; const char * OBJECT = "object"; - FeatureExecutor & createFromObject(const IAttributeVector * attribute, const fef::Anything & object, vespalib::Stash &stash) { if (attribute->getCollectionType() == attribute::CollectionType::ARRAY) { + if (!isImportedAttribute(*attribute)) { + switch (attribute->getBasicType()) { + case BasicType::INT32: + return createForDirectArray<IntegerAttributeTemplate<int32_t>>(attribute, dynamic_cast<const ArrayParam<int32_t> &>(object), stash); + case BasicType::INT64: + return createForDirectArray<IntegerAttributeTemplate<int64_t>>(attribute, dynamic_cast<const ArrayParam<int64_t> &>(object), stash); + case BasicType::FLOAT: + return createForDirectArray<FloatingPointAttributeTemplate<float>>(attribute, dynamic_cast<const ArrayParam<float> &>(object), stash); + case BasicType::DOUBLE: + return createForDirectArray<FloatingPointAttributeTemplate<double>>(attribute, dynamic_cast<const ArrayParam<double> &>(object), stash); + default: + break; + } + } else { + switch (attribute->getBasicType()) { + case BasicType::INT32: + case BasicType::INT64: + return createForImportedArray<int64_t>(attribute, dynamic_cast<const ArrayParam<int64_t> &>(object), stash); + case BasicType::FLOAT: + case BasicType::DOUBLE: + return createForImportedArray<double>(attribute, dynamic_cast<const ArrayParam<double> &>(object), stash); + default: + break; + } + } + } + // TODO: Add support for creating executor for weighted set string / integer attribute + // where the query vector is represented as an object instead of a string. + LOG(warning, "The attribute vector '%s' is NOT of type array<int/long/float/double>" + ", returning executor with default value.", attribute->getName().c_str()); + return stash.create<SingleZeroValueExecutor>(); +} + +FeatureExecutor * createTypedArrayExecutor(const IAttributeVector * attribute, + const Property & prop, + vespalib::Stash & stash) { + if (!isImportedAttribute(*attribute)) { + switch (attribute->getBasicType()) { + case BasicType::INT32: + return &createForDirectArray<IntegerAttributeTemplate<int32_t>>(attribute, prop, stash); + case BasicType::INT64: + return &createForDirectArray<IntegerAttributeTemplate<int64_t>>(attribute, prop, stash); + case BasicType::FLOAT: + return &createForDirectArray<FloatingPointAttributeTemplate<float>>(attribute, prop, stash); + case BasicType::DOUBLE: + return &createForDirectArray<FloatingPointAttributeTemplate<double>>(attribute, prop, stash); + default: + break; + } + } else { + // When using AttributeContent, integers are always extracted as largeint_t and + // floats always as double. This means that we cannot allow type specializations + // on int32_t or float, or reinterpreting type casts will end up pointing at + // data that is not of the correct size. Which would be Bad(tm). switch (attribute->getBasicType()) { case BasicType::INT32: - return createForArray<IntegerAttributeTemplate<int32_t>>(attribute, dynamic_cast<const ArrayParam<int32_t> &>(object), stash); case BasicType::INT64: - return createForArray<IntegerAttributeTemplate<int64_t>>(attribute, dynamic_cast<const ArrayParam<int64_t> &>(object), stash); + return &createForImportedArray<IAttributeVector::largeint_t>(attribute, prop, stash); case BasicType::FLOAT: - return createForArray<FloatingPointAttributeTemplate<float>>(attribute, dynamic_cast<const ArrayParam<float> &>(object), stash); case BasicType::DOUBLE: - return createForArray<FloatingPointAttributeTemplate<double>>(attribute, dynamic_cast<const ArrayParam<double> &>(object), stash); + return &createForImportedArray<double>(attribute, prop, stash); default: break; } } - // TODO: Add support for creating executor for weighted set string / integer attribute - // where the query vector is represented as an object instead of a string. - LOG(warning, "The attribute vector '%s' is NOT of type array<int/long/float/double>" - ", returning executor with default value.", attribute->getName().c_str()); - return stash.create<SingleZeroValueExecutor>(); + return nullptr; +} + +FeatureExecutor * createTypedWsetExecutor(const IAttributeVector * attribute, + const Property & prop, + vespalib::Stash & stash) { + if (attribute->isStringType()) { + if (attribute->hasEnum()) { + dotproduct::wset::EnumVector vector(attribute); + WeightedSetParser::parse(prop.get(), vector); + return &stash.create<dotproduct::wset::DotProductExecutor<dotproduct::wset::EnumVector, WeightedEnumContent>>(attribute, vector); + } else { + dotproduct::wset::StringVector vector; + WeightedSetParser::parse(prop.get(), vector); + return &stash.create<dotproduct::wset::DotProductExecutor<dotproduct::wset::StringVector, WeightedConstCharContent>>(attribute, vector); + } + } else if (attribute->isIntegerType()) { + if (attribute->hasEnum()) { + dotproduct::wset::EnumVector vector(attribute); + WeightedSetParser::parse(prop.get(), vector); + return &stash.create<dotproduct::wset::DotProductExecutor<dotproduct::wset::EnumVector, WeightedEnumContent>>(attribute, vector); + + } else { + dotproduct::wset::IntegerVector vector; + WeightedSetParser::parse(prop.get(), vector); + return &stash.create<dotproduct::wset::DotProductExecutor<dotproduct::wset::IntegerVector, WeightedIntegerContent>>(attribute, vector); + } + } + return nullptr; } FeatureExecutor & createFromString(const IAttributeVector * attribute, const Property & prop, vespalib::Stash &stash) { + FeatureExecutor * executor = nullptr; if (attribute->getCollectionType() == attribute::CollectionType::WSET) { - if (attribute->isStringType()) { - if (attribute->hasEnum()) { - dotproduct::wset::EnumVector vector(attribute); - WeightedSetParser::parse(prop.get(), vector); - return stash.create<dotproduct::wset::DotProductExecutor<dotproduct::wset::EnumVector, WeightedEnumContent>>(attribute, vector); - } else { - dotproduct::wset::StringVector vector; - WeightedSetParser::parse(prop.get(), vector); - return stash.create<dotproduct::wset::DotProductExecutor<dotproduct::wset::StringVector, WeightedConstCharContent>>(attribute, vector); - } - } else if (attribute->isIntegerType()) { - if (attribute->hasEnum()) { - dotproduct::wset::EnumVector vector(attribute); - WeightedSetParser::parse(prop.get(), vector); - return stash.create<dotproduct::wset::DotProductExecutor<dotproduct::wset::EnumVector, WeightedEnumContent>>(attribute, vector); - - } else { - dotproduct::wset::IntegerVector vector; - WeightedSetParser::parse(prop.get(), vector); - return stash.create<dotproduct::wset::DotProductExecutor<dotproduct::wset::IntegerVector, WeightedIntegerContent>>(attribute, vector); - } - } + executor = createTypedWsetExecutor(attribute, prop, stash); } else if (attribute->getCollectionType() == attribute::CollectionType::ARRAY) { - switch (attribute->getBasicType()) { + executor = createTypedArrayExecutor(attribute, prop, stash); + } + + if (executor == nullptr) { + LOG(warning, "The attribute vector '%s' is not of type weighted set string/integer nor" + " array<int/long/float/double>, returning executor with default value.", attribute->getName().c_str()); + executor = &stash.create<SingleZeroValueExecutor>(); + } + return *executor; +} + +fef::Anything::UP attemptParseArrayQueryVector(const IAttributeVector & attribute, const Property & prop) { + if (!isImportedAttribute(attribute)) { + switch (attribute.getBasicType()) { + case BasicType::INT32: + return std::make_unique<ArrayParam<int32_t>>(prop); + case BasicType::INT64: + return std::make_unique<ArrayParam<int64_t>>(prop); + case BasicType::FLOAT: + return std::make_unique<ArrayParam<float>>(prop); + case BasicType::DOUBLE: + return std::make_unique<ArrayParam<double>>(prop); + default: + break; + } + } else { + // See rationale in createTypedArrayExecutor() as to why we promote < 64 bit types + // to their full-width equivalent when dealing with imported attributes. + switch (attribute.getBasicType()) { case BasicType::INT32: - return createForArray<IntegerAttributeTemplate<int32_t>>(attribute, prop, stash); case BasicType::INT64: - return createForArray<IntegerAttributeTemplate<int64_t>>(attribute, prop, stash); + return std::make_unique<ArrayParam<int64_t>>(prop); case BasicType::FLOAT: - return createForArray<FloatingPointAttributeTemplate<float>>(attribute, prop, stash); case BasicType::DOUBLE: - return createForArray<FloatingPointAttributeTemplate<double>>(attribute, prop, stash); + return std::make_unique<ArrayParam<double>>(prop); default: break; } } - LOG(warning, "The attribute vector '%s' is not of type weighted set string/integer nor" - " array<int/long/float/double>, returning executor with default value.", attribute->getName().c_str()); - return stash.create<SingleZeroValueExecutor>(); + return std::unique_ptr<fef::Anything>(); } -} +} // anon ns void DotProductBlueprint::prepareSharedState(const IQueryEnvironment & env, IObjectStore & store) const { const IAttributeVector * attribute = env.getAttributeContext().getAttribute(getAttribute(env)); - if (attribute != NULL) { + if (attribute != nullptr) { if ((attribute->getCollectionType() == attribute::CollectionType::WSET) && attribute->hasEnum() && (attribute->isStringType() || attribute->isIntegerType())) @@ -414,25 +636,11 @@ DotProductBlueprint::prepareSharedState(const IQueryEnvironment & env, IObjectSt WeightedSetParser::parse(prop.get(), vector); } } + // TODO actually use the parsed output for wset operations! } else if (attribute->getCollectionType() == attribute::CollectionType::ARRAY) { - switch (attribute->getBasicType()) { - case BasicType::INT32: - arguments.reset(new ArrayParam<int32_t>(prop)); - break; - case BasicType::INT64: - arguments.reset(new ArrayParam<int64_t>(prop)); - break; - case BasicType::FLOAT: - arguments.reset(new ArrayParam<float>(prop)); - break; - case BasicType::DOUBLE: - arguments.reset(new ArrayParam<double>(prop)); - break; - default: - break; - } + arguments = attemptParseArrayQueryVector(*attribute, prop); } - if ( arguments.get()) { + if (arguments.get()) { store.add(getBaseName() + "." + _queryVector + "." + OBJECT, std::move(arguments)); } } @@ -443,7 +651,7 @@ FeatureExecutor & DotProductBlueprint::createExecutor(const IQueryEnvironment & env, vespalib::Stash &stash) const { const IAttributeVector * attribute = env.getAttributeContext().getAttribute(getAttribute(env)); - if (attribute == NULL) { + if (attribute == nullptr) { LOG(warning, "The attribute vector '%s' was not found in the attribute manager, returning executor with default value.", getAttribute(env).c_str()); return stash.create<SingleZeroValueExecutor>(); @@ -455,7 +663,7 @@ DotProductBlueprint::createExecutor(const IQueryEnvironment & env, vespalib::Sta attribute = env.getAttributeContext().getAttributeStableEnum(getAttribute(env)); } const fef::Anything * argument = env.getObjectStore().get(getBaseName() + "." + _queryVector + "." + OBJECT); - if (argument != NULL) { + if (argument != nullptr) { return createFromObject(attribute, *argument, stash); } else { Property prop = env.getProperties().lookup(getBaseName(), _queryVector); diff --git a/searchlib/src/vespa/searchlib/features/dotproductfeature.h b/searchlib/src/vespa/searchlib/features/dotproductfeature.h index 5ac623082ee..5a5b809875b 100644 --- a/searchlib/src/vespa/searchlib/features/dotproductfeature.h +++ b/searchlib/src/vespa/searchlib/features/dotproductfeature.h @@ -11,6 +11,10 @@ #include <vespa/vespalib/stllike/hash_map.hpp> namespace search { +namespace fef { +class Property; +} + namespace features { namespace dotproduct { @@ -31,6 +35,13 @@ struct Converter<vespalib::string, const char *> { const char * convert(const vespalib::string & value) const { return value.c_str(); } }; +template <typename T> +struct ArrayParam : public fef::Anything { + ArrayParam(const fef::Property & prop); + std::vector<T> values; + std::vector<uint32_t> indexes; +}; + namespace wset { template <typename DimensionVType, typename DimensionHType, typename ComponentType, typename HashMapComparator = std::equal_to<DimensionHType> > @@ -115,23 +126,40 @@ public: namespace array { /** + * Common base for handling execution for all array dot product executors. + * Only cares about the underlying value type, not the concrete type of the + * attribute vector itself. + */ +template <typename BaseType> +class DotProductExecutorBase : public fef::FeatureExecutor { +public: + using AT = multivalue::Value<BaseType>; + using V = std::vector<BaseType>; +private: + vespalib::hwaccelrated::IAccelrated::UP _multiplier; + V _vector; + virtual size_t getAttributeValues(uint32_t docid, const AT * & count) = 0; +public: + DotProductExecutorBase(const V & vector); + ~DotProductExecutorBase(); + void execute(uint32_t docId) final override; +}; + +/** * Implements the executor for the dotproduct feature. */ template <typename A> -class DotProductExecutor : public fef::FeatureExecutor { +class DotProductExecutor : public DotProductExecutorBase<typename A::BaseType> { public: - typedef multivalue::Value<typename A::BaseType> AT; - typedef std::vector<typename A::BaseType> V; + using AT = typename DotProductExecutorBase<typename A::BaseType>::AT; + using V = typename DotProductExecutorBase<typename A::BaseType>::V; protected: const A * _attribute; private: - vespalib::hwaccelrated::IAccelrated::UP _multiplier; - V _vector; virtual size_t getAttributeValues(uint32_t docid, const AT * & count); public: DotProductExecutor(const A * attribute, const V & vector); ~DotProductExecutor(); - void execute(uint32_t docId) override; }; template <typename A> @@ -146,6 +174,33 @@ private: std::vector<typename A::BaseType> _copy; }; +/** + * Dot product executor which uses AttributeContent for the specified base value type + * to extract array elements from a given attribute vector. Used for "synthetic" + * attribute vectors such as imported attributes, where we cannot directly access + * the memory of the underlying attribute store. + * + * Some caveats: + * - 64 bit value type width is enforced, so 32-bit value types will not benefit + * from extra SIMD register capacity. + * - Additional overhead caused by call indirection and copy step. + */ +template <typename BaseType> +class DotProductByContentFillExecutor : public DotProductExecutorBase<BaseType> { +public: + using V = typename DotProductExecutorBase<BaseType>::V; + using AT = typename DotProductExecutorBase<BaseType>::AT; + using ValueFiller = attribute::AttributeContent<BaseType>; + + DotProductByContentFillExecutor(const attribute::IAttributeVector * attribute, const V & vector); + ~DotProductByContentFillExecutor(); +private: + size_t getAttributeValues(uint32_t docid, const AT * & values) final override; + + const attribute::IAttributeVector* _attribute; + ValueFiller _filler; +}; + template <typename A> class SparseDotProductExecutor : public DotProductExecutor<A> { public: @@ -174,6 +229,30 @@ private: std::vector<typename A::BaseType> _copy; }; +/** + * Dot product executor which uses AttributeContent for fetching values. See + * DotProductByContentFillExecutor for a more in-depth description and caveats. + */ +template <typename BaseType> +class SparseDotProductByContentFillExecutor : public DotProductExecutorBase<BaseType> { +public: + using IV = std::vector<uint32_t>; + using V = typename DotProductExecutorBase<BaseType>::V; + using AT = typename DotProductExecutorBase<BaseType>::AT; + using ValueFiller = attribute::AttributeContent<BaseType>; + + SparseDotProductByContentFillExecutor(const attribute::IAttributeVector * attribute, + const V & vector, + const IV & indexes); + ~SparseDotProductByContentFillExecutor(); +private: + size_t getAttributeValues(uint32_t docid, const AT * & values) final override; + + const attribute::IAttributeVector* _attribute; + IV _indexes; + ValueFiller _filler; +}; + } } diff --git a/searchlib/src/vespa/searchlib/fef/test/CMakeLists.txt b/searchlib/src/vespa/searchlib/fef/test/CMakeLists.txt index aa4a0aa5ddf..ab44c30b142 100644 --- a/searchlib/src/vespa/searchlib/fef/test/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/fef/test/CMakeLists.txt @@ -7,6 +7,7 @@ vespa_add_library(searchlib_fef_test OBJECT indexenvironment.cpp indexenvironmentbuilder.cpp matchdatabuilder.cpp + mock_attribute_context.cpp queryenvironment.cpp queryenvironmentbuilder.cpp rankresult.cpp diff --git a/searchlib/src/vespa/searchlib/fef/test/attribute_map.h b/searchlib/src/vespa/searchlib/fef/test/attribute_map.h new file mode 100644 index 00000000000..db62cbebdab --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/attribute_map.h @@ -0,0 +1,41 @@ +// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "mock_attribute_context.h" +#include <vespa/searchcommon/attribute/iattributecontext.h> +#include <memory> +#include <map> + +#pragma once + +namespace search { +namespace fef { +namespace test { + +class AttributeMap { + std::map<vespalib::string, std::shared_ptr<attribute::IAttributeVector>> _attributes; +public: + using IAttributeVector = attribute::IAttributeVector; + + void add(std::shared_ptr<attribute::IAttributeVector> attr) { + _attributes.emplace(attr->getName(), std::move(attr)); + } + + const IAttributeVector * getAttribute(const vespalib::string & name) const { + auto iter = _attributes.find(name); + return (iter != _attributes.end() ? iter->second.get() : nullptr); + } + + void getAttributeList(std::vector<const IAttributeVector *> & list) const { + for (const auto& attr : _attributes) { + list.emplace_back(attr.second.get()); + } + } + + attribute::IAttributeContext::UP createContext() const { + return std::make_unique<MockAttributeContext>(*this); + } +}; + +} // test +} // fef +} // search diff --git a/searchlib/src/vespa/searchlib/fef/test/indexenvironment.cpp b/searchlib/src/vespa/searchlib/fef/test/indexenvironment.cpp index 490130ec0ef..bfc2dd7fe05 100644 --- a/searchlib/src/vespa/searchlib/fef/test/indexenvironment.cpp +++ b/searchlib/src/vespa/searchlib/fef/test/indexenvironment.cpp @@ -24,7 +24,7 @@ IndexEnvironment::Constant notFoundError(ValueType::error_type(), IndexEnvironment::IndexEnvironment() : _properties(), _fields(), - _attrMan(), + _attrMap(), _tableMan(), _constants() { diff --git a/searchlib/src/vespa/searchlib/fef/test/indexenvironment.h b/searchlib/src/vespa/searchlib/fef/test/indexenvironment.h index 3f6686ddefa..cdc5083b4b5 100644 --- a/searchlib/src/vespa/searchlib/fef/test/indexenvironment.h +++ b/searchlib/src/vespa/searchlib/fef/test/indexenvironment.h @@ -1,6 +1,7 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once +#include "attribute_map.h" #include <vespa/searchlib/attribute/attributemanager.h> #include <vespa/searchlib/fef/iindexenvironment.h> #include <vespa/searchlib/fef/properties.h> @@ -89,8 +90,8 @@ public: /** Returns a const reference to the list of fields of this. */ const std::vector<FieldInfo> &getFields() const { return _fields; } - /** Returns a reference to the attribute manager of this. */ - AttributeManager &getAttributeManager() { return _attrMan; } + /** Returns a reference to the attribute map of this. */ + AttributeMap &getAttributeMap() { return _attrMap; } /** Returns a reference to the table manager of this. */ TableManager &getTableManager() { return _tableMan; } @@ -108,7 +109,7 @@ private: private: Properties _properties; std::vector<FieldInfo> _fields; - AttributeManager _attrMan; + AttributeMap _attrMap; TableManager _tableMan; ConstantsMap _constants; }; diff --git a/searchlib/src/vespa/searchlib/fef/test/mock_attribute_context.cpp b/searchlib/src/vespa/searchlib/fef/test/mock_attribute_context.cpp new file mode 100644 index 00000000000..7beb5f9a2cb --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/mock_attribute_context.cpp @@ -0,0 +1,22 @@ +#include "mock_attribute_context.h" +#include "attribute_map.h" + +namespace search { +namespace fef { +namespace test { + +using IAttributeVector = attribute::IAttributeVector; + +const IAttributeVector * MockAttributeContext::getAttribute(const string & name) const { + return _attributes.getAttribute(name); +} +const IAttributeVector * MockAttributeContext::getAttributeStableEnum(const string & name) const { + return getAttribute(name); +} +void MockAttributeContext::getAttributeList(std::vector<const IAttributeVector *> & list) const { + _attributes.getAttributeList(list); +} + +} // test +} // fef +} // search diff --git a/searchlib/src/vespa/searchlib/fef/test/mock_attribute_context.h b/searchlib/src/vespa/searchlib/fef/test/mock_attribute_context.h new file mode 100644 index 00000000000..adfa7048d67 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/mock_attribute_context.h @@ -0,0 +1,32 @@ +// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchcommon/attribute/iattributecontext.h> +#include <memory> +#include <map> + +#pragma once + +namespace search { +namespace fef { +namespace test { + +class AttributeMap; + +class MockAttributeContext : public attribute::IAttributeContext { + const AttributeMap& _attributes; +public: + using IAttributeVector = attribute::IAttributeVector; + + explicit MockAttributeContext(const AttributeMap& attributes) + : _attributes(attributes) + { + } + + const IAttributeVector * getAttribute(const string & name) const override; + const IAttributeVector * getAttributeStableEnum(const string & name) const override; + void getAttributeList(std::vector<const IAttributeVector *> & list) const override; +}; + +} // test +} // fef +} // search diff --git a/searchlib/src/vespa/searchlib/fef/test/queryenvironment.cpp b/searchlib/src/vespa/searchlib/fef/test/queryenvironment.cpp index f2ce596dbea..e10a25771ca 100644 --- a/searchlib/src/vespa/searchlib/fef/test/queryenvironment.cpp +++ b/searchlib/src/vespa/searchlib/fef/test/queryenvironment.cpp @@ -11,7 +11,7 @@ QueryEnvironment::QueryEnvironment(IndexEnvironment *env) _terms(), _properties(), _location(), - _attrCtx((env == NULL) ? attribute::IAttributeContext::UP() : env->getAttributeManager().createContext()) + _attrCtx((env == NULL) ? attribute::IAttributeContext::UP() : env->getAttributeMap().createContext()) { } diff --git a/searchlib/src/vespa/searchlib/fef/test/queryenvironment.h b/searchlib/src/vespa/searchlib/fef/test/queryenvironment.h index 047af1152d5..6c797dc5934 100644 --- a/searchlib/src/vespa/searchlib/fef/test/queryenvironment.h +++ b/searchlib/src/vespa/searchlib/fef/test/queryenvironment.h @@ -62,8 +62,8 @@ public: /** Sets the index environment of this. */ QueryEnvironment &setIndexEnv(IndexEnvironment *indexEnv) { _indexEnv = indexEnv; - _attrCtx = ((indexEnv == NULL) ? search::attribute::IAttributeContext::UP() : - indexEnv->getAttributeManager().createContext()); + _attrCtx = ((indexEnv == NULL) ? search::attribute::IAttributeContext::UP() : + indexEnv->getAttributeMap().createContext()); return *this; } diff --git a/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.h b/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.h index a1c6737cd5c..4b376df2bdd 100644 --- a/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.h +++ b/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.h @@ -38,27 +38,39 @@ std::shared_ptr<ReferenceAttribute> create_reference_attribute(vespalib::stringr return std::make_shared<ReferenceAttribute>(name, Config(BasicType::REFERENCE)); } +enum class FastSearchConfig { + ExplicitlyEnabled, + Default +}; + template<typename AttrVecType> std::shared_ptr<AttrVecType> create_typed_attribute(BasicType basic_type, CollectionType collection_type, + FastSearchConfig fast_search = FastSearchConfig::Default, vespalib::stringref name = "parent") { + Config cfg(basic_type, collection_type); + if (fast_search == FastSearchConfig::ExplicitlyEnabled) { + cfg.setFastSearch(true); + } return std::dynamic_pointer_cast<AttrVecType>( - AttributeFactory::createAttribute(name, Config(basic_type, collection_type))); + AttributeFactory::createAttribute(name, std::move(cfg))); } template<typename AttrVecType> std::shared_ptr<AttrVecType> create_single_attribute(BasicType type, vespalib::stringref name = "parent") { - return create_typed_attribute<AttrVecType>(type, CollectionType::SINGLE, name); + return create_typed_attribute<AttrVecType>(type, CollectionType::SINGLE, FastSearchConfig::Default, name); } template<typename AttrVecType> std::shared_ptr<AttrVecType> create_array_attribute(BasicType type, vespalib::stringref name = "parent") { - return create_typed_attribute<AttrVecType>(type, CollectionType::ARRAY, name); + return create_typed_attribute<AttrVecType>(type, CollectionType::ARRAY, FastSearchConfig::Default, name); } template<typename AttrVecType> -std::shared_ptr<AttrVecType> create_wset_attribute(BasicType type, vespalib::stringref name = "parent") { - return create_typed_attribute<AttrVecType>(type, CollectionType::WSET, name); +std::shared_ptr<AttrVecType> create_wset_attribute(BasicType type, + FastSearchConfig fast_search = FastSearchConfig::Default, + vespalib::stringref name = "parent") { + return create_typed_attribute<AttrVecType>(type, CollectionType::WSET, fast_search, name); } template<typename VectorType> @@ -83,7 +95,7 @@ struct ImportedAttributeFixture { ImportedAttributeFixture(); - ~ImportedAttributeFixture(); + virtual ~ImportedAttributeFixture(); void map_reference(DocId from_lid, GlobalId via_gid, DocId to_lid) { assert(from_lid < reference_attr->getNumDocs()); @@ -176,8 +188,9 @@ struct ImportedAttributeFixture { template<typename AttrVecType, typename WeightedValueType> void reset_with_wset_value_reference_mappings( BasicType type, - const std::vector<LidToLidMapping<std::vector<WeightedValueType>>> &mappings) { - reset_with_new_target_attr(create_wset_attribute<AttrVecType>(type)); + const std::vector<LidToLidMapping<std::vector<WeightedValueType>>> &mappings, + FastSearchConfig fast_search = FastSearchConfig::Default) { + reset_with_new_target_attr(create_wset_attribute<AttrVecType>(type, fast_search)); set_up_and_map<AttrVecType>(mappings, [this](auto &target_vec, auto &mapping) { for (const auto &v : mapping._value_in_target_attr) { ASSERT_TRUE(target_vec.append(mapping._to_lid, v.value(), v.weight())); @@ -237,8 +250,9 @@ template<typename AttrVecType, typename WeightedValueType> void reset_with_wset_value_reference_mappings( ImportedAttributeFixture &f, BasicType type, - const std::vector<ImportedAttributeFixture::LidToLidMapping<std::vector<WeightedValueType>>> &mappings) { - f.reset_with_wset_value_reference_mappings<AttrVecType, WeightedValueType>(type, mappings); + const std::vector<ImportedAttributeFixture::LidToLidMapping<std::vector<WeightedValueType>>> &mappings, + FastSearchConfig fast_search = FastSearchConfig::Default) { + f.reset_with_wset_value_reference_mappings<AttrVecType, WeightedValueType>(type, mappings, fast_search); } bool has_active_enum_guards(AttributeVector &attr) { |