diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2020-06-30 13:43:43 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-06-30 13:43:43 +0200 |
commit | 01b380ed97dbfe1cce41068ea8a806502a2d2bec (patch) | |
tree | dad429bf2b7c1e72eb4a0efdeb073fa5fc0794c1 /searchlib | |
parent | 99f8036a385fa9c5573a6a786350ed8ec3040ef7 (diff) | |
parent | 43575b1d6514953e08583999f0d0d29f23cf3ba1 (diff) |
Merge pull request #13741 from vespa-engine/balder/unique-2-globalsequence
Rename unique feature to globalsequence.
Diffstat (limited to 'searchlib')
-rw-r--r-- | searchlib/src/tests/features/prod_features.cpp | 27 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/features/CMakeLists.txt | 2 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/features/global_sequence_feature.cpp | 64 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/features/global_sequence_feature.h (renamed from searchlib/src/vespa/searchlib/features/uniquefeature.h) | 8 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/features/setup.cpp | 4 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/features/uniquefeature.cpp | 64 |
6 files changed, 92 insertions, 77 deletions
diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp index c50c7a12698..f886ba59c1c 100644 --- a/searchlib/src/tests/features/prod_features.cpp +++ b/searchlib/src/tests/features/prod_features.cpp @@ -34,7 +34,7 @@ #include <vespa/searchlib/features/setup.h> #include <vespa/searchlib/features/termfeature.h> #include <vespa/searchlib/features/utils.h> -#include <vespa/searchlib/features/uniquefeature.h> +#include <vespa/searchlib/features/global_sequence_feature.h> #include <vespa/searchlib/features/weighted_set_parser.hpp> #include <vespa/searchlib/fef/featurenamebuilder.h> #include <vespa/searchlib/fef/indexproperties.h> @@ -1565,22 +1565,33 @@ Test::testMatchCount() } } +void verifySequence(uint64_t first, uint64_t second) { + ASSERT_GREATER(first, second); + ASSERT_GREATER(double(first), double(second)); +} + void Test::testUnique() { { - UniqueBlueprint bp; - EXPECT_TRUE(assertCreateInstance(bp, "unique")); + GlobalSequenceBlueprint bp; + EXPECT_TRUE(assertCreateInstance(bp, "globalSequence")); FtFeatureTest ft(_factory, ""); StringList params, in, out; FT_SETUP_OK(bp, ft.getIndexEnv(), params, in, out.add("out")); - FT_DUMP_EMPTY(_factory, "unique"); + FT_DUMP_EMPTY(_factory, "globalSequence"); } - FtFeatureTest ft(_factory, "unique"); + FtFeatureTest ft(_factory, "globalSequence"); ASSERT_TRUE(ft.setup()); - EXPECT_TRUE(ft.execute(0x10003,0, 1)); - EXPECT_TRUE(ft.execute(0x70003,0, 7)); - + TEST_DO(verifySequence(GlobalSequenceBlueprint::globalSequence(1, 0), GlobalSequenceBlueprint::globalSequence(1,1))); + TEST_DO(verifySequence(GlobalSequenceBlueprint::globalSequence(1, 1), GlobalSequenceBlueprint::globalSequence(1,2))); + TEST_DO(verifySequence(GlobalSequenceBlueprint::globalSequence(1, 1), GlobalSequenceBlueprint::globalSequence(2,1))); + TEST_DO(verifySequence(GlobalSequenceBlueprint::globalSequence(2, 1), GlobalSequenceBlueprint::globalSequence(2,2))); + TEST_DO(verifySequence(GlobalSequenceBlueprint::globalSequence(2, 2), GlobalSequenceBlueprint::globalSequence(2,3))); + TEST_DO(verifySequence(GlobalSequenceBlueprint::globalSequence(2, 2), GlobalSequenceBlueprint::globalSequence(3,0))); + ASSERT_EQUAL(0xfffffffefffdul, (1ul << 48) - 0x10003l); + EXPECT_TRUE(ft.execute(0xfffffffefffdul, 0, 1)); + EXPECT_TRUE(ft.execute(0xfffffff8fffdul, 0, 7)); } void diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt index a3ce67c4bf6..215b6ade9fd 100644 --- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt @@ -26,6 +26,7 @@ vespa_add_library(searchlib_features OBJECT flow_completeness_feature.cpp foreachfeature.cpp freshnessfeature.cpp + global_sequence_feature.cpp internal_max_reduce_prod_join_feature.cpp item_raw_score_feature.cpp jarowinklerdistancefeature.cpp @@ -63,7 +64,6 @@ vespa_add_library(searchlib_features OBJECT termfeature.cpp terminfofeature.cpp text_similarity_feature.cpp - uniquefeature.cpp utils.cpp valuefeature.cpp weighted_set_parser.cpp diff --git a/searchlib/src/vespa/searchlib/features/global_sequence_feature.cpp b/searchlib/src/vespa/searchlib/features/global_sequence_feature.cpp new file mode 100644 index 00000000000..255b033a592 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/global_sequence_feature.cpp @@ -0,0 +1,64 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "global_sequence_feature.h" +#include <vespa/vespalib/util/stash.h> + +using namespace search::fef; + +namespace search::features { + +namespace { + +/** + * Implements the executor for combining lid and distribution key to form a globally unique value. + */ +class GlobalSequenceExecutor : public fef::FeatureExecutor { +private: + uint32_t _distributionKey; + +public: + GlobalSequenceExecutor(uint32_t distributionKey) + : _distributionKey(distributionKey) + { + } + + void execute(uint32_t docId) override { + outputs().set_number(0, GlobalSequenceBlueprint::globalSequence(docId, _distributionKey)); + } +}; + +} + +GlobalSequenceBlueprint::GlobalSequenceBlueprint() : + Blueprint("globalSequence"), + _distributionKey(0) +{ +} + +void +GlobalSequenceBlueprint::visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const +{ +} + +bool +GlobalSequenceBlueprint::setup(const IIndexEnvironment & env, const ParameterList & ) +{ + _distributionKey = env.getDistributionKey(); + assert( _distributionKey < 0x10000); + describeOutput("out", "Returns (1 << 48) - ((lid << 16) | distributionKey)"); + return true; +} + +Blueprint::UP +GlobalSequenceBlueprint::createInstance() const +{ + return std::make_unique<GlobalSequenceBlueprint>(); +} + +FeatureExecutor & +GlobalSequenceBlueprint::createExecutor(const IQueryEnvironment &, vespalib::Stash &stash) const +{ + return stash.create<GlobalSequenceExecutor>(_distributionKey); +} + +} diff --git a/searchlib/src/vespa/searchlib/features/uniquefeature.h b/searchlib/src/vespa/searchlib/features/global_sequence_feature.h index f21a427762a..3678a260b00 100644 --- a/searchlib/src/vespa/searchlib/features/uniquefeature.h +++ b/searchlib/src/vespa/searchlib/features/global_sequence_feature.h @@ -14,12 +14,12 @@ namespace search::features { * It will change if documents change lid. */ -class UniqueBlueprint : public fef::Blueprint +class GlobalSequenceBlueprint : public fef::Blueprint { private: uint32_t _distributionKey; public: - UniqueBlueprint(); + GlobalSequenceBlueprint(); void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override; fef::Blueprint::UP createInstance() const override; fef::ParameterDescriptions getDescriptions() const override { @@ -27,6 +27,10 @@ public: } bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override; fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override; + + static uint64_t globalSequence(uint32_t docId, uint32_t distrKey) { + return (1ul << 48) - ((uint64_t(docId) << 16)| distrKey); + } }; } diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp index ea6ec842a00..bd79f1d4fb5 100644 --- a/searchlib/src/vespa/searchlib/features/setup.cpp +++ b/searchlib/src/vespa/searchlib/features/setup.cpp @@ -53,7 +53,7 @@ #include "termfeature.h" #include "terminfofeature.h" #include "text_similarity_feature.h" -#include "uniquefeature.h" +#include "global_sequence_feature.h" #include "valuefeature.h" #include "max_reduce_prod_join_replacer.h" @@ -122,7 +122,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry) registry.addPrototype(std::make_shared<TermEditDistanceBlueprint>()); registry.addPrototype(std::make_shared<TermFieldMdBlueprint>()); registry.addPrototype(std::make_shared<ConstantBlueprint>()); - registry.addPrototype(std::make_shared<UniqueBlueprint>()); + registry.addPrototype(std::make_shared<GlobalSequenceBlueprint>()); // Ranking Expression diff --git a/searchlib/src/vespa/searchlib/features/uniquefeature.cpp b/searchlib/src/vespa/searchlib/features/uniquefeature.cpp deleted file mode 100644 index 73ac4a1178e..00000000000 --- a/searchlib/src/vespa/searchlib/features/uniquefeature.cpp +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "uniquefeature.h" -#include <vespa/vespalib/util/stash.h> - -using namespace search::fef; - -namespace search::features { - -namespace { - -/** - * Implements the executor for combining lid and distribution key to form a globally unique value. - */ -class UniqueLidAndDistributionKeyExecutor : public fef::FeatureExecutor { -private: - uint32_t _distributionKey; - -public: - UniqueLidAndDistributionKeyExecutor(uint32_t distributionKey) - : _distributionKey(distributionKey) - { - assert( _distributionKey < 0x10000); - } - - void execute(uint32_t docId) override { - outputs().set_number(0, (uint64_t(docId) << 16u) | _distributionKey); - } -}; - -} - -UniqueBlueprint::UniqueBlueprint() : - Blueprint("unique"), - _distributionKey(0) -{ -} - -void -UniqueBlueprint::visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const -{ -} - -bool -UniqueBlueprint::setup(const IIndexEnvironment & env, const ParameterList & ) -{ - _distributionKey = env.getDistributionKey(); - describeOutput("out", "Returns (lid << 16) | distributionKey"); - return true; -} - -Blueprint::UP -UniqueBlueprint::createInstance() const -{ - return std::make_unique<UniqueBlueprint>(); -} - -FeatureExecutor & -UniqueBlueprint::createExecutor(const IQueryEnvironment &, vespalib::Stash &stash) const -{ - return stash.create<UniqueLidAndDistributionKeyExecutor>(_distributionKey); -} - -} |