diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2020-06-29 13:03:15 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2020-06-29 13:08:42 +0000 |
commit | af54254095fae1bdea8d101a9c0f4811ddf69f56 (patch) | |
tree | c5d9ca1ae926e0159e2145d9e3e54ef9bfb73546 /searchlib | |
parent | bb7d188e04ff44318906dfa9ede5b532409ebf9f (diff) |
Rename unique feature to globalsequence.
Invert so that the first doc from the first node will sort first,
followed by the first doc from the second node, and the the last doc of the last node at the end.
It will produce number in the range (1 << 48) down to zero.
Sequence = (1 << 48) - ((docId << 16) | disributionKey)
Diffstat (limited to 'searchlib')
-rw-r--r-- | searchlib/src/tests/features/prod_features.cpp | 30 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/features/CMakeLists.txt | 2 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/features/global_sequence_feature.cpp | 64 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/features/global_sequence_feature.h (renamed from searchlib/src/vespa/searchlib/features/uniquefeature.h) | 4 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/features/setup.cpp | 4 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/features/uniquefeature.cpp | 64 |
6 files changed, 91 insertions, 77 deletions
diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp index c50c7a12698..cc90008eca8 100644 --- a/searchlib/src/tests/features/prod_features.cpp +++ b/searchlib/src/tests/features/prod_features.cpp @@ -34,7 +34,7 @@ #include <vespa/searchlib/features/setup.h> #include <vespa/searchlib/features/termfeature.h> #include <vespa/searchlib/features/utils.h> -#include <vespa/searchlib/features/uniquefeature.h> +#include <vespa/searchlib/features/global_sequence_feature.h> #include <vespa/searchlib/features/weighted_set_parser.hpp> #include <vespa/searchlib/fef/featurenamebuilder.h> #include <vespa/searchlib/fef/indexproperties.h> @@ -1565,22 +1565,36 @@ Test::testMatchCount() } } +uint64_t globalSequence(uint32_t docId, uint32_t distrKey) { + return (1ul << 48) - ((uint64_t(docId) << 16)| distrKey); +} + +void verifySequence(uint64_t first, uint64_t second) { + ASSERT_GREATER(first, second); + ASSERT_GREATER(double(first), double(second)); +} void Test::testUnique() { { - UniqueBlueprint bp; - EXPECT_TRUE(assertCreateInstance(bp, "unique")); + GlobalSequenceBlueprint bp; + EXPECT_TRUE(assertCreateInstance(bp, "globalsequence")); FtFeatureTest ft(_factory, ""); StringList params, in, out; FT_SETUP_OK(bp, ft.getIndexEnv(), params, in, out.add("out")); - FT_DUMP_EMPTY(_factory, "unique"); + FT_DUMP_EMPTY(_factory, "globalsequence"); } - FtFeatureTest ft(_factory, "unique"); + FtFeatureTest ft(_factory, "globalsequence"); ASSERT_TRUE(ft.setup()); - EXPECT_TRUE(ft.execute(0x10003,0, 1)); - EXPECT_TRUE(ft.execute(0x70003,0, 7)); - + TEST_DO(verifySequence(globalSequence(1, 0), globalSequence(1,1))); + TEST_DO(verifySequence(globalSequence(1, 1), globalSequence(1,2))); + TEST_DO(verifySequence(globalSequence(1, 1), globalSequence(2,1))); + TEST_DO(verifySequence(globalSequence(2, 1), globalSequence(2,2))); + TEST_DO(verifySequence(globalSequence(2, 2), globalSequence(2,3))); + TEST_DO(verifySequence(globalSequence(2, 2), globalSequence(3,0))); + ASSERT_EQUAL(0xfffffffefffdul, (1ul << 48) - 0x10003l); + EXPECT_TRUE(ft.execute(0xfffffffefffdul, 0, 1)); + EXPECT_TRUE(ft.execute(0xfffffff8fffdul, 0, 7)); } void diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt index a3ce67c4bf6..215b6ade9fd 100644 --- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt @@ -26,6 +26,7 @@ vespa_add_library(searchlib_features OBJECT flow_completeness_feature.cpp foreachfeature.cpp freshnessfeature.cpp + global_sequence_feature.cpp internal_max_reduce_prod_join_feature.cpp item_raw_score_feature.cpp jarowinklerdistancefeature.cpp @@ -63,7 +64,6 @@ vespa_add_library(searchlib_features OBJECT termfeature.cpp terminfofeature.cpp text_similarity_feature.cpp - uniquefeature.cpp utils.cpp valuefeature.cpp weighted_set_parser.cpp diff --git a/searchlib/src/vespa/searchlib/features/global_sequence_feature.cpp b/searchlib/src/vespa/searchlib/features/global_sequence_feature.cpp new file mode 100644 index 00000000000..e1d9d4733c1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/global_sequence_feature.cpp @@ -0,0 +1,64 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "global_sequence_feature.h" +#include <vespa/vespalib/util/stash.h> + +using namespace search::fef; + +namespace search::features { + +namespace { + +/** + * Implements the executor for combining lid and distribution key to form a globally unique value. + */ +class GlobalSequenceExecutor : public fef::FeatureExecutor { +private: + uint32_t _distributionKey; + +public: + GlobalSequenceExecutor(uint32_t distributionKey) + : _distributionKey(distributionKey) + { + } + + void execute(uint32_t docId) override { + outputs().set_number(0, ((1ul << 48u) - ((uint64_t(docId) << 16u) | _distributionKey))); + } +}; + +} + +GlobalSequenceBlueprint::GlobalSequenceBlueprint() : + Blueprint("globalsequence"), + _distributionKey(0) +{ +} + +void +GlobalSequenceBlueprint::visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const +{ +} + +bool +GlobalSequenceBlueprint::setup(const IIndexEnvironment & env, const ParameterList & ) +{ + _distributionKey = env.getDistributionKey(); + assert( _distributionKey < 0x80000); + describeOutput("out", "Returns (1 << 48) - ((lid << 16) | distributionKey)"); + return true; +} + +Blueprint::UP +GlobalSequenceBlueprint::createInstance() const +{ + return std::make_unique<GlobalSequenceBlueprint>(); +} + +FeatureExecutor & +GlobalSequenceBlueprint::createExecutor(const IQueryEnvironment &, vespalib::Stash &stash) const +{ + return stash.create<GlobalSequenceExecutor>(_distributionKey); +} + +} diff --git a/searchlib/src/vespa/searchlib/features/uniquefeature.h b/searchlib/src/vespa/searchlib/features/global_sequence_feature.h index f21a427762a..04e3b09c72a 100644 --- a/searchlib/src/vespa/searchlib/features/uniquefeature.h +++ b/searchlib/src/vespa/searchlib/features/global_sequence_feature.h @@ -14,12 +14,12 @@ namespace search::features { * It will change if documents change lid. */ -class UniqueBlueprint : public fef::Blueprint +class GlobalSequenceBlueprint : public fef::Blueprint { private: uint32_t _distributionKey; public: - UniqueBlueprint(); + GlobalSequenceBlueprint(); void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override; fef::Blueprint::UP createInstance() const override; fef::ParameterDescriptions getDescriptions() const override { diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp index ea6ec842a00..bd79f1d4fb5 100644 --- a/searchlib/src/vespa/searchlib/features/setup.cpp +++ b/searchlib/src/vespa/searchlib/features/setup.cpp @@ -53,7 +53,7 @@ #include "termfeature.h" #include "terminfofeature.h" #include "text_similarity_feature.h" -#include "uniquefeature.h" +#include "global_sequence_feature.h" #include "valuefeature.h" #include "max_reduce_prod_join_replacer.h" @@ -122,7 +122,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry) registry.addPrototype(std::make_shared<TermEditDistanceBlueprint>()); registry.addPrototype(std::make_shared<TermFieldMdBlueprint>()); registry.addPrototype(std::make_shared<ConstantBlueprint>()); - registry.addPrototype(std::make_shared<UniqueBlueprint>()); + registry.addPrototype(std::make_shared<GlobalSequenceBlueprint>()); // Ranking Expression diff --git a/searchlib/src/vespa/searchlib/features/uniquefeature.cpp b/searchlib/src/vespa/searchlib/features/uniquefeature.cpp deleted file mode 100644 index 73ac4a1178e..00000000000 --- a/searchlib/src/vespa/searchlib/features/uniquefeature.cpp +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "uniquefeature.h" -#include <vespa/vespalib/util/stash.h> - -using namespace search::fef; - -namespace search::features { - -namespace { - -/** - * Implements the executor for combining lid and distribution key to form a globally unique value. - */ -class UniqueLidAndDistributionKeyExecutor : public fef::FeatureExecutor { -private: - uint32_t _distributionKey; - -public: - UniqueLidAndDistributionKeyExecutor(uint32_t distributionKey) - : _distributionKey(distributionKey) - { - assert( _distributionKey < 0x10000); - } - - void execute(uint32_t docId) override { - outputs().set_number(0, (uint64_t(docId) << 16u) | _distributionKey); - } -}; - -} - -UniqueBlueprint::UniqueBlueprint() : - Blueprint("unique"), - _distributionKey(0) -{ -} - -void -UniqueBlueprint::visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const -{ -} - -bool -UniqueBlueprint::setup(const IIndexEnvironment & env, const ParameterList & ) -{ - _distributionKey = env.getDistributionKey(); - describeOutput("out", "Returns (lid << 16) | distributionKey"); - return true; -} - -Blueprint::UP -UniqueBlueprint::createInstance() const -{ - return std::make_unique<UniqueBlueprint>(); -} - -FeatureExecutor & -UniqueBlueprint::createExecutor(const IQueryEnvironment &, vespalib::Stash &stash) const -{ - return stash.create<UniqueLidAndDistributionKeyExecutor>(_distributionKey); -} - -} |