summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2020-06-29 13:03:15 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2020-06-29 13:08:42 +0000
commitaf54254095fae1bdea8d101a9c0f4811ddf69f56 (patch)
treec5d9ca1ae926e0159e2145d9e3e54ef9bfb73546 /searchlib
parentbb7d188e04ff44318906dfa9ede5b532409ebf9f (diff)
Rename unique feature to globalsequence.
Invert so that the first doc from the first node will sort first, followed by the first doc from the second node, and the the last doc of the last node at the end. It will produce number in the range (1 << 48) down to zero. Sequence = (1 << 48) - ((docId << 16) | disributionKey)
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/features/prod_features.cpp30
-rw-r--r--searchlib/src/vespa/searchlib/features/CMakeLists.txt2
-rw-r--r--searchlib/src/vespa/searchlib/features/global_sequence_feature.cpp64
-rw-r--r--searchlib/src/vespa/searchlib/features/global_sequence_feature.h (renamed from searchlib/src/vespa/searchlib/features/uniquefeature.h)4
-rw-r--r--searchlib/src/vespa/searchlib/features/setup.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/features/uniquefeature.cpp64
6 files changed, 91 insertions, 77 deletions
diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp
index c50c7a12698..cc90008eca8 100644
--- a/searchlib/src/tests/features/prod_features.cpp
+++ b/searchlib/src/tests/features/prod_features.cpp
@@ -34,7 +34,7 @@
#include <vespa/searchlib/features/setup.h>
#include <vespa/searchlib/features/termfeature.h>
#include <vespa/searchlib/features/utils.h>
-#include <vespa/searchlib/features/uniquefeature.h>
+#include <vespa/searchlib/features/global_sequence_feature.h>
#include <vespa/searchlib/features/weighted_set_parser.hpp>
#include <vespa/searchlib/fef/featurenamebuilder.h>
#include <vespa/searchlib/fef/indexproperties.h>
@@ -1565,22 +1565,36 @@ Test::testMatchCount()
}
}
+uint64_t globalSequence(uint32_t docId, uint32_t distrKey) {
+ return (1ul << 48) - ((uint64_t(docId) << 16)| distrKey);
+}
+
+void verifySequence(uint64_t first, uint64_t second) {
+ ASSERT_GREATER(first, second);
+ ASSERT_GREATER(double(first), double(second));
+}
void
Test::testUnique()
{
{
- UniqueBlueprint bp;
- EXPECT_TRUE(assertCreateInstance(bp, "unique"));
+ GlobalSequenceBlueprint bp;
+ EXPECT_TRUE(assertCreateInstance(bp, "globalsequence"));
FtFeatureTest ft(_factory, "");
StringList params, in, out;
FT_SETUP_OK(bp, ft.getIndexEnv(), params, in, out.add("out"));
- FT_DUMP_EMPTY(_factory, "unique");
+ FT_DUMP_EMPTY(_factory, "globalsequence");
}
- FtFeatureTest ft(_factory, "unique");
+ FtFeatureTest ft(_factory, "globalsequence");
ASSERT_TRUE(ft.setup());
- EXPECT_TRUE(ft.execute(0x10003,0, 1));
- EXPECT_TRUE(ft.execute(0x70003,0, 7));
-
+ TEST_DO(verifySequence(globalSequence(1, 0), globalSequence(1,1)));
+ TEST_DO(verifySequence(globalSequence(1, 1), globalSequence(1,2)));
+ TEST_DO(verifySequence(globalSequence(1, 1), globalSequence(2,1)));
+ TEST_DO(verifySequence(globalSequence(2, 1), globalSequence(2,2)));
+ TEST_DO(verifySequence(globalSequence(2, 2), globalSequence(2,3)));
+ TEST_DO(verifySequence(globalSequence(2, 2), globalSequence(3,0)));
+ ASSERT_EQUAL(0xfffffffefffdul, (1ul << 48) - 0x10003l);
+ EXPECT_TRUE(ft.execute(0xfffffffefffdul, 0, 1));
+ EXPECT_TRUE(ft.execute(0xfffffff8fffdul, 0, 7));
}
void
diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
index a3ce67c4bf6..215b6ade9fd 100644
--- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
@@ -26,6 +26,7 @@ vespa_add_library(searchlib_features OBJECT
flow_completeness_feature.cpp
foreachfeature.cpp
freshnessfeature.cpp
+ global_sequence_feature.cpp
internal_max_reduce_prod_join_feature.cpp
item_raw_score_feature.cpp
jarowinklerdistancefeature.cpp
@@ -63,7 +64,6 @@ vespa_add_library(searchlib_features OBJECT
termfeature.cpp
terminfofeature.cpp
text_similarity_feature.cpp
- uniquefeature.cpp
utils.cpp
valuefeature.cpp
weighted_set_parser.cpp
diff --git a/searchlib/src/vespa/searchlib/features/global_sequence_feature.cpp b/searchlib/src/vespa/searchlib/features/global_sequence_feature.cpp
new file mode 100644
index 00000000000..e1d9d4733c1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/global_sequence_feature.cpp
@@ -0,0 +1,64 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "global_sequence_feature.h"
+#include <vespa/vespalib/util/stash.h>
+
+using namespace search::fef;
+
+namespace search::features {
+
+namespace {
+
+/**
+ * Implements the executor for combining lid and distribution key to form a globally unique value.
+ */
+class GlobalSequenceExecutor : public fef::FeatureExecutor {
+private:
+ uint32_t _distributionKey;
+
+public:
+ GlobalSequenceExecutor(uint32_t distributionKey)
+ : _distributionKey(distributionKey)
+ {
+ }
+
+ void execute(uint32_t docId) override {
+ outputs().set_number(0, ((1ul << 48u) - ((uint64_t(docId) << 16u) | _distributionKey)));
+ }
+};
+
+}
+
+GlobalSequenceBlueprint::GlobalSequenceBlueprint() :
+ Blueprint("globalsequence"),
+ _distributionKey(0)
+{
+}
+
+void
+GlobalSequenceBlueprint::visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const
+{
+}
+
+bool
+GlobalSequenceBlueprint::setup(const IIndexEnvironment & env, const ParameterList & )
+{
+ _distributionKey = env.getDistributionKey();
+ assert( _distributionKey < 0x80000);
+ describeOutput("out", "Returns (1 << 48) - ((lid << 16) | distributionKey)");
+ return true;
+}
+
+Blueprint::UP
+GlobalSequenceBlueprint::createInstance() const
+{
+ return std::make_unique<GlobalSequenceBlueprint>();
+}
+
+FeatureExecutor &
+GlobalSequenceBlueprint::createExecutor(const IQueryEnvironment &, vespalib::Stash &stash) const
+{
+ return stash.create<GlobalSequenceExecutor>(_distributionKey);
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/features/uniquefeature.h b/searchlib/src/vespa/searchlib/features/global_sequence_feature.h
index f21a427762a..04e3b09c72a 100644
--- a/searchlib/src/vespa/searchlib/features/uniquefeature.h
+++ b/searchlib/src/vespa/searchlib/features/global_sequence_feature.h
@@ -14,12 +14,12 @@ namespace search::features {
* It will change if documents change lid.
*/
-class UniqueBlueprint : public fef::Blueprint
+class GlobalSequenceBlueprint : public fef::Blueprint
{
private:
uint32_t _distributionKey;
public:
- UniqueBlueprint();
+ GlobalSequenceBlueprint();
void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override;
fef::Blueprint::UP createInstance() const override;
fef::ParameterDescriptions getDescriptions() const override {
diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp
index ea6ec842a00..bd79f1d4fb5 100644
--- a/searchlib/src/vespa/searchlib/features/setup.cpp
+++ b/searchlib/src/vespa/searchlib/features/setup.cpp
@@ -53,7 +53,7 @@
#include "termfeature.h"
#include "terminfofeature.h"
#include "text_similarity_feature.h"
-#include "uniquefeature.h"
+#include "global_sequence_feature.h"
#include "valuefeature.h"
#include "max_reduce_prod_join_replacer.h"
@@ -122,7 +122,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry)
registry.addPrototype(std::make_shared<TermEditDistanceBlueprint>());
registry.addPrototype(std::make_shared<TermFieldMdBlueprint>());
registry.addPrototype(std::make_shared<ConstantBlueprint>());
- registry.addPrototype(std::make_shared<UniqueBlueprint>());
+ registry.addPrototype(std::make_shared<GlobalSequenceBlueprint>());
// Ranking Expression
diff --git a/searchlib/src/vespa/searchlib/features/uniquefeature.cpp b/searchlib/src/vespa/searchlib/features/uniquefeature.cpp
deleted file mode 100644
index 73ac4a1178e..00000000000
--- a/searchlib/src/vespa/searchlib/features/uniquefeature.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "uniquefeature.h"
-#include <vespa/vespalib/util/stash.h>
-
-using namespace search::fef;
-
-namespace search::features {
-
-namespace {
-
-/**
- * Implements the executor for combining lid and distribution key to form a globally unique value.
- */
-class UniqueLidAndDistributionKeyExecutor : public fef::FeatureExecutor {
-private:
- uint32_t _distributionKey;
-
-public:
- UniqueLidAndDistributionKeyExecutor(uint32_t distributionKey)
- : _distributionKey(distributionKey)
- {
- assert( _distributionKey < 0x10000);
- }
-
- void execute(uint32_t docId) override {
- outputs().set_number(0, (uint64_t(docId) << 16u) | _distributionKey);
- }
-};
-
-}
-
-UniqueBlueprint::UniqueBlueprint() :
- Blueprint("unique"),
- _distributionKey(0)
-{
-}
-
-void
-UniqueBlueprint::visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const
-{
-}
-
-bool
-UniqueBlueprint::setup(const IIndexEnvironment & env, const ParameterList & )
-{
- _distributionKey = env.getDistributionKey();
- describeOutput("out", "Returns (lid << 16) | distributionKey");
- return true;
-}
-
-Blueprint::UP
-UniqueBlueprint::createInstance() const
-{
- return std::make_unique<UniqueBlueprint>();
-}
-
-FeatureExecutor &
-UniqueBlueprint::createExecutor(const IQueryEnvironment &, vespalib::Stash &stash) const
-{
- return stash.create<UniqueLidAndDistributionKeyExecutor>(_distributionKey);
-}
-
-}