summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2020-06-30 13:43:43 +0200
committerGitHub <noreply@github.com>2020-06-30 13:43:43 +0200
commit01b380ed97dbfe1cce41068ea8a806502a2d2bec (patch)
treedad429bf2b7c1e72eb4a0efdeb073fa5fc0794c1 /searchlib
parent99f8036a385fa9c5573a6a786350ed8ec3040ef7 (diff)
parent43575b1d6514953e08583999f0d0d29f23cf3ba1 (diff)
Merge pull request #13741 from vespa-engine/balder/unique-2-globalsequence
Rename unique feature to globalsequence.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/features/prod_features.cpp27
-rw-r--r--searchlib/src/vespa/searchlib/features/CMakeLists.txt2
-rw-r--r--searchlib/src/vespa/searchlib/features/global_sequence_feature.cpp64
-rw-r--r--searchlib/src/vespa/searchlib/features/global_sequence_feature.h (renamed from searchlib/src/vespa/searchlib/features/uniquefeature.h)8
-rw-r--r--searchlib/src/vespa/searchlib/features/setup.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/features/uniquefeature.cpp64
6 files changed, 92 insertions, 77 deletions
diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp
index c50c7a12698..f886ba59c1c 100644
--- a/searchlib/src/tests/features/prod_features.cpp
+++ b/searchlib/src/tests/features/prod_features.cpp
@@ -34,7 +34,7 @@
#include <vespa/searchlib/features/setup.h>
#include <vespa/searchlib/features/termfeature.h>
#include <vespa/searchlib/features/utils.h>
-#include <vespa/searchlib/features/uniquefeature.h>
+#include <vespa/searchlib/features/global_sequence_feature.h>
#include <vespa/searchlib/features/weighted_set_parser.hpp>
#include <vespa/searchlib/fef/featurenamebuilder.h>
#include <vespa/searchlib/fef/indexproperties.h>
@@ -1565,22 +1565,33 @@ Test::testMatchCount()
}
}
+void verifySequence(uint64_t first, uint64_t second) {
+ ASSERT_GREATER(first, second);
+ ASSERT_GREATER(double(first), double(second));
+}
+
void
Test::testUnique()
{
{
- UniqueBlueprint bp;
- EXPECT_TRUE(assertCreateInstance(bp, "unique"));
+ GlobalSequenceBlueprint bp;
+ EXPECT_TRUE(assertCreateInstance(bp, "globalSequence"));
FtFeatureTest ft(_factory, "");
StringList params, in, out;
FT_SETUP_OK(bp, ft.getIndexEnv(), params, in, out.add("out"));
- FT_DUMP_EMPTY(_factory, "unique");
+ FT_DUMP_EMPTY(_factory, "globalSequence");
}
- FtFeatureTest ft(_factory, "unique");
+ FtFeatureTest ft(_factory, "globalSequence");
ASSERT_TRUE(ft.setup());
- EXPECT_TRUE(ft.execute(0x10003,0, 1));
- EXPECT_TRUE(ft.execute(0x70003,0, 7));
-
+ TEST_DO(verifySequence(GlobalSequenceBlueprint::globalSequence(1, 0), GlobalSequenceBlueprint::globalSequence(1,1)));
+ TEST_DO(verifySequence(GlobalSequenceBlueprint::globalSequence(1, 1), GlobalSequenceBlueprint::globalSequence(1,2)));
+ TEST_DO(verifySequence(GlobalSequenceBlueprint::globalSequence(1, 1), GlobalSequenceBlueprint::globalSequence(2,1)));
+ TEST_DO(verifySequence(GlobalSequenceBlueprint::globalSequence(2, 1), GlobalSequenceBlueprint::globalSequence(2,2)));
+ TEST_DO(verifySequence(GlobalSequenceBlueprint::globalSequence(2, 2), GlobalSequenceBlueprint::globalSequence(2,3)));
+ TEST_DO(verifySequence(GlobalSequenceBlueprint::globalSequence(2, 2), GlobalSequenceBlueprint::globalSequence(3,0)));
+ ASSERT_EQUAL(0xfffffffefffdul, (1ul << 48) - 0x10003l);
+ EXPECT_TRUE(ft.execute(0xfffffffefffdul, 0, 1));
+ EXPECT_TRUE(ft.execute(0xfffffff8fffdul, 0, 7));
}
void
diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
index a3ce67c4bf6..215b6ade9fd 100644
--- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
@@ -26,6 +26,7 @@ vespa_add_library(searchlib_features OBJECT
flow_completeness_feature.cpp
foreachfeature.cpp
freshnessfeature.cpp
+ global_sequence_feature.cpp
internal_max_reduce_prod_join_feature.cpp
item_raw_score_feature.cpp
jarowinklerdistancefeature.cpp
@@ -63,7 +64,6 @@ vespa_add_library(searchlib_features OBJECT
termfeature.cpp
terminfofeature.cpp
text_similarity_feature.cpp
- uniquefeature.cpp
utils.cpp
valuefeature.cpp
weighted_set_parser.cpp
diff --git a/searchlib/src/vespa/searchlib/features/global_sequence_feature.cpp b/searchlib/src/vespa/searchlib/features/global_sequence_feature.cpp
new file mode 100644
index 00000000000..255b033a592
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/global_sequence_feature.cpp
@@ -0,0 +1,64 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "global_sequence_feature.h"
+#include <vespa/vespalib/util/stash.h>
+
+using namespace search::fef;
+
+namespace search::features {
+
+namespace {
+
+/**
+ * Implements the executor for combining lid and distribution key to form a globally unique value.
+ */
+class GlobalSequenceExecutor : public fef::FeatureExecutor {
+private:
+ uint32_t _distributionKey;
+
+public:
+ GlobalSequenceExecutor(uint32_t distributionKey)
+ : _distributionKey(distributionKey)
+ {
+ }
+
+ void execute(uint32_t docId) override {
+ outputs().set_number(0, GlobalSequenceBlueprint::globalSequence(docId, _distributionKey));
+ }
+};
+
+}
+
+GlobalSequenceBlueprint::GlobalSequenceBlueprint() :
+ Blueprint("globalSequence"),
+ _distributionKey(0)
+{
+}
+
+void
+GlobalSequenceBlueprint::visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const
+{
+}
+
+bool
+GlobalSequenceBlueprint::setup(const IIndexEnvironment & env, const ParameterList & )
+{
+ _distributionKey = env.getDistributionKey();
+ assert( _distributionKey < 0x10000);
+ describeOutput("out", "Returns (1 << 48) - ((lid << 16) | distributionKey)");
+ return true;
+}
+
+Blueprint::UP
+GlobalSequenceBlueprint::createInstance() const
+{
+ return std::make_unique<GlobalSequenceBlueprint>();
+}
+
+FeatureExecutor &
+GlobalSequenceBlueprint::createExecutor(const IQueryEnvironment &, vespalib::Stash &stash) const
+{
+ return stash.create<GlobalSequenceExecutor>(_distributionKey);
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/features/uniquefeature.h b/searchlib/src/vespa/searchlib/features/global_sequence_feature.h
index f21a427762a..3678a260b00 100644
--- a/searchlib/src/vespa/searchlib/features/uniquefeature.h
+++ b/searchlib/src/vespa/searchlib/features/global_sequence_feature.h
@@ -14,12 +14,12 @@ namespace search::features {
* It will change if documents change lid.
*/
-class UniqueBlueprint : public fef::Blueprint
+class GlobalSequenceBlueprint : public fef::Blueprint
{
private:
uint32_t _distributionKey;
public:
- UniqueBlueprint();
+ GlobalSequenceBlueprint();
void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override;
fef::Blueprint::UP createInstance() const override;
fef::ParameterDescriptions getDescriptions() const override {
@@ -27,6 +27,10 @@ public:
}
bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override;
fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override;
+
+ static uint64_t globalSequence(uint32_t docId, uint32_t distrKey) {
+ return (1ul << 48) - ((uint64_t(docId) << 16)| distrKey);
+ }
};
}
diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp
index ea6ec842a00..bd79f1d4fb5 100644
--- a/searchlib/src/vespa/searchlib/features/setup.cpp
+++ b/searchlib/src/vespa/searchlib/features/setup.cpp
@@ -53,7 +53,7 @@
#include "termfeature.h"
#include "terminfofeature.h"
#include "text_similarity_feature.h"
-#include "uniquefeature.h"
+#include "global_sequence_feature.h"
#include "valuefeature.h"
#include "max_reduce_prod_join_replacer.h"
@@ -122,7 +122,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry)
registry.addPrototype(std::make_shared<TermEditDistanceBlueprint>());
registry.addPrototype(std::make_shared<TermFieldMdBlueprint>());
registry.addPrototype(std::make_shared<ConstantBlueprint>());
- registry.addPrototype(std::make_shared<UniqueBlueprint>());
+ registry.addPrototype(std::make_shared<GlobalSequenceBlueprint>());
// Ranking Expression
diff --git a/searchlib/src/vespa/searchlib/features/uniquefeature.cpp b/searchlib/src/vespa/searchlib/features/uniquefeature.cpp
deleted file mode 100644
index 73ac4a1178e..00000000000
--- a/searchlib/src/vespa/searchlib/features/uniquefeature.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "uniquefeature.h"
-#include <vespa/vespalib/util/stash.h>
-
-using namespace search::fef;
-
-namespace search::features {
-
-namespace {
-
-/**
- * Implements the executor for combining lid and distribution key to form a globally unique value.
- */
-class UniqueLidAndDistributionKeyExecutor : public fef::FeatureExecutor {
-private:
- uint32_t _distributionKey;
-
-public:
- UniqueLidAndDistributionKeyExecutor(uint32_t distributionKey)
- : _distributionKey(distributionKey)
- {
- assert( _distributionKey < 0x10000);
- }
-
- void execute(uint32_t docId) override {
- outputs().set_number(0, (uint64_t(docId) << 16u) | _distributionKey);
- }
-};
-
-}
-
-UniqueBlueprint::UniqueBlueprint() :
- Blueprint("unique"),
- _distributionKey(0)
-{
-}
-
-void
-UniqueBlueprint::visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const
-{
-}
-
-bool
-UniqueBlueprint::setup(const IIndexEnvironment & env, const ParameterList & )
-{
- _distributionKey = env.getDistributionKey();
- describeOutput("out", "Returns (lid << 16) | distributionKey");
- return true;
-}
-
-Blueprint::UP
-UniqueBlueprint::createInstance() const
-{
- return std::make_unique<UniqueBlueprint>();
-}
-
-FeatureExecutor &
-UniqueBlueprint::createExecutor(const IQueryEnvironment &, vespalib::Stash &stash) const
-{
- return stash.create<UniqueLidAndDistributionKeyExecutor>(_distributionKey);
-}
-
-}