aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorGeir Storli <geirstorli@yahoo.no>2018-06-15 14:57:41 +0200
committerGitHub <noreply@github.com>2018-06-15 14:57:41 +0200
commit59b82da9c6fb37328a2ed1f7a7f485972537a9a7 (patch)
tree0f097e2dbb052e1a9b8468d894e4ff8ee7e73883 /searchlib
parentf966facc1dace916b3cddb564c9b41877bd64176 (diff)
parent8c150dc0c10ca361f26b1a73b7383a0f2c4f8a95 (diff)
Merge pull request #6190 from vespa-engine/lesters/random-normal-match
Add randomNormal.match
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/features/prod_features.cpp56
-rw-r--r--searchlib/src/tests/features/prod_features.h1
-rw-r--r--searchlib/src/vespa/searchlib/features/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/features/random_normal_feature.cpp36
-rw-r--r--searchlib/src/vespa/searchlib/features/random_normal_feature.h9
-rw-r--r--searchlib/src/vespa/searchlib/features/random_normal_stable_feature.cpp82
-rw-r--r--searchlib/src/vespa/searchlib/features/random_normal_stable_feature.h67
-rw-r--r--searchlib/src/vespa/searchlib/features/setup.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/util/random_normal.h67
9 files changed, 275 insertions, 46 deletions
diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp
index 345c66ec672..0e57f520673 100644
--- a/searchlib/src/tests/features/prod_features.cpp
+++ b/searchlib/src/tests/features/prod_features.cpp
@@ -32,6 +32,7 @@ LOG_SETUP("prod_features_test");
#include <vespa/searchlib/features/querytermcountfeature.h>
#include <vespa/searchlib/features/randomfeature.h>
#include <vespa/searchlib/features/random_normal_feature.h>
+#include <vespa/searchlib/features/random_normal_stable_feature.h>
#include <vespa/searchlib/features/rankingexpressionfeature.h>
#include <vespa/searchlib/features/setup.h>
#include <vespa/searchlib/features/termfeature.h>
@@ -105,6 +106,7 @@ Test::Main()
TEST_DO(testQueryTermCount()); TEST_FLUSH();
TEST_DO(testRandom()); TEST_FLUSH();
TEST_DO(testRandomNormal()); TEST_FLUSH();
+ TEST_DO(testRandomNormalStable()); TEST_FLUSH();
TEST_DO(testRankingExpression()); TEST_FLUSH();
TEST_DO(testTerm()); TEST_FLUSH();
TEST_DO(testTermDistance()); TEST_FLUSH();
@@ -1727,17 +1729,16 @@ Test::testRandom()
}
void
-Test::testRandomNormal()
-{
+Test::testRandomNormal() {
{ // Test blueprint.
RandomNormalBlueprint pt;
EXPECT_TRUE(assertCreateInstance(pt, "randomNormal"));
StringList params, in, out;
- FT_SETUP_OK (pt, params, in, out.add("out"));
- FT_SETUP_OK (pt, params.add("0.5").add("1.0"), in, out);
- FT_SETUP_OK (pt, params.add("val1"), in, out);
+ FT_SETUP_OK(pt, params, in, out.add("out"));
+ FT_SETUP_OK(pt, params.add("0.5").add("1.0"), in, out);
+ FT_SETUP_OK(pt, params.add("val1"), in, out);
FT_DUMP_EMPTY(_factory, "randomNormal");
}
@@ -1766,7 +1767,50 @@ Test::testRandomNormal()
for (uint32_t i = 0; i < 5; ++i) {
rr.clear();
ASSERT_TRUE(ft1.executeOnly(rr, i + 1));
- ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormal(0.0,0.1)")-0.0)/0.1) * 0.2 + 1.0, EPS, i + 1));
+ ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormal(0.0,0.1)") - 0.0) / 0.1) * 0.2 + 1.0, EPS, i + 1));
+ }
+ }
+}
+
+void
+Test::testRandomNormalStable() {
+ { // Test blueprint.
+ RandomNormalStableBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "randomNormalStable"));
+
+ StringList params, in, out;
+ FT_SETUP_OK(pt, params, in, out.add("out"));
+ FT_SETUP_OK(pt, params.add("0.5").add("1.0"), in, out);
+ FT_SETUP_OK(pt, params.add("val1"), in, out);
+
+ FT_DUMP_EMPTY(_factory, "randomNormalStable");
+ }
+
+ { // Test setting of mean and stddev values, and seed
+ FtFeatureTest ft1(_factory, "randomNormalStable(0.0,0.1)");
+ FtFeatureTest ft2(_factory, "randomNormalStable(1.0,0.2)");
+ ft1.getIndexEnv().getProperties().add("randomNormalStable(0.0,0.1).seed", "100");
+ ft2.getIndexEnv().getProperties().add("randomNormalStable(1.0,0.2).seed", "100");
+ ASSERT_TRUE(ft1.setup());
+ ASSERT_TRUE(ft2.setup());
+ RankResult rr;
+ for (uint32_t i = 0; i < 5; ++i) {
+ rr.clear();
+ ASSERT_TRUE(ft1.executeOnly(rr, i + 1));
+ ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormalStable(0.0,0.1)") - 0.0) / 0.1) * 0.2 + 1.0, EPS, i + 1));
+ }
+ }
+ { // Test executor (randomNormalStable)
+ FtFeatureTest ft1(_factory, "randomNormalStable");
+ FtFeatureTest ft2(_factory, "randomNormalStable");
+ ASSERT_TRUE(ft1.setup());
+ ASSERT_TRUE(ft2.setup());
+ RankResult rr;
+ for (uint32_t i = 0; i < 5; ++i) {
+ rr.clear();
+ ASSERT_TRUE(ft1.executeOnly(rr, i + 1));
+ ASSERT_TRUE(ft2.execute(rr.getScore("randomNormalStable"), EPS, i + 1));
}
}
}
diff --git a/searchlib/src/tests/features/prod_features.h b/searchlib/src/tests/features/prod_features.h
index 0d234ca674e..d7bf001bedf 100644
--- a/searchlib/src/tests/features/prod_features.h
+++ b/searchlib/src/tests/features/prod_features.h
@@ -35,6 +35,7 @@ public:
void testQueryTermCount();
void testRandom();
void testRandomNormal();
+ void testRandomNormalStable();
void testRankingExpression();
void testTerm();
void testTermDistance();
diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
index bd847fe35b5..16401a67424 100644
--- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
@@ -44,6 +44,7 @@ vespa_add_library(searchlib_features OBJECT
queryterm.cpp
querytermcountfeature.cpp
random_normal_feature.cpp
+ random_normal_stable_feature.cpp
randomfeature.cpp
rankingexpressionfeature.cpp
raw_score_feature.cpp
diff --git a/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp b/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp
index 67f30c4eb93..c83ec80f6b5 100644
--- a/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp
+++ b/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp
@@ -4,7 +4,6 @@
#include "utils.h"
#include <vespa/searchlib/fef/properties.h>
#include <vespa/fastos/time.h>
-#include <cmath>
#include <vespa/log/log.h>
LOG_SETUP(".features.randomnormalfeature");
@@ -14,46 +13,18 @@ namespace features {
RandomNormalExecutor::RandomNormalExecutor(uint64_t seed, double mean, double stddev) :
search::fef::FeatureExecutor(),
- _rnd(),
- _mean(mean),
- _stddev(stddev),
- _hasSpare(false),
- _spare(0.0)
-
+ _rnd(mean, stddev, true)
{
LOG(debug, "RandomNormalExecutor: seed=%zu, mean=%f, stddev=%f", seed, mean, stddev);
- _rnd.srand48(seed);
+ _rnd.seed(seed);
}
-/**
- * Draws a random number from the Gaussian distribution
- * using the Marsaglia polar method.
- */
void
RandomNormalExecutor::execute(uint32_t)
{
- feature_t result = _spare;
- if (_hasSpare) {
- _hasSpare = false;
- } else {
- _hasSpare = true;
-
- feature_t u, v, s;
- do {
- u = (_rnd.lrand48() / (feature_t)0x80000000u) * 2.0 - 1.0;
- v = (_rnd.lrand48() / (feature_t)0x80000000u) * 2.0 - 1.0;
- s = u * u + v * v;
- } while ( (s >= 1.0) || (s == 0.0) );
- s = std::sqrt(-2.0 * std::log(s) / s);
-
- _spare = v * s; // saved for next invocation
- result = u * s;
- }
-
- outputs().set_number(0, _mean + _stddev * result);
+ outputs().set_number(0, _rnd.next());
}
-
RandomNormalBlueprint::RandomNormalBlueprint() :
search::fef::Blueprint("randomNormal"),
_seed(0),
@@ -82,7 +53,6 @@ RandomNormalBlueprint::setup(const search::fef::IIndexEnvironment & env,
if (p.found()) {
_seed = util::strToNum<uint64_t>(p.get());
}
-
if (params.size() > 0) {
_mean = params[0].asDouble();
}
diff --git a/searchlib/src/vespa/searchlib/features/random_normal_feature.h b/searchlib/src/vespa/searchlib/features/random_normal_feature.h
index 92aa3117b91..2d2429371d9 100644
--- a/searchlib/src/vespa/searchlib/features/random_normal_feature.h
+++ b/searchlib/src/vespa/searchlib/features/random_normal_feature.h
@@ -4,7 +4,7 @@
#include <vespa/searchlib/fef/blueprint.h>
#include <vespa/searchlib/fef/featureexecutor.h>
-#include <vespa/searchlib/util/rand48.h>
+#include <vespa/searchlib/util/random_normal.h>
namespace search {
namespace features {
@@ -17,12 +17,7 @@ namespace features {
**/
class RandomNormalExecutor : public fef::FeatureExecutor {
private:
- Rand48 _rnd;
- double _mean;
- double _stddev;
-
- bool _hasSpare;
- double _spare;
+ RandomNormal _rnd; // seeded once per query
public:
RandomNormalExecutor(uint64_t seed, double mean, double stddev);
diff --git a/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.cpp b/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.cpp
new file mode 100644
index 00000000000..5f3cf7fd063
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.cpp
@@ -0,0 +1,82 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "random_normal_stable_feature.h"
+#include "utils.h"
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/fastos/time.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".features.randomnormalstablefeature");
+
+namespace search {
+namespace features {
+
+RandomNormalStableExecutor::RandomNormalStableExecutor(uint64_t seed, double mean, double stddev) :
+ search::fef::FeatureExecutor(),
+ _rnd(mean, stddev, false), // don't use spares, as we reset seed on every generation
+ _seed(seed)
+{
+ LOG(debug, "RandomNormalStableExecutor: seed=%zu, mean=%f, stddev=%f", seed, mean, stddev);
+}
+
+void
+RandomNormalStableExecutor::execute(uint32_t docId)
+{
+ _rnd.seed(_seed + docId);
+ outputs().set_number(0, _rnd.next());
+}
+
+RandomNormalStableBlueprint::RandomNormalStableBlueprint() :
+ search::fef::Blueprint("randomNormalStable"),
+ _seed(0),
+ _mean(0.0),
+ _stddev(1.0)
+{
+}
+
+void
+RandomNormalStableBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &,
+ search::fef::IDumpFeatureVisitor &) const
+{
+}
+
+search::fef::Blueprint::UP
+RandomNormalStableBlueprint::createInstance() const
+{
+ return search::fef::Blueprint::UP(new RandomNormalStableBlueprint());
+}
+
+bool
+RandomNormalStableBlueprint::setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params)
+{
+ search::fef::Property p = env.getProperties().lookup(getName(), "seed");
+ if (p.found()) {
+ _seed = util::strToNum<uint64_t>(p.get());
+ }
+ if (params.size() > 0) {
+ _mean = params[0].asDouble();
+ }
+ if (params.size() > 1) {
+ _stddev = params[1].asDouble();
+ }
+
+ describeOutput("out" , "A random value drawn from the Gaussian distribution that is stable for a given match (document and query)");
+
+ return true;
+}
+
+search::fef::FeatureExecutor &
+RandomNormalStableBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const
+{
+ uint64_t seed = _seed;
+ if (seed == 0) {
+ seed = util::strToNum<uint64_t>
+ (env.getProperties().lookup(getName(), "seed").get("1024")); // default seed
+ }
+ return stash.create<RandomNormalStableExecutor>(seed, _mean, _stddev);
+}
+
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.h b/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.h
new file mode 100644
index 00000000000..129c929ba3d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.h
@@ -0,0 +1,67 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/util/random_normal.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Implements the executor for the random normal feature outputting a
+ * random number drawn from the Gaussian distribution with the
+ * two arguments 'mean' and 'stddev'.
+ * The same hit always returns the same random number.
+ **/
+class RandomNormalStableExecutor : public fef::FeatureExecutor {
+private:
+ RandomNormal _rnd; // seeded once per match
+ uint64_t _seed;
+
+public:
+ RandomNormalStableExecutor(uint64_t seed, double mean, double stddev);
+ void execute(uint32_t docId) override;
+};
+
+
+/**
+ * Implements the blueprint for the random normal stable feature.
+ */
+class RandomNormalStableBlueprint : public fef::Blueprint {
+private:
+ uint64_t _seed;
+ double _mean;
+ double _stddev;
+
+public:
+ RandomNormalStableBlueprint();
+
+ void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override;
+ fef::Blueprint::UP createInstance() const override;
+ fef::ParameterDescriptions getDescriptions() const override {
+ return fef::ParameterDescriptions().
+ // Can run without parameters:
+ desc().
+
+ // Can run with two parameters (mean and stddev):
+ desc().
+ number(). // mean
+ number(). // stddev
+
+ // Can run with three parameters:
+ desc().
+ number(). // mean
+ number(). // stddev
+ string(); // in order to name different features
+ }
+
+ bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override;
+ fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override;
+};
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp
index 867f058931f..1d3c59f5b3d 100644
--- a/searchlib/src/vespa/searchlib/features/setup.cpp
+++ b/searchlib/src/vespa/searchlib/features/setup.cpp
@@ -38,6 +38,7 @@
#include "querytermcountfeature.h"
#include "randomfeature.h"
#include "random_normal_feature.h"
+#include "random_normal_stable_feature.h"
#include "rankingexpressionfeature.h"
#include "raw_score_feature.h"
#include "reverseproximityfeature.h"
@@ -100,6 +101,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry)
registry.addPrototype(Blueprint::SP(new QueryTermCountBlueprint()));
registry.addPrototype(Blueprint::SP(new RandomBlueprint()));
registry.addPrototype(Blueprint::SP(new RandomNormalBlueprint()));
+ registry.addPrototype(Blueprint::SP(new RandomNormalStableBlueprint()));
registry.addPrototype(Blueprint::SP(new RawScoreBlueprint()));
registry.addPrototype(Blueprint::SP(new SubqueriesBlueprint));
registry.addPrototype(Blueprint::SP(new TensorFromLabelsBlueprint()));
diff --git a/searchlib/src/vespa/searchlib/util/random_normal.h b/searchlib/src/vespa/searchlib/util/random_normal.h
new file mode 100644
index 00000000000..74596066312
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/random_normal.h
@@ -0,0 +1,67 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/util/rand48.h>
+#include <cmath>
+
+namespace search {
+
+/**
+ * Draws a random number from the Gaussian distribution
+ * using the Marsaglia polar method.
+ */
+class RandomNormal
+{
+private:
+ Rand48 _rnd;
+ double _mean;
+ double _stddev;
+
+ bool _useSpare;
+ bool _hasSpare;
+ feature_t _spare;
+
+ feature_t nextUniform() {
+ return (_rnd.lrand48() / (feature_t)0x80000000u) * 2.0 - 1.0;
+ }
+
+public:
+ RandomNormal(double mean, double stddev, bool useSpare = true) :
+ _rnd(),
+ _mean(mean),
+ _stddev(stddev),
+ _useSpare(useSpare),
+ _hasSpare(false),
+ _spare(0.0)
+ {}
+
+ void seed(long seed) {
+ _rnd.srand48(seed);
+ }
+
+ feature_t next() {
+ feature_t result = _spare;
+ if (_useSpare && _hasSpare) {
+ _hasSpare = false;
+ } else {
+ _hasSpare = true;
+
+ feature_t u, v, s;
+ do {
+ u = nextUniform();
+ v = nextUniform();
+ s = u * u + v * v;
+ } while ( (s >= 1.0) || (s == 0.0) );
+ s = std::sqrt(-2.0 * std::log(s) / s);
+
+ _spare = v * s; // saved for next invocation
+ result = u * s;
+ }
+ return _mean + _stddev * result;
+ }
+
+};
+
+} // search
+