aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLester Solbakken <lesters@oath.com>2018-06-14 15:10:36 +0200
committerLester Solbakken <lesters@oath.com>2018-06-14 15:10:36 +0200
commit9074c9d6db60c9d785f395354aea2284f22d5020 (patch)
tree0a108cd8dfe35d4310ecc02a6e5f8d4cd1e533e8
parent3720104a3ae7e7ba38c34e8eae85a25ceeae74cc (diff)
Move match version of random normal to it's own feature
-rw-r--r--searchlib/src/tests/features/prod_features.cpp67
-rw-r--r--searchlib/src/tests/features/prod_features.h1
-rw-r--r--searchlib/src/vespa/searchlib/features/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/features/random_normal_feature.cpp31
-rw-r--r--searchlib/src/vespa/searchlib/features/random_normal_feature.h7
-rw-r--r--searchlib/src/vespa/searchlib/features/random_normal_match_feature.cpp79
-rw-r--r--searchlib/src/vespa/searchlib/features/random_normal_match_feature.h67
-rw-r--r--searchlib/src/vespa/searchlib/features/setup.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/util/random_normal.h19
9 files changed, 217 insertions, 57 deletions
diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp
index a07785398bf..214323ae7c7 100644
--- a/searchlib/src/tests/features/prod_features.cpp
+++ b/searchlib/src/tests/features/prod_features.cpp
@@ -32,6 +32,7 @@ LOG_SETUP("prod_features_test");
#include <vespa/searchlib/features/querytermcountfeature.h>
#include <vespa/searchlib/features/randomfeature.h>
#include <vespa/searchlib/features/random_normal_feature.h>
+#include <vespa/searchlib/features/random_normal_match_feature.h>
#include <vespa/searchlib/features/rankingexpressionfeature.h>
#include <vespa/searchlib/features/setup.h>
#include <vespa/searchlib/features/termfeature.h>
@@ -105,6 +106,7 @@ Test::Main()
TEST_DO(testQueryTermCount()); TEST_FLUSH();
TEST_DO(testRandom()); TEST_FLUSH();
TEST_DO(testRandomNormal()); TEST_FLUSH();
+ TEST_DO(testRandomNormalMatch()); TEST_FLUSH();
TEST_DO(testRankingExpression()); TEST_FLUSH();
TEST_DO(testTerm()); TEST_FLUSH();
TEST_DO(testTermDistance()); TEST_FLUSH();
@@ -1727,17 +1729,16 @@ Test::testRandom()
}
void
-Test::testRandomNormal()
-{
+Test::testRandomNormal() {
{ // Test blueprint.
RandomNormalBlueprint pt;
EXPECT_TRUE(assertCreateInstance(pt, "randomNormal"));
StringList params, in, out;
- FT_SETUP_OK (pt, params, in, out.add("out").add("match"));
- FT_SETUP_OK (pt, params.add("0.5").add("1.0"), in, out);
- FT_SETUP_OK (pt, params.add("val1"), in, out);
+ FT_SETUP_OK(pt, params, in, out.add("out"));
+ FT_SETUP_OK(pt, params.add("0.5").add("1.0"), in, out);
+ FT_SETUP_OK(pt, params.add("val1"), in, out);
FT_DUMP_EMPTY(_factory, "randomNormal");
}
@@ -1766,32 +1767,52 @@ Test::testRandomNormal()
for (uint32_t i = 0; i < 5; ++i) {
rr.clear();
ASSERT_TRUE(ft1.executeOnly(rr, i + 1));
- ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormal(0.0,0.1)")-0.0)/0.1) * 0.2 + 1.0, EPS, i + 1));
+ ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormal(0.0,0.1)") - 0.0) / 0.1) * 0.2 + 1.0, EPS, i + 1));
+ }
+ }
+}
+
+void
+Test::testRandomNormalMatch() {
+ { // Test blueprint.
+ RandomNormalMatchBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "randomNormalMatch"));
+
+ StringList params, in, out;
+ FT_SETUP_OK(pt, params, in, out.add("out"));
+ FT_SETUP_OK(pt, params.add("0.5").add("1.0"), in, out);
+ FT_SETUP_OK(pt, params.add("val1"), in, out);
+
+ FT_DUMP_EMPTY(_factory, "randomNormalMatch");
+ }
+
+ { // Test setting of mean and stddev values, and seed
+ FtFeatureTest ft1(_factory, "randomNormalMatch(0.0,0.1)");
+ FtFeatureTest ft2(_factory, "randomNormalMatch(1.0,0.2)");
+ ft1.getIndexEnv().getProperties().add("randomNormalMatch(0.0,0.1).seed", "100");
+ ft2.getIndexEnv().getProperties().add("randomNormalMatch(1.0,0.2).seed", "100");
+ ASSERT_TRUE(ft1.setup());
+ ASSERT_TRUE(ft2.setup());
+ RankResult rr;
+ for (uint32_t i = 0; i < 5; ++i) {
+ rr.clear();
+ ASSERT_TRUE(ft1.executeOnly(rr, i + 1));
+ ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormalMatch(0.0,0.1)") - 0.0) / 0.1) * 0.2 + 1.0, EPS, i + 1));
}
}
{ // Test executor (randomNormal.match)
- FtFeatureTest ft1(_factory, "randomNormal.match");
- FtFeatureTest ft2(_factory, "randomNormal.match");
+ FtFeatureTest ft1(_factory, "randomNormalMatch");
+ FtFeatureTest ft2(_factory, "randomNormalMatch");
ASSERT_TRUE(ft1.setup());
ASSERT_TRUE(ft2.setup());
- RankResult rr1;
- RankResult rr2;
+ RankResult rr;
for (uint32_t i = 0; i < 5; ++i) {
- rr1.clear();
- rr2.clear();
- ASSERT_TRUE(ft1.executeOnly(rr1, i + 1));
- ASSERT_TRUE(ft2.executeOnly(rr2, i + 1));
-
- feature_t rn1 = rr1.getScore("randomNormal");
- feature_t rn2 = rr2.getScore("randomNormal");
- ASSERT_NOT_EQUAL(rn1, rn2);
-
- feature_t rnm1 = rr1.getScore("randomNormal.match");
- feature_t rnm2 = rr2.getScore("randomNormal.match");
- ASSERT_EQUAL(rnm1, rnm2);
+ rr.clear();
+ ASSERT_TRUE(ft1.executeOnly(rr, i + 1));
+ ASSERT_TRUE(ft2.execute(rr.getScore("randomNormalMatch"), EPS, i + 1));
}
}
-
}
void
diff --git a/searchlib/src/tests/features/prod_features.h b/searchlib/src/tests/features/prod_features.h
index 0d234ca674e..dec860917f0 100644
--- a/searchlib/src/tests/features/prod_features.h
+++ b/searchlib/src/tests/features/prod_features.h
@@ -35,6 +35,7 @@ public:
void testQueryTermCount();
void testRandom();
void testRandomNormal();
+ void testRandomNormalMatch();
void testRankingExpression();
void testTerm();
void testTermDistance();
diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
index bd847fe35b5..2b92b5ec443 100644
--- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
@@ -44,6 +44,7 @@ vespa_add_library(searchlib_features OBJECT
queryterm.cpp
querytermcountfeature.cpp
random_normal_feature.cpp
+ random_normal_match_feature.cpp
randomfeature.cpp
rankingexpressionfeature.cpp
raw_score_feature.cpp
diff --git a/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp b/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp
index 40ff8db229e..c83ec80f6b5 100644
--- a/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp
+++ b/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp
@@ -11,32 +11,18 @@ LOG_SETUP(".features.randomnormalfeature");
namespace search {
namespace features {
-RandomNormalExecutor::RandomNormalExecutor(uint64_t seed, uint64_t matchSeed, double mean, double stddev) :
+RandomNormalExecutor::RandomNormalExecutor(uint64_t seed, double mean, double stddev) :
search::fef::FeatureExecutor(),
- _rnd(),
- _matchRnd(),
- _matchSeed(matchSeed),
- _mean(mean),
- _stddev(stddev)
+ _rnd(mean, stddev, true)
{
- LOG(debug, "RandomNormalExecutor: seed=%zu, matchSeed=%zu, mean=%f, stddev=%f", seed, matchSeed, mean, stddev);
+ LOG(debug, "RandomNormalExecutor: seed=%zu, mean=%f, stddev=%f", seed, mean, stddev);
_rnd.seed(seed);
}
void
-RandomNormalExecutor::execute(uint32_t docId)
+RandomNormalExecutor::execute(uint32_t)
{
- _matchRnd.seed(_matchSeed + docId);
-
- feature_t out = _mean + _stddev * _rnd.next();
- feature_t match = _mean + _stddev * _matchRnd.next(false);
-
- outputs().set_number(0, out);
- outputs().set_number(1, match);
-
- // Note: calculating match here almost triples the cost for generating the non-match
- // value. If this turns out to be too costly, we should consider creating an own
- // feature executor for the match.
+ outputs().set_number(0, _rnd.next());
}
RandomNormalBlueprint::RandomNormalBlueprint() :
@@ -75,13 +61,12 @@ RandomNormalBlueprint::setup(const search::fef::IIndexEnvironment & env,
}
describeOutput("out" , "A random value drawn from the Gaussian distribution");
- describeOutput("match" , "A random value drawn from the Gaussian distribution that is stable for a given match (document and query)");
return true;
}
search::fef::FeatureExecutor &
-RandomNormalBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const
+RandomNormalBlueprint::createExecutor(const search::fef::IQueryEnvironment &, vespalib::Stash &stash) const
{
uint64_t seed = _seed;
if (seed == 0) {
@@ -90,9 +75,7 @@ RandomNormalBlueprint::createExecutor(const search::fef::IQueryEnvironment &env,
seed = static_cast<uint64_t>(time.MicroSecs()) ^
reinterpret_cast<uint64_t>(&seed); // results in different seeds in different threads
}
- uint64_t matchSeed = util::strToNum<uint64_t>
- (env.getProperties().lookup(getName(), "match", "seed").get("1024")); // default seed
- return stash.create<RandomNormalExecutor>(seed, matchSeed, _mean, _stddev);
+ return stash.create<RandomNormalExecutor>(seed, _mean, _stddev);
}
diff --git a/searchlib/src/vespa/searchlib/features/random_normal_feature.h b/searchlib/src/vespa/searchlib/features/random_normal_feature.h
index 9ce8f899446..2d2429371d9 100644
--- a/searchlib/src/vespa/searchlib/features/random_normal_feature.h
+++ b/searchlib/src/vespa/searchlib/features/random_normal_feature.h
@@ -18,14 +18,9 @@ namespace features {
class RandomNormalExecutor : public fef::FeatureExecutor {
private:
RandomNormal _rnd; // seeded once per query
- RandomNormal _matchRnd; // seeded once per match
-
- uint64_t _matchSeed;
- double _mean;
- double _stddev;
public:
- RandomNormalExecutor(uint64_t seed, uint64_t matchSeed, double mean, double stddev);
+ RandomNormalExecutor(uint64_t seed, double mean, double stddev);
void execute(uint32_t docId) override;
};
diff --git a/searchlib/src/vespa/searchlib/features/random_normal_match_feature.cpp b/searchlib/src/vespa/searchlib/features/random_normal_match_feature.cpp
new file mode 100644
index 00000000000..586835f6b9a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/random_normal_match_feature.cpp
@@ -0,0 +1,79 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "random_normal_match_feature.h"
+#include "utils.h"
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/fastos/time.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".features.randomnormalmatchfeature");
+
+namespace search {
+namespace features {
+
+RandomNormalMatchExecutor::RandomNormalMatchExecutor(uint64_t seed, double mean, double stddev) :
+ search::fef::FeatureExecutor(),
+ _rnd(mean, stddev, true),
+ _seed(seed)
+{
+ LOG(debug, "RandomNormalMatchExecutor: seed=%zu, mean=%f, stddev=%f", seed, mean, stddev);
+}
+
+void
+RandomNormalMatchExecutor::execute(uint32_t docId)
+{
+ _rnd.seed(_seed + docId);
+ outputs().set_number(0, _rnd.next());
+}
+
+RandomNormalMatchBlueprint::RandomNormalMatchBlueprint() :
+ search::fef::Blueprint("randomNormalMatch"),
+ _seed(0),
+ _mean(0.0),
+ _stddev(1.0)
+{
+}
+
+void
+RandomNormalMatchBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &,
+ search::fef::IDumpFeatureVisitor &) const
+{
+}
+
+search::fef::Blueprint::UP
+RandomNormalMatchBlueprint::createInstance() const
+{
+ return search::fef::Blueprint::UP(new RandomNormalMatchBlueprint());
+}
+
+bool
+RandomNormalMatchBlueprint::setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params)
+{
+ search::fef::Property p = env.getProperties().lookup(getName(), "seed");
+ if (p.found()) {
+ _seed = util::strToNum<uint64_t>(p.get());
+ }
+ if (params.size() > 0) {
+ _mean = params[0].asDouble();
+ }
+ if (params.size() > 1) {
+ _stddev = params[1].asDouble();
+ }
+
+ describeOutput("out" , "A random value drawn from the Gaussian distribution that is stable for a given match (document and query)");
+
+ return true;
+}
+
+search::fef::FeatureExecutor &
+RandomNormalMatchBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const
+{
+ uint64_t seed = util::strToNum<uint64_t>
+ (env.getProperties().lookup(getName(), "seed").get("1024")); // default seed
+ return stash.create<RandomNormalMatchExecutor>(seed, _mean, _stddev);
+}
+
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/random_normal_match_feature.h b/searchlib/src/vespa/searchlib/features/random_normal_match_feature.h
new file mode 100644
index 00000000000..66431a3a60c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/random_normal_match_feature.h
@@ -0,0 +1,67 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/util/random_normal.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Implements the executor for the random normal feature outputting a
+ * random number drawn from the Gaussian distribution with the
+ * two arguments 'mean' and 'stddev'.
+ * The same hit always returns the same random number.
+ **/
+class RandomNormalMatchExecutor : public fef::FeatureExecutor {
+private:
+ RandomNormal _rnd; // seeded once per match
+ uint64_t _seed;
+
+public:
+ RandomNormalMatchExecutor(uint64_t seed, double mean, double stddev);
+ void execute(uint32_t docId) override;
+};
+
+
+/**
+ * Implements the blueprint for the random normal feature.
+ */
+class RandomNormalMatchBlueprint : public fef::Blueprint {
+private:
+ uint64_t _seed;
+ double _mean;
+ double _stddev;
+
+public:
+ RandomNormalMatchBlueprint();
+
+ void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override;
+ fef::Blueprint::UP createInstance() const override;
+ fef::ParameterDescriptions getDescriptions() const override {
+ return fef::ParameterDescriptions().
+ // Can run without parameters:
+ desc().
+
+ // Can run with two parameters (mean and stddev):
+ desc().
+ number(). // mean
+ number(). // stddev
+
+ // Can run with three parameters:
+ desc().
+ number(). // mean
+ number(). // stddev
+ string(); // in order to name different features
+ }
+
+ bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override;
+ fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override;
+};
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp
index 867f058931f..c8e0ffb6f4a 100644
--- a/searchlib/src/vespa/searchlib/features/setup.cpp
+++ b/searchlib/src/vespa/searchlib/features/setup.cpp
@@ -38,6 +38,7 @@
#include "querytermcountfeature.h"
#include "randomfeature.h"
#include "random_normal_feature.h"
+#include "random_normal_match_feature.h"
#include "rankingexpressionfeature.h"
#include "raw_score_feature.h"
#include "reverseproximityfeature.h"
@@ -100,6 +101,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry)
registry.addPrototype(Blueprint::SP(new QueryTermCountBlueprint()));
registry.addPrototype(Blueprint::SP(new RandomBlueprint()));
registry.addPrototype(Blueprint::SP(new RandomNormalBlueprint()));
+ registry.addPrototype(Blueprint::SP(new RandomNormalMatchBlueprint()));
registry.addPrototype(Blueprint::SP(new RawScoreBlueprint()));
registry.addPrototype(Blueprint::SP(new SubqueriesBlueprint));
registry.addPrototype(Blueprint::SP(new TensorFromLabelsBlueprint()));
diff --git a/searchlib/src/vespa/searchlib/util/random_normal.h b/searchlib/src/vespa/searchlib/util/random_normal.h
index 0c2da580db6..68e98f871d3 100644
--- a/searchlib/src/vespa/searchlib/util/random_normal.h
+++ b/searchlib/src/vespa/searchlib/util/random_normal.h
@@ -11,6 +11,10 @@ class RandomNormal
{
private:
Rand48 _rnd;
+ double _mean;
+ double _stddev;
+
+ bool _useSpare;
bool _hasSpare;
feature_t _spare;
@@ -19,7 +23,14 @@ private:
}
public:
- RandomNormal() : _rnd(), _hasSpare(false), _spare(0.0) {}
+ RandomNormal(double mean, double stddev, bool useSpare = true) :
+ _rnd(),
+ _mean(mean),
+ _stddev(stddev),
+ _useSpare(useSpare),
+ _hasSpare(false),
+ _spare(0.0)
+ {}
void seed(long seed) {
_rnd.srand48(seed);
@@ -29,9 +40,9 @@ public:
* Draws a random number from the Gaussian distribution
* using the Marsaglia polar method.
*/
- feature_t next(bool useSpare = true) {
+ feature_t next() {
feature_t result = _spare;
- if (_hasSpare && useSpare) {
+ if (_useSpare && _hasSpare) {
_hasSpare = false;
} else {
_hasSpare = true;
@@ -47,7 +58,7 @@ public:
_spare = v * s; // saved for next invocation
result = u * s;
}
- return result;
+ return _mean + _stddev * result;
}
};