From cf97aabc855c1beae74ec1708548c0334f47c438 Mon Sep 17 00:00:00 2001 From: Lester Solbakken Date: Wed, 13 Jun 2018 13:17:02 +0200 Subject: Add randomNormal.match --- searchlib/src/tests/features/prod_features.cpp | 10 ++++++ .../searchlib/features/random_normal_feature.cpp | 37 +++++++++++++++------- .../searchlib/features/random_normal_feature.h | 7 ++-- 3 files changed, 41 insertions(+), 13 deletions(-) (limited to 'searchlib') diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp index 345c66ec672..21c509832e3 100644 --- a/searchlib/src/tests/features/prod_features.cpp +++ b/searchlib/src/tests/features/prod_features.cpp @@ -1769,6 +1769,16 @@ Test::testRandomNormal() ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormal(0.0,0.1)")-0.0)/0.1) * 0.2 + 1.0, EPS, i + 1)); } } + { // Test executor (randomNormal.match) + FtFeatureTest ft(_factory, "randomNormal.match"); + ASSERT_TRUE(ft.setup()); + RankResult rr; + for (uint32_t i = 0; i < 5; ++i) { + rr.clear(); + ASSERT_TRUE(ft.executeOnly(rr, i + 1)); + ASSERT_TRUE(ft.execute(rr.getScore("randomNormal.match"), EPS, i + 1)); + } + } } void diff --git a/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp b/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp index 67f30c4eb93..1b274be81a7 100644 --- a/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp @@ -12,25 +12,30 @@ LOG_SETUP(".features.randomnormalfeature"); namespace search { namespace features { -RandomNormalExecutor::RandomNormalExecutor(uint64_t seed, double mean, double stddev) : +RandomNormalExecutor::RandomNormalExecutor(uint64_t seed, uint64_t matchSeed, double mean, double stddev) : search::fef::FeatureExecutor(), _rnd(), + _matchRnd(), + _matchSeed(matchSeed), _mean(mean), _stddev(stddev), _hasSpare(false), _spare(0.0) - { - LOG(debug, "RandomNormalExecutor: seed=%zu, mean=%f, stddev=%f", seed, mean, stddev); + LOG(debug, "RandomNormalExecutor: seed=%zu, matchSeed=%zu, mean=%f, stddev=%f", seed, matchSeed, mean, stddev); _rnd.srand48(seed); } +feature_t generateRandom(Rand48 generator) { + return (generator.lrand48() / (feature_t)0x80000000u) * 2.0 - 1.0; +} + /** * Draws a random number from the Gaussian distribution * using the Marsaglia polar method. */ void -RandomNormalExecutor::execute(uint32_t) +RandomNormalExecutor::execute(uint32_t docId) { feature_t result = _spare; if (_hasSpare) { @@ -40,8 +45,8 @@ RandomNormalExecutor::execute(uint32_t) feature_t u, v, s; do { - u = (_rnd.lrand48() / (feature_t)0x80000000u) * 2.0 - 1.0; - v = (_rnd.lrand48() / (feature_t)0x80000000u) * 2.0 - 1.0; + u = generateRandom(_rnd); + v = generateRandom(_rnd); s = u * u + v * v; } while ( (s >= 1.0) || (s == 0.0) ); s = std::sqrt(-2.0 * std::log(s) / s); @@ -49,10 +54,18 @@ RandomNormalExecutor::execute(uint32_t) _spare = v * s; // saved for next invocation result = u * s; } - outputs().set_number(0, _mean + _stddev * result); -} + _matchRnd.srand48(_matchSeed + docId); + feature_t u, v, s; + do { + u = generateRandom(_matchRnd); + v = generateRandom(_matchRnd); + s = u * u + v * v; + } while ( (s >= 1.0) || (s == 0.0) ); + s = std::sqrt(-2.0 * std::log(s) / s); + outputs().set_number(1, _mean + _stddev * u * s); +} RandomNormalBlueprint::RandomNormalBlueprint() : search::fef::Blueprint("randomNormal"), @@ -82,7 +95,6 @@ RandomNormalBlueprint::setup(const search::fef::IIndexEnvironment & env, if (p.found()) { _seed = util::strToNum(p.get()); } - if (params.size() > 0) { _mean = params[0].asDouble(); } @@ -91,12 +103,13 @@ RandomNormalBlueprint::setup(const search::fef::IIndexEnvironment & env, } describeOutput("out" , "A random value drawn from the Gaussian distribution"); + describeOutput("match" , "A random value drawn from the Gaussian distribution that is stable for a given match (document and query)"); return true; } search::fef::FeatureExecutor & -RandomNormalBlueprint::createExecutor(const search::fef::IQueryEnvironment &, vespalib::Stash &stash) const +RandomNormalBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const { uint64_t seed = _seed; if (seed == 0) { @@ -105,7 +118,9 @@ RandomNormalBlueprint::createExecutor(const search::fef::IQueryEnvironment &, ve seed = static_cast(time.MicroSecs()) ^ reinterpret_cast(&seed); // results in different seeds in different threads } - return stash.create(seed, _mean, _stddev); + uint64_t matchSeed = util::strToNum + (env.getProperties().lookup(getName(), "match", "seed").get("1024")); // default seed + return stash.create(seed, matchSeed, _mean, _stddev); } diff --git a/searchlib/src/vespa/searchlib/features/random_normal_feature.h b/searchlib/src/vespa/searchlib/features/random_normal_feature.h index 92aa3117b91..f2bc82704bb 100644 --- a/searchlib/src/vespa/searchlib/features/random_normal_feature.h +++ b/searchlib/src/vespa/searchlib/features/random_normal_feature.h @@ -17,7 +17,10 @@ namespace features { **/ class RandomNormalExecutor : public fef::FeatureExecutor { private: - Rand48 _rnd; + Rand48 _rnd; // seeded once per query + Rand48 _matchRnd; // seeded once per match + uint64_t _matchSeed; + double _mean; double _stddev; @@ -25,7 +28,7 @@ private: double _spare; public: - RandomNormalExecutor(uint64_t seed, double mean, double stddev); + RandomNormalExecutor(uint64_t seed, uint64_t matchSeed, double mean, double stddev); void execute(uint32_t docId) override; }; -- cgit v1.2.3 From 2b0c9b5e9614862d9e13aac6d760cc0f8a941b4a Mon Sep 17 00:00:00 2001 From: Lester Solbakken Date: Wed, 13 Jun 2018 14:22:45 +0200 Subject: Add match to output in randomNormal test --- searchlib/src/tests/features/prod_features.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'searchlib') diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp index 21c509832e3..80211192ad5 100644 --- a/searchlib/src/tests/features/prod_features.cpp +++ b/searchlib/src/tests/features/prod_features.cpp @@ -1735,7 +1735,7 @@ Test::testRandomNormal() EXPECT_TRUE(assertCreateInstance(pt, "randomNormal")); StringList params, in, out; - FT_SETUP_OK (pt, params, in, out.add("out")); + FT_SETUP_OK (pt, params, in, out.add("out").add("match")); FT_SETUP_OK (pt, params.add("0.5").add("1.0"), in, out); FT_SETUP_OK (pt, params.add("val1"), in, out); -- cgit v1.2.3 From a5cb701cad94076774b900edcc68757b6e20c93e Mon Sep 17 00:00:00 2001 From: Lester Solbakken Date: Thu, 14 Jun 2018 10:51:06 +0200 Subject: Pass random generator by reference --- searchlib/src/vespa/searchlib/features/random_normal_feature.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'searchlib') diff --git a/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp b/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp index 1b274be81a7..c15f072aaa6 100644 --- a/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp @@ -26,7 +26,7 @@ RandomNormalExecutor::RandomNormalExecutor(uint64_t seed, uint64_t matchSeed, do _rnd.srand48(seed); } -feature_t generateRandom(Rand48 generator) { +feature_t generateRandom(Rand48& generator) { return (generator.lrand48() / (feature_t)0x80000000u) * 2.0 - 1.0; } -- cgit v1.2.3 From 03f71b36ce970a0207108702e4d1b6bf9b1fcabb Mon Sep 17 00:00:00 2001 From: Lester Solbakken Date: Thu, 14 Jun 2018 11:17:31 +0200 Subject: Refactor out normal distributed random generator --- .../searchlib/features/random_normal_feature.cpp | 46 ++---------------- .../searchlib/features/random_normal_feature.h | 11 ++--- searchlib/src/vespa/searchlib/util/random_normal.h | 56 ++++++++++++++++++++++ 3 files changed, 65 insertions(+), 48 deletions(-) create mode 100644 searchlib/src/vespa/searchlib/util/random_normal.h (limited to 'searchlib') diff --git a/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp b/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp index c15f072aaa6..ddf9f9f016a 100644 --- a/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp @@ -4,7 +4,6 @@ #include "utils.h" #include #include -#include #include LOG_SETUP(".features.randomnormalfeature"); @@ -18,53 +17,18 @@ RandomNormalExecutor::RandomNormalExecutor(uint64_t seed, uint64_t matchSeed, do _matchRnd(), _matchSeed(matchSeed), _mean(mean), - _stddev(stddev), - _hasSpare(false), - _spare(0.0) + _stddev(stddev) { LOG(debug, "RandomNormalExecutor: seed=%zu, matchSeed=%zu, mean=%f, stddev=%f", seed, matchSeed, mean, stddev); - _rnd.srand48(seed); + _rnd.seed(seed); } -feature_t generateRandom(Rand48& generator) { - return (generator.lrand48() / (feature_t)0x80000000u) * 2.0 - 1.0; -} - -/** - * Draws a random number from the Gaussian distribution - * using the Marsaglia polar method. - */ void RandomNormalExecutor::execute(uint32_t docId) { - feature_t result = _spare; - if (_hasSpare) { - _hasSpare = false; - } else { - _hasSpare = true; - - feature_t u, v, s; - do { - u = generateRandom(_rnd); - v = generateRandom(_rnd); - s = u * u + v * v; - } while ( (s >= 1.0) || (s == 0.0) ); - s = std::sqrt(-2.0 * std::log(s) / s); - - _spare = v * s; // saved for next invocation - result = u * s; - } - outputs().set_number(0, _mean + _stddev * result); - - _matchRnd.srand48(_matchSeed + docId); - feature_t u, v, s; - do { - u = generateRandom(_matchRnd); - v = generateRandom(_matchRnd); - s = u * u + v * v; - } while ( (s >= 1.0) || (s == 0.0) ); - s = std::sqrt(-2.0 * std::log(s) / s); - outputs().set_number(1, _mean + _stddev * u * s); + outputs().set_number(0, _mean + _stddev * _rnd.next()); + _matchRnd.seed(_matchSeed + docId); + outputs().set_number(0, _mean + _stddev * _matchRnd.next(false)); } RandomNormalBlueprint::RandomNormalBlueprint() : diff --git a/searchlib/src/vespa/searchlib/features/random_normal_feature.h b/searchlib/src/vespa/searchlib/features/random_normal_feature.h index f2bc82704bb..9ce8f899446 100644 --- a/searchlib/src/vespa/searchlib/features/random_normal_feature.h +++ b/searchlib/src/vespa/searchlib/features/random_normal_feature.h @@ -4,7 +4,7 @@ #include #include -#include +#include namespace search { namespace features { @@ -17,16 +17,13 @@ namespace features { **/ class RandomNormalExecutor : public fef::FeatureExecutor { private: - Rand48 _rnd; // seeded once per query - Rand48 _matchRnd; // seeded once per match - uint64_t _matchSeed; + RandomNormal _rnd; // seeded once per query + RandomNormal _matchRnd; // seeded once per match + uint64_t _matchSeed; double _mean; double _stddev; - bool _hasSpare; - double _spare; - public: RandomNormalExecutor(uint64_t seed, uint64_t matchSeed, double mean, double stddev); void execute(uint32_t docId) override; diff --git a/searchlib/src/vespa/searchlib/util/random_normal.h b/searchlib/src/vespa/searchlib/util/random_normal.h new file mode 100644 index 00000000000..0c2da580db6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/random_normal.h @@ -0,0 +1,56 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { + +class RandomNormal +{ +private: + Rand48 _rnd; + bool _hasSpare; + feature_t _spare; + + feature_t nextUniform() { + return (_rnd.lrand48() / (feature_t)0x80000000u) * 2.0 - 1.0; + } + +public: + RandomNormal() : _rnd(), _hasSpare(false), _spare(0.0) {} + + void seed(long seed) { + _rnd.srand48(seed); + } + + /** + * Draws a random number from the Gaussian distribution + * using the Marsaglia polar method. + */ + feature_t next(bool useSpare = true) { + feature_t result = _spare; + if (_hasSpare && useSpare) { + _hasSpare = false; + } else { + _hasSpare = true; + + feature_t u, v, s; + do { + u = nextUniform(); + v = nextUniform(); + s = u * u + v * v; + } while ( (s >= 1.0) || (s == 0.0) ); + s = std::sqrt(-2.0 * std::log(s) / s); + + _spare = v * s; // saved for next invocation + result = u * s; + } + return result; + } + +}; + +} // search + -- cgit v1.2.3 From 68c62a5d2a1b35c3c2859cd5838928c371127f27 Mon Sep 17 00:00:00 2001 From: Lester Solbakken Date: Thu, 14 Jun 2018 13:16:10 +0200 Subject: Fix random normal match test --- searchlib/src/tests/features/prod_features.cpp | 27 +++++++++++++++++----- .../searchlib/features/random_normal_feature.cpp | 12 ++++++++-- 2 files changed, 31 insertions(+), 8 deletions(-) (limited to 'searchlib') diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp index 80211192ad5..7f22d779a9e 100644 --- a/searchlib/src/tests/features/prod_features.cpp +++ b/searchlib/src/tests/features/prod_features.cpp @@ -1770,15 +1770,30 @@ Test::testRandomNormal() } } { // Test executor (randomNormal.match) - FtFeatureTest ft(_factory, "randomNormal.match"); - ASSERT_TRUE(ft.setup()); - RankResult rr; + FtFeatureTest ft1(_factory, "randomNormal.match"); + FtFeatureTest ft2(_factory, "randomNormal.match"); + ASSERT_TRUE(ft1.setup()); + ASSERT_TRUE(ft2.setup()); + RankResult rr1; + RankResult rr2; for (uint32_t i = 0; i < 5; ++i) { - rr.clear(); - ASSERT_TRUE(ft.executeOnly(rr, i + 1)); - ASSERT_TRUE(ft.execute(rr.getScore("randomNormal.match"), EPS, i + 1)); + rr1.clear(); + rr2.clear(); + ASSERT_TRUE(ft1.executeOnly(rr1, i + 1)); + ASSERT_TRUE(ft2.executeOnly(rr2, i + 1)); + + feature_t rn1 = rr1.getScore("randomNormal"); + feature_t rn2 = rr2.getScore("randomNormal"); + LOG(info, "randomNormal: %f - %f", rn1, rn2); + ASSERT_NOT_EQUAL(rn1, rn2); + + feature_t rnm1 = rr1.getScore("randomNormal.match"); + feature_t rnm2 = rr2.getScore("randomNormal.match"); + LOG(info, "randomNormalMatch: %f - %f", rnm1, rnm2); + ASSERT_EQUAL(rnm1, rnm2); } } + } void diff --git a/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp b/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp index ddf9f9f016a..40ff8db229e 100644 --- a/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp @@ -26,9 +26,17 @@ RandomNormalExecutor::RandomNormalExecutor(uint64_t seed, uint64_t matchSeed, do void RandomNormalExecutor::execute(uint32_t docId) { - outputs().set_number(0, _mean + _stddev * _rnd.next()); _matchRnd.seed(_matchSeed + docId); - outputs().set_number(0, _mean + _stddev * _matchRnd.next(false)); + + feature_t out = _mean + _stddev * _rnd.next(); + feature_t match = _mean + _stddev * _matchRnd.next(false); + + outputs().set_number(0, out); + outputs().set_number(1, match); + + // Note: calculating match here almost triples the cost for generating the non-match + // value. If this turns out to be too costly, we should consider creating an own + // feature executor for the match. } RandomNormalBlueprint::RandomNormalBlueprint() : -- cgit v1.2.3 From 3720104a3ae7e7ba38c34e8eae85a25ceeae74cc Mon Sep 17 00:00:00 2001 From: Lester Solbakken Date: Thu, 14 Jun 2018 13:22:21 +0200 Subject: Remove unnecessary logging in unit test --- searchlib/src/tests/features/prod_features.cpp | 2 -- 1 file changed, 2 deletions(-) (limited to 'searchlib') diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp index 7f22d779a9e..a07785398bf 100644 --- a/searchlib/src/tests/features/prod_features.cpp +++ b/searchlib/src/tests/features/prod_features.cpp @@ -1784,12 +1784,10 @@ Test::testRandomNormal() feature_t rn1 = rr1.getScore("randomNormal"); feature_t rn2 = rr2.getScore("randomNormal"); - LOG(info, "randomNormal: %f - %f", rn1, rn2); ASSERT_NOT_EQUAL(rn1, rn2); feature_t rnm1 = rr1.getScore("randomNormal.match"); feature_t rnm2 = rr2.getScore("randomNormal.match"); - LOG(info, "randomNormalMatch: %f - %f", rnm1, rnm2); ASSERT_EQUAL(rnm1, rnm2); } } -- cgit v1.2.3 From 9074c9d6db60c9d785f395354aea2284f22d5020 Mon Sep 17 00:00:00 2001 From: Lester Solbakken Date: Thu, 14 Jun 2018 15:10:36 +0200 Subject: Move match version of random normal to it's own feature --- searchlib/src/tests/features/prod_features.cpp | 67 +++++++++++------- searchlib/src/tests/features/prod_features.h | 1 + .../src/vespa/searchlib/features/CMakeLists.txt | 1 + .../searchlib/features/random_normal_feature.cpp | 31 ++------- .../searchlib/features/random_normal_feature.h | 7 +- .../features/random_normal_match_feature.cpp | 79 ++++++++++++++++++++++ .../features/random_normal_match_feature.h | 67 ++++++++++++++++++ searchlib/src/vespa/searchlib/features/setup.cpp | 2 + searchlib/src/vespa/searchlib/util/random_normal.h | 19 ++++-- 9 files changed, 217 insertions(+), 57 deletions(-) create mode 100644 searchlib/src/vespa/searchlib/features/random_normal_match_feature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/random_normal_match_feature.h (limited to 'searchlib') diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp index a07785398bf..214323ae7c7 100644 --- a/searchlib/src/tests/features/prod_features.cpp +++ b/searchlib/src/tests/features/prod_features.cpp @@ -32,6 +32,7 @@ LOG_SETUP("prod_features_test"); #include #include #include +#include #include #include #include @@ -105,6 +106,7 @@ Test::Main() TEST_DO(testQueryTermCount()); TEST_FLUSH(); TEST_DO(testRandom()); TEST_FLUSH(); TEST_DO(testRandomNormal()); TEST_FLUSH(); + TEST_DO(testRandomNormalMatch()); TEST_FLUSH(); TEST_DO(testRankingExpression()); TEST_FLUSH(); TEST_DO(testTerm()); TEST_FLUSH(); TEST_DO(testTermDistance()); TEST_FLUSH(); @@ -1727,17 +1729,16 @@ Test::testRandom() } void -Test::testRandomNormal() -{ +Test::testRandomNormal() { { // Test blueprint. RandomNormalBlueprint pt; EXPECT_TRUE(assertCreateInstance(pt, "randomNormal")); StringList params, in, out; - FT_SETUP_OK (pt, params, in, out.add("out").add("match")); - FT_SETUP_OK (pt, params.add("0.5").add("1.0"), in, out); - FT_SETUP_OK (pt, params.add("val1"), in, out); + FT_SETUP_OK(pt, params, in, out.add("out")); + FT_SETUP_OK(pt, params.add("0.5").add("1.0"), in, out); + FT_SETUP_OK(pt, params.add("val1"), in, out); FT_DUMP_EMPTY(_factory, "randomNormal"); } @@ -1766,32 +1767,52 @@ Test::testRandomNormal() for (uint32_t i = 0; i < 5; ++i) { rr.clear(); ASSERT_TRUE(ft1.executeOnly(rr, i + 1)); - ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormal(0.0,0.1)")-0.0)/0.1) * 0.2 + 1.0, EPS, i + 1)); + ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormal(0.0,0.1)") - 0.0) / 0.1) * 0.2 + 1.0, EPS, i + 1)); + } + } +} + +void +Test::testRandomNormalMatch() { + { // Test blueprint. + RandomNormalMatchBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "randomNormalMatch")); + + StringList params, in, out; + FT_SETUP_OK(pt, params, in, out.add("out")); + FT_SETUP_OK(pt, params.add("0.5").add("1.0"), in, out); + FT_SETUP_OK(pt, params.add("val1"), in, out); + + FT_DUMP_EMPTY(_factory, "randomNormalMatch"); + } + + { // Test setting of mean and stddev values, and seed + FtFeatureTest ft1(_factory, "randomNormalMatch(0.0,0.1)"); + FtFeatureTest ft2(_factory, "randomNormalMatch(1.0,0.2)"); + ft1.getIndexEnv().getProperties().add("randomNormalMatch(0.0,0.1).seed", "100"); + ft2.getIndexEnv().getProperties().add("randomNormalMatch(1.0,0.2).seed", "100"); + ASSERT_TRUE(ft1.setup()); + ASSERT_TRUE(ft2.setup()); + RankResult rr; + for (uint32_t i = 0; i < 5; ++i) { + rr.clear(); + ASSERT_TRUE(ft1.executeOnly(rr, i + 1)); + ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormalMatch(0.0,0.1)") - 0.0) / 0.1) * 0.2 + 1.0, EPS, i + 1)); } } { // Test executor (randomNormal.match) - FtFeatureTest ft1(_factory, "randomNormal.match"); - FtFeatureTest ft2(_factory, "randomNormal.match"); + FtFeatureTest ft1(_factory, "randomNormalMatch"); + FtFeatureTest ft2(_factory, "randomNormalMatch"); ASSERT_TRUE(ft1.setup()); ASSERT_TRUE(ft2.setup()); - RankResult rr1; - RankResult rr2; + RankResult rr; for (uint32_t i = 0; i < 5; ++i) { - rr1.clear(); - rr2.clear(); - ASSERT_TRUE(ft1.executeOnly(rr1, i + 1)); - ASSERT_TRUE(ft2.executeOnly(rr2, i + 1)); - - feature_t rn1 = rr1.getScore("randomNormal"); - feature_t rn2 = rr2.getScore("randomNormal"); - ASSERT_NOT_EQUAL(rn1, rn2); - - feature_t rnm1 = rr1.getScore("randomNormal.match"); - feature_t rnm2 = rr2.getScore("randomNormal.match"); - ASSERT_EQUAL(rnm1, rnm2); + rr.clear(); + ASSERT_TRUE(ft1.executeOnly(rr, i + 1)); + ASSERT_TRUE(ft2.execute(rr.getScore("randomNormalMatch"), EPS, i + 1)); } } - } void diff --git a/searchlib/src/tests/features/prod_features.h b/searchlib/src/tests/features/prod_features.h index 0d234ca674e..dec860917f0 100644 --- a/searchlib/src/tests/features/prod_features.h +++ b/searchlib/src/tests/features/prod_features.h @@ -35,6 +35,7 @@ public: void testQueryTermCount(); void testRandom(); void testRandomNormal(); + void testRandomNormalMatch(); void testRankingExpression(); void testTerm(); void testTermDistance(); diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt index bd847fe35b5..2b92b5ec443 100644 --- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt @@ -44,6 +44,7 @@ vespa_add_library(searchlib_features OBJECT queryterm.cpp querytermcountfeature.cpp random_normal_feature.cpp + random_normal_match_feature.cpp randomfeature.cpp rankingexpressionfeature.cpp raw_score_feature.cpp diff --git a/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp b/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp index 40ff8db229e..c83ec80f6b5 100644 --- a/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp @@ -11,32 +11,18 @@ LOG_SETUP(".features.randomnormalfeature"); namespace search { namespace features { -RandomNormalExecutor::RandomNormalExecutor(uint64_t seed, uint64_t matchSeed, double mean, double stddev) : +RandomNormalExecutor::RandomNormalExecutor(uint64_t seed, double mean, double stddev) : search::fef::FeatureExecutor(), - _rnd(), - _matchRnd(), - _matchSeed(matchSeed), - _mean(mean), - _stddev(stddev) + _rnd(mean, stddev, true) { - LOG(debug, "RandomNormalExecutor: seed=%zu, matchSeed=%zu, mean=%f, stddev=%f", seed, matchSeed, mean, stddev); + LOG(debug, "RandomNormalExecutor: seed=%zu, mean=%f, stddev=%f", seed, mean, stddev); _rnd.seed(seed); } void -RandomNormalExecutor::execute(uint32_t docId) +RandomNormalExecutor::execute(uint32_t) { - _matchRnd.seed(_matchSeed + docId); - - feature_t out = _mean + _stddev * _rnd.next(); - feature_t match = _mean + _stddev * _matchRnd.next(false); - - outputs().set_number(0, out); - outputs().set_number(1, match); - - // Note: calculating match here almost triples the cost for generating the non-match - // value. If this turns out to be too costly, we should consider creating an own - // feature executor for the match. + outputs().set_number(0, _rnd.next()); } RandomNormalBlueprint::RandomNormalBlueprint() : @@ -75,13 +61,12 @@ RandomNormalBlueprint::setup(const search::fef::IIndexEnvironment & env, } describeOutput("out" , "A random value drawn from the Gaussian distribution"); - describeOutput("match" , "A random value drawn from the Gaussian distribution that is stable for a given match (document and query)"); return true; } search::fef::FeatureExecutor & -RandomNormalBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const +RandomNormalBlueprint::createExecutor(const search::fef::IQueryEnvironment &, vespalib::Stash &stash) const { uint64_t seed = _seed; if (seed == 0) { @@ -90,9 +75,7 @@ RandomNormalBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, seed = static_cast(time.MicroSecs()) ^ reinterpret_cast(&seed); // results in different seeds in different threads } - uint64_t matchSeed = util::strToNum - (env.getProperties().lookup(getName(), "match", "seed").get("1024")); // default seed - return stash.create(seed, matchSeed, _mean, _stddev); + return stash.create(seed, _mean, _stddev); } diff --git a/searchlib/src/vespa/searchlib/features/random_normal_feature.h b/searchlib/src/vespa/searchlib/features/random_normal_feature.h index 9ce8f899446..2d2429371d9 100644 --- a/searchlib/src/vespa/searchlib/features/random_normal_feature.h +++ b/searchlib/src/vespa/searchlib/features/random_normal_feature.h @@ -18,14 +18,9 @@ namespace features { class RandomNormalExecutor : public fef::FeatureExecutor { private: RandomNormal _rnd; // seeded once per query - RandomNormal _matchRnd; // seeded once per match - - uint64_t _matchSeed; - double _mean; - double _stddev; public: - RandomNormalExecutor(uint64_t seed, uint64_t matchSeed, double mean, double stddev); + RandomNormalExecutor(uint64_t seed, double mean, double stddev); void execute(uint32_t docId) override; }; diff --git a/searchlib/src/vespa/searchlib/features/random_normal_match_feature.cpp b/searchlib/src/vespa/searchlib/features/random_normal_match_feature.cpp new file mode 100644 index 00000000000..586835f6b9a --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/random_normal_match_feature.cpp @@ -0,0 +1,79 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "random_normal_match_feature.h" +#include "utils.h" +#include +#include + +#include +LOG_SETUP(".features.randomnormalmatchfeature"); + +namespace search { +namespace features { + +RandomNormalMatchExecutor::RandomNormalMatchExecutor(uint64_t seed, double mean, double stddev) : + search::fef::FeatureExecutor(), + _rnd(mean, stddev, true), + _seed(seed) +{ + LOG(debug, "RandomNormalMatchExecutor: seed=%zu, mean=%f, stddev=%f", seed, mean, stddev); +} + +void +RandomNormalMatchExecutor::execute(uint32_t docId) +{ + _rnd.seed(_seed + docId); + outputs().set_number(0, _rnd.next()); +} + +RandomNormalMatchBlueprint::RandomNormalMatchBlueprint() : + search::fef::Blueprint("randomNormalMatch"), + _seed(0), + _mean(0.0), + _stddev(1.0) +{ +} + +void +RandomNormalMatchBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &, + search::fef::IDumpFeatureVisitor &) const +{ +} + +search::fef::Blueprint::UP +RandomNormalMatchBlueprint::createInstance() const +{ + return search::fef::Blueprint::UP(new RandomNormalMatchBlueprint()); +} + +bool +RandomNormalMatchBlueprint::setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params) +{ + search::fef::Property p = env.getProperties().lookup(getName(), "seed"); + if (p.found()) { + _seed = util::strToNum(p.get()); + } + if (params.size() > 0) { + _mean = params[0].asDouble(); + } + if (params.size() > 1) { + _stddev = params[1].asDouble(); + } + + describeOutput("out" , "A random value drawn from the Gaussian distribution that is stable for a given match (document and query)"); + + return true; +} + +search::fef::FeatureExecutor & +RandomNormalMatchBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const +{ + uint64_t seed = util::strToNum + (env.getProperties().lookup(getName(), "seed").get("1024")); // default seed + return stash.create(seed, _mean, _stddev); +} + + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/random_normal_match_feature.h b/searchlib/src/vespa/searchlib/features/random_normal_match_feature.h new file mode 100644 index 00000000000..66431a3a60c --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/random_normal_match_feature.h @@ -0,0 +1,67 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace search { +namespace features { + +/** + * Implements the executor for the random normal feature outputting a + * random number drawn from the Gaussian distribution with the + * two arguments 'mean' and 'stddev'. + * The same hit always returns the same random number. + **/ +class RandomNormalMatchExecutor : public fef::FeatureExecutor { +private: + RandomNormal _rnd; // seeded once per match + uint64_t _seed; + +public: + RandomNormalMatchExecutor(uint64_t seed, double mean, double stddev); + void execute(uint32_t docId) override; +}; + + +/** + * Implements the blueprint for the random normal feature. + */ +class RandomNormalMatchBlueprint : public fef::Blueprint { +private: + uint64_t _seed; + double _mean; + double _stddev; + +public: + RandomNormalMatchBlueprint(); + + void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override; + fef::Blueprint::UP createInstance() const override; + fef::ParameterDescriptions getDescriptions() const override { + return fef::ParameterDescriptions(). + // Can run without parameters: + desc(). + + // Can run with two parameters (mean and stddev): + desc(). + number(). // mean + number(). // stddev + + // Can run with three parameters: + desc(). + number(). // mean + number(). // stddev + string(); // in order to name different features + } + + bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override; + fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override; +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp index 867f058931f..c8e0ffb6f4a 100644 --- a/searchlib/src/vespa/searchlib/features/setup.cpp +++ b/searchlib/src/vespa/searchlib/features/setup.cpp @@ -38,6 +38,7 @@ #include "querytermcountfeature.h" #include "randomfeature.h" #include "random_normal_feature.h" +#include "random_normal_match_feature.h" #include "rankingexpressionfeature.h" #include "raw_score_feature.h" #include "reverseproximityfeature.h" @@ -100,6 +101,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry) registry.addPrototype(Blueprint::SP(new QueryTermCountBlueprint())); registry.addPrototype(Blueprint::SP(new RandomBlueprint())); registry.addPrototype(Blueprint::SP(new RandomNormalBlueprint())); + registry.addPrototype(Blueprint::SP(new RandomNormalMatchBlueprint())); registry.addPrototype(Blueprint::SP(new RawScoreBlueprint())); registry.addPrototype(Blueprint::SP(new SubqueriesBlueprint)); registry.addPrototype(Blueprint::SP(new TensorFromLabelsBlueprint())); diff --git a/searchlib/src/vespa/searchlib/util/random_normal.h b/searchlib/src/vespa/searchlib/util/random_normal.h index 0c2da580db6..68e98f871d3 100644 --- a/searchlib/src/vespa/searchlib/util/random_normal.h +++ b/searchlib/src/vespa/searchlib/util/random_normal.h @@ -11,6 +11,10 @@ class RandomNormal { private: Rand48 _rnd; + double _mean; + double _stddev; + + bool _useSpare; bool _hasSpare; feature_t _spare; @@ -19,7 +23,14 @@ private: } public: - RandomNormal() : _rnd(), _hasSpare(false), _spare(0.0) {} + RandomNormal(double mean, double stddev, bool useSpare = true) : + _rnd(), + _mean(mean), + _stddev(stddev), + _useSpare(useSpare), + _hasSpare(false), + _spare(0.0) + {} void seed(long seed) { _rnd.srand48(seed); @@ -29,9 +40,9 @@ public: * Draws a random number from the Gaussian distribution * using the Marsaglia polar method. */ - feature_t next(bool useSpare = true) { + feature_t next() { feature_t result = _spare; - if (_hasSpare && useSpare) { + if (_useSpare && _hasSpare) { _hasSpare = false; } else { _hasSpare = true; @@ -47,7 +58,7 @@ public: _spare = v * s; // saved for next invocation result = u * s; } - return result; + return _mean + _stddev * result; } }; -- cgit v1.2.3 From 28be51e5511dd4482c22d9a571b0df78063f2b67 Mon Sep 17 00:00:00 2001 From: Lester Solbakken Date: Fri, 15 Jun 2018 09:25:19 +0200 Subject: Rename feature randomNormalMatch -> randomNormalStable --- searchlib/src/tests/features/prod_features.cpp | 30 ++++---- searchlib/src/tests/features/prod_features.h | 2 +- .../src/vespa/searchlib/features/CMakeLists.txt | 2 +- .../features/random_normal_match_feature.cpp | 79 ---------------------- .../features/random_normal_match_feature.h | 67 ------------------ .../features/random_normal_stable_feature.cpp | 79 ++++++++++++++++++++++ .../features/random_normal_stable_feature.h | 67 ++++++++++++++++++ searchlib/src/vespa/searchlib/features/setup.cpp | 4 +- 8 files changed, 165 insertions(+), 165 deletions(-) delete mode 100644 searchlib/src/vespa/searchlib/features/random_normal_match_feature.cpp delete mode 100644 searchlib/src/vespa/searchlib/features/random_normal_match_feature.h create mode 100644 searchlib/src/vespa/searchlib/features/random_normal_stable_feature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/random_normal_stable_feature.h (limited to 'searchlib') diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp index 214323ae7c7..0e57f520673 100644 --- a/searchlib/src/tests/features/prod_features.cpp +++ b/searchlib/src/tests/features/prod_features.cpp @@ -32,7 +32,7 @@ LOG_SETUP("prod_features_test"); #include #include #include -#include +#include #include #include #include @@ -106,7 +106,7 @@ Test::Main() TEST_DO(testQueryTermCount()); TEST_FLUSH(); TEST_DO(testRandom()); TEST_FLUSH(); TEST_DO(testRandomNormal()); TEST_FLUSH(); - TEST_DO(testRandomNormalMatch()); TEST_FLUSH(); + TEST_DO(testRandomNormalStable()); TEST_FLUSH(); TEST_DO(testRankingExpression()); TEST_FLUSH(); TEST_DO(testTerm()); TEST_FLUSH(); TEST_DO(testTermDistance()); TEST_FLUSH(); @@ -1773,44 +1773,44 @@ Test::testRandomNormal() { } void -Test::testRandomNormalMatch() { +Test::testRandomNormalStable() { { // Test blueprint. - RandomNormalMatchBlueprint pt; + RandomNormalStableBlueprint pt; - EXPECT_TRUE(assertCreateInstance(pt, "randomNormalMatch")); + EXPECT_TRUE(assertCreateInstance(pt, "randomNormalStable")); StringList params, in, out; FT_SETUP_OK(pt, params, in, out.add("out")); FT_SETUP_OK(pt, params.add("0.5").add("1.0"), in, out); FT_SETUP_OK(pt, params.add("val1"), in, out); - FT_DUMP_EMPTY(_factory, "randomNormalMatch"); + FT_DUMP_EMPTY(_factory, "randomNormalStable"); } { // Test setting of mean and stddev values, and seed - FtFeatureTest ft1(_factory, "randomNormalMatch(0.0,0.1)"); - FtFeatureTest ft2(_factory, "randomNormalMatch(1.0,0.2)"); - ft1.getIndexEnv().getProperties().add("randomNormalMatch(0.0,0.1).seed", "100"); - ft2.getIndexEnv().getProperties().add("randomNormalMatch(1.0,0.2).seed", "100"); + FtFeatureTest ft1(_factory, "randomNormalStable(0.0,0.1)"); + FtFeatureTest ft2(_factory, "randomNormalStable(1.0,0.2)"); + ft1.getIndexEnv().getProperties().add("randomNormalStable(0.0,0.1).seed", "100"); + ft2.getIndexEnv().getProperties().add("randomNormalStable(1.0,0.2).seed", "100"); ASSERT_TRUE(ft1.setup()); ASSERT_TRUE(ft2.setup()); RankResult rr; for (uint32_t i = 0; i < 5; ++i) { rr.clear(); ASSERT_TRUE(ft1.executeOnly(rr, i + 1)); - ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormalMatch(0.0,0.1)") - 0.0) / 0.1) * 0.2 + 1.0, EPS, i + 1)); + ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormalStable(0.0,0.1)") - 0.0) / 0.1) * 0.2 + 1.0, EPS, i + 1)); } } - { // Test executor (randomNormal.match) - FtFeatureTest ft1(_factory, "randomNormalMatch"); - FtFeatureTest ft2(_factory, "randomNormalMatch"); + { // Test executor (randomNormalStable) + FtFeatureTest ft1(_factory, "randomNormalStable"); + FtFeatureTest ft2(_factory, "randomNormalStable"); ASSERT_TRUE(ft1.setup()); ASSERT_TRUE(ft2.setup()); RankResult rr; for (uint32_t i = 0; i < 5; ++i) { rr.clear(); ASSERT_TRUE(ft1.executeOnly(rr, i + 1)); - ASSERT_TRUE(ft2.execute(rr.getScore("randomNormalMatch"), EPS, i + 1)); + ASSERT_TRUE(ft2.execute(rr.getScore("randomNormalStable"), EPS, i + 1)); } } } diff --git a/searchlib/src/tests/features/prod_features.h b/searchlib/src/tests/features/prod_features.h index dec860917f0..d7bf001bedf 100644 --- a/searchlib/src/tests/features/prod_features.h +++ b/searchlib/src/tests/features/prod_features.h @@ -35,7 +35,7 @@ public: void testQueryTermCount(); void testRandom(); void testRandomNormal(); - void testRandomNormalMatch(); + void testRandomNormalStable(); void testRankingExpression(); void testTerm(); void testTermDistance(); diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt index 2b92b5ec443..16401a67424 100644 --- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt @@ -44,7 +44,7 @@ vespa_add_library(searchlib_features OBJECT queryterm.cpp querytermcountfeature.cpp random_normal_feature.cpp - random_normal_match_feature.cpp + random_normal_stable_feature.cpp randomfeature.cpp rankingexpressionfeature.cpp raw_score_feature.cpp diff --git a/searchlib/src/vespa/searchlib/features/random_normal_match_feature.cpp b/searchlib/src/vespa/searchlib/features/random_normal_match_feature.cpp deleted file mode 100644 index 586835f6b9a..00000000000 --- a/searchlib/src/vespa/searchlib/features/random_normal_match_feature.cpp +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "random_normal_match_feature.h" -#include "utils.h" -#include -#include - -#include -LOG_SETUP(".features.randomnormalmatchfeature"); - -namespace search { -namespace features { - -RandomNormalMatchExecutor::RandomNormalMatchExecutor(uint64_t seed, double mean, double stddev) : - search::fef::FeatureExecutor(), - _rnd(mean, stddev, true), - _seed(seed) -{ - LOG(debug, "RandomNormalMatchExecutor: seed=%zu, mean=%f, stddev=%f", seed, mean, stddev); -} - -void -RandomNormalMatchExecutor::execute(uint32_t docId) -{ - _rnd.seed(_seed + docId); - outputs().set_number(0, _rnd.next()); -} - -RandomNormalMatchBlueprint::RandomNormalMatchBlueprint() : - search::fef::Blueprint("randomNormalMatch"), - _seed(0), - _mean(0.0), - _stddev(1.0) -{ -} - -void -RandomNormalMatchBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &, - search::fef::IDumpFeatureVisitor &) const -{ -} - -search::fef::Blueprint::UP -RandomNormalMatchBlueprint::createInstance() const -{ - return search::fef::Blueprint::UP(new RandomNormalMatchBlueprint()); -} - -bool -RandomNormalMatchBlueprint::setup(const search::fef::IIndexEnvironment & env, - const search::fef::ParameterList & params) -{ - search::fef::Property p = env.getProperties().lookup(getName(), "seed"); - if (p.found()) { - _seed = util::strToNum(p.get()); - } - if (params.size() > 0) { - _mean = params[0].asDouble(); - } - if (params.size() > 1) { - _stddev = params[1].asDouble(); - } - - describeOutput("out" , "A random value drawn from the Gaussian distribution that is stable for a given match (document and query)"); - - return true; -} - -search::fef::FeatureExecutor & -RandomNormalMatchBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const -{ - uint64_t seed = util::strToNum - (env.getProperties().lookup(getName(), "seed").get("1024")); // default seed - return stash.create(seed, _mean, _stddev); -} - - -} // namespace features -} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/random_normal_match_feature.h b/searchlib/src/vespa/searchlib/features/random_normal_match_feature.h deleted file mode 100644 index 66431a3a60c..00000000000 --- a/searchlib/src/vespa/searchlib/features/random_normal_match_feature.h +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include -#include -#include - -namespace search { -namespace features { - -/** - * Implements the executor for the random normal feature outputting a - * random number drawn from the Gaussian distribution with the - * two arguments 'mean' and 'stddev'. - * The same hit always returns the same random number. - **/ -class RandomNormalMatchExecutor : public fef::FeatureExecutor { -private: - RandomNormal _rnd; // seeded once per match - uint64_t _seed; - -public: - RandomNormalMatchExecutor(uint64_t seed, double mean, double stddev); - void execute(uint32_t docId) override; -}; - - -/** - * Implements the blueprint for the random normal feature. - */ -class RandomNormalMatchBlueprint : public fef::Blueprint { -private: - uint64_t _seed; - double _mean; - double _stddev; - -public: - RandomNormalMatchBlueprint(); - - void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override; - fef::Blueprint::UP createInstance() const override; - fef::ParameterDescriptions getDescriptions() const override { - return fef::ParameterDescriptions(). - // Can run without parameters: - desc(). - - // Can run with two parameters (mean and stddev): - desc(). - number(). // mean - number(). // stddev - - // Can run with three parameters: - desc(). - number(). // mean - number(). // stddev - string(); // in order to name different features - } - - bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override; - fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override; -}; - - -} // namespace features -} // namespace search - diff --git a/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.cpp b/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.cpp new file mode 100644 index 00000000000..55b724285d6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.cpp @@ -0,0 +1,79 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "random_normal_stable_feature.h" +#include "utils.h" +#include +#include + +#include +LOG_SETUP(".features.randomnormalstablefeature"); + +namespace search { +namespace features { + +RandomNormalStableExecutor::RandomNormalStableExecutor(uint64_t seed, double mean, double stddev) : + search::fef::FeatureExecutor(), + _rnd(mean, stddev, true), + _seed(seed) +{ + LOG(debug, "RandomNormalStableExecutor: seed=%zu, mean=%f, stddev=%f", seed, mean, stddev); +} + +void +RandomNormalStableExecutor::execute(uint32_t docId) +{ + _rnd.seed(_seed + docId); + outputs().set_number(0, _rnd.next()); +} + +RandomNormalStableBlueprint::RandomNormalStableBlueprint() : + search::fef::Blueprint("randomNormalStable"), + _seed(0), + _mean(0.0), + _stddev(1.0) +{ +} + +void +RandomNormalStableBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &, + search::fef::IDumpFeatureVisitor &) const +{ +} + +search::fef::Blueprint::UP +RandomNormalStableBlueprint::createInstance() const +{ + return search::fef::Blueprint::UP(new RandomNormalStableBlueprint()); +} + +bool +RandomNormalStableBlueprint::setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params) +{ + search::fef::Property p = env.getProperties().lookup(getName(), "seed"); + if (p.found()) { + _seed = util::strToNum(p.get()); + } + if (params.size() > 0) { + _mean = params[0].asDouble(); + } + if (params.size() > 1) { + _stddev = params[1].asDouble(); + } + + describeOutput("out" , "A random value drawn from the Gaussian distribution that is stable for a given Stable (document and query)"); + + return true; +} + +search::fef::FeatureExecutor & +RandomNormalStableBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const +{ + uint64_t seed = util::strToNum + (env.getProperties().lookup(getName(), "seed").get("1024")); // default seed + return stash.create(seed, _mean, _stddev); +} + + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.h b/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.h new file mode 100644 index 00000000000..e0b3232c5a1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.h @@ -0,0 +1,67 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace search { +namespace features { + +/** + * Implements the executor for the random normal feature outputting a + * random number drawn from the Gaussian distribution with the + * two arguments 'mean' and 'stddev'. + * The same hit always returns the same random number. + **/ +class RandomNormalStableExecutor : public fef::FeatureExecutor { +private: + RandomNormal _rnd; // seeded once per match + uint64_t _seed; + +public: + RandomNormalStableExecutor(uint64_t seed, double mean, double stddev); + void execute(uint32_t docId) override; +}; + + +/** + * Implements the blueprint for the random normal feature. + */ +class RandomNormalStableBlueprint : public fef::Blueprint { +private: + uint64_t _seed; + double _mean; + double _stddev; + +public: + RandomNormalStableBlueprint(); + + void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override; + fef::Blueprint::UP createInstance() const override; + fef::ParameterDescriptions getDescriptions() const override { + return fef::ParameterDescriptions(). + // Can run without parameters: + desc(). + + // Can run with two parameters (mean and stddev): + desc(). + number(). // mean + number(). // stddev + + // Can run with three parameters: + desc(). + number(). // mean + number(). // stddev + string(); // in order to name different features + } + + bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override; + fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override; +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp index c8e0ffb6f4a..1d3c59f5b3d 100644 --- a/searchlib/src/vespa/searchlib/features/setup.cpp +++ b/searchlib/src/vespa/searchlib/features/setup.cpp @@ -38,7 +38,7 @@ #include "querytermcountfeature.h" #include "randomfeature.h" #include "random_normal_feature.h" -#include "random_normal_match_feature.h" +#include "random_normal_stable_feature.h" #include "rankingexpressionfeature.h" #include "raw_score_feature.h" #include "reverseproximityfeature.h" @@ -101,7 +101,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry) registry.addPrototype(Blueprint::SP(new QueryTermCountBlueprint())); registry.addPrototype(Blueprint::SP(new RandomBlueprint())); registry.addPrototype(Blueprint::SP(new RandomNormalBlueprint())); - registry.addPrototype(Blueprint::SP(new RandomNormalMatchBlueprint())); + registry.addPrototype(Blueprint::SP(new RandomNormalStableBlueprint())); registry.addPrototype(Blueprint::SP(new RawScoreBlueprint())); registry.addPrototype(Blueprint::SP(new SubqueriesBlueprint)); registry.addPrototype(Blueprint::SP(new TensorFromLabelsBlueprint())); -- cgit v1.2.3 From 8c150dc0c10ca361f26b1a73b7383a0f2c4f8a95 Mon Sep 17 00:00:00 2001 From: Lester Solbakken Date: Fri, 15 Jun 2018 13:00:48 +0200 Subject: Use correct seed and don't use spare for randomNormalStable --- .../searchlib/features/random_normal_stable_feature.cpp | 13 ++++++++----- .../vespa/searchlib/features/random_normal_stable_feature.h | 6 +++--- searchlib/src/vespa/searchlib/util/random_normal.h | 10 +++++----- 3 files changed, 16 insertions(+), 13 deletions(-) (limited to 'searchlib') diff --git a/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.cpp b/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.cpp index 55b724285d6..5f3cf7fd063 100644 --- a/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.cpp @@ -1,4 +1,4 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "random_normal_stable_feature.h" #include "utils.h" @@ -13,7 +13,7 @@ namespace features { RandomNormalStableExecutor::RandomNormalStableExecutor(uint64_t seed, double mean, double stddev) : search::fef::FeatureExecutor(), - _rnd(mean, stddev, true), + _rnd(mean, stddev, false), // don't use spares, as we reset seed on every generation _seed(seed) { LOG(debug, "RandomNormalStableExecutor: seed=%zu, mean=%f, stddev=%f", seed, mean, stddev); @@ -61,7 +61,7 @@ RandomNormalStableBlueprint::setup(const search::fef::IIndexEnvironment & env, _stddev = params[1].asDouble(); } - describeOutput("out" , "A random value drawn from the Gaussian distribution that is stable for a given Stable (document and query)"); + describeOutput("out" , "A random value drawn from the Gaussian distribution that is stable for a given match (document and query)"); return true; } @@ -69,8 +69,11 @@ RandomNormalStableBlueprint::setup(const search::fef::IIndexEnvironment & env, search::fef::FeatureExecutor & RandomNormalStableBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const { - uint64_t seed = util::strToNum - (env.getProperties().lookup(getName(), "seed").get("1024")); // default seed + uint64_t seed = _seed; + if (seed == 0) { + seed = util::strToNum + (env.getProperties().lookup(getName(), "seed").get("1024")); // default seed + } return stash.create(seed, _mean, _stddev); } diff --git a/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.h b/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.h index e0b3232c5a1..129c929ba3d 100644 --- a/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.h +++ b/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.h @@ -1,4 +1,4 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once @@ -17,7 +17,7 @@ namespace features { **/ class RandomNormalStableExecutor : public fef::FeatureExecutor { private: - RandomNormal _rnd; // seeded once per match + RandomNormal _rnd; // seeded once per match uint64_t _seed; public: @@ -27,7 +27,7 @@ public: /** - * Implements the blueprint for the random normal feature. + * Implements the blueprint for the random normal stable feature. */ class RandomNormalStableBlueprint : public fef::Blueprint { private: diff --git a/searchlib/src/vespa/searchlib/util/random_normal.h b/searchlib/src/vespa/searchlib/util/random_normal.h index 68e98f871d3..74596066312 100644 --- a/searchlib/src/vespa/searchlib/util/random_normal.h +++ b/searchlib/src/vespa/searchlib/util/random_normal.h @@ -1,4 +1,4 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once @@ -7,6 +7,10 @@ namespace search { +/** + * Draws a random number from the Gaussian distribution + * using the Marsaglia polar method. + */ class RandomNormal { private: @@ -36,10 +40,6 @@ public: _rnd.srand48(seed); } - /** - * Draws a random number from the Gaussian distribution - * using the Marsaglia polar method. - */ feature_t next() { feature_t result = _spare; if (_useSpare && _hasSpare) { -- cgit v1.2.3