From cf97aabc855c1beae74ec1708548c0334f47c438 Mon Sep 17 00:00:00 2001 From: Lester Solbakken Date: Wed, 13 Jun 2018 13:17:02 +0200 Subject: Add randomNormal.match --- searchlib/src/tests/features/prod_features.cpp | 10 ++++++ .../searchlib/features/random_normal_feature.cpp | 37 +++++++++++++++------- .../searchlib/features/random_normal_feature.h | 7 ++-- 3 files changed, 41 insertions(+), 13 deletions(-) (limited to 'searchlib') diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp index 345c66ec672..21c509832e3 100644 --- a/searchlib/src/tests/features/prod_features.cpp +++ b/searchlib/src/tests/features/prod_features.cpp @@ -1769,6 +1769,16 @@ Test::testRandomNormal() ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormal(0.0,0.1)")-0.0)/0.1) * 0.2 + 1.0, EPS, i + 1)); } } + { // Test executor (randomNormal.match) + FtFeatureTest ft(_factory, "randomNormal.match"); + ASSERT_TRUE(ft.setup()); + RankResult rr; + for (uint32_t i = 0; i < 5; ++i) { + rr.clear(); + ASSERT_TRUE(ft.executeOnly(rr, i + 1)); + ASSERT_TRUE(ft.execute(rr.getScore("randomNormal.match"), EPS, i + 1)); + } + } } void diff --git a/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp b/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp index 67f30c4eb93..1b274be81a7 100644 --- a/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp @@ -12,25 +12,30 @@ LOG_SETUP(".features.randomnormalfeature"); namespace search { namespace features { -RandomNormalExecutor::RandomNormalExecutor(uint64_t seed, double mean, double stddev) : +RandomNormalExecutor::RandomNormalExecutor(uint64_t seed, uint64_t matchSeed, double mean, double stddev) : search::fef::FeatureExecutor(), _rnd(), + _matchRnd(), + _matchSeed(matchSeed), _mean(mean), _stddev(stddev), _hasSpare(false), _spare(0.0) - { - LOG(debug, "RandomNormalExecutor: seed=%zu, mean=%f, stddev=%f", seed, mean, stddev); + LOG(debug, "RandomNormalExecutor: seed=%zu, matchSeed=%zu, mean=%f, stddev=%f", seed, matchSeed, mean, stddev); _rnd.srand48(seed); } +feature_t generateRandom(Rand48 generator) { + return (generator.lrand48() / (feature_t)0x80000000u) * 2.0 - 1.0; +} + /** * Draws a random number from the Gaussian distribution * using the Marsaglia polar method. */ void -RandomNormalExecutor::execute(uint32_t) +RandomNormalExecutor::execute(uint32_t docId) { feature_t result = _spare; if (_hasSpare) { @@ -40,8 +45,8 @@ RandomNormalExecutor::execute(uint32_t) feature_t u, v, s; do { - u = (_rnd.lrand48() / (feature_t)0x80000000u) * 2.0 - 1.0; - v = (_rnd.lrand48() / (feature_t)0x80000000u) * 2.0 - 1.0; + u = generateRandom(_rnd); + v = generateRandom(_rnd); s = u * u + v * v; } while ( (s >= 1.0) || (s == 0.0) ); s = std::sqrt(-2.0 * std::log(s) / s); @@ -49,10 +54,18 @@ RandomNormalExecutor::execute(uint32_t) _spare = v * s; // saved for next invocation result = u * s; } - outputs().set_number(0, _mean + _stddev * result); -} + _matchRnd.srand48(_matchSeed + docId); + feature_t u, v, s; + do { + u = generateRandom(_matchRnd); + v = generateRandom(_matchRnd); + s = u * u + v * v; + } while ( (s >= 1.0) || (s == 0.0) ); + s = std::sqrt(-2.0 * std::log(s) / s); + outputs().set_number(1, _mean + _stddev * u * s); +} RandomNormalBlueprint::RandomNormalBlueprint() : search::fef::Blueprint("randomNormal"), @@ -82,7 +95,6 @@ RandomNormalBlueprint::setup(const search::fef::IIndexEnvironment & env, if (p.found()) { _seed = util::strToNum(p.get()); } - if (params.size() > 0) { _mean = params[0].asDouble(); } @@ -91,12 +103,13 @@ RandomNormalBlueprint::setup(const search::fef::IIndexEnvironment & env, } describeOutput("out" , "A random value drawn from the Gaussian distribution"); + describeOutput("match" , "A random value drawn from the Gaussian distribution that is stable for a given match (document and query)"); return true; } search::fef::FeatureExecutor & -RandomNormalBlueprint::createExecutor(const search::fef::IQueryEnvironment &, vespalib::Stash &stash) const +RandomNormalBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const { uint64_t seed = _seed; if (seed == 0) { @@ -105,7 +118,9 @@ RandomNormalBlueprint::createExecutor(const search::fef::IQueryEnvironment &, ve seed = static_cast(time.MicroSecs()) ^ reinterpret_cast(&seed); // results in different seeds in different threads } - return stash.create(seed, _mean, _stddev); + uint64_t matchSeed = util::strToNum + (env.getProperties().lookup(getName(), "match", "seed").get("1024")); // default seed + return stash.create(seed, matchSeed, _mean, _stddev); } diff --git a/searchlib/src/vespa/searchlib/features/random_normal_feature.h b/searchlib/src/vespa/searchlib/features/random_normal_feature.h index 92aa3117b91..f2bc82704bb 100644 --- a/searchlib/src/vespa/searchlib/features/random_normal_feature.h +++ b/searchlib/src/vespa/searchlib/features/random_normal_feature.h @@ -17,7 +17,10 @@ namespace features { **/ class RandomNormalExecutor : public fef::FeatureExecutor { private: - Rand48 _rnd; + Rand48 _rnd; // seeded once per query + Rand48 _matchRnd; // seeded once per match + uint64_t _matchSeed; + double _mean; double _stddev; @@ -25,7 +28,7 @@ private: double _spare; public: - RandomNormalExecutor(uint64_t seed, double mean, double stddev); + RandomNormalExecutor(uint64_t seed, uint64_t matchSeed, double mean, double stddev); void execute(uint32_t docId) override; }; -- cgit v1.2.3