diff options
author | Geir Storli <geirstorli@yahoo.no> | 2018-06-15 14:57:41 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-06-15 14:57:41 +0200 |
commit | 59b82da9c6fb37328a2ed1f7a7f485972537a9a7 (patch) | |
tree | 0f097e2dbb052e1a9b8468d894e4ff8ee7e73883 /searchlib | |
parent | f966facc1dace916b3cddb564c9b41877bd64176 (diff) | |
parent | 8c150dc0c10ca361f26b1a73b7383a0f2c4f8a95 (diff) |
Merge pull request #6190 from vespa-engine/lesters/random-normal-match
Add randomNormal.match
Diffstat (limited to 'searchlib')
9 files changed, 275 insertions, 46 deletions
diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp index 345c66ec672..0e57f520673 100644 --- a/searchlib/src/tests/features/prod_features.cpp +++ b/searchlib/src/tests/features/prod_features.cpp @@ -32,6 +32,7 @@ LOG_SETUP("prod_features_test"); #include <vespa/searchlib/features/querytermcountfeature.h> #include <vespa/searchlib/features/randomfeature.h> #include <vespa/searchlib/features/random_normal_feature.h> +#include <vespa/searchlib/features/random_normal_stable_feature.h> #include <vespa/searchlib/features/rankingexpressionfeature.h> #include <vespa/searchlib/features/setup.h> #include <vespa/searchlib/features/termfeature.h> @@ -105,6 +106,7 @@ Test::Main() TEST_DO(testQueryTermCount()); TEST_FLUSH(); TEST_DO(testRandom()); TEST_FLUSH(); TEST_DO(testRandomNormal()); TEST_FLUSH(); + TEST_DO(testRandomNormalStable()); TEST_FLUSH(); TEST_DO(testRankingExpression()); TEST_FLUSH(); TEST_DO(testTerm()); TEST_FLUSH(); TEST_DO(testTermDistance()); TEST_FLUSH(); @@ -1727,17 +1729,16 @@ Test::testRandom() } void -Test::testRandomNormal() -{ +Test::testRandomNormal() { { // Test blueprint. RandomNormalBlueprint pt; EXPECT_TRUE(assertCreateInstance(pt, "randomNormal")); StringList params, in, out; - FT_SETUP_OK (pt, params, in, out.add("out")); - FT_SETUP_OK (pt, params.add("0.5").add("1.0"), in, out); - FT_SETUP_OK (pt, params.add("val1"), in, out); + FT_SETUP_OK(pt, params, in, out.add("out")); + FT_SETUP_OK(pt, params.add("0.5").add("1.0"), in, out); + FT_SETUP_OK(pt, params.add("val1"), in, out); FT_DUMP_EMPTY(_factory, "randomNormal"); } @@ -1766,7 +1767,50 @@ Test::testRandomNormal() for (uint32_t i = 0; i < 5; ++i) { rr.clear(); ASSERT_TRUE(ft1.executeOnly(rr, i + 1)); - ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormal(0.0,0.1)")-0.0)/0.1) * 0.2 + 1.0, EPS, i + 1)); + ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormal(0.0,0.1)") - 0.0) / 0.1) * 0.2 + 1.0, EPS, i + 1)); + } + } +} + +void +Test::testRandomNormalStable() { + { // Test blueprint. + RandomNormalStableBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "randomNormalStable")); + + StringList params, in, out; + FT_SETUP_OK(pt, params, in, out.add("out")); + FT_SETUP_OK(pt, params.add("0.5").add("1.0"), in, out); + FT_SETUP_OK(pt, params.add("val1"), in, out); + + FT_DUMP_EMPTY(_factory, "randomNormalStable"); + } + + { // Test setting of mean and stddev values, and seed + FtFeatureTest ft1(_factory, "randomNormalStable(0.0,0.1)"); + FtFeatureTest ft2(_factory, "randomNormalStable(1.0,0.2)"); + ft1.getIndexEnv().getProperties().add("randomNormalStable(0.0,0.1).seed", "100"); + ft2.getIndexEnv().getProperties().add("randomNormalStable(1.0,0.2).seed", "100"); + ASSERT_TRUE(ft1.setup()); + ASSERT_TRUE(ft2.setup()); + RankResult rr; + for (uint32_t i = 0; i < 5; ++i) { + rr.clear(); + ASSERT_TRUE(ft1.executeOnly(rr, i + 1)); + ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormalStable(0.0,0.1)") - 0.0) / 0.1) * 0.2 + 1.0, EPS, i + 1)); + } + } + { // Test executor (randomNormalStable) + FtFeatureTest ft1(_factory, "randomNormalStable"); + FtFeatureTest ft2(_factory, "randomNormalStable"); + ASSERT_TRUE(ft1.setup()); + ASSERT_TRUE(ft2.setup()); + RankResult rr; + for (uint32_t i = 0; i < 5; ++i) { + rr.clear(); + ASSERT_TRUE(ft1.executeOnly(rr, i + 1)); + ASSERT_TRUE(ft2.execute(rr.getScore("randomNormalStable"), EPS, i + 1)); } } } diff --git a/searchlib/src/tests/features/prod_features.h b/searchlib/src/tests/features/prod_features.h index 0d234ca674e..d7bf001bedf 100644 --- a/searchlib/src/tests/features/prod_features.h +++ b/searchlib/src/tests/features/prod_features.h @@ -35,6 +35,7 @@ public: void testQueryTermCount(); void testRandom(); void testRandomNormal(); + void testRandomNormalStable(); void testRankingExpression(); void testTerm(); void testTermDistance(); diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt index bd847fe35b5..16401a67424 100644 --- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt @@ -44,6 +44,7 @@ vespa_add_library(searchlib_features OBJECT queryterm.cpp querytermcountfeature.cpp random_normal_feature.cpp + random_normal_stable_feature.cpp randomfeature.cpp rankingexpressionfeature.cpp raw_score_feature.cpp diff --git a/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp b/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp index 67f30c4eb93..c83ec80f6b5 100644 --- a/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp @@ -4,7 +4,6 @@ #include "utils.h" #include <vespa/searchlib/fef/properties.h> #include <vespa/fastos/time.h> -#include <cmath> #include <vespa/log/log.h> LOG_SETUP(".features.randomnormalfeature"); @@ -14,46 +13,18 @@ namespace features { RandomNormalExecutor::RandomNormalExecutor(uint64_t seed, double mean, double stddev) : search::fef::FeatureExecutor(), - _rnd(), - _mean(mean), - _stddev(stddev), - _hasSpare(false), - _spare(0.0) - + _rnd(mean, stddev, true) { LOG(debug, "RandomNormalExecutor: seed=%zu, mean=%f, stddev=%f", seed, mean, stddev); - _rnd.srand48(seed); + _rnd.seed(seed); } -/** - * Draws a random number from the Gaussian distribution - * using the Marsaglia polar method. - */ void RandomNormalExecutor::execute(uint32_t) { - feature_t result = _spare; - if (_hasSpare) { - _hasSpare = false; - } else { - _hasSpare = true; - - feature_t u, v, s; - do { - u = (_rnd.lrand48() / (feature_t)0x80000000u) * 2.0 - 1.0; - v = (_rnd.lrand48() / (feature_t)0x80000000u) * 2.0 - 1.0; - s = u * u + v * v; - } while ( (s >= 1.0) || (s == 0.0) ); - s = std::sqrt(-2.0 * std::log(s) / s); - - _spare = v * s; // saved for next invocation - result = u * s; - } - - outputs().set_number(0, _mean + _stddev * result); + outputs().set_number(0, _rnd.next()); } - RandomNormalBlueprint::RandomNormalBlueprint() : search::fef::Blueprint("randomNormal"), _seed(0), @@ -82,7 +53,6 @@ RandomNormalBlueprint::setup(const search::fef::IIndexEnvironment & env, if (p.found()) { _seed = util::strToNum<uint64_t>(p.get()); } - if (params.size() > 0) { _mean = params[0].asDouble(); } diff --git a/searchlib/src/vespa/searchlib/features/random_normal_feature.h b/searchlib/src/vespa/searchlib/features/random_normal_feature.h index 92aa3117b91..2d2429371d9 100644 --- a/searchlib/src/vespa/searchlib/features/random_normal_feature.h +++ b/searchlib/src/vespa/searchlib/features/random_normal_feature.h @@ -4,7 +4,7 @@ #include <vespa/searchlib/fef/blueprint.h> #include <vespa/searchlib/fef/featureexecutor.h> -#include <vespa/searchlib/util/rand48.h> +#include <vespa/searchlib/util/random_normal.h> namespace search { namespace features { @@ -17,12 +17,7 @@ namespace features { **/ class RandomNormalExecutor : public fef::FeatureExecutor { private: - Rand48 _rnd; - double _mean; - double _stddev; - - bool _hasSpare; - double _spare; + RandomNormal _rnd; // seeded once per query public: RandomNormalExecutor(uint64_t seed, double mean, double stddev); diff --git a/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.cpp b/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.cpp new file mode 100644 index 00000000000..5f3cf7fd063 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.cpp @@ -0,0 +1,82 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "random_normal_stable_feature.h" +#include "utils.h" +#include <vespa/searchlib/fef/properties.h> +#include <vespa/fastos/time.h> + +#include <vespa/log/log.h> +LOG_SETUP(".features.randomnormalstablefeature"); + +namespace search { +namespace features { + +RandomNormalStableExecutor::RandomNormalStableExecutor(uint64_t seed, double mean, double stddev) : + search::fef::FeatureExecutor(), + _rnd(mean, stddev, false), // don't use spares, as we reset seed on every generation + _seed(seed) +{ + LOG(debug, "RandomNormalStableExecutor: seed=%zu, mean=%f, stddev=%f", seed, mean, stddev); +} + +void +RandomNormalStableExecutor::execute(uint32_t docId) +{ + _rnd.seed(_seed + docId); + outputs().set_number(0, _rnd.next()); +} + +RandomNormalStableBlueprint::RandomNormalStableBlueprint() : + search::fef::Blueprint("randomNormalStable"), + _seed(0), + _mean(0.0), + _stddev(1.0) +{ +} + +void +RandomNormalStableBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &, + search::fef::IDumpFeatureVisitor &) const +{ +} + +search::fef::Blueprint::UP +RandomNormalStableBlueprint::createInstance() const +{ + return search::fef::Blueprint::UP(new RandomNormalStableBlueprint()); +} + +bool +RandomNormalStableBlueprint::setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params) +{ + search::fef::Property p = env.getProperties().lookup(getName(), "seed"); + if (p.found()) { + _seed = util::strToNum<uint64_t>(p.get()); + } + if (params.size() > 0) { + _mean = params[0].asDouble(); + } + if (params.size() > 1) { + _stddev = params[1].asDouble(); + } + + describeOutput("out" , "A random value drawn from the Gaussian distribution that is stable for a given match (document and query)"); + + return true; +} + +search::fef::FeatureExecutor & +RandomNormalStableBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const +{ + uint64_t seed = _seed; + if (seed == 0) { + seed = util::strToNum<uint64_t> + (env.getProperties().lookup(getName(), "seed").get("1024")); // default seed + } + return stash.create<RandomNormalStableExecutor>(seed, _mean, _stddev); +} + + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.h b/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.h new file mode 100644 index 00000000000..129c929ba3d --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.h @@ -0,0 +1,67 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/fef/blueprint.h> +#include <vespa/searchlib/fef/featureexecutor.h> +#include <vespa/searchlib/util/random_normal.h> + +namespace search { +namespace features { + +/** + * Implements the executor for the random normal feature outputting a + * random number drawn from the Gaussian distribution with the + * two arguments 'mean' and 'stddev'. + * The same hit always returns the same random number. + **/ +class RandomNormalStableExecutor : public fef::FeatureExecutor { +private: + RandomNormal _rnd; // seeded once per match + uint64_t _seed; + +public: + RandomNormalStableExecutor(uint64_t seed, double mean, double stddev); + void execute(uint32_t docId) override; +}; + + +/** + * Implements the blueprint for the random normal stable feature. + */ +class RandomNormalStableBlueprint : public fef::Blueprint { +private: + uint64_t _seed; + double _mean; + double _stddev; + +public: + RandomNormalStableBlueprint(); + + void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override; + fef::Blueprint::UP createInstance() const override; + fef::ParameterDescriptions getDescriptions() const override { + return fef::ParameterDescriptions(). + // Can run without parameters: + desc(). + + // Can run with two parameters (mean and stddev): + desc(). + number(). // mean + number(). // stddev + + // Can run with three parameters: + desc(). + number(). // mean + number(). // stddev + string(); // in order to name different features + } + + bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override; + fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override; +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp index 867f058931f..1d3c59f5b3d 100644 --- a/searchlib/src/vespa/searchlib/features/setup.cpp +++ b/searchlib/src/vespa/searchlib/features/setup.cpp @@ -38,6 +38,7 @@ #include "querytermcountfeature.h" #include "randomfeature.h" #include "random_normal_feature.h" +#include "random_normal_stable_feature.h" #include "rankingexpressionfeature.h" #include "raw_score_feature.h" #include "reverseproximityfeature.h" @@ -100,6 +101,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry) registry.addPrototype(Blueprint::SP(new QueryTermCountBlueprint())); registry.addPrototype(Blueprint::SP(new RandomBlueprint())); registry.addPrototype(Blueprint::SP(new RandomNormalBlueprint())); + registry.addPrototype(Blueprint::SP(new RandomNormalStableBlueprint())); registry.addPrototype(Blueprint::SP(new RawScoreBlueprint())); registry.addPrototype(Blueprint::SP(new SubqueriesBlueprint)); registry.addPrototype(Blueprint::SP(new TensorFromLabelsBlueprint())); diff --git a/searchlib/src/vespa/searchlib/util/random_normal.h b/searchlib/src/vespa/searchlib/util/random_normal.h new file mode 100644 index 00000000000..74596066312 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/random_normal.h @@ -0,0 +1,67 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/util/rand48.h> +#include <cmath> + +namespace search { + +/** + * Draws a random number from the Gaussian distribution + * using the Marsaglia polar method. + */ +class RandomNormal +{ +private: + Rand48 _rnd; + double _mean; + double _stddev; + + bool _useSpare; + bool _hasSpare; + feature_t _spare; + + feature_t nextUniform() { + return (_rnd.lrand48() / (feature_t)0x80000000u) * 2.0 - 1.0; + } + +public: + RandomNormal(double mean, double stddev, bool useSpare = true) : + _rnd(), + _mean(mean), + _stddev(stddev), + _useSpare(useSpare), + _hasSpare(false), + _spare(0.0) + {} + + void seed(long seed) { + _rnd.srand48(seed); + } + + feature_t next() { + feature_t result = _spare; + if (_useSpare && _hasSpare) { + _hasSpare = false; + } else { + _hasSpare = true; + + feature_t u, v, s; + do { + u = nextUniform(); + v = nextUniform(); + s = u * u + v * v; + } while ( (s >= 1.0) || (s == 0.0) ); + s = std::sqrt(-2.0 * std::log(s) / s); + + _spare = v * s; // saved for next invocation + result = u * s; + } + return _mean + _stddev * result; + } + +}; + +} // search + |