diff options
author | Lester Solbakken <lesters@oath.com> | 2018-06-14 15:10:36 +0200 |
---|---|---|
committer | Lester Solbakken <lesters@oath.com> | 2018-06-14 15:10:36 +0200 |
commit | 9074c9d6db60c9d785f395354aea2284f22d5020 (patch) | |
tree | 0a108cd8dfe35d4310ecc02a6e5f8d4cd1e533e8 /searchlib | |
parent | 3720104a3ae7e7ba38c34e8eae85a25ceeae74cc (diff) |
Move match version of random normal to it's own feature
Diffstat (limited to 'searchlib')
9 files changed, 217 insertions, 57 deletions
diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp index a07785398bf..214323ae7c7 100644 --- a/searchlib/src/tests/features/prod_features.cpp +++ b/searchlib/src/tests/features/prod_features.cpp @@ -32,6 +32,7 @@ LOG_SETUP("prod_features_test"); #include <vespa/searchlib/features/querytermcountfeature.h> #include <vespa/searchlib/features/randomfeature.h> #include <vespa/searchlib/features/random_normal_feature.h> +#include <vespa/searchlib/features/random_normal_match_feature.h> #include <vespa/searchlib/features/rankingexpressionfeature.h> #include <vespa/searchlib/features/setup.h> #include <vespa/searchlib/features/termfeature.h> @@ -105,6 +106,7 @@ Test::Main() TEST_DO(testQueryTermCount()); TEST_FLUSH(); TEST_DO(testRandom()); TEST_FLUSH(); TEST_DO(testRandomNormal()); TEST_FLUSH(); + TEST_DO(testRandomNormalMatch()); TEST_FLUSH(); TEST_DO(testRankingExpression()); TEST_FLUSH(); TEST_DO(testTerm()); TEST_FLUSH(); TEST_DO(testTermDistance()); TEST_FLUSH(); @@ -1727,17 +1729,16 @@ Test::testRandom() } void -Test::testRandomNormal() -{ +Test::testRandomNormal() { { // Test blueprint. RandomNormalBlueprint pt; EXPECT_TRUE(assertCreateInstance(pt, "randomNormal")); StringList params, in, out; - FT_SETUP_OK (pt, params, in, out.add("out").add("match")); - FT_SETUP_OK (pt, params.add("0.5").add("1.0"), in, out); - FT_SETUP_OK (pt, params.add("val1"), in, out); + FT_SETUP_OK(pt, params, in, out.add("out")); + FT_SETUP_OK(pt, params.add("0.5").add("1.0"), in, out); + FT_SETUP_OK(pt, params.add("val1"), in, out); FT_DUMP_EMPTY(_factory, "randomNormal"); } @@ -1766,32 +1767,52 @@ Test::testRandomNormal() for (uint32_t i = 0; i < 5; ++i) { rr.clear(); ASSERT_TRUE(ft1.executeOnly(rr, i + 1)); - ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormal(0.0,0.1)")-0.0)/0.1) * 0.2 + 1.0, EPS, i + 1)); + ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormal(0.0,0.1)") - 0.0) / 0.1) * 0.2 + 1.0, EPS, i + 1)); + } + } +} + +void +Test::testRandomNormalMatch() { + { // Test blueprint. + RandomNormalMatchBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "randomNormalMatch")); + + StringList params, in, out; + FT_SETUP_OK(pt, params, in, out.add("out")); + FT_SETUP_OK(pt, params.add("0.5").add("1.0"), in, out); + FT_SETUP_OK(pt, params.add("val1"), in, out); + + FT_DUMP_EMPTY(_factory, "randomNormalMatch"); + } + + { // Test setting of mean and stddev values, and seed + FtFeatureTest ft1(_factory, "randomNormalMatch(0.0,0.1)"); + FtFeatureTest ft2(_factory, "randomNormalMatch(1.0,0.2)"); + ft1.getIndexEnv().getProperties().add("randomNormalMatch(0.0,0.1).seed", "100"); + ft2.getIndexEnv().getProperties().add("randomNormalMatch(1.0,0.2).seed", "100"); + ASSERT_TRUE(ft1.setup()); + ASSERT_TRUE(ft2.setup()); + RankResult rr; + for (uint32_t i = 0; i < 5; ++i) { + rr.clear(); + ASSERT_TRUE(ft1.executeOnly(rr, i + 1)); + ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormalMatch(0.0,0.1)") - 0.0) / 0.1) * 0.2 + 1.0, EPS, i + 1)); } } { // Test executor (randomNormal.match) - FtFeatureTest ft1(_factory, "randomNormal.match"); - FtFeatureTest ft2(_factory, "randomNormal.match"); + FtFeatureTest ft1(_factory, "randomNormalMatch"); + FtFeatureTest ft2(_factory, "randomNormalMatch"); ASSERT_TRUE(ft1.setup()); ASSERT_TRUE(ft2.setup()); - RankResult rr1; - RankResult rr2; + RankResult rr; for (uint32_t i = 0; i < 5; ++i) { - rr1.clear(); - rr2.clear(); - ASSERT_TRUE(ft1.executeOnly(rr1, i + 1)); - ASSERT_TRUE(ft2.executeOnly(rr2, i + 1)); - - feature_t rn1 = rr1.getScore("randomNormal"); - feature_t rn2 = rr2.getScore("randomNormal"); - ASSERT_NOT_EQUAL(rn1, rn2); - - feature_t rnm1 = rr1.getScore("randomNormal.match"); - feature_t rnm2 = rr2.getScore("randomNormal.match"); - ASSERT_EQUAL(rnm1, rnm2); + rr.clear(); + ASSERT_TRUE(ft1.executeOnly(rr, i + 1)); + ASSERT_TRUE(ft2.execute(rr.getScore("randomNormalMatch"), EPS, i + 1)); } } - } void diff --git a/searchlib/src/tests/features/prod_features.h b/searchlib/src/tests/features/prod_features.h index 0d234ca674e..dec860917f0 100644 --- a/searchlib/src/tests/features/prod_features.h +++ b/searchlib/src/tests/features/prod_features.h @@ -35,6 +35,7 @@ public: void testQueryTermCount(); void testRandom(); void testRandomNormal(); + void testRandomNormalMatch(); void testRankingExpression(); void testTerm(); void testTermDistance(); diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt index bd847fe35b5..2b92b5ec443 100644 --- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt @@ -44,6 +44,7 @@ vespa_add_library(searchlib_features OBJECT queryterm.cpp querytermcountfeature.cpp random_normal_feature.cpp + random_normal_match_feature.cpp randomfeature.cpp rankingexpressionfeature.cpp raw_score_feature.cpp diff --git a/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp b/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp index 40ff8db229e..c83ec80f6b5 100644 --- a/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp @@ -11,32 +11,18 @@ LOG_SETUP(".features.randomnormalfeature"); namespace search { namespace features { -RandomNormalExecutor::RandomNormalExecutor(uint64_t seed, uint64_t matchSeed, double mean, double stddev) : +RandomNormalExecutor::RandomNormalExecutor(uint64_t seed, double mean, double stddev) : search::fef::FeatureExecutor(), - _rnd(), - _matchRnd(), - _matchSeed(matchSeed), - _mean(mean), - _stddev(stddev) + _rnd(mean, stddev, true) { - LOG(debug, "RandomNormalExecutor: seed=%zu, matchSeed=%zu, mean=%f, stddev=%f", seed, matchSeed, mean, stddev); + LOG(debug, "RandomNormalExecutor: seed=%zu, mean=%f, stddev=%f", seed, mean, stddev); _rnd.seed(seed); } void -RandomNormalExecutor::execute(uint32_t docId) +RandomNormalExecutor::execute(uint32_t) { - _matchRnd.seed(_matchSeed + docId); - - feature_t out = _mean + _stddev * _rnd.next(); - feature_t match = _mean + _stddev * _matchRnd.next(false); - - outputs().set_number(0, out); - outputs().set_number(1, match); - - // Note: calculating match here almost triples the cost for generating the non-match - // value. If this turns out to be too costly, we should consider creating an own - // feature executor for the match. + outputs().set_number(0, _rnd.next()); } RandomNormalBlueprint::RandomNormalBlueprint() : @@ -75,13 +61,12 @@ RandomNormalBlueprint::setup(const search::fef::IIndexEnvironment & env, } describeOutput("out" , "A random value drawn from the Gaussian distribution"); - describeOutput("match" , "A random value drawn from the Gaussian distribution that is stable for a given match (document and query)"); return true; } search::fef::FeatureExecutor & -RandomNormalBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const +RandomNormalBlueprint::createExecutor(const search::fef::IQueryEnvironment &, vespalib::Stash &stash) const { uint64_t seed = _seed; if (seed == 0) { @@ -90,9 +75,7 @@ RandomNormalBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, seed = static_cast<uint64_t>(time.MicroSecs()) ^ reinterpret_cast<uint64_t>(&seed); // results in different seeds in different threads } - uint64_t matchSeed = util::strToNum<uint64_t> - (env.getProperties().lookup(getName(), "match", "seed").get("1024")); // default seed - return stash.create<RandomNormalExecutor>(seed, matchSeed, _mean, _stddev); + return stash.create<RandomNormalExecutor>(seed, _mean, _stddev); } diff --git a/searchlib/src/vespa/searchlib/features/random_normal_feature.h b/searchlib/src/vespa/searchlib/features/random_normal_feature.h index 9ce8f899446..2d2429371d9 100644 --- a/searchlib/src/vespa/searchlib/features/random_normal_feature.h +++ b/searchlib/src/vespa/searchlib/features/random_normal_feature.h @@ -18,14 +18,9 @@ namespace features { class RandomNormalExecutor : public fef::FeatureExecutor { private: RandomNormal _rnd; // seeded once per query - RandomNormal _matchRnd; // seeded once per match - - uint64_t _matchSeed; - double _mean; - double _stddev; public: - RandomNormalExecutor(uint64_t seed, uint64_t matchSeed, double mean, double stddev); + RandomNormalExecutor(uint64_t seed, double mean, double stddev); void execute(uint32_t docId) override; }; diff --git a/searchlib/src/vespa/searchlib/features/random_normal_match_feature.cpp b/searchlib/src/vespa/searchlib/features/random_normal_match_feature.cpp new file mode 100644 index 00000000000..586835f6b9a --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/random_normal_match_feature.cpp @@ -0,0 +1,79 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "random_normal_match_feature.h" +#include "utils.h" +#include <vespa/searchlib/fef/properties.h> +#include <vespa/fastos/time.h> + +#include <vespa/log/log.h> +LOG_SETUP(".features.randomnormalmatchfeature"); + +namespace search { +namespace features { + +RandomNormalMatchExecutor::RandomNormalMatchExecutor(uint64_t seed, double mean, double stddev) : + search::fef::FeatureExecutor(), + _rnd(mean, stddev, true), + _seed(seed) +{ + LOG(debug, "RandomNormalMatchExecutor: seed=%zu, mean=%f, stddev=%f", seed, mean, stddev); +} + +void +RandomNormalMatchExecutor::execute(uint32_t docId) +{ + _rnd.seed(_seed + docId); + outputs().set_number(0, _rnd.next()); +} + +RandomNormalMatchBlueprint::RandomNormalMatchBlueprint() : + search::fef::Blueprint("randomNormalMatch"), + _seed(0), + _mean(0.0), + _stddev(1.0) +{ +} + +void +RandomNormalMatchBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &, + search::fef::IDumpFeatureVisitor &) const +{ +} + +search::fef::Blueprint::UP +RandomNormalMatchBlueprint::createInstance() const +{ + return search::fef::Blueprint::UP(new RandomNormalMatchBlueprint()); +} + +bool +RandomNormalMatchBlueprint::setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params) +{ + search::fef::Property p = env.getProperties().lookup(getName(), "seed"); + if (p.found()) { + _seed = util::strToNum<uint64_t>(p.get()); + } + if (params.size() > 0) { + _mean = params[0].asDouble(); + } + if (params.size() > 1) { + _stddev = params[1].asDouble(); + } + + describeOutput("out" , "A random value drawn from the Gaussian distribution that is stable for a given match (document and query)"); + + return true; +} + +search::fef::FeatureExecutor & +RandomNormalMatchBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const +{ + uint64_t seed = util::strToNum<uint64_t> + (env.getProperties().lookup(getName(), "seed").get("1024")); // default seed + return stash.create<RandomNormalMatchExecutor>(seed, _mean, _stddev); +} + + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/random_normal_match_feature.h b/searchlib/src/vespa/searchlib/features/random_normal_match_feature.h new file mode 100644 index 00000000000..66431a3a60c --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/random_normal_match_feature.h @@ -0,0 +1,67 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/fef/blueprint.h> +#include <vespa/searchlib/fef/featureexecutor.h> +#include <vespa/searchlib/util/random_normal.h> + +namespace search { +namespace features { + +/** + * Implements the executor for the random normal feature outputting a + * random number drawn from the Gaussian distribution with the + * two arguments 'mean' and 'stddev'. + * The same hit always returns the same random number. + **/ +class RandomNormalMatchExecutor : public fef::FeatureExecutor { +private: + RandomNormal _rnd; // seeded once per match + uint64_t _seed; + +public: + RandomNormalMatchExecutor(uint64_t seed, double mean, double stddev); + void execute(uint32_t docId) override; +}; + + +/** + * Implements the blueprint for the random normal feature. + */ +class RandomNormalMatchBlueprint : public fef::Blueprint { +private: + uint64_t _seed; + double _mean; + double _stddev; + +public: + RandomNormalMatchBlueprint(); + + void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override; + fef::Blueprint::UP createInstance() const override; + fef::ParameterDescriptions getDescriptions() const override { + return fef::ParameterDescriptions(). + // Can run without parameters: + desc(). + + // Can run with two parameters (mean and stddev): + desc(). + number(). // mean + number(). // stddev + + // Can run with three parameters: + desc(). + number(). // mean + number(). // stddev + string(); // in order to name different features + } + + bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override; + fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override; +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp index 867f058931f..c8e0ffb6f4a 100644 --- a/searchlib/src/vespa/searchlib/features/setup.cpp +++ b/searchlib/src/vespa/searchlib/features/setup.cpp @@ -38,6 +38,7 @@ #include "querytermcountfeature.h" #include "randomfeature.h" #include "random_normal_feature.h" +#include "random_normal_match_feature.h" #include "rankingexpressionfeature.h" #include "raw_score_feature.h" #include "reverseproximityfeature.h" @@ -100,6 +101,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry) registry.addPrototype(Blueprint::SP(new QueryTermCountBlueprint())); registry.addPrototype(Blueprint::SP(new RandomBlueprint())); registry.addPrototype(Blueprint::SP(new RandomNormalBlueprint())); + registry.addPrototype(Blueprint::SP(new RandomNormalMatchBlueprint())); registry.addPrototype(Blueprint::SP(new RawScoreBlueprint())); registry.addPrototype(Blueprint::SP(new SubqueriesBlueprint)); registry.addPrototype(Blueprint::SP(new TensorFromLabelsBlueprint())); diff --git a/searchlib/src/vespa/searchlib/util/random_normal.h b/searchlib/src/vespa/searchlib/util/random_normal.h index 0c2da580db6..68e98f871d3 100644 --- a/searchlib/src/vespa/searchlib/util/random_normal.h +++ b/searchlib/src/vespa/searchlib/util/random_normal.h @@ -11,6 +11,10 @@ class RandomNormal { private: Rand48 _rnd; + double _mean; + double _stddev; + + bool _useSpare; bool _hasSpare; feature_t _spare; @@ -19,7 +23,14 @@ private: } public: - RandomNormal() : _rnd(), _hasSpare(false), _spare(0.0) {} + RandomNormal(double mean, double stddev, bool useSpare = true) : + _rnd(), + _mean(mean), + _stddev(stddev), + _useSpare(useSpare), + _hasSpare(false), + _spare(0.0) + {} void seed(long seed) { _rnd.srand48(seed); @@ -29,9 +40,9 @@ public: * Draws a random number from the Gaussian distribution * using the Marsaglia polar method. */ - feature_t next(bool useSpare = true) { + feature_t next() { feature_t result = _spare; - if (_hasSpare && useSpare) { + if (_useSpare && _hasSpare) { _hasSpare = false; } else { _hasSpare = true; @@ -47,7 +58,7 @@ public: _spare = v * s; // saved for next invocation result = u * s; } - return result; + return _mean + _stddev * result; } }; |