diff options
author | Arne Juul <arnej@yahoo-inc.com> | 2018-06-20 09:48:02 +0200 |
---|---|---|
committer | Arne Juul <arnej@yahoo-inc.com> | 2018-06-20 09:48:02 +0200 |
commit | 44fc1380b66867958f89c47ac8752926b3787a4d (patch) | |
tree | ea324d387b055c3ccf0921f9f973c95b16bad2e2 /searchlib | |
parent | 016e584f0ad5a071e13d75eb8ad5ddb46b8c54f5 (diff) | |
parent | 79e7562d34a20ed28621a9ec0dc296eda0881428 (diff) |
Merge branch 'master' into arnej/use-log-abort
Diffstat (limited to 'searchlib')
24 files changed, 420 insertions, 72 deletions
diff --git a/searchlib/src/apps/tests/memoryindexstress_test.cpp b/searchlib/src/apps/tests/memoryindexstress_test.cpp index 1ba264e0bfe..b911284a1b4 100644 --- a/searchlib/src/apps/tests/memoryindexstress_test.cpp +++ b/searchlib/src/apps/tests/memoryindexstress_test.cpp @@ -19,6 +19,7 @@ #include <vespa/document/fieldvalue/document.h> #include <vespa/document/fieldvalue/stringfieldvalue.h> #include <vespa/document/repo/configbuilder.h> +#include <vespa/document/repo/fixedtyperepo.h> #include <vespa/document/annotation/spanlist.h> #include <vespa/document/annotation/spantree.h> #include <vespa/searchlib/util/rand48.h> diff --git a/searchlib/src/tests/attribute/attribute_test.cpp b/searchlib/src/tests/attribute/attribute_test.cpp index 5fcc49fc228..5148ab2be34 100644 --- a/searchlib/src/tests/attribute/attribute_test.cpp +++ b/searchlib/src/tests/attribute/attribute_test.cpp @@ -32,8 +32,7 @@ using search::index::DummyFileHeaderContext; using search::attribute::BasicType; using search::attribute::IAttributeVector; -namespace -{ +namespace { vespalib::string empty; vespalib::string tmpDir("tmp"); @@ -2315,6 +2314,23 @@ AttributeTest::testPendingCompaction() populateSimple(iv, 1, 2); // should not trigger new compaction } +void testNamePrefix() { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + AttributeVector::SP vFlat = createAttribute("sfsint32_pc", cfg); + AttributeVector::SP vS1 = createAttribute("sfsint32_pc.abc", cfg); + AttributeVector::SP vS2 = createAttribute("sfsint32_pc.xyz", cfg); + AttributeVector::SP vSS1 = createAttribute("sfsint32_pc.xyz.abc", cfg); + EXPECT_EQUAL("sfsint32_pc", vFlat->getName()); + EXPECT_EQUAL("sfsint32_pc", vFlat->getNamePrefix()); + EXPECT_EQUAL("sfsint32_pc.abc", vS1->getName()); + EXPECT_EQUAL("sfsint32_pc", vS1->getNamePrefix()); + EXPECT_EQUAL("sfsint32_pc.xyz", vS2->getName()); + EXPECT_EQUAL("sfsint32_pc", vS2->getNamePrefix()); + EXPECT_EQUAL("sfsint32_pc.xyz.abc", vSS1->getName()); + EXPECT_EQUAL("sfsint32_pc", vSS1->getNamePrefix()); + +} + void deleteDataDirs() { @@ -2361,6 +2377,7 @@ int AttributeTest::Main() TEST_DO(requireThatAddressSpaceUsageIsReported()); testReaderDuringLastUpdate(); TEST_DO(testPendingCompaction()); + TEST_DO(testNamePrefix()); deleteDataDirs(); TEST_DONE(); diff --git a/searchlib/src/tests/common/sequencedtaskexecutor/sequencedtaskexecutor_test.cpp b/searchlib/src/tests/common/sequencedtaskexecutor/sequencedtaskexecutor_test.cpp index 805a6d3b962..f8ab03d7710 100644 --- a/searchlib/src/tests/common/sequencedtaskexecutor/sequencedtaskexecutor_test.cpp +++ b/searchlib/src/tests/common/sequencedtaskexecutor/sequencedtaskexecutor_test.cpp @@ -122,8 +122,8 @@ TEST_F("require that task with same string component id are serialized", Fixture std::shared_ptr<TestObj> tv(std::make_shared<TestObj>()); EXPECT_EQUAL(0, tv->_val); auto test2 = [=]() { tv->modify(14, 42); }; - f._threads.execute("0", [=]() { usleep(2000); tv->modify(0, 14); }); - f._threads.execute("0", test2); + f._threads.execute(f._threads.getExecutorId("0"), [=]() { usleep(2000); tv->modify(0, 14); }); + f._threads.execute(f._threads.getExecutorId("0"), test2); tv->wait(2); EXPECT_EQUAL(0, tv->_fail); EXPECT_EQUAL(42, tv->_val); @@ -132,8 +132,7 @@ TEST_F("require that task with same string component id are serialized", Fixture EXPECT_EQUAL(42, tv->_val); } -namespace -{ +namespace { int detectSerializeFailure(Fixture &f, vespalib::stringref altComponentId, int tryLimit) { @@ -141,8 +140,8 @@ int detectSerializeFailure(Fixture &f, vespalib::stringref altComponentId, int t for (tryCnt = 0; tryCnt < tryLimit; ++tryCnt) { std::shared_ptr<TestObj> tv(std::make_shared<TestObj>()); EXPECT_EQUAL(0, tv->_val); - f._threads.execute("0", [=]() { usleep(2000); tv->modify(0, 14); }); - f._threads.execute(altComponentId, [=]() { tv->modify(14, 42); }); + f._threads.execute(f._threads.getExecutorId("0"), [=]() { usleep(2000); tv->modify(0, 14); }); + f._threads.execute(f._threads.getExecutorId(altComponentId), [=]() { tv->modify(14, 42); }); tv->wait(2); if (tv->_fail != 1) { continue; diff --git a/searchlib/src/tests/features/native_dot_product/native_dot_product_test.cpp b/searchlib/src/tests/features/native_dot_product/native_dot_product_test.cpp index 1da912ccb3a..0cf13443142 100644 --- a/searchlib/src/tests/features/native_dot_product/native_dot_product_test.cpp +++ b/searchlib/src/tests/features/native_dot_product/native_dot_product_test.cpp @@ -16,7 +16,8 @@ using namespace search::fef::test; using namespace search::features; using CollectionType = FieldInfo::CollectionType; -const std::string featureName("nativeDotProduct(foo)"); +const std::string fooFeatureName("nativeDotProduct(foo)"); +const std::string anyFeatureName("nativeDotProduct"); struct BlueprintFactoryFixture { BlueprintFactory factory; @@ -78,7 +79,8 @@ struct RankFixture : BlueprintFactoryFixture, IndexFixture { std::vector<TermFieldHandle> fooHandles; std::vector<TermFieldHandle> barHandles; RankFixture(const std::vector<uint32_t> &fooWeights, - const std::vector<uint32_t> &barWeights) + const std::vector<uint32_t> &barWeights, + const vespalib::string &featureName = fooFeatureName) : queryEnv(&indexEnv), rankSetup(factory, indexEnv), mdl(), match_data(), rankProgram(), fooHandles(), barHandles() { @@ -152,6 +154,12 @@ TEST_FF("require that setup fails for unknown field", NativeDotProductBlueprint, EXPECT_TRUE(!((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "unknown"))); } +TEST_FF("require that setup can be done without field", NativeDotProductBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s", f1.getBaseName().c_str())); + EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>())); +} + TEST_F("require that not searching a field will give it 0.0 dot product", RankFixture(vec(), vec(1, 2, 3))) { EXPECT_EQUAL(0.0, f1.getScore(10)); } @@ -183,11 +191,18 @@ TEST_F("require that data from other fields is ignored", RankFixture(vec(1, 3), EXPECT_EQUAL(14, f1.getScore(10)); } +TEST_F("require that not specifying field includes all term/field combinations", RankFixture(vec(1, 3), vec(5, 7), anyFeatureName)) { + f1.setFooWeight(0, 10, 2); + f1.setFooWeight(1, 10, 4); + f1.setBarWeight(0, 10, 6); + f1.setBarWeight(1, 10, 8); + EXPECT_EQUAL(100, f1.getScore(10)); +} + TEST_F("require that negative weights in the index works", RankFixture(vec(1, 3), vec())) { f1.setFooWeight(0, 10, 2); f1.setFooWeight(1, 10, -4); EXPECT_EQUAL(-10, f1.getScore(10)); } - TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp index 345c66ec672..0e57f520673 100644 --- a/searchlib/src/tests/features/prod_features.cpp +++ b/searchlib/src/tests/features/prod_features.cpp @@ -32,6 +32,7 @@ LOG_SETUP("prod_features_test"); #include <vespa/searchlib/features/querytermcountfeature.h> #include <vespa/searchlib/features/randomfeature.h> #include <vespa/searchlib/features/random_normal_feature.h> +#include <vespa/searchlib/features/random_normal_stable_feature.h> #include <vespa/searchlib/features/rankingexpressionfeature.h> #include <vespa/searchlib/features/setup.h> #include <vespa/searchlib/features/termfeature.h> @@ -105,6 +106,7 @@ Test::Main() TEST_DO(testQueryTermCount()); TEST_FLUSH(); TEST_DO(testRandom()); TEST_FLUSH(); TEST_DO(testRandomNormal()); TEST_FLUSH(); + TEST_DO(testRandomNormalStable()); TEST_FLUSH(); TEST_DO(testRankingExpression()); TEST_FLUSH(); TEST_DO(testTerm()); TEST_FLUSH(); TEST_DO(testTermDistance()); TEST_FLUSH(); @@ -1727,17 +1729,16 @@ Test::testRandom() } void -Test::testRandomNormal() -{ +Test::testRandomNormal() { { // Test blueprint. RandomNormalBlueprint pt; EXPECT_TRUE(assertCreateInstance(pt, "randomNormal")); StringList params, in, out; - FT_SETUP_OK (pt, params, in, out.add("out")); - FT_SETUP_OK (pt, params.add("0.5").add("1.0"), in, out); - FT_SETUP_OK (pt, params.add("val1"), in, out); + FT_SETUP_OK(pt, params, in, out.add("out")); + FT_SETUP_OK(pt, params.add("0.5").add("1.0"), in, out); + FT_SETUP_OK(pt, params.add("val1"), in, out); FT_DUMP_EMPTY(_factory, "randomNormal"); } @@ -1766,7 +1767,50 @@ Test::testRandomNormal() for (uint32_t i = 0; i < 5; ++i) { rr.clear(); ASSERT_TRUE(ft1.executeOnly(rr, i + 1)); - ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormal(0.0,0.1)")-0.0)/0.1) * 0.2 + 1.0, EPS, i + 1)); + ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormal(0.0,0.1)") - 0.0) / 0.1) * 0.2 + 1.0, EPS, i + 1)); + } + } +} + +void +Test::testRandomNormalStable() { + { // Test blueprint. + RandomNormalStableBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "randomNormalStable")); + + StringList params, in, out; + FT_SETUP_OK(pt, params, in, out.add("out")); + FT_SETUP_OK(pt, params.add("0.5").add("1.0"), in, out); + FT_SETUP_OK(pt, params.add("val1"), in, out); + + FT_DUMP_EMPTY(_factory, "randomNormalStable"); + } + + { // Test setting of mean and stddev values, and seed + FtFeatureTest ft1(_factory, "randomNormalStable(0.0,0.1)"); + FtFeatureTest ft2(_factory, "randomNormalStable(1.0,0.2)"); + ft1.getIndexEnv().getProperties().add("randomNormalStable(0.0,0.1).seed", "100"); + ft2.getIndexEnv().getProperties().add("randomNormalStable(1.0,0.2).seed", "100"); + ASSERT_TRUE(ft1.setup()); + ASSERT_TRUE(ft2.setup()); + RankResult rr; + for (uint32_t i = 0; i < 5; ++i) { + rr.clear(); + ASSERT_TRUE(ft1.executeOnly(rr, i + 1)); + ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormalStable(0.0,0.1)") - 0.0) / 0.1) * 0.2 + 1.0, EPS, i + 1)); + } + } + { // Test executor (randomNormalStable) + FtFeatureTest ft1(_factory, "randomNormalStable"); + FtFeatureTest ft2(_factory, "randomNormalStable"); + ASSERT_TRUE(ft1.setup()); + ASSERT_TRUE(ft2.setup()); + RankResult rr; + for (uint32_t i = 0; i < 5; ++i) { + rr.clear(); + ASSERT_TRUE(ft1.executeOnly(rr, i + 1)); + ASSERT_TRUE(ft2.execute(rr.getScore("randomNormalStable"), EPS, i + 1)); } } } diff --git a/searchlib/src/tests/features/prod_features.h b/searchlib/src/tests/features/prod_features.h index 0d234ca674e..d7bf001bedf 100644 --- a/searchlib/src/tests/features/prod_features.h +++ b/searchlib/src/tests/features/prod_features.h @@ -35,6 +35,7 @@ public: void testQueryTermCount(); void testRandom(); void testRandomNormal(); + void testRandomNormalStable(); void testRankingExpression(); void testTerm(); void testTermDistance(); diff --git a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp index d89883bc417..45ebdd78fb3 100644 --- a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp +++ b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp @@ -5,17 +5,24 @@ #include <vespa/searchlib/queryeval/leaf_blueprints.h> #include <vespa/searchlib/queryeval/simpleresult.h> #include <vespa/searchlib/queryeval/same_element_blueprint.h> +#include <vespa/searchlib/queryeval/same_element_search.h> +#include <vespa/searchlib/queryeval/emptysearch.h> +#include <vespa/searchcommon/attribute/i_search_context.h> +#include <vespa/searchlib/attribute/elementiterator.h> using namespace search::fef; using namespace search::queryeval; +using search::attribute::ElementIterator; -std::unique_ptr<SameElementBlueprint> make_blueprint(const std::vector<FakeResult> &children) { +std::unique_ptr<SameElementBlueprint> make_blueprint(const std::vector<FakeResult> &children, bool fake_attr = false) { auto result = std::make_unique<SameElementBlueprint>(); for (size_t i = 0; i < children.size(); ++i) { uint32_t field_id = i; vespalib::string field_name = vespalib::make_string("f%u", field_id); FieldSpec field = result->getNextChildField(field_name, field_id); - result->addTerm(std::make_unique<FakeBlueprint>(field, children[i])); + auto fake = std::make_unique<FakeBlueprint>(field, children[i]); + fake->is_attr(fake_attr); + result->addTerm(std::move(fake)); } return result; } @@ -96,4 +103,17 @@ TEST("require that children are sorted") { EXPECT_EQUAL(dynamic_cast<SameElementBlueprint&>(*bp).terms()[2]->getState().estimate().estHits, 4u); } +TEST("require that attribute iterators are wrapped for element unpacking") { + auto a = make_result({{5, {1,3,7}}}); + auto b = make_result({{5, {3,5,10}}}); + auto bp = finalize(make_blueprint({a,b}, true), true); + auto md = MatchData::makeTestInstance(0, 0); + auto search = bp->createSearch(*md, false); + SameElementSearch *se = dynamic_cast<SameElementSearch*>(search.get()); + ASSERT_TRUE(se != nullptr); + ASSERT_EQUAL(se->children().size(), 2u); + EXPECT_TRUE(dynamic_cast<ElementIterator*>(se->children()[0].get()) != nullptr); + EXPECT_TRUE(dynamic_cast<ElementIterator*>(se->children()[1].get()) != nullptr); +} + TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/vespa/searchlib/common/isequencedtaskexecutor.h b/searchlib/src/vespa/searchlib/common/isequencedtaskexecutor.h index 05347e790fb..a8b2a722c01 100644 --- a/searchlib/src/vespa/searchlib/common/isequencedtaskexecutor.h +++ b/searchlib/src/vespa/searchlib/common/isequencedtaskexecutor.h @@ -89,14 +89,14 @@ public: * call sync before tearing down pointed to/referenced data. * All tasks must be scheduled from same thread. * - * @param componentId component id - * @param function function to be wrapped in a task and later executed + * @param id executor id + * @param function function to be wrapped in a task and later executed */ template <class FunctionType> - void execute(vespalib::stringref componentId, FunctionType &&function) { - ExecutorId id = getExecutorId(componentId); + void execute(ExecutorId id, FunctionType &&function) { executeTask(id, vespalib::makeLambdaTask(std::forward<FunctionType>(function))); } + }; } // namespace search diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt index bd847fe35b5..16401a67424 100644 --- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt @@ -44,6 +44,7 @@ vespa_add_library(searchlib_features OBJECT queryterm.cpp querytermcountfeature.cpp random_normal_feature.cpp + random_normal_stable_feature.cpp randomfeature.cpp rankingexpressionfeature.cpp raw_score_feature.cpp diff --git a/searchlib/src/vespa/searchlib/features/native_dot_product_feature.cpp b/searchlib/src/vespa/searchlib/features/native_dot_product_feature.cpp index cb8136e8b7f..7865e32849f 100644 --- a/searchlib/src/vespa/searchlib/features/native_dot_product_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/native_dot_product_feature.cpp @@ -8,6 +8,23 @@ using namespace search::fef; namespace search { namespace features { +NativeDotProductExecutor::NativeDotProductExecutor(const search::fef::IQueryEnvironment &env) + : FeatureExecutor(), + _pairs(), + _md(nullptr) +{ + for (uint32_t i = 0; i < env.getNumTerms(); ++i) { + const search::fef::ITermData *td = env.getTerm(i); + auto weight = td->getWeight(); + for (size_t f = 0; f < td->numFields(); ++f) { + auto handle = td->field(f).getHandle(); + if (handle != search::fef::IllegalHandle) { + _pairs.emplace_back(handle, weight); + } + } + } +} + NativeDotProductExecutor::NativeDotProductExecutor(const search::fef::IQueryEnvironment &env, uint32_t fieldId) : FeatureExecutor(), _pairs(), @@ -46,15 +63,21 @@ bool NativeDotProductBlueprint::setup(const IIndexEnvironment &, const ParameterList ¶ms) { - _field = params[0].asField(); - describeOutput("out", "dot product between query term weights and match weights for the given field"); + if (params.size() > 0) { + _field = params[0].asField(); + } + describeOutput("out", "dot product between query term weights and match weights"); return true; } FeatureExecutor & NativeDotProductBlueprint::createExecutor(const IQueryEnvironment &queryEnv, vespalib::Stash &stash) const { - return stash.create<NativeDotProductExecutor>(queryEnv, _field->id()); + if (_field) { + return stash.create<NativeDotProductExecutor>(queryEnv, _field->id()); + } else { + return stash.create<NativeDotProductExecutor>(queryEnv); + } } } // namespace features diff --git a/searchlib/src/vespa/searchlib/features/native_dot_product_feature.h b/searchlib/src/vespa/searchlib/features/native_dot_product_feature.h index a71d23f3158..33c5c89c88b 100644 --- a/searchlib/src/vespa/searchlib/features/native_dot_product_feature.h +++ b/searchlib/src/vespa/searchlib/features/native_dot_product_feature.h @@ -20,6 +20,7 @@ private: void handle_bind_match_data(const fef::MatchData &md) override; public: + NativeDotProductExecutor(const fef::IQueryEnvironment &env); NativeDotProductExecutor(const fef::IQueryEnvironment &env, uint32_t fieldId); void execute(uint32_t docId) override; }; @@ -31,13 +32,13 @@ class NativeDotProductBlueprint : public fef::Blueprint private: const fef::FieldInfo *_field; public: - NativeDotProductBlueprint() : Blueprint("nativeDotProduct"), _field(0) {} + NativeDotProductBlueprint() : Blueprint("nativeDotProduct"), _field(nullptr) {} void visitDumpFeatures(const fef::IIndexEnvironment &, fef::IDumpFeatureVisitor &) const override {} fef::Blueprint::UP createInstance() const override { return Blueprint::UP(new NativeDotProductBlueprint()); } fef::ParameterDescriptions getDescriptions() const override { - return fef::ParameterDescriptions().desc().field(); + return fef::ParameterDescriptions().desc().field().desc(); } bool setup(const fef::IIndexEnvironment &env, const fef::ParameterList ¶ms) override; fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override; diff --git a/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp b/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp index 67f30c4eb93..c83ec80f6b5 100644 --- a/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp @@ -4,7 +4,6 @@ #include "utils.h" #include <vespa/searchlib/fef/properties.h> #include <vespa/fastos/time.h> -#include <cmath> #include <vespa/log/log.h> LOG_SETUP(".features.randomnormalfeature"); @@ -14,46 +13,18 @@ namespace features { RandomNormalExecutor::RandomNormalExecutor(uint64_t seed, double mean, double stddev) : search::fef::FeatureExecutor(), - _rnd(), - _mean(mean), - _stddev(stddev), - _hasSpare(false), - _spare(0.0) - + _rnd(mean, stddev, true) { LOG(debug, "RandomNormalExecutor: seed=%zu, mean=%f, stddev=%f", seed, mean, stddev); - _rnd.srand48(seed); + _rnd.seed(seed); } -/** - * Draws a random number from the Gaussian distribution - * using the Marsaglia polar method. - */ void RandomNormalExecutor::execute(uint32_t) { - feature_t result = _spare; - if (_hasSpare) { - _hasSpare = false; - } else { - _hasSpare = true; - - feature_t u, v, s; - do { - u = (_rnd.lrand48() / (feature_t)0x80000000u) * 2.0 - 1.0; - v = (_rnd.lrand48() / (feature_t)0x80000000u) * 2.0 - 1.0; - s = u * u + v * v; - } while ( (s >= 1.0) || (s == 0.0) ); - s = std::sqrt(-2.0 * std::log(s) / s); - - _spare = v * s; // saved for next invocation - result = u * s; - } - - outputs().set_number(0, _mean + _stddev * result); + outputs().set_number(0, _rnd.next()); } - RandomNormalBlueprint::RandomNormalBlueprint() : search::fef::Blueprint("randomNormal"), _seed(0), @@ -82,7 +53,6 @@ RandomNormalBlueprint::setup(const search::fef::IIndexEnvironment & env, if (p.found()) { _seed = util::strToNum<uint64_t>(p.get()); } - if (params.size() > 0) { _mean = params[0].asDouble(); } diff --git a/searchlib/src/vespa/searchlib/features/random_normal_feature.h b/searchlib/src/vespa/searchlib/features/random_normal_feature.h index 92aa3117b91..2d2429371d9 100644 --- a/searchlib/src/vespa/searchlib/features/random_normal_feature.h +++ b/searchlib/src/vespa/searchlib/features/random_normal_feature.h @@ -4,7 +4,7 @@ #include <vespa/searchlib/fef/blueprint.h> #include <vespa/searchlib/fef/featureexecutor.h> -#include <vespa/searchlib/util/rand48.h> +#include <vespa/searchlib/util/random_normal.h> namespace search { namespace features { @@ -17,12 +17,7 @@ namespace features { **/ class RandomNormalExecutor : public fef::FeatureExecutor { private: - Rand48 _rnd; - double _mean; - double _stddev; - - bool _hasSpare; - double _spare; + RandomNormal _rnd; // seeded once per query public: RandomNormalExecutor(uint64_t seed, double mean, double stddev); diff --git a/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.cpp b/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.cpp new file mode 100644 index 00000000000..5f3cf7fd063 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.cpp @@ -0,0 +1,82 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "random_normal_stable_feature.h" +#include "utils.h" +#include <vespa/searchlib/fef/properties.h> +#include <vespa/fastos/time.h> + +#include <vespa/log/log.h> +LOG_SETUP(".features.randomnormalstablefeature"); + +namespace search { +namespace features { + +RandomNormalStableExecutor::RandomNormalStableExecutor(uint64_t seed, double mean, double stddev) : + search::fef::FeatureExecutor(), + _rnd(mean, stddev, false), // don't use spares, as we reset seed on every generation + _seed(seed) +{ + LOG(debug, "RandomNormalStableExecutor: seed=%zu, mean=%f, stddev=%f", seed, mean, stddev); +} + +void +RandomNormalStableExecutor::execute(uint32_t docId) +{ + _rnd.seed(_seed + docId); + outputs().set_number(0, _rnd.next()); +} + +RandomNormalStableBlueprint::RandomNormalStableBlueprint() : + search::fef::Blueprint("randomNormalStable"), + _seed(0), + _mean(0.0), + _stddev(1.0) +{ +} + +void +RandomNormalStableBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &, + search::fef::IDumpFeatureVisitor &) const +{ +} + +search::fef::Blueprint::UP +RandomNormalStableBlueprint::createInstance() const +{ + return search::fef::Blueprint::UP(new RandomNormalStableBlueprint()); +} + +bool +RandomNormalStableBlueprint::setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params) +{ + search::fef::Property p = env.getProperties().lookup(getName(), "seed"); + if (p.found()) { + _seed = util::strToNum<uint64_t>(p.get()); + } + if (params.size() > 0) { + _mean = params[0].asDouble(); + } + if (params.size() > 1) { + _stddev = params[1].asDouble(); + } + + describeOutput("out" , "A random value drawn from the Gaussian distribution that is stable for a given match (document and query)"); + + return true; +} + +search::fef::FeatureExecutor & +RandomNormalStableBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const +{ + uint64_t seed = _seed; + if (seed == 0) { + seed = util::strToNum<uint64_t> + (env.getProperties().lookup(getName(), "seed").get("1024")); // default seed + } + return stash.create<RandomNormalStableExecutor>(seed, _mean, _stddev); +} + + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.h b/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.h new file mode 100644 index 00000000000..129c929ba3d --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.h @@ -0,0 +1,67 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/fef/blueprint.h> +#include <vespa/searchlib/fef/featureexecutor.h> +#include <vespa/searchlib/util/random_normal.h> + +namespace search { +namespace features { + +/** + * Implements the executor for the random normal feature outputting a + * random number drawn from the Gaussian distribution with the + * two arguments 'mean' and 'stddev'. + * The same hit always returns the same random number. + **/ +class RandomNormalStableExecutor : public fef::FeatureExecutor { +private: + RandomNormal _rnd; // seeded once per match + uint64_t _seed; + +public: + RandomNormalStableExecutor(uint64_t seed, double mean, double stddev); + void execute(uint32_t docId) override; +}; + + +/** + * Implements the blueprint for the random normal stable feature. + */ +class RandomNormalStableBlueprint : public fef::Blueprint { +private: + uint64_t _seed; + double _mean; + double _stddev; + +public: + RandomNormalStableBlueprint(); + + void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override; + fef::Blueprint::UP createInstance() const override; + fef::ParameterDescriptions getDescriptions() const override { + return fef::ParameterDescriptions(). + // Can run without parameters: + desc(). + + // Can run with two parameters (mean and stddev): + desc(). + number(). // mean + number(). // stddev + + // Can run with three parameters: + desc(). + number(). // mean + number(). // stddev + string(); // in order to name different features + } + + bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override; + fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override; +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp index 867f058931f..1d3c59f5b3d 100644 --- a/searchlib/src/vespa/searchlib/features/setup.cpp +++ b/searchlib/src/vespa/searchlib/features/setup.cpp @@ -38,6 +38,7 @@ #include "querytermcountfeature.h" #include "randomfeature.h" #include "random_normal_feature.h" +#include "random_normal_stable_feature.h" #include "rankingexpressionfeature.h" #include "raw_score_feature.h" #include "reverseproximityfeature.h" @@ -100,6 +101,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry) registry.addPrototype(Blueprint::SP(new QueryTermCountBlueprint())); registry.addPrototype(Blueprint::SP(new RandomBlueprint())); registry.addPrototype(Blueprint::SP(new RandomNormalBlueprint())); + registry.addPrototype(Blueprint::SP(new RandomNormalStableBlueprint())); registry.addPrototype(Blueprint::SP(new RawScoreBlueprint())); registry.addPrototype(Blueprint::SP(new SubqueriesBlueprint)); registry.addPrototype(Blueprint::SP(new TensorFromLabelsBlueprint())); diff --git a/searchlib/src/vespa/searchlib/fef/featurenamebuilder.cpp b/searchlib/src/vespa/searchlib/fef/featurenamebuilder.cpp index ec00b7d2f90..0eba912fafd 100644 --- a/searchlib/src/vespa/searchlib/fef/featurenamebuilder.cpp +++ b/searchlib/src/vespa/searchlib/fef/featurenamebuilder.cpp @@ -136,7 +136,7 @@ FeatureNameBuilder::buildName() const vespalib::string ret; if (!_baseName.empty()) { ret = _baseName; - if (!_parameters.empty() > 0) { + if (!_parameters.empty()) { ret += "("; for (uint32_t i = 0; i < _parameters.size(); ++i) { if (i > 0) { diff --git a/searchlib/src/vespa/searchlib/index/docbuilder.h b/searchlib/src/vespa/searchlib/index/docbuilder.h index 1d170483783..73c60304f50 100644 --- a/searchlib/src/vespa/searchlib/index/docbuilder.h +++ b/searchlib/src/vespa/searchlib/index/docbuilder.h @@ -4,6 +4,7 @@ #include "doctypebuilder.h" #include <vespa/document/datatype/datatypes.h> +#include <vespa/document/repo/fixedtyperepo.h> #include <vespa/document/fieldvalue/fieldvalues.h> #include <vespa/document/annotation/annotation.h> #include <vespa/document/annotation/span.h> diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp b/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp index 226f41a53c0..3165cd9b68a 100644 --- a/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp @@ -4,10 +4,37 @@ #include <vespa/searchlib/fef/termfieldmatchdataposition.h> #include <vespa/searchlib/fef/termfieldmatchdata.h> #include <vespa/vespalib/objects/visit.h> +#include <vespa/searchcommon/attribute/i_search_context.h> namespace search { namespace queryeval { +namespace { + +struct FakeContext : search::attribute::ISearchContext { + int32_t onFind(DocId, int32_t, int32_t &) const override { return -1; } + int32_t onFind(DocId, int32_t) const override { return -1; } + unsigned int approximateHits() const override { return 0; } + std::unique_ptr<SearchIterator> createIterator(fef::TermFieldMatchData *, bool) override { abort(); } + void fetchPostings(bool) override { } + bool valid() const override { return true; } + search::Int64Range getAsIntegerTerm() const override { abort(); } + const search::QueryTermBase &queryTerm() const override { abort(); } + const vespalib::string &attributeName() const override { abort(); } +}; + +} // namespace search::queryeval::<unnamed> + +void +FakeSearch::is_attr(bool value) +{ + if (value) { + _ctx = std::make_unique<FakeContext>(); + } else { + _ctx.reset(); + } +} + void FakeSearch::doSeek(uint32_t docid) { @@ -49,5 +76,5 @@ FakeSearch::visitMembers(vespalib::ObjectVisitor &visitor) const visit(visitor, "term", _term); } -} // namespace queryeval +} // namespace search::queryeval } // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_search.h b/searchlib/src/vespa/searchlib/queryeval/fake_search.h index c320d497edc..aa6df480a21 100644 --- a/searchlib/src/vespa/searchlib/queryeval/fake_search.h +++ b/searchlib/src/vespa/searchlib/queryeval/fake_search.h @@ -5,6 +5,7 @@ #include "searchiterator.h" #include "fake_result.h" #include <vespa/searchlib/fef/termfieldmatchdataarray.h> +#include <vespa/searchcommon/attribute/i_search_context.h> namespace search { namespace queryeval { @@ -18,6 +19,7 @@ private: FakeResult _result; uint32_t _offset; fef::TermFieldMatchDataArray _tfmda; + std::unique_ptr<attribute::ISearchContext> _ctx; bool valid() const { return _offset < _result.inspect().size(); } uint32_t currId() const { return _result.inspect()[_offset].docId; } @@ -34,10 +36,12 @@ public: { assert(_tfmda.size() == 1); } + void is_attr(bool value); void doSeek(uint32_t docid) override; void doUnpack(uint32_t docid) override; const PostingInfo *getPostingInfo() const override { return _result.postingInfo(); } void visitMembers(vespalib::ObjectVisitor &visitor) const override; + const attribute::ISearchContext *getAttributeSearchContext() const override { return _ctx.get(); } }; } // namespace queryeval diff --git a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp index bbfa487ae7d..a140fb146d5 100644 --- a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp @@ -64,7 +64,9 @@ SimpleBlueprint::tag(const vespalib::string &t) SearchIterator::UP FakeBlueprint::createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, bool) const { - return std::make_unique<FakeSearch>(_tag, _field.getName(), _term, _result, tfmda); + auto result = std::make_unique<FakeSearch>(_tag, _field.getName(), _term, _result, tfmda); + result->is_attr(_is_attr); + return result; } FakeBlueprint::FakeBlueprint(const FieldSpec &field, const FakeResult &result) @@ -72,7 +74,8 @@ FakeBlueprint::FakeBlueprint(const FieldSpec &field, const FakeResult &result) _tag("<tag>"), _term("<term>"), _field(field), - _result(result) + _result(result), + _is_attr(false) { setEstimate(HitEstimate(result.inspect().size(), result.inspect().empty())); } diff --git a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h index 698e2235690..85d30aaf003 100644 --- a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h +++ b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h @@ -51,6 +51,7 @@ private: vespalib::string _term; FieldSpec _field; FakeResult _result; + bool _is_attr; protected: SearchIterator::UP @@ -66,6 +67,12 @@ public: } const vespalib::string &tag() const { return _tag; } + FakeBlueprint &is_attr(bool value) { + _is_attr = value; + return *this; + } + bool is_attr() const { return _is_attr; } + FakeBlueprint &term(const vespalib::string &t) { _term = t; return *this; diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_search.h b/searchlib/src/vespa/searchlib/queryeval/same_element_search.h index 6a116c76e73..1fd381eb1ae 100644 --- a/searchlib/src/vespa/searchlib/queryeval/same_element_search.h +++ b/searchlib/src/vespa/searchlib/queryeval/same_element_search.h @@ -39,6 +39,7 @@ public: void doSeek(uint32_t docid) override; void doUnpack(uint32_t) override {} void visitMembers(vespalib::ObjectVisitor &visitor) const override; + const std::vector<SearchIterator::UP> &children() const { return _children; } }; } diff --git a/searchlib/src/vespa/searchlib/util/random_normal.h b/searchlib/src/vespa/searchlib/util/random_normal.h new file mode 100644 index 00000000000..74596066312 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/random_normal.h @@ -0,0 +1,67 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/util/rand48.h> +#include <cmath> + +namespace search { + +/** + * Draws a random number from the Gaussian distribution + * using the Marsaglia polar method. + */ +class RandomNormal +{ +private: + Rand48 _rnd; + double _mean; + double _stddev; + + bool _useSpare; + bool _hasSpare; + feature_t _spare; + + feature_t nextUniform() { + return (_rnd.lrand48() / (feature_t)0x80000000u) * 2.0 - 1.0; + } + +public: + RandomNormal(double mean, double stddev, bool useSpare = true) : + _rnd(), + _mean(mean), + _stddev(stddev), + _useSpare(useSpare), + _hasSpare(false), + _spare(0.0) + {} + + void seed(long seed) { + _rnd.srand48(seed); + } + + feature_t next() { + feature_t result = _spare; + if (_useSpare && _hasSpare) { + _hasSpare = false; + } else { + _hasSpare = true; + + feature_t u, v, s; + do { + u = nextUniform(); + v = nextUniform(); + s = u * u + v * v; + } while ( (s >= 1.0) || (s == 0.0) ); + s = std::sqrt(-2.0 * std::log(s) / s); + + _spare = v * s; // saved for next invocation + result = u * s; + } + return _mean + _stddev * result; + } + +}; + +} // search + |