summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorArne Juul <arnej@yahoo-inc.com>2018-06-20 09:48:02 +0200
committerArne Juul <arnej@yahoo-inc.com>2018-06-20 09:48:02 +0200
commit44fc1380b66867958f89c47ac8752926b3787a4d (patch)
treeea324d387b055c3ccf0921f9f973c95b16bad2e2 /searchlib
parent016e584f0ad5a071e13d75eb8ad5ddb46b8c54f5 (diff)
parent79e7562d34a20ed28621a9ec0dc296eda0881428 (diff)
Merge branch 'master' into arnej/use-log-abort
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/apps/tests/memoryindexstress_test.cpp1
-rw-r--r--searchlib/src/tests/attribute/attribute_test.cpp21
-rw-r--r--searchlib/src/tests/common/sequencedtaskexecutor/sequencedtaskexecutor_test.cpp11
-rw-r--r--searchlib/src/tests/features/native_dot_product/native_dot_product_test.cpp21
-rw-r--r--searchlib/src/tests/features/prod_features.cpp56
-rw-r--r--searchlib/src/tests/features/prod_features.h1
-rw-r--r--searchlib/src/tests/queryeval/same_element/same_element_test.cpp24
-rw-r--r--searchlib/src/vespa/searchlib/common/isequencedtaskexecutor.h8
-rw-r--r--searchlib/src/vespa/searchlib/features/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/features/native_dot_product_feature.cpp29
-rw-r--r--searchlib/src/vespa/searchlib/features/native_dot_product_feature.h5
-rw-r--r--searchlib/src/vespa/searchlib/features/random_normal_feature.cpp36
-rw-r--r--searchlib/src/vespa/searchlib/features/random_normal_feature.h9
-rw-r--r--searchlib/src/vespa/searchlib/features/random_normal_stable_feature.cpp82
-rw-r--r--searchlib/src/vespa/searchlib/features/random_normal_stable_feature.h67
-rw-r--r--searchlib/src/vespa/searchlib/features/setup.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/fef/featurenamebuilder.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/index/docbuilder.h1
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/fake_search.cpp29
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/fake_search.h4
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp7
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h7
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/same_element_search.h1
-rw-r--r--searchlib/src/vespa/searchlib/util/random_normal.h67
24 files changed, 420 insertions, 72 deletions
diff --git a/searchlib/src/apps/tests/memoryindexstress_test.cpp b/searchlib/src/apps/tests/memoryindexstress_test.cpp
index 1ba264e0bfe..b911284a1b4 100644
--- a/searchlib/src/apps/tests/memoryindexstress_test.cpp
+++ b/searchlib/src/apps/tests/memoryindexstress_test.cpp
@@ -19,6 +19,7 @@
#include <vespa/document/fieldvalue/document.h>
#include <vespa/document/fieldvalue/stringfieldvalue.h>
#include <vespa/document/repo/configbuilder.h>
+#include <vespa/document/repo/fixedtyperepo.h>
#include <vespa/document/annotation/spanlist.h>
#include <vespa/document/annotation/spantree.h>
#include <vespa/searchlib/util/rand48.h>
diff --git a/searchlib/src/tests/attribute/attribute_test.cpp b/searchlib/src/tests/attribute/attribute_test.cpp
index 5fcc49fc228..5148ab2be34 100644
--- a/searchlib/src/tests/attribute/attribute_test.cpp
+++ b/searchlib/src/tests/attribute/attribute_test.cpp
@@ -32,8 +32,7 @@ using search::index::DummyFileHeaderContext;
using search::attribute::BasicType;
using search::attribute::IAttributeVector;
-namespace
-{
+namespace {
vespalib::string empty;
vespalib::string tmpDir("tmp");
@@ -2315,6 +2314,23 @@ AttributeTest::testPendingCompaction()
populateSimple(iv, 1, 2); // should not trigger new compaction
}
+void testNamePrefix() {
+ Config cfg(BasicType::INT32, CollectionType::SINGLE);
+ AttributeVector::SP vFlat = createAttribute("sfsint32_pc", cfg);
+ AttributeVector::SP vS1 = createAttribute("sfsint32_pc.abc", cfg);
+ AttributeVector::SP vS2 = createAttribute("sfsint32_pc.xyz", cfg);
+ AttributeVector::SP vSS1 = createAttribute("sfsint32_pc.xyz.abc", cfg);
+ EXPECT_EQUAL("sfsint32_pc", vFlat->getName());
+ EXPECT_EQUAL("sfsint32_pc", vFlat->getNamePrefix());
+ EXPECT_EQUAL("sfsint32_pc.abc", vS1->getName());
+ EXPECT_EQUAL("sfsint32_pc", vS1->getNamePrefix());
+ EXPECT_EQUAL("sfsint32_pc.xyz", vS2->getName());
+ EXPECT_EQUAL("sfsint32_pc", vS2->getNamePrefix());
+ EXPECT_EQUAL("sfsint32_pc.xyz.abc", vSS1->getName());
+ EXPECT_EQUAL("sfsint32_pc", vSS1->getNamePrefix());
+
+}
+
void
deleteDataDirs()
{
@@ -2361,6 +2377,7 @@ int AttributeTest::Main()
TEST_DO(requireThatAddressSpaceUsageIsReported());
testReaderDuringLastUpdate();
TEST_DO(testPendingCompaction());
+ TEST_DO(testNamePrefix());
deleteDataDirs();
TEST_DONE();
diff --git a/searchlib/src/tests/common/sequencedtaskexecutor/sequencedtaskexecutor_test.cpp b/searchlib/src/tests/common/sequencedtaskexecutor/sequencedtaskexecutor_test.cpp
index 805a6d3b962..f8ab03d7710 100644
--- a/searchlib/src/tests/common/sequencedtaskexecutor/sequencedtaskexecutor_test.cpp
+++ b/searchlib/src/tests/common/sequencedtaskexecutor/sequencedtaskexecutor_test.cpp
@@ -122,8 +122,8 @@ TEST_F("require that task with same string component id are serialized", Fixture
std::shared_ptr<TestObj> tv(std::make_shared<TestObj>());
EXPECT_EQUAL(0, tv->_val);
auto test2 = [=]() { tv->modify(14, 42); };
- f._threads.execute("0", [=]() { usleep(2000); tv->modify(0, 14); });
- f._threads.execute("0", test2);
+ f._threads.execute(f._threads.getExecutorId("0"), [=]() { usleep(2000); tv->modify(0, 14); });
+ f._threads.execute(f._threads.getExecutorId("0"), test2);
tv->wait(2);
EXPECT_EQUAL(0, tv->_fail);
EXPECT_EQUAL(42, tv->_val);
@@ -132,8 +132,7 @@ TEST_F("require that task with same string component id are serialized", Fixture
EXPECT_EQUAL(42, tv->_val);
}
-namespace
-{
+namespace {
int detectSerializeFailure(Fixture &f, vespalib::stringref altComponentId, int tryLimit)
{
@@ -141,8 +140,8 @@ int detectSerializeFailure(Fixture &f, vespalib::stringref altComponentId, int t
for (tryCnt = 0; tryCnt < tryLimit; ++tryCnt) {
std::shared_ptr<TestObj> tv(std::make_shared<TestObj>());
EXPECT_EQUAL(0, tv->_val);
- f._threads.execute("0", [=]() { usleep(2000); tv->modify(0, 14); });
- f._threads.execute(altComponentId, [=]() { tv->modify(14, 42); });
+ f._threads.execute(f._threads.getExecutorId("0"), [=]() { usleep(2000); tv->modify(0, 14); });
+ f._threads.execute(f._threads.getExecutorId(altComponentId), [=]() { tv->modify(14, 42); });
tv->wait(2);
if (tv->_fail != 1) {
continue;
diff --git a/searchlib/src/tests/features/native_dot_product/native_dot_product_test.cpp b/searchlib/src/tests/features/native_dot_product/native_dot_product_test.cpp
index 1da912ccb3a..0cf13443142 100644
--- a/searchlib/src/tests/features/native_dot_product/native_dot_product_test.cpp
+++ b/searchlib/src/tests/features/native_dot_product/native_dot_product_test.cpp
@@ -16,7 +16,8 @@ using namespace search::fef::test;
using namespace search::features;
using CollectionType = FieldInfo::CollectionType;
-const std::string featureName("nativeDotProduct(foo)");
+const std::string fooFeatureName("nativeDotProduct(foo)");
+const std::string anyFeatureName("nativeDotProduct");
struct BlueprintFactoryFixture {
BlueprintFactory factory;
@@ -78,7 +79,8 @@ struct RankFixture : BlueprintFactoryFixture, IndexFixture {
std::vector<TermFieldHandle> fooHandles;
std::vector<TermFieldHandle> barHandles;
RankFixture(const std::vector<uint32_t> &fooWeights,
- const std::vector<uint32_t> &barWeights)
+ const std::vector<uint32_t> &barWeights,
+ const vespalib::string &featureName = fooFeatureName)
: queryEnv(&indexEnv), rankSetup(factory, indexEnv),
mdl(), match_data(), rankProgram(), fooHandles(), barHandles()
{
@@ -152,6 +154,12 @@ TEST_FF("require that setup fails for unknown field", NativeDotProductBlueprint,
EXPECT_TRUE(!((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "unknown")));
}
+TEST_FF("require that setup can be done without field", NativeDotProductBlueprint, IndexFixture) {
+ DummyDependencyHandler deps(f1);
+ f1.setName(vespalib::make_string("%s", f1.getBaseName().c_str()));
+ EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>()));
+}
+
TEST_F("require that not searching a field will give it 0.0 dot product", RankFixture(vec(), vec(1, 2, 3))) {
EXPECT_EQUAL(0.0, f1.getScore(10));
}
@@ -183,11 +191,18 @@ TEST_F("require that data from other fields is ignored", RankFixture(vec(1, 3),
EXPECT_EQUAL(14, f1.getScore(10));
}
+TEST_F("require that not specifying field includes all term/field combinations", RankFixture(vec(1, 3), vec(5, 7), anyFeatureName)) {
+ f1.setFooWeight(0, 10, 2);
+ f1.setFooWeight(1, 10, 4);
+ f1.setBarWeight(0, 10, 6);
+ f1.setBarWeight(1, 10, 8);
+ EXPECT_EQUAL(100, f1.getScore(10));
+}
+
TEST_F("require that negative weights in the index works", RankFixture(vec(1, 3), vec())) {
f1.setFooWeight(0, 10, 2);
f1.setFooWeight(1, 10, -4);
EXPECT_EQUAL(-10, f1.getScore(10));
}
-
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp
index 345c66ec672..0e57f520673 100644
--- a/searchlib/src/tests/features/prod_features.cpp
+++ b/searchlib/src/tests/features/prod_features.cpp
@@ -32,6 +32,7 @@ LOG_SETUP("prod_features_test");
#include <vespa/searchlib/features/querytermcountfeature.h>
#include <vespa/searchlib/features/randomfeature.h>
#include <vespa/searchlib/features/random_normal_feature.h>
+#include <vespa/searchlib/features/random_normal_stable_feature.h>
#include <vespa/searchlib/features/rankingexpressionfeature.h>
#include <vespa/searchlib/features/setup.h>
#include <vespa/searchlib/features/termfeature.h>
@@ -105,6 +106,7 @@ Test::Main()
TEST_DO(testQueryTermCount()); TEST_FLUSH();
TEST_DO(testRandom()); TEST_FLUSH();
TEST_DO(testRandomNormal()); TEST_FLUSH();
+ TEST_DO(testRandomNormalStable()); TEST_FLUSH();
TEST_DO(testRankingExpression()); TEST_FLUSH();
TEST_DO(testTerm()); TEST_FLUSH();
TEST_DO(testTermDistance()); TEST_FLUSH();
@@ -1727,17 +1729,16 @@ Test::testRandom()
}
void
-Test::testRandomNormal()
-{
+Test::testRandomNormal() {
{ // Test blueprint.
RandomNormalBlueprint pt;
EXPECT_TRUE(assertCreateInstance(pt, "randomNormal"));
StringList params, in, out;
- FT_SETUP_OK (pt, params, in, out.add("out"));
- FT_SETUP_OK (pt, params.add("0.5").add("1.0"), in, out);
- FT_SETUP_OK (pt, params.add("val1"), in, out);
+ FT_SETUP_OK(pt, params, in, out.add("out"));
+ FT_SETUP_OK(pt, params.add("0.5").add("1.0"), in, out);
+ FT_SETUP_OK(pt, params.add("val1"), in, out);
FT_DUMP_EMPTY(_factory, "randomNormal");
}
@@ -1766,7 +1767,50 @@ Test::testRandomNormal()
for (uint32_t i = 0; i < 5; ++i) {
rr.clear();
ASSERT_TRUE(ft1.executeOnly(rr, i + 1));
- ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormal(0.0,0.1)")-0.0)/0.1) * 0.2 + 1.0, EPS, i + 1));
+ ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormal(0.0,0.1)") - 0.0) / 0.1) * 0.2 + 1.0, EPS, i + 1));
+ }
+ }
+}
+
+void
+Test::testRandomNormalStable() {
+ { // Test blueprint.
+ RandomNormalStableBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "randomNormalStable"));
+
+ StringList params, in, out;
+ FT_SETUP_OK(pt, params, in, out.add("out"));
+ FT_SETUP_OK(pt, params.add("0.5").add("1.0"), in, out);
+ FT_SETUP_OK(pt, params.add("val1"), in, out);
+
+ FT_DUMP_EMPTY(_factory, "randomNormalStable");
+ }
+
+ { // Test setting of mean and stddev values, and seed
+ FtFeatureTest ft1(_factory, "randomNormalStable(0.0,0.1)");
+ FtFeatureTest ft2(_factory, "randomNormalStable(1.0,0.2)");
+ ft1.getIndexEnv().getProperties().add("randomNormalStable(0.0,0.1).seed", "100");
+ ft2.getIndexEnv().getProperties().add("randomNormalStable(1.0,0.2).seed", "100");
+ ASSERT_TRUE(ft1.setup());
+ ASSERT_TRUE(ft2.setup());
+ RankResult rr;
+ for (uint32_t i = 0; i < 5; ++i) {
+ rr.clear();
+ ASSERT_TRUE(ft1.executeOnly(rr, i + 1));
+ ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormalStable(0.0,0.1)") - 0.0) / 0.1) * 0.2 + 1.0, EPS, i + 1));
+ }
+ }
+ { // Test executor (randomNormalStable)
+ FtFeatureTest ft1(_factory, "randomNormalStable");
+ FtFeatureTest ft2(_factory, "randomNormalStable");
+ ASSERT_TRUE(ft1.setup());
+ ASSERT_TRUE(ft2.setup());
+ RankResult rr;
+ for (uint32_t i = 0; i < 5; ++i) {
+ rr.clear();
+ ASSERT_TRUE(ft1.executeOnly(rr, i + 1));
+ ASSERT_TRUE(ft2.execute(rr.getScore("randomNormalStable"), EPS, i + 1));
}
}
}
diff --git a/searchlib/src/tests/features/prod_features.h b/searchlib/src/tests/features/prod_features.h
index 0d234ca674e..d7bf001bedf 100644
--- a/searchlib/src/tests/features/prod_features.h
+++ b/searchlib/src/tests/features/prod_features.h
@@ -35,6 +35,7 @@ public:
void testQueryTermCount();
void testRandom();
void testRandomNormal();
+ void testRandomNormalStable();
void testRankingExpression();
void testTerm();
void testTermDistance();
diff --git a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp
index d89883bc417..45ebdd78fb3 100644
--- a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp
+++ b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp
@@ -5,17 +5,24 @@
#include <vespa/searchlib/queryeval/leaf_blueprints.h>
#include <vespa/searchlib/queryeval/simpleresult.h>
#include <vespa/searchlib/queryeval/same_element_blueprint.h>
+#include <vespa/searchlib/queryeval/same_element_search.h>
+#include <vespa/searchlib/queryeval/emptysearch.h>
+#include <vespa/searchcommon/attribute/i_search_context.h>
+#include <vespa/searchlib/attribute/elementiterator.h>
using namespace search::fef;
using namespace search::queryeval;
+using search::attribute::ElementIterator;
-std::unique_ptr<SameElementBlueprint> make_blueprint(const std::vector<FakeResult> &children) {
+std::unique_ptr<SameElementBlueprint> make_blueprint(const std::vector<FakeResult> &children, bool fake_attr = false) {
auto result = std::make_unique<SameElementBlueprint>();
for (size_t i = 0; i < children.size(); ++i) {
uint32_t field_id = i;
vespalib::string field_name = vespalib::make_string("f%u", field_id);
FieldSpec field = result->getNextChildField(field_name, field_id);
- result->addTerm(std::make_unique<FakeBlueprint>(field, children[i]));
+ auto fake = std::make_unique<FakeBlueprint>(field, children[i]);
+ fake->is_attr(fake_attr);
+ result->addTerm(std::move(fake));
}
return result;
}
@@ -96,4 +103,17 @@ TEST("require that children are sorted") {
EXPECT_EQUAL(dynamic_cast<SameElementBlueprint&>(*bp).terms()[2]->getState().estimate().estHits, 4u);
}
+TEST("require that attribute iterators are wrapped for element unpacking") {
+ auto a = make_result({{5, {1,3,7}}});
+ auto b = make_result({{5, {3,5,10}}});
+ auto bp = finalize(make_blueprint({a,b}, true), true);
+ auto md = MatchData::makeTestInstance(0, 0);
+ auto search = bp->createSearch(*md, false);
+ SameElementSearch *se = dynamic_cast<SameElementSearch*>(search.get());
+ ASSERT_TRUE(se != nullptr);
+ ASSERT_EQUAL(se->children().size(), 2u);
+ EXPECT_TRUE(dynamic_cast<ElementIterator*>(se->children()[0].get()) != nullptr);
+ EXPECT_TRUE(dynamic_cast<ElementIterator*>(se->children()[1].get()) != nullptr);
+}
+
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/vespa/searchlib/common/isequencedtaskexecutor.h b/searchlib/src/vespa/searchlib/common/isequencedtaskexecutor.h
index 05347e790fb..a8b2a722c01 100644
--- a/searchlib/src/vespa/searchlib/common/isequencedtaskexecutor.h
+++ b/searchlib/src/vespa/searchlib/common/isequencedtaskexecutor.h
@@ -89,14 +89,14 @@ public:
* call sync before tearing down pointed to/referenced data.
* All tasks must be scheduled from same thread.
*
- * @param componentId component id
- * @param function function to be wrapped in a task and later executed
+ * @param id executor id
+ * @param function function to be wrapped in a task and later executed
*/
template <class FunctionType>
- void execute(vespalib::stringref componentId, FunctionType &&function) {
- ExecutorId id = getExecutorId(componentId);
+ void execute(ExecutorId id, FunctionType &&function) {
executeTask(id, vespalib::makeLambdaTask(std::forward<FunctionType>(function)));
}
+
};
} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
index bd847fe35b5..16401a67424 100644
--- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
@@ -44,6 +44,7 @@ vespa_add_library(searchlib_features OBJECT
queryterm.cpp
querytermcountfeature.cpp
random_normal_feature.cpp
+ random_normal_stable_feature.cpp
randomfeature.cpp
rankingexpressionfeature.cpp
raw_score_feature.cpp
diff --git a/searchlib/src/vespa/searchlib/features/native_dot_product_feature.cpp b/searchlib/src/vespa/searchlib/features/native_dot_product_feature.cpp
index cb8136e8b7f..7865e32849f 100644
--- a/searchlib/src/vespa/searchlib/features/native_dot_product_feature.cpp
+++ b/searchlib/src/vespa/searchlib/features/native_dot_product_feature.cpp
@@ -8,6 +8,23 @@ using namespace search::fef;
namespace search {
namespace features {
+NativeDotProductExecutor::NativeDotProductExecutor(const search::fef::IQueryEnvironment &env)
+ : FeatureExecutor(),
+ _pairs(),
+ _md(nullptr)
+{
+ for (uint32_t i = 0; i < env.getNumTerms(); ++i) {
+ const search::fef::ITermData *td = env.getTerm(i);
+ auto weight = td->getWeight();
+ for (size_t f = 0; f < td->numFields(); ++f) {
+ auto handle = td->field(f).getHandle();
+ if (handle != search::fef::IllegalHandle) {
+ _pairs.emplace_back(handle, weight);
+ }
+ }
+ }
+}
+
NativeDotProductExecutor::NativeDotProductExecutor(const search::fef::IQueryEnvironment &env, uint32_t fieldId)
: FeatureExecutor(),
_pairs(),
@@ -46,15 +63,21 @@ bool
NativeDotProductBlueprint::setup(const IIndexEnvironment &,
const ParameterList &params)
{
- _field = params[0].asField();
- describeOutput("out", "dot product between query term weights and match weights for the given field");
+ if (params.size() > 0) {
+ _field = params[0].asField();
+ }
+ describeOutput("out", "dot product between query term weights and match weights");
return true;
}
FeatureExecutor &
NativeDotProductBlueprint::createExecutor(const IQueryEnvironment &queryEnv, vespalib::Stash &stash) const
{
- return stash.create<NativeDotProductExecutor>(queryEnv, _field->id());
+ if (_field) {
+ return stash.create<NativeDotProductExecutor>(queryEnv, _field->id());
+ } else {
+ return stash.create<NativeDotProductExecutor>(queryEnv);
+ }
}
} // namespace features
diff --git a/searchlib/src/vespa/searchlib/features/native_dot_product_feature.h b/searchlib/src/vespa/searchlib/features/native_dot_product_feature.h
index a71d23f3158..33c5c89c88b 100644
--- a/searchlib/src/vespa/searchlib/features/native_dot_product_feature.h
+++ b/searchlib/src/vespa/searchlib/features/native_dot_product_feature.h
@@ -20,6 +20,7 @@ private:
void handle_bind_match_data(const fef::MatchData &md) override;
public:
+ NativeDotProductExecutor(const fef::IQueryEnvironment &env);
NativeDotProductExecutor(const fef::IQueryEnvironment &env, uint32_t fieldId);
void execute(uint32_t docId) override;
};
@@ -31,13 +32,13 @@ class NativeDotProductBlueprint : public fef::Blueprint
private:
const fef::FieldInfo *_field;
public:
- NativeDotProductBlueprint() : Blueprint("nativeDotProduct"), _field(0) {}
+ NativeDotProductBlueprint() : Blueprint("nativeDotProduct"), _field(nullptr) {}
void visitDumpFeatures(const fef::IIndexEnvironment &, fef::IDumpFeatureVisitor &) const override {}
fef::Blueprint::UP createInstance() const override {
return Blueprint::UP(new NativeDotProductBlueprint());
}
fef::ParameterDescriptions getDescriptions() const override {
- return fef::ParameterDescriptions().desc().field();
+ return fef::ParameterDescriptions().desc().field().desc();
}
bool setup(const fef::IIndexEnvironment &env, const fef::ParameterList &params) override;
fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override;
diff --git a/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp b/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp
index 67f30c4eb93..c83ec80f6b5 100644
--- a/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp
+++ b/searchlib/src/vespa/searchlib/features/random_normal_feature.cpp
@@ -4,7 +4,6 @@
#include "utils.h"
#include <vespa/searchlib/fef/properties.h>
#include <vespa/fastos/time.h>
-#include <cmath>
#include <vespa/log/log.h>
LOG_SETUP(".features.randomnormalfeature");
@@ -14,46 +13,18 @@ namespace features {
RandomNormalExecutor::RandomNormalExecutor(uint64_t seed, double mean, double stddev) :
search::fef::FeatureExecutor(),
- _rnd(),
- _mean(mean),
- _stddev(stddev),
- _hasSpare(false),
- _spare(0.0)
-
+ _rnd(mean, stddev, true)
{
LOG(debug, "RandomNormalExecutor: seed=%zu, mean=%f, stddev=%f", seed, mean, stddev);
- _rnd.srand48(seed);
+ _rnd.seed(seed);
}
-/**
- * Draws a random number from the Gaussian distribution
- * using the Marsaglia polar method.
- */
void
RandomNormalExecutor::execute(uint32_t)
{
- feature_t result = _spare;
- if (_hasSpare) {
- _hasSpare = false;
- } else {
- _hasSpare = true;
-
- feature_t u, v, s;
- do {
- u = (_rnd.lrand48() / (feature_t)0x80000000u) * 2.0 - 1.0;
- v = (_rnd.lrand48() / (feature_t)0x80000000u) * 2.0 - 1.0;
- s = u * u + v * v;
- } while ( (s >= 1.0) || (s == 0.0) );
- s = std::sqrt(-2.0 * std::log(s) / s);
-
- _spare = v * s; // saved for next invocation
- result = u * s;
- }
-
- outputs().set_number(0, _mean + _stddev * result);
+ outputs().set_number(0, _rnd.next());
}
-
RandomNormalBlueprint::RandomNormalBlueprint() :
search::fef::Blueprint("randomNormal"),
_seed(0),
@@ -82,7 +53,6 @@ RandomNormalBlueprint::setup(const search::fef::IIndexEnvironment & env,
if (p.found()) {
_seed = util::strToNum<uint64_t>(p.get());
}
-
if (params.size() > 0) {
_mean = params[0].asDouble();
}
diff --git a/searchlib/src/vespa/searchlib/features/random_normal_feature.h b/searchlib/src/vespa/searchlib/features/random_normal_feature.h
index 92aa3117b91..2d2429371d9 100644
--- a/searchlib/src/vespa/searchlib/features/random_normal_feature.h
+++ b/searchlib/src/vespa/searchlib/features/random_normal_feature.h
@@ -4,7 +4,7 @@
#include <vespa/searchlib/fef/blueprint.h>
#include <vespa/searchlib/fef/featureexecutor.h>
-#include <vespa/searchlib/util/rand48.h>
+#include <vespa/searchlib/util/random_normal.h>
namespace search {
namespace features {
@@ -17,12 +17,7 @@ namespace features {
**/
class RandomNormalExecutor : public fef::FeatureExecutor {
private:
- Rand48 _rnd;
- double _mean;
- double _stddev;
-
- bool _hasSpare;
- double _spare;
+ RandomNormal _rnd; // seeded once per query
public:
RandomNormalExecutor(uint64_t seed, double mean, double stddev);
diff --git a/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.cpp b/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.cpp
new file mode 100644
index 00000000000..5f3cf7fd063
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.cpp
@@ -0,0 +1,82 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "random_normal_stable_feature.h"
+#include "utils.h"
+#include <vespa/searchlib/fef/properties.h>
+#include <vespa/fastos/time.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".features.randomnormalstablefeature");
+
+namespace search {
+namespace features {
+
+RandomNormalStableExecutor::RandomNormalStableExecutor(uint64_t seed, double mean, double stddev) :
+ search::fef::FeatureExecutor(),
+ _rnd(mean, stddev, false), // don't use spares, as we reset seed on every generation
+ _seed(seed)
+{
+ LOG(debug, "RandomNormalStableExecutor: seed=%zu, mean=%f, stddev=%f", seed, mean, stddev);
+}
+
+void
+RandomNormalStableExecutor::execute(uint32_t docId)
+{
+ _rnd.seed(_seed + docId);
+ outputs().set_number(0, _rnd.next());
+}
+
+RandomNormalStableBlueprint::RandomNormalStableBlueprint() :
+ search::fef::Blueprint("randomNormalStable"),
+ _seed(0),
+ _mean(0.0),
+ _stddev(1.0)
+{
+}
+
+void
+RandomNormalStableBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &,
+ search::fef::IDumpFeatureVisitor &) const
+{
+}
+
+search::fef::Blueprint::UP
+RandomNormalStableBlueprint::createInstance() const
+{
+ return search::fef::Blueprint::UP(new RandomNormalStableBlueprint());
+}
+
+bool
+RandomNormalStableBlueprint::setup(const search::fef::IIndexEnvironment & env,
+ const search::fef::ParameterList & params)
+{
+ search::fef::Property p = env.getProperties().lookup(getName(), "seed");
+ if (p.found()) {
+ _seed = util::strToNum<uint64_t>(p.get());
+ }
+ if (params.size() > 0) {
+ _mean = params[0].asDouble();
+ }
+ if (params.size() > 1) {
+ _stddev = params[1].asDouble();
+ }
+
+ describeOutput("out" , "A random value drawn from the Gaussian distribution that is stable for a given match (document and query)");
+
+ return true;
+}
+
+search::fef::FeatureExecutor &
+RandomNormalStableBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const
+{
+ uint64_t seed = _seed;
+ if (seed == 0) {
+ seed = util::strToNum<uint64_t>
+ (env.getProperties().lookup(getName(), "seed").get("1024")); // default seed
+ }
+ return stash.create<RandomNormalStableExecutor>(seed, _mean, _stddev);
+}
+
+
+} // namespace features
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.h b/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.h
new file mode 100644
index 00000000000..129c929ba3d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/random_normal_stable_feature.h
@@ -0,0 +1,67 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+#include <vespa/searchlib/util/random_normal.h>
+
+namespace search {
+namespace features {
+
+/**
+ * Implements the executor for the random normal feature outputting a
+ * random number drawn from the Gaussian distribution with the
+ * two arguments 'mean' and 'stddev'.
+ * The same hit always returns the same random number.
+ **/
+class RandomNormalStableExecutor : public fef::FeatureExecutor {
+private:
+ RandomNormal _rnd; // seeded once per match
+ uint64_t _seed;
+
+public:
+ RandomNormalStableExecutor(uint64_t seed, double mean, double stddev);
+ void execute(uint32_t docId) override;
+};
+
+
+/**
+ * Implements the blueprint for the random normal stable feature.
+ */
+class RandomNormalStableBlueprint : public fef::Blueprint {
+private:
+ uint64_t _seed;
+ double _mean;
+ double _stddev;
+
+public:
+ RandomNormalStableBlueprint();
+
+ void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override;
+ fef::Blueprint::UP createInstance() const override;
+ fef::ParameterDescriptions getDescriptions() const override {
+ return fef::ParameterDescriptions().
+ // Can run without parameters:
+ desc().
+
+ // Can run with two parameters (mean and stddev):
+ desc().
+ number(). // mean
+ number(). // stddev
+
+ // Can run with three parameters:
+ desc().
+ number(). // mean
+ number(). // stddev
+ string(); // in order to name different features
+ }
+
+ bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override;
+ fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override;
+};
+
+
+} // namespace features
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp
index 867f058931f..1d3c59f5b3d 100644
--- a/searchlib/src/vespa/searchlib/features/setup.cpp
+++ b/searchlib/src/vespa/searchlib/features/setup.cpp
@@ -38,6 +38,7 @@
#include "querytermcountfeature.h"
#include "randomfeature.h"
#include "random_normal_feature.h"
+#include "random_normal_stable_feature.h"
#include "rankingexpressionfeature.h"
#include "raw_score_feature.h"
#include "reverseproximityfeature.h"
@@ -100,6 +101,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry)
registry.addPrototype(Blueprint::SP(new QueryTermCountBlueprint()));
registry.addPrototype(Blueprint::SP(new RandomBlueprint()));
registry.addPrototype(Blueprint::SP(new RandomNormalBlueprint()));
+ registry.addPrototype(Blueprint::SP(new RandomNormalStableBlueprint()));
registry.addPrototype(Blueprint::SP(new RawScoreBlueprint()));
registry.addPrototype(Blueprint::SP(new SubqueriesBlueprint));
registry.addPrototype(Blueprint::SP(new TensorFromLabelsBlueprint()));
diff --git a/searchlib/src/vespa/searchlib/fef/featurenamebuilder.cpp b/searchlib/src/vespa/searchlib/fef/featurenamebuilder.cpp
index ec00b7d2f90..0eba912fafd 100644
--- a/searchlib/src/vespa/searchlib/fef/featurenamebuilder.cpp
+++ b/searchlib/src/vespa/searchlib/fef/featurenamebuilder.cpp
@@ -136,7 +136,7 @@ FeatureNameBuilder::buildName() const
vespalib::string ret;
if (!_baseName.empty()) {
ret = _baseName;
- if (!_parameters.empty() > 0) {
+ if (!_parameters.empty()) {
ret += "(";
for (uint32_t i = 0; i < _parameters.size(); ++i) {
if (i > 0) {
diff --git a/searchlib/src/vespa/searchlib/index/docbuilder.h b/searchlib/src/vespa/searchlib/index/docbuilder.h
index 1d170483783..73c60304f50 100644
--- a/searchlib/src/vespa/searchlib/index/docbuilder.h
+++ b/searchlib/src/vespa/searchlib/index/docbuilder.h
@@ -4,6 +4,7 @@
#include "doctypebuilder.h"
#include <vespa/document/datatype/datatypes.h>
+#include <vespa/document/repo/fixedtyperepo.h>
#include <vespa/document/fieldvalue/fieldvalues.h>
#include <vespa/document/annotation/annotation.h>
#include <vespa/document/annotation/span.h>
diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp b/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp
index 226f41a53c0..3165cd9b68a 100644
--- a/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp
@@ -4,10 +4,37 @@
#include <vespa/searchlib/fef/termfieldmatchdataposition.h>
#include <vespa/searchlib/fef/termfieldmatchdata.h>
#include <vespa/vespalib/objects/visit.h>
+#include <vespa/searchcommon/attribute/i_search_context.h>
namespace search {
namespace queryeval {
+namespace {
+
+struct FakeContext : search::attribute::ISearchContext {
+ int32_t onFind(DocId, int32_t, int32_t &) const override { return -1; }
+ int32_t onFind(DocId, int32_t) const override { return -1; }
+ unsigned int approximateHits() const override { return 0; }
+ std::unique_ptr<SearchIterator> createIterator(fef::TermFieldMatchData *, bool) override { abort(); }
+ void fetchPostings(bool) override { }
+ bool valid() const override { return true; }
+ search::Int64Range getAsIntegerTerm() const override { abort(); }
+ const search::QueryTermBase &queryTerm() const override { abort(); }
+ const vespalib::string &attributeName() const override { abort(); }
+};
+
+} // namespace search::queryeval::<unnamed>
+
+void
+FakeSearch::is_attr(bool value)
+{
+ if (value) {
+ _ctx = std::make_unique<FakeContext>();
+ } else {
+ _ctx.reset();
+ }
+}
+
void
FakeSearch::doSeek(uint32_t docid)
{
@@ -49,5 +76,5 @@ FakeSearch::visitMembers(vespalib::ObjectVisitor &visitor) const
visit(visitor, "term", _term);
}
-} // namespace queryeval
+} // namespace search::queryeval
} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_search.h b/searchlib/src/vespa/searchlib/queryeval/fake_search.h
index c320d497edc..aa6df480a21 100644
--- a/searchlib/src/vespa/searchlib/queryeval/fake_search.h
+++ b/searchlib/src/vespa/searchlib/queryeval/fake_search.h
@@ -5,6 +5,7 @@
#include "searchiterator.h"
#include "fake_result.h"
#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/searchcommon/attribute/i_search_context.h>
namespace search {
namespace queryeval {
@@ -18,6 +19,7 @@ private:
FakeResult _result;
uint32_t _offset;
fef::TermFieldMatchDataArray _tfmda;
+ std::unique_ptr<attribute::ISearchContext> _ctx;
bool valid() const { return _offset < _result.inspect().size(); }
uint32_t currId() const { return _result.inspect()[_offset].docId; }
@@ -34,10 +36,12 @@ public:
{
assert(_tfmda.size() == 1);
}
+ void is_attr(bool value);
void doSeek(uint32_t docid) override;
void doUnpack(uint32_t docid) override;
const PostingInfo *getPostingInfo() const override { return _result.postingInfo(); }
void visitMembers(vespalib::ObjectVisitor &visitor) const override;
+ const attribute::ISearchContext *getAttributeSearchContext() const override { return _ctx.get(); }
};
} // namespace queryeval
diff --git a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp
index bbfa487ae7d..a140fb146d5 100644
--- a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp
@@ -64,7 +64,9 @@ SimpleBlueprint::tag(const vespalib::string &t)
SearchIterator::UP
FakeBlueprint::createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, bool) const
{
- return std::make_unique<FakeSearch>(_tag, _field.getName(), _term, _result, tfmda);
+ auto result = std::make_unique<FakeSearch>(_tag, _field.getName(), _term, _result, tfmda);
+ result->is_attr(_is_attr);
+ return result;
}
FakeBlueprint::FakeBlueprint(const FieldSpec &field, const FakeResult &result)
@@ -72,7 +74,8 @@ FakeBlueprint::FakeBlueprint(const FieldSpec &field, const FakeResult &result)
_tag("<tag>"),
_term("<term>"),
_field(field),
- _result(result)
+ _result(result),
+ _is_attr(false)
{
setEstimate(HitEstimate(result.inspect().size(), result.inspect().empty()));
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h
index 698e2235690..85d30aaf003 100644
--- a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h
+++ b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h
@@ -51,6 +51,7 @@ private:
vespalib::string _term;
FieldSpec _field;
FakeResult _result;
+ bool _is_attr;
protected:
SearchIterator::UP
@@ -66,6 +67,12 @@ public:
}
const vespalib::string &tag() const { return _tag; }
+ FakeBlueprint &is_attr(bool value) {
+ _is_attr = value;
+ return *this;
+ }
+ bool is_attr() const { return _is_attr; }
+
FakeBlueprint &term(const vespalib::string &t) {
_term = t;
return *this;
diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_search.h b/searchlib/src/vespa/searchlib/queryeval/same_element_search.h
index 6a116c76e73..1fd381eb1ae 100644
--- a/searchlib/src/vespa/searchlib/queryeval/same_element_search.h
+++ b/searchlib/src/vespa/searchlib/queryeval/same_element_search.h
@@ -39,6 +39,7 @@ public:
void doSeek(uint32_t docid) override;
void doUnpack(uint32_t) override {}
void visitMembers(vespalib::ObjectVisitor &visitor) const override;
+ const std::vector<SearchIterator::UP> &children() const { return _children; }
};
}
diff --git a/searchlib/src/vespa/searchlib/util/random_normal.h b/searchlib/src/vespa/searchlib/util/random_normal.h
new file mode 100644
index 00000000000..74596066312
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/random_normal.h
@@ -0,0 +1,67 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/util/rand48.h>
+#include <cmath>
+
+namespace search {
+
+/**
+ * Draws a random number from the Gaussian distribution
+ * using the Marsaglia polar method.
+ */
+class RandomNormal
+{
+private:
+ Rand48 _rnd;
+ double _mean;
+ double _stddev;
+
+ bool _useSpare;
+ bool _hasSpare;
+ feature_t _spare;
+
+ feature_t nextUniform() {
+ return (_rnd.lrand48() / (feature_t)0x80000000u) * 2.0 - 1.0;
+ }
+
+public:
+ RandomNormal(double mean, double stddev, bool useSpare = true) :
+ _rnd(),
+ _mean(mean),
+ _stddev(stddev),
+ _useSpare(useSpare),
+ _hasSpare(false),
+ _spare(0.0)
+ {}
+
+ void seed(long seed) {
+ _rnd.srand48(seed);
+ }
+
+ feature_t next() {
+ feature_t result = _spare;
+ if (_useSpare && _hasSpare) {
+ _hasSpare = false;
+ } else {
+ _hasSpare = true;
+
+ feature_t u, v, s;
+ do {
+ u = nextUniform();
+ v = nextUniform();
+ s = u * u + v * v;
+ } while ( (s >= 1.0) || (s == 0.0) );
+ s = std::sqrt(-2.0 * std::log(s) / s);
+
+ _spare = v * s; // saved for next invocation
+ result = u * s;
+ }
+ return _mean + _stddev * result;
+ }
+
+};
+
+} // search
+