summaryrefslogtreecommitdiffstats
path: root/searchlib/src/tests/features/prod_features_test.cpp
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2023-08-30 09:29:14 +0000
committerGeir Storli <geirst@yahooinc.com>2023-08-30 13:00:55 +0000
commit1efa4e7ef74e8d3d650c72e52e1596f43d0aa690 (patch)
tree0fc44ce13d904171b50614e9c5738e64548307ab /searchlib/src/tests/features/prod_features_test.cpp
parent306dce28ed3cfa0d2f4679fd2519db3d1ac780c7 (diff)
Use "_test" suffix for unit test cpp files.
Diffstat (limited to 'searchlib/src/tests/features/prod_features_test.cpp')
-rw-r--r--searchlib/src/tests/features/prod_features_test.cpp2256
1 files changed, 2256 insertions, 0 deletions
diff --git a/searchlib/src/tests/features/prod_features_test.cpp b/searchlib/src/tests/features/prod_features_test.cpp
new file mode 100644
index 00000000000..10d1a9bdc8e
--- /dev/null
+++ b/searchlib/src/tests/features/prod_features_test.cpp
@@ -0,0 +1,2256 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "prod_features_test.h"
+#include <vespa/searchcommon/attribute/config.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/extendableattributes.h>
+#include <vespa/searchlib/attribute/floatbase.h>
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/singleboolattribute.h>
+#include <vespa/searchlib/attribute/stringbase.h>
+#include <vespa/searchlib/features/agefeature.h>
+#include <vespa/searchlib/features/array_parser.hpp>
+#include <vespa/searchlib/features/attributefeature.h>
+#include <vespa/searchlib/features/closenessfeature.h>
+#include <vespa/searchlib/features/distancefeature.h>
+#include <vespa/searchlib/features/dotproductfeature.h>
+#include <vespa/searchlib/features/fieldlengthfeature.h>
+#include <vespa/searchlib/features/fieldmatchfeature.h>
+#include <vespa/searchlib/features/firstphasefeature.h>
+#include <vespa/searchlib/features/foreachfeature.h>
+#include <vespa/searchlib/features/freshnessfeature.h>
+#include <vespa/searchlib/features/global_sequence_feature.h>
+#include <vespa/searchlib/features/great_circle_distance_feature.h>
+#include <vespa/searchlib/features/matchcountfeature.h>
+#include <vespa/searchlib/features/matchesfeature.h>
+#include <vespa/searchlib/features/matchfeature.h>
+#include <vespa/searchlib/features/nowfeature.h>
+#include <vespa/searchlib/features/queryfeature.h>
+#include <vespa/searchlib/features/querytermcountfeature.h>
+#include <vespa/searchlib/features/random_normal_feature.h>
+#include <vespa/searchlib/features/random_normal_stable_feature.h>
+#include <vespa/searchlib/features/randomfeature.h>
+#include <vespa/searchlib/features/rankingexpressionfeature.h>
+#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/features/termfeature.h>
+#include <vespa/searchlib/features/utils.h>
+#include <vespa/searchlib/features/weighted_set_parser.hpp>
+#include <vespa/searchlib/fef/featurenamebuilder.h>
+#include <vespa/searchlib/fef/indexproperties.h>
+#include <vespa/searchlib/fef/queryproperties.h>
+#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
+#include <vespa/searchlib/fef/test/plugin/setup.h>
+#include <vespa/searchlib/test/attribute_builder.h>
+#include <vespa/vespalib/geo/zcurve.h>
+#include <vespa/vespalib/util/rand48.h>
+#include <vespa/vespalib/util/string_hash.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <cmath>
+
+#include <vespa/log/log.h>
+LOG_SETUP("prod_features_test");
+
+using namespace search::features;
+using namespace search::fef;
+using namespace search::fef::test;
+
+using search::AttributeFactory;
+using search::AttributeVector;
+using search::FloatingPointAttribute;
+using search::IntegerAttribute;
+using search::SingleBoolAttribute;
+using search::StringAttribute;
+using search::WeightedSetStringExtAttribute;
+using search::attribute::WeightedEnumContent;
+using search::attribute::test::AttributeBuilder;
+using search::common::GeoLocation;
+using search::common::GeoLocationSpec;
+using vespalib::eval::ValueType;
+
+using AttributePtr = AttributeVector::SP;
+using AVC = search::attribute::Config;
+using AVBT = search::attribute::BasicType;
+using AVCT = search::attribute::CollectionType;
+using CollectionType = FieldInfo::CollectionType;
+using DataType = FieldInfo::DataType;
+
+const double EPS = 10e-6;
+
+
+TEST_APPHOOK(Test);
+
+int
+Test::Main()
+{
+ TEST_INIT("prod_features_test");
+
+ // Configure factory with all known blueprints.
+ setup_fef_test_plugin(_factory);
+ setup_search_features(_factory);
+
+ // Test all features.
+ TEST_DO(testFramework()); TEST_FLUSH();
+ TEST_DO(testFtLib()); TEST_FLUSH();
+ TEST_DO(testAge()); TEST_FLUSH();
+ TEST_DO(testAttribute()); TEST_FLUSH();
+ TEST_DO(testAttributeMatch()); TEST_FLUSH();
+ TEST_DO(testCloseness()); TEST_FLUSH();
+ TEST_DO(testMatchCount()); TEST_FLUSH();
+ TEST_DO(testGreatCircleDistance()); TEST_FLUSH();
+ TEST_DO(testDistance()); TEST_FLUSH();
+ TEST_DO(testDistanceToPath()); TEST_FLUSH();
+ TEST_DO(testDotProduct()); TEST_FLUSH();
+ TEST_DO(testFieldLength()); TEST_FLUSH();
+ TEST_DO(testFieldMatch()); TEST_FLUSH();
+ TEST_DO(testFieldTermMatch()); TEST_FLUSH();
+ TEST_DO(testFirstPhase()); TEST_FLUSH();
+ TEST_DO(testForeach()); TEST_FLUSH();
+ TEST_DO(testFreshness()); TEST_FLUSH();
+ TEST_DO(testMatch()); TEST_FLUSH();
+ TEST_DO(testMatches()); TEST_FLUSH();
+ TEST_DO(testNow()); TEST_FLUSH();
+ TEST_DO(testQuery()); TEST_FLUSH();
+ TEST_DO(testQueryTermCount()); TEST_FLUSH();
+ TEST_DO(testRandom()); TEST_FLUSH();
+ TEST_DO(testRandomNormal()); TEST_FLUSH();
+ TEST_DO(testRandomNormalStable()); TEST_FLUSH();
+ TEST_DO(testRankingExpression()); TEST_FLUSH();
+ TEST_DO(testTerm()); TEST_FLUSH();
+ TEST_DO(testTermDistance()); TEST_FLUSH();
+ TEST_DO(testUtils()); TEST_FLUSH();
+ TEST_DO(testUnique()); TEST_FLUSH();
+
+ TEST_DONE();
+ return 0;
+}
+
+
+void
+Test::testFtLib()
+{
+ { // toQuery
+ FtQuery q = FtUtil::toQuery("a b!50 0.5:c!200%0.5 d%0.3 e!300 0.3:f ");
+ ASSERT_TRUE(q.size() == 6);
+ EXPECT_EQUAL(q[0].term, vespalib::string("a"));
+ EXPECT_EQUAL(q[0].termWeight.percent(), 100);
+ EXPECT_APPROX(q[0].connexity, 0.1f, EPS);
+ EXPECT_APPROX(q[0].significance, 0.1f, EPS);
+ EXPECT_EQUAL(q[1].term, vespalib::string("b"));
+ EXPECT_EQUAL(q[1].termWeight.percent(), 50);
+ EXPECT_APPROX(q[1].connexity, 0.1f, EPS);
+ EXPECT_APPROX(q[1].significance, 0.1f, EPS);
+ EXPECT_EQUAL(q[2].term, vespalib::string("c"));
+ EXPECT_EQUAL(q[2].termWeight.percent(), 200);
+ EXPECT_APPROX(q[2].connexity, 0.5f, EPS);
+ EXPECT_APPROX(q[2].significance, 0.5f, EPS);
+ EXPECT_EQUAL(q[3].term, vespalib::string("d"));
+ EXPECT_EQUAL(q[3].termWeight.percent(), 100);
+ EXPECT_APPROX(q[3].connexity, 0.1f, EPS);
+ EXPECT_APPROX(q[3].significance, 0.3f, EPS);
+ EXPECT_EQUAL(q[4].term, vespalib::string("e"));
+ EXPECT_EQUAL(q[4].termWeight.percent(), 300);
+ EXPECT_APPROX(q[4].connexity, 0.1f, EPS);
+ EXPECT_APPROX(q[4].significance, 0.1f, EPS);
+ EXPECT_EQUAL(q[5].term, vespalib::string("f"));
+ EXPECT_EQUAL(q[5].termWeight.percent(), 100);
+ EXPECT_APPROX(q[5].connexity, 0.3f, EPS);
+ EXPECT_APPROX(q[5].significance, 0.1f, EPS);
+ }
+ { // toRankResult
+ RankResult rr = toRankResult("foo", "a:0.5 b:-0.5 c:2 d:3 ");
+ std::vector<vespalib::string> keys = rr.getKeys();
+ ASSERT_TRUE(keys.size() == 4);
+ EXPECT_EQUAL(keys[0], vespalib::string("foo.a"));
+ EXPECT_EQUAL(keys[1], vespalib::string("foo.b"));
+ EXPECT_EQUAL(keys[2], vespalib::string("foo.c"));
+ EXPECT_EQUAL(keys[3], vespalib::string("foo.d"));
+ EXPECT_APPROX(rr.getScore("foo.a"), 0.5f, EPS);
+ EXPECT_APPROX(rr.getScore("foo.b"), -0.5f, EPS);
+ EXPECT_APPROX(rr.getScore("foo.c"), 2.0f, EPS);
+ EXPECT_APPROX(rr.getScore("foo.d"), 3.0f, EPS);
+ }
+}
+
+
+void
+Test::testAge()
+{
+ { // Test blueprint
+ FtIndexEnvironment idx_env;
+ idx_env.getBuilder()
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "datetime")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "datetime2");
+
+ AgeBlueprint pt;
+ EXPECT_TRUE(assertCreateInstance(pt, "age"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, idx_env, params);
+ FT_SETUP_OK(pt, idx_env, params.add("datetime"), in.add("now"), out.add("out"));
+ FT_SETUP_FAIL(pt, idx_env, params.add("datetime2"));
+
+ FT_DUMP_EMPTY(_factory, "age");
+ }
+
+ { // Test executor
+ assertAge(0, "doctime", 60, 120);
+ assertAge(60, "doctime", 180, 120);
+ assertAge(15000000000, "doctime", 20000000000, 5000000000);
+ }
+}
+
+void
+Test::assertAge(feature_t expAge, const vespalib::string & attr, uint64_t now, uint64_t docTime)
+{
+ vespalib::string feature = "age(" + attr + ")";
+ FtFeatureTest ft(_factory, feature);
+ setupForAgeTest(ft, docTime);
+ ft.getQueryEnv().getProperties().add(queryproperties::now::SystemTime::NAME,
+ vespalib::make_string("%" PRIu64, now));
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult().addScore(feature, expAge)));
+}
+
+void
+Test::setupForAgeTest(FtFeatureTest & ft, int64_t docTime)
+{
+ auto doctime = AttributeBuilder("doctime", AVC(AVBT::INT64, AVCT::SINGLE)).
+ fill({docTime}).get();
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "doctime");
+ ft.getIndexEnv().getAttributeMap().add(doctime);
+}
+
+void
+Test::testAttribute()
+{
+ AttributeBlueprint prototype;
+ {
+ FtIndexEnvironment idx_env;
+ idx_env.getBuilder()
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar");
+
+ EXPECT_TRUE(assertCreateInstance(prototype, "attribute"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(prototype, idx_env, params); // expects 1 - 2 params
+
+ FT_SETUP_OK(prototype, idx_env, params.add("bar"), in,
+ out.add("value").add("weight").add("contains").add("count"));
+ FT_SETUP_OK(prototype, idx_env, params.add("0"), in, out);
+
+ FT_DUMP_EMPTY(_factory, "attribute");
+ }
+ { // single attributes
+ RankResult exp;
+ exp.addScore("attribute(sint)", 10).
+ addScore("attribute(sint,0)", 10).
+ addScore("attribute(slong)", 20).
+ addScore("attribute(sbyte)", 37).
+ addScore("attribute(sbool)", 1).
+ addScore("attribute(sebool)", 0).
+ addScore("attribute(sfloat)", 60.5f).
+ addScore("attribute(sdouble)", 67.5f).
+ addScore("attribute(sstr)", vespalib::hash2d("foo")).
+ addScore("attribute(sint).count", 1).
+ addScore("attribute(sfloat).count", 1).
+ addScore("attribute(sstr).count", 1).
+ addScore("attribute(udefint)", search::attribute::getUndefined<feature_t>()).
+ addScore("attribute(udeffloat)", search::attribute::getUndefined<feature_t>()).
+ addScore("attribute(udefstr)", vespalib::hash2d(""));
+
+ FtFeatureTest ft(_factory, exp.getKeys());
+ ft.getIndexEnv().getBuilder()
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "slong")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sbyte")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::BOOL, "sbool")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::BOOL, "sebool")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sfloat")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sdouble")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sstr")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "udefint")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "udeffloat")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "udefstr");
+ setupForAttributeTest(ft);
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(exp));
+ }
+ { // array attributes
+ RankResult exp;
+ exp.addScore("attribute(aint)", 0).
+ addScore("attribute(aint,0)", 20).
+ addScore("attribute(aint,1)", 30).
+ addScore("attribute(aint,2)", 0).
+ addScore("attribute(afloat,0)", 70.5f).
+ addScore("attribute(afloat,1)", 80.5f).
+ addScore("attribute(astr,0)", vespalib::hash2d("bar")).
+ addScore("attribute(astr,1)", vespalib::hash2d("baz")).
+ addScore("attribute(aint).count", 2).
+ addScore("attribute(aint,0).count", 0).
+ addScore("attribute(afloat).count", 2).
+ addScore("attribute(afloat,0).count", 0).
+ addScore("attribute(astr).count", 2).
+ addScore("attribute(astr,0).count", 0);
+
+ FtFeatureTest ft(_factory, exp.getKeys());
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint").
+ addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "afloat").
+ addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "astr");
+ setupForAttributeTest(ft);
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(exp));
+ }
+ { // weighted set attributes
+ RankResult exp;
+ exp.addScore("attribute(wsint).value", 0).
+ addScore("attribute(wsint).weight", 0).
+ addScore("attribute(wsint).contains", 0).
+ addScore("attribute(wsint,100).value", 0).
+ addScore("attribute(wsint,100).weight", 0).
+ addScore("attribute(wsint,100).contains", 0).
+ addScore("attribute(wsint,40).value", 40).
+ addScore("attribute(wsint,40).weight", 10).
+ addScore("attribute(wsint,40).contains", 1).
+ addScore("attribute(wsint,50).value", 50).
+ addScore("attribute(wsint,50).weight", 20).
+ addScore("attribute(wsint,50).contains", 1).
+ addScore("attribute(wsfloat).value", 0).
+ addScore("attribute(wsfloat).weight", 0).
+ addScore("attribute(wsfloat).contains", 0).
+ addScore("attribute(wsfloat,1000.5).value", 0).
+ addScore("attribute(wsfloat,1000.5).weight", 0).
+ addScore("attribute(wsfloat,1000.5).contains", 0).
+ addScore("attribute(wsfloat,90.5).value", 90.5f).
+ addScore("attribute(wsfloat,90.5).weight", -30).
+ addScore("attribute(wsfloat,90.5).contains", 1).
+ addScore("attribute(wsfloat,100.5).value", 100.5f).
+ addScore("attribute(wsfloat,100.5).weight", -40).
+ addScore("attribute(wsfloat,100.5).contains", 1).
+ addScore("attribute(wsstr).value", 0).
+ addScore("attribute(wsstr).weight", 0).
+ addScore("attribute(wsstr).contains", 0).
+ addScore("attribute(wsstr,foo).value", 0).
+ addScore("attribute(wsstr,foo).weight", 0).
+ addScore("attribute(wsstr,foo).contains", 0).
+ addScore("attribute(wsstr,qux).value", vespalib::hash2d("qux")).
+ addScore("attribute(wsstr,qux).weight", 11).
+ addScore("attribute(wsstr,qux).contains", 1).
+ addScore("attribute(wsstr,quux).value", vespalib::hash2d("quux")).
+ addScore("attribute(wsstr,quux).weight", 12).
+ addScore("attribute(wsstr,quux).contains", 1).
+ addScore("attribute(wsint).count", 2).
+ addScore("attribute(wsint,40).count", 0).
+ addScore("attribute(wsfloat).count", 2).
+ addScore("attribute(wsfloat,90.5).count", 0).
+ addScore("attribute(wsstr).count", 2).
+ addScore("attribute(wsstr,qux).count", 0);
+
+ FtFeatureTest ft(_factory, exp.getKeys());
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint").
+ addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsfloat").
+ addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsstr");
+ setupForAttributeTest(ft);
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(exp));
+ }
+ { // unique only attribute
+ RankResult exp;
+ exp.addScore("attribute(unique).value", 0).
+ addScore("attribute(unique).weight", 0).
+ addScore("attribute(unique).contains", 0).
+ addScore("attribute(unique).count", 0);
+
+ FtFeatureTest ft(_factory, exp.getKeys());
+ setupForAttributeTest(ft);
+ ASSERT_TRUE(ft.setup());
+ //ASSERT_TRUE(ft.execute(exp));
+ }
+}
+
+
+void
+Test::setupForAttributeTest(FtFeatureTest &ft, bool setup_env)
+{
+ // setup an original attribute manager with attributes
+ std::vector<AttributePtr> avs;
+ avs.push_back(AttributeFactory::createAttribute("sint", AVC(AVBT::INT32, AVCT::SINGLE))); // 0
+ avs.push_back(AttributeFactory::createAttribute("aint", AVC(AVBT::INT32, AVCT::ARRAY))); // 1
+ avs.push_back(AttributeFactory::createAttribute("wsint", AVC(AVBT::INT32, AVCT::WSET))); // 2
+ avs.push_back(AttributeFactory::createAttribute("sfloat", AVC(AVBT::FLOAT, AVCT::SINGLE))); // 3
+ avs.push_back(AttributeFactory::createAttribute("afloat", AVC(AVBT::FLOAT, AVCT::ARRAY))); // 4
+ avs.push_back(AttributeFactory::createAttribute("wsfloat",AVC(AVBT::FLOAT, AVCT::WSET))); // 5
+ avs.push_back(AttributeFactory::createAttribute("sstr", AVC(AVBT::STRING, AVCT::SINGLE))); // 6
+ avs.push_back(AttributeFactory::createAttribute("astr", AVC(AVBT::STRING, AVCT::ARRAY))); // 7
+ avs.push_back(AttributeFactory::createAttribute("wsstr", AVC(AVBT::STRING, AVCT::WSET))); // 8
+ avs.push_back(AttributeFactory::createAttribute("udefint", AVC(AVBT::INT32, AVCT::SINGLE))); // 9
+ avs.push_back(AttributeFactory::createAttribute("udeffloat", AVC(AVBT::FLOAT, AVCT::SINGLE))); // 10
+ avs.push_back(AttributeFactory::createAttribute("udefstr", AVC(AVBT::STRING, AVCT::SINGLE))); // 11
+ avs.push_back(AttributeFactory::createAttribute("sbyte", AVC(AVBT::INT64, AVCT::SINGLE))); // 12
+ avs.push_back(AttributeFactory::createAttribute("slong", AVC(AVBT::INT64, AVCT::SINGLE))); // 13
+ avs.push_back(AttributeFactory::createAttribute("sbool", AVC(AVBT::BOOL, AVCT::SINGLE))); // 14
+ avs.push_back(AttributeFactory::createAttribute("sebool", AVC(AVBT::BOOL, AVCT::SINGLE))); // 15
+ avs.push_back(AttributeFactory::createAttribute("sdouble", AVC(AVBT::DOUBLE, AVCT::SINGLE))); // 16
+ {
+ AVC cfg(AVBT::TENSOR, AVCT::SINGLE);
+ cfg.setTensorType(ValueType::from_spec("tensor(x[2])"));
+ avs.push_back(AttributeFactory::createAttribute("tensor", cfg));
+ }
+ avs.push_back(AttributeFactory::createAttribute("predicate", AVC(AVBT::PREDICATE, AVCT::SINGLE))); // 18
+ avs.push_back(AttributeFactory::createAttribute("reference", AVC(AVBT::REFERENCE, AVCT::SINGLE))); // 19
+ avs.push_back(AttributeFactory::createAttribute("raw", AVC(AVBT::RAW, AVCT::SINGLE))); // 20
+
+ // simulate a unique only attribute as specified in sd
+ AVC cfg(AVBT::INT32, AVCT::SINGLE);
+ cfg.setFastSearch(true);
+ avs.push_back(AttributeFactory::createAttribute("unique", cfg)); // 9
+
+ if (setup_env) {
+ // register attributes in index environment
+ ft.getIndexEnv().getBuilder()
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint")
+ .addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint")
+ .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sfloat")
+ .addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "afloat")
+ .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsfloat")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sstr")
+ .addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "astr")
+ .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsstr")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "udefint")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "udeffloat")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "udefstr")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "unique")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "slong")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sdouble")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sbyte")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::BOOL,"sbool")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::BOOL,"sebool")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::TENSOR, "tensor")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::BOOLEANTREE, "predicate")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::REFERENCE, "reference")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::RAW, "raw");
+ }
+
+ for (const auto & attr : avs) {
+ attr->addReservedDoc();
+ attr->addDocs(1);
+ ft.getIndexEnv().getAttributeMap().add(attr);
+ }
+
+ // integer attributes
+ (dynamic_cast<IntegerAttribute *>(avs[0].get()))->update(1, 10);
+ (dynamic_cast<IntegerAttribute *>(avs[12].get()))->update(1, 37);
+ (dynamic_cast<IntegerAttribute *>(avs[13].get()))->update(1, 20);
+ (dynamic_cast<SingleBoolAttribute *>(avs[14].get()))->update(1, 1);
+ (dynamic_cast<SingleBoolAttribute *>(avs[15].get()))->update(1, 0);
+ (dynamic_cast<IntegerAttribute *>(avs[1].get()))->append(1, 20, 0);
+ (dynamic_cast<IntegerAttribute *>(avs[1].get()))->append(1, 30, 0);
+ (dynamic_cast<IntegerAttribute *>(avs[2].get()))->append(1, 40, 10);
+ (dynamic_cast<IntegerAttribute *>(avs[2].get()))->append(1, 50, 20);
+ (dynamic_cast<IntegerAttribute *>(avs[9].get()))->update(1, search::attribute::getUndefined<int32_t>());
+ // feature_t attributes
+ (dynamic_cast<FloatingPointAttribute *>(avs[3].get()))->update(1, 60.5f);
+ (dynamic_cast<FloatingPointAttribute *>(avs[4].get()))->append(1, 70.5f, 0);
+ (dynamic_cast<FloatingPointAttribute *>(avs[4].get()))->append(1, 80.5f, 0);
+ (dynamic_cast<FloatingPointAttribute *>(avs[5].get()))->append(1, 90.5f, -30);
+ (dynamic_cast<FloatingPointAttribute *>(avs[5].get()))->append(1, 100.5f, -40);
+ (dynamic_cast<FloatingPointAttribute *>(avs[10].get()))->update(1, search::attribute::getUndefined<float>());
+ (dynamic_cast<FloatingPointAttribute *>(avs[16].get()))->update(1, 67.5);
+ // string attributes
+ (dynamic_cast<StringAttribute *>(avs[6].get()))->update(1, "foo");
+ (dynamic_cast<StringAttribute *>(avs[7].get()))->append(1, "bar", 0);
+ (dynamic_cast<StringAttribute *>(avs[7].get()))->append(1, "baz", 0);
+ (dynamic_cast<StringAttribute *>(avs[8].get()))->append(1, "qux", 11);
+ (dynamic_cast<StringAttribute *>(avs[8].get()))->append(1, "quux", 12);
+ (dynamic_cast<StringAttribute *>(avs[11].get()))->update(1, "");
+
+ for (uint32_t i = 0; i < avs.size() - 1; ++i) { // do not commit the noupdate attribute
+ avs[i]->commit();
+ }
+
+ // save 'sint' and load it into 'unique' (only way to set a noupdate attribute)
+ ASSERT_TRUE(avs[0]->save(avs[9]->getBaseFileName()));
+ avs[9] = AttributeFactory::createAttribute("udefint", AVC(AVBT::INT32, AVCT::SINGLE));
+ ASSERT_TRUE(avs[9]->load());
+}
+
+void
+Test::testCloseness()
+{
+ { // Test blueprint.
+ ClosenessBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "closeness"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, params);
+ FT_SETUP_OK(pt, params.add("name"), in.add("distance(name)"), out.add("out").add("logscale"));
+
+ FT_DUMP_EMPTY(_factory, "closeness");
+ }
+
+ { // Test executor.
+ TEST_DO(assertCloseness(1, "pos", 0));
+ assertCloseness(0.8, "pos", 1802661);
+ assertCloseness(0, "pos", 9013306);
+ // two-argument version
+ TEST_DO(assertCloseness(0.8, "field,pos", 1802661));
+
+ // use non-default maxDistance
+ assertCloseness(1, "pos", 0, 100);
+ assertCloseness(0.5, "pos", 50, 100);
+ assertCloseness(0, "pos", 100, 100);
+ assertCloseness(0, "pos", 101, 100);
+
+ // test logscale using halfResponse (define that x = 10 should give 0.5 -> s = -10^2/(2*10 - 100) = 1.25 (scale distance))
+ assertCloseness(1, "pos", 0, 100, 10);
+ assertCloseness(0.5, "pos", 10, 100, 10);
+ assertCloseness(0, "pos", 100, 100, 10);
+ assertCloseness(0, "pos", 101, 100, 10);
+ }
+}
+
+void
+Test::assertCloseness(feature_t exp, const vespalib::string & attr, double distance, double maxDistance, double halfResponse)
+{
+ vespalib::string feature = "closeness(" + attr + ")";
+ FtFeatureTest ft(_factory, feature);
+ std::vector<std::pair<int32_t, int32_t> > positions;
+ int32_t x = 0;
+ positions.emplace_back(x, x);
+ setupForDistanceTest(ft, "pos", positions, false);
+ GeoLocation::Point p{int32_t(distance), 0};
+ ft.getQueryEnv().addLocation(GeoLocationSpec{attr, p});
+ if (maxDistance > 0) {
+ ft.getIndexEnv().getProperties().add(feature + ".maxDistance",
+ vespalib::make_string("%u", (unsigned int)maxDistance));
+ }
+ if (halfResponse > 0) {
+ ft.getIndexEnv().getProperties().add(feature + ".halfResponse",
+ vespalib::make_string("%f", halfResponse));
+ feature.append(".logscale");
+ }
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult().addScore(feature, exp)));
+}
+
+void
+Test::testFieldLength()
+{
+ FieldLengthBlueprint pt;
+
+ { // Test blueprint.
+ EXPECT_TRUE(assertCreateInstance(pt, "fieldLength"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, params);
+ FtIndexEnvironment ie;
+ ie.getBuilder()
+ .addField(FieldType::INDEX, CollectionType::SINGLE, "foo")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar")
+ .addField(FieldType::INDEX, CollectionType::ARRAY, "afoo")
+ .addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wfoo");
+ FT_SETUP_FAIL(pt, params.add("qux")); // does not exists
+ FT_SETUP_FAIL(pt, params.clear().add("bar")); // not an index
+ FT_SETUP_FAIL(pt, params.clear().add("afoo")); // wrong collection type
+ FT_SETUP_FAIL(pt, params.clear().add("wfoo")); // wrong collection type
+ FT_SETUP_OK(pt, ie, params.clear().add("foo"), in, out.add("out"));
+
+ FT_DUMP_EMPTY(_factory, "fieldLength");
+ FT_DUMP_EMPTY(_factory, "fieldLength", ie);
+ }
+
+ { // Test executor.
+ for (uint32_t i = 0; i < 10; ++i) {
+ StringList features;
+ features.add("fieldLength(foo)").add("fieldLength(baz)");
+ FtFeatureTest ft(_factory, features);
+ ASSERT_TRUE(!ft.setup());
+
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo").
+ addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar").addField(FieldType::INDEX, CollectionType::SINGLE, "baz");
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ASSERT_TRUE(ft.setup());
+
+ search::fef::test::MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ ASSERT_TRUE(mdb->addOccurence("foo", 0, i));
+ ASSERT_TRUE(mdb->setFieldLength("foo", i + 10));
+ ASSERT_TRUE(mdb->addOccurence("baz", 0, i));
+ ASSERT_TRUE(mdb->setFieldLength("baz", i + 20));
+ ASSERT_TRUE(mdb->apply(1));
+ ASSERT_TRUE(ft.execute(RankResult()
+ .addScore("fieldLength(foo)", (feature_t)i + 10)
+ .addScore("fieldLength(baz)", (feature_t)i + 20)));
+ }
+ }
+}
+
+
+void
+Test::assertFieldMatch(const vespalib::string & spec,
+ const vespalib::string & query,
+ const vespalib::string & field,
+ const fieldmatch::Params * params,
+ uint32_t totalTermWeight,
+ feature_t totalSignificance)
+{
+ LOG(info, "assertFieldMatch('%s', '%s', '%s', (%u))", spec.c_str(), query.c_str(), field.c_str(), totalTermWeight);
+
+ // Setup feature test.
+ vespalib::string feature = "fieldMatch(foo)";
+ FtFeatureTest ft(_factory, feature);
+
+ setupFieldMatch(ft, "foo", query, field, params, totalTermWeight, totalSignificance, 1);
+
+ // Execute and compare results.
+ RankResult rr = toRankResult(feature, spec);
+ rr.setEpsilon(1e-4); // same as java tests
+ ASSERT_TRUE(ft.execute(rr));
+}
+
+void
+Test::assertFieldMatch(const vespalib::string & spec,
+ const vespalib::string & query,
+ const vespalib::string & field,
+ uint32_t totalTermWeight)
+{
+ assertFieldMatch(spec, query, field, nullptr, totalTermWeight);
+}
+
+void
+Test::assertFieldMatchTS(const vespalib::string & spec,
+ const vespalib::string & query,
+ const vespalib::string & field,
+ feature_t totalSignificance)
+{
+ assertFieldMatch(spec, query, field, nullptr, 0, totalSignificance);
+}
+
+
+void
+Test::testFirstPhase()
+{
+ { // Test blueprint.
+ FirstPhaseBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "firstPhase"));
+
+ FtIndexEnvironment ie;
+ ie.getProperties().add(indexproperties::rank::FirstPhase::NAME, "random"); // override nativeRank dependency
+
+ StringList params, in, out;
+ FT_SETUP_OK(pt, ie, params, in.add("random"), out.add("score"));
+ FT_SETUP_FAIL(pt, params.add("foo"));
+ params.clear();
+
+ FT_DUMP(_factory, "firstPhase", ie, StringList().add("firstPhase"));
+ }
+
+ { // Test executor.
+ FtFeatureTest ft(_factory, "firstPhase");
+ ft.getIndexEnv().getProperties().add(indexproperties::rank::FirstPhase::NAME, "value(10)");
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(10.0f));
+ }
+}
+
+void
+Test::testForeach()
+{
+ { // Test blueprint.
+ ForeachBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "foreach"));
+
+ StringList params, in, out;
+ out.add("value");
+ FT_SETUP_FAIL(pt, params);
+ // illegal dimension
+ FT_SETUP_FAIL(pt, params.add("squares").add("N").add("foo").add("true").add("sum"));
+ // illegal condition
+ FT_SETUP_FAIL(pt, params.clear().add("fields").add("N").add("foo").add("false").add("sum"));
+ // illegal operation
+ FT_SETUP_FAIL(pt, params.clear().add("fields").add("N").add("foo").add("true").add("dotproduct"));
+
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar");
+ ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "baz");
+
+ // various dimensions
+ FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo(N)").add("true").add("sum"),
+ in.clear().add("foo(0)").add("foo(1)").add("foo(2)").add("foo(3)").add("foo(4)").
+ add("foo(5)").add("foo(6)").add("foo(7)").add("foo(8)").add("foo(9)").
+ add("foo(10)").add("foo(11)").add("foo(12)").add("foo(13)").add("foo(14)").add("foo(15)"), out);
+ ie.getProperties().add("foreach.maxTerms", "1");
+ FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("sum"),
+ in.clear().add("foo"), out);
+ FT_SETUP_OK(pt, ie, params.clear().add("fields").add("N").add("foo(N)").add("true").add("sum"),
+ in.clear().add("foo(foo)").add("foo(bar)"), out);
+ FT_SETUP_OK(pt, ie, params.clear().add("attributes").add("N").add("foo(N)").add("true").add("sum"),
+ in.clear().add("foo(baz)"), out);
+
+ // various conditions
+ FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("sum"), in.clear().add("foo"), out);
+ FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("<4").add("sum"), in, out);
+ FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add(">4").add("sum"), in, out);
+ // various operations
+ FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("sum"), in, out);
+ FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("product"), in, out);
+ FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("average"), in, out);
+ FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("max"), in, out);
+ FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("min"), in, out);
+ FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("count"), in, out);
+
+ FT_DUMP_EMPTY(_factory, "foreach");
+ }
+ { // Test executor
+ // single loop
+ assertForeachOperation( 16.5, "true", "sum");
+ assertForeachOperation(-2106, "true", "product");
+ assertForeachOperation( 3.3, "true", "average");
+ assertForeachOperation( 8, "true", "max");
+ assertForeachOperation( -4.5, "true", "min");
+ assertForeachOperation( 5, "true", "count");
+
+ assertForeachOperation(3, "\">4\"", "count");
+ assertForeachOperation(2, "\">4.5\"", "count");
+ assertForeachOperation(2, "\"<4\"", "count");
+ assertForeachOperation(2, "\"<4.5\"", "count");
+ assertForeachOperation(4, "\">0\"", "count");
+ assertForeachOperation(1, "\"<0\"", "count");
+ assertForeachOperation(4, "\">-4.5\"", "count");
+ assertForeachOperation(1, "\"<-4.4\"", "count");
+
+ { // average without any values
+ FtFeatureTest ft(_factory, "foreach(fields,N,value(N),true,average)");
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(0));
+ }
+
+ { // double loop
+ vespalib::string feature =
+ "foreach(fields,N,foreach(attributes,M,rankingExpression(\"value(N)+value(M)\"),true,product),true,sum)";
+ LOG(info, "double loop feature: '%s'", feature.c_str());
+ FtFeatureTest ft(_factory, feature);
+ ft.getIndexEnv().getProperties().add("foreach.maxTerms", "1");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "1");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "2");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "3");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "4");
+ // ((1 + 3) * (1 + 4)) + ((2 + 3) * (2 + 4)) = 4 * 5 + 5 * 6 = 20 + 30 = 50
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(50));
+ ASSERT_TRUE(ft.execute(50)); // check that reset works
+ }
+ }
+}
+
+void
+Test::assertForeachOperation(feature_t exp, const vespalib::string & cond, const vespalib::string & op)
+{
+ vespalib::string feature = "foreach(fields,N,value(N)," + cond + "," + op + ")";
+ FtFeatureTest ft(_factory, feature);
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "4.5");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "2");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "8");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "6.5");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "-4.5");
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(exp));
+ ASSERT_TRUE(ft.execute(exp)); // check that reset works
+}
+
+
+void
+Test::testFreshness()
+{
+ { // Test blueprint.
+ FtIndexEnvironment idx_env;
+ idx_env.getBuilder()
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "name");
+
+ FreshnessBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "freshness"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, idx_env, params);
+ FT_SETUP_OK(pt, idx_env, params.add("name"), in.add("age(name)"), out.add("out").add("logscale"));
+
+ FT_DUMP_EMPTY(_factory, "freshness");
+ }
+
+ { // Test executor.
+ assertFreshness(1, "doctime", 0);
+ assertFreshness(0.5, "doctime", 3*15*24*60*60);
+ assertFreshness(0, "doctime", 3*30*24*60*60);
+ // use non-default maxAge
+ assertFreshness(1, "doctime", 0, 120);
+ assertFreshness(0.75, "doctime", 30, 120);
+ assertFreshness(0.5, "doctime", 60, 120);
+ assertFreshness(0, "doctime", 120, 120);
+ assertFreshness(0, "doctime", 121, 120);
+
+ // test logscale
+ assertFreshness(1, "doctime", 0, 0, 0, true);
+ assertFreshness(0.5, "doctime", 7*24*60*60, 0, 0, true);
+ assertFreshness(0, "doctime", 3*30*24*60*60, 0, 0, true);
+ // use non-default maxAge & halfResponse
+ assertFreshness(1, "doctime", 0, 120, 30, true);
+ assertFreshness(0.5, "doctime", 30, 120, 30, true); // half response after 30 secs
+ assertFreshness(0, "doctime", 120, 120, 30, true);
+ assertFreshness(0, "doctime", 121, 120, 30, true);
+ // test invalid half response
+ assertFreshness(0.5, "doctime", 1, 120, 0.5, true); // half response is set to 1
+ assertFreshness(0.5, "doctime", 59, 120, 70, true); // half response is set to 120/2 - 1
+ }
+}
+
+void
+Test::assertFreshness(feature_t expFreshness, const vespalib::string & attr, uint32_t age, uint32_t maxAge, double halfResponse, bool logScale)
+{
+ vespalib::string feature = "freshness(" + attr + ")";
+ FtFeatureTest ft(_factory, feature);
+ setupForAgeTest(ft, 60); // time = 60
+ if (maxAge > 0) {
+ ft.getIndexEnv().getProperties().add("freshness(" + attr + ").maxAge",
+ vespalib::make_string("%u", maxAge));
+ }
+ if (halfResponse > 0) {
+ ft.getIndexEnv().getProperties().add("freshness(" + attr + ").halfResponse",
+ vespalib::make_string("%f", halfResponse));
+ }
+ if (logScale) {
+ feature.append(".logscale");
+ }
+ ft.getQueryEnv().getProperties().add(queryproperties::now::SystemTime::NAME,
+ vespalib::make_string("%u", age + 60)); // now = age + 60
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult().addScore(feature, expFreshness).setEpsilon(EPS)));
+}
+
+namespace {
+
+struct AirPort {
+ const char *tla;
+ double lat;
+ double lng;
+};
+
+std::pair<int32_t, int32_t> toXY(const AirPort &p) {
+ return std::make_pair(int(p.lng * 1.0e6), int(p.lat * 1.0e6));
+}
+
+GeoLocation toGL(const AirPort &p) {
+ auto x = int(p.lng * 1.0e6);
+ auto y = int(p.lat * 1.0e6);
+ GeoLocation::Point gp{x, y};
+ return GeoLocation{gp};
+}
+
+}
+
+void
+Test::testGreatCircleDistance()
+{
+ { // Test blueprint.
+ GreatCircleDistanceBlueprint pt;
+ EXPECT_TRUE(assertCreateInstance(pt, "great_circle_distance"));
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, params);
+ FtIndexEnvironment idx_env;
+ idx_env
+ .getBuilder()
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::INT64, "pos_zcurve");
+ FT_SETUP_OK(pt, idx_env, params.add("pos"), in,
+ out.add("km").add("latitude").add("longitude"));
+ FT_DUMP_EMPTY(_factory, "great_circle_distance");
+ }
+ { // Test executor.
+ FtFeatureTest ft(_factory, "great_circle_distance(pos)");
+ const AirPort SFO = { "SFO", 37.618806, -122.375416 };
+ const AirPort TRD = { "TRD", 63.457556, 10.924250 };
+ std::vector<std::pair<int32_t,int32_t>> pos = { toXY(SFO), toXY(TRD) };
+ setupForDistanceTest(ft, "pos_zcurve", pos, true);
+ const AirPort LHR = { "LHR", 51.477500, -0.461388 };
+ const AirPort JFK = { "JFK", 40.639928, -73.778692 };
+ ft.getQueryEnv().addLocation(GeoLocationSpec{"pos", toGL(LHR)});
+ ft.getQueryEnv().addLocation(GeoLocationSpec{"pos", toGL(JFK)});
+ ASSERT_TRUE(ft.setup());
+ double exp = 1494; // according to gcmap.com
+ ASSERT_TRUE(ft.execute(RankResult().setEpsilon(10.0).
+ addScore("great_circle_distance(pos)", exp)));
+ ASSERT_TRUE(ft.execute(RankResult().setEpsilon(10.0).
+ addScore("great_circle_distance(pos).km", exp)));
+ ASSERT_TRUE(ft.execute(RankResult().setEpsilon(1e-9).
+ addScore("great_circle_distance(pos).latitude", TRD.lat)));
+ ASSERT_TRUE(ft.execute(RankResult().setEpsilon(1e-9).
+ addScore("great_circle_distance(pos).longitude", TRD.lng)));
+ }
+}
+
+void
+Test::testDistance()
+{
+ { // Test blueprint.
+ DistanceBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "distance"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, params);
+ FtIndexEnvironment idx_env;
+ idx_env
+ .getBuilder()
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::INT64, "pos");
+ FT_SETUP_OK(pt, idx_env, params.add("pos"), in,
+ out.add("out").add("index").add("latitude").add("longitude").add("km"));
+ FT_DUMP_EMPTY(_factory, "distance");
+ }
+
+ { // Test executor.
+
+ { // test 2D single location (zcurve)
+ assert2DZDistance(static_cast<feature_t>(std::sqrt(650.0f)), "5:-5", 10, 20);
+ assert2DZDistance(static_cast<feature_t>(std::sqrt(250.0f)), "5:-5", 10, -20);
+ assert2DZDistance(static_cast<feature_t>(std::sqrt(450.0f)), "5:-5", -10, -20);
+ assert2DZDistance(static_cast<feature_t>(std::sqrt(850.0f)), "5:-5", -10, 20);
+ assert2DZDistance(static_cast<feature_t>(std::sqrt(250.0f)), "5:-5", 15, -20, 0x80000000); // 2^31
+ }
+
+ { // test 2D multi location (zcurve)
+ vespalib::string positions = "5:-5,35:0,5:40,35:-40";
+ assert2DZDistance(static_cast<feature_t>(std::sqrt(425.0f)), positions, 10, 20, 0, 2);
+ assert2DZDistance(static_cast<feature_t>(std::sqrt(250.0f)), positions, 10, -20, 0, 0);
+ assert2DZDistance(static_cast<feature_t>(std::sqrt(450.0f)), positions, -10, -20, 0, 0);
+ assert2DZDistance(static_cast<feature_t>(std::sqrt(625.0f)), positions, -10, 20, 0, 2);
+ assert2DZDistance(static_cast<feature_t>(std::sqrt(250.0f)), positions, 15, -20, 0x80000000, 0); // 2^31
+ assert2DZDistance(static_cast<feature_t>(std::sqrt(425.0f)), positions, 45, -20, 0x80000000, 1); // 2^31
+ }
+
+ { // test geo multi location (zcurve)
+ vespalib::string positions = "0:0,100:100,-200:200,-300:-300,400:-400";
+ assert2DZDistance(static_cast<feature_t>(0.0f), positions, 0, 0, 0x40000000, 0);
+ assert2DZDistance(static_cast<feature_t>(1.0f), positions, 100, 101, 0x40000000, 1);
+ assert2DZDistance(static_cast<feature_t>(0.0f), positions, -200, 200, 0x40000000, 2);
+ assert2DZDistance(static_cast<feature_t>(13.0f), positions, -320, -312, 0x40000000, 3);
+ assert2DZDistance(static_cast<feature_t>(5.0f), positions, 416, -403, 0x40000000, 4);
+ assert2DZDistance(static_cast<feature_t>(5.0f), positions, 112, 104, 0x40000000, 1);
+ }
+
+ { // test default distance
+ { // non-existing attribute
+ FtFeatureTest ft(_factory, "distance(pos)");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::INT64, "pos");
+ GeoLocation::Point p{0, 0};
+ ft.getQueryEnv().addLocation(GeoLocationSpec{"pos", p});
+
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult().addScore("distance(pos)", 6400000000.0)));
+ }
+ { // label
+ FtFeatureTest ft(_factory, "distance(label,foo)");
+ GeoLocation::Point p{0, 0};
+ ft.getQueryEnv().addLocation(GeoLocationSpec{"pos", p});
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult().addScore("distance(label,foo)", std::numeric_limits<feature_t>::max())));
+ }
+ { // wrong attribute type (float)
+ FtFeatureTest ft(_factory, "distance(pos)");
+ auto pos = AttributeBuilder("pos", AVC(AVBT::FLOAT, AVCT::SINGLE)).get();
+ ft.getIndexEnv().getAttributeMap().add(pos);
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::INT64, "pos");
+ GeoLocation::Point p{0, 0};
+ ft.getQueryEnv().addLocation(GeoLocationSpec{"pos", p});
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult().addScore("distance(pos)", 6400000000.0)));
+ }
+ { // wrong attribute type (string)
+ FtFeatureTest ft(_factory, "distance(pos)");
+ auto pos = AttributeBuilder("pos", AVC(AVBT::STRING, AVCT::SINGLE)).get();
+ ft.getIndexEnv().getAttributeMap().add(pos);
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::INT64, "pos");
+ GeoLocation::Point p{0, 0};
+ ft.getQueryEnv().addLocation(GeoLocationSpec{"pos", p});
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult().addScore("distance(pos)", 6400000000.0)));
+ }
+ { // wrong attribute collection type (weighted set)
+ FtFeatureTest ft(_factory, "distance(pos)");
+ auto pos = AttributeBuilder("pos", AVC(AVBT::INT64, AVCT::WSET)).get();
+ ft.getIndexEnv().getAttributeMap().add(pos);
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, DataType::INT64, "pos");
+ GeoLocation::Point p{0, 0};
+ ft.getQueryEnv().addLocation(GeoLocationSpec{"pos", p});
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult().addScore("distance(pos)", 6400000000.0)));
+ }
+ }
+ }
+}
+
+void
+Test::setupForDistanceTest(FtFeatureTest &ft, const vespalib::string & attrName,
+ const std::vector<std::pair<int32_t, int32_t> > & positions, bool zcurve)
+{
+ auto pos = AttributeBuilder(attrName, AVC(AVBT::INT64, AVCT::ARRAY)).docs(1).get();
+ ft.getIndexEnv().getAttributeMap().add(pos);
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, DataType::INT64, attrName);
+
+ auto ia = dynamic_cast<IntegerAttribute *>(pos.get());
+ for (const auto & p : positions) {
+ if (zcurve) {
+ ia->append(1, vespalib::geo::ZCurve::encode(p.first, p.second), 0);
+ } else {
+ ia->append(1, p.first, 0);
+ }
+ }
+ pos->commit();
+}
+
+void
+Test::assert2DZDistance(feature_t exp, const vespalib::string & positions,
+ int32_t xquery, int32_t yquery, uint32_t xAspect,
+ uint32_t hit_index)
+{
+ LOG(info, "assert2DZDistance(%g, %s, %d, %d, %u, %u)", exp, positions.c_str(), xquery, yquery, xAspect, hit_index);
+ FtFeatureTest ft(_factory, "distance(pos)");
+ std::vector<vespalib::string> ta = FtUtil::tokenize(positions, ",");
+ std::vector<std::pair<int32_t, int32_t> > pos;
+ for (const auto & s : ta) {
+ std::vector<vespalib::string> tb = FtUtil::tokenize(s, ":");
+ auto x = util::strToNum<int32_t>(tb[0]);
+ auto y = util::strToNum<int32_t>(tb[1]);
+ pos.emplace_back(x, y);
+ }
+ setupForDistanceTest(ft, "pos", pos, true);
+ GeoLocation::Point p{xquery, yquery};
+ GeoLocation::Aspect aspect{xAspect};
+ ft.getQueryEnv().addLocation(GeoLocationSpec{"pos", {p, aspect}});
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult().setEpsilon(1e-4).
+ addScore("distance(pos)", exp)));
+ ASSERT_TRUE(ft.execute(RankResult().setEpsilon(1e-4).
+ addScore("distance(pos).km", exp * 0.00011119508023)));
+ ASSERT_TRUE(ft.execute(RankResult().setEpsilon(1e-30).
+ addScore("distance(pos).index", hit_index)));
+ ASSERT_TRUE(ft.execute(RankResult().setEpsilon(1e-9).
+ addScore("distance(pos).latitude", pos[hit_index].second * 1e-6)));
+ ASSERT_TRUE(ft.execute(RankResult().setEpsilon(1e-9).
+ addScore("distance(pos).longitude", pos[hit_index].first * 1e-6)));
+}
+
+void
+Test::testDistanceToPath()
+{
+ {
+ // Test blueprint.
+ DistanceToPathBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "distanceToPath"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, params);
+ FT_SETUP_OK(pt, params.add("pos"), in, out.add("distance").add("traveled").add("product"));
+ FT_SETUP_FAIL(pt, params.add("foo"));
+
+ FT_DUMP_EMPTY(_factory, "distanceToPath");
+ }
+
+ {
+ // Test executor.
+ std::vector<std::pair<int32_t, int32_t> > pos;
+ pos.emplace_back(0, 0);
+
+ // invalid path
+ assertDistanceToPath(pos, "");
+ assertDistanceToPath(pos, "()");
+ assertDistanceToPath(pos, "a");
+ assertDistanceToPath(pos, "(");
+ assertDistanceToPath(pos, "(a");
+ assertDistanceToPath(pos, "(a)");
+ assertDistanceToPath(pos, "(-1)");
+ assertDistanceToPath(pos, "(-1,1)");
+ assertDistanceToPath(pos, "(-1,1,1)");
+ assertDistanceToPath(pos, "(-1 1 1 1)");
+
+ // path on either side of document
+ assertDistanceToPath(pos, "(-1,1,1,1)", 1, 0.5, 2);
+ assertDistanceToPath(pos, "(-1,-1,1,-1)", 1, 0.5, -2);
+
+ // zero length path
+ assertDistanceToPath(pos, "(0,0,0,0)", 0, 0);
+ assertDistanceToPath(pos, "(0,0,0,0,0,0)", 0, 0);
+ assertDistanceToPath(pos, "(0,1,0,1)", 1, 0);
+ assertDistanceToPath(pos, "(0,1,0,1,0,1)", 1, 0);
+
+ // path crosses document
+ assertDistanceToPath(pos, "(-1,1,1,-1)", 0, 0.5);
+ assertDistanceToPath(pos, "(-2,2,2,-2)", 0, 0.5);
+ assertDistanceToPath(pos, "(-1,1,3,-3)", 0, 0.25);
+
+ // intersection outside segments
+ assertDistanceToPath(pos, "(1,0,2,0)", 1, 0); // before
+ assertDistanceToPath(pos, "(0,1,0,2)", 1, 0);
+ assertDistanceToPath(pos, "(-2,0,-1,0)", 1, 1); // after
+ assertDistanceToPath(pos, "(0,-2,0,-1)", 1, 1);
+
+ // various paths
+ assertDistanceToPath(pos, "(-3,1,2,1,2,-2,-2,-2)", 1, 0.25, 5);
+ assertDistanceToPath(pos, "(-3,2,2,2,2,-1,0,-1)", 1, 1, 2);
+
+ // multiple document locations
+ pos.emplace_back(0, 1);
+ assertDistanceToPath(pos, "(-1,1,1,1)", 0, 0.5);
+ assertDistanceToPath(pos, "(-2,-1,-1,1)", 1, 1, 2);
+ assertDistanceToPath(pos, "(-1,0.25,1,0.25)", 0.25, 0.5, 0.5);
+
+ {
+ // Test defaults.
+ RankResult res;
+ res.addScore("distanceToPath(pos).distance", DistanceExecutor::DEFAULT_DISTANCE);
+ res.addScore("distanceToPath(pos).traveled", 1);
+ {
+ // Non-existing attribute.
+ FtFeatureTest ft(_factory, "distanceToPath(pos)");
+ ft.getQueryEnv().getProperties().add("distanceToPath(pos).path", "0 0 1 1");
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(res));
+ }
+ {
+ // Wrong attribute type (float).
+ FtFeatureTest ft(_factory, "distanceToPath(pos)");
+ auto att = AttributeBuilder("pos", AVC(AVBT::FLOAT, AVCT::SINGLE)).get();
+ ft.getIndexEnv().getAttributeMap().add(att);
+ ft.getQueryEnv().getProperties().add("distanceToPath(pos).path", "0 0 1 1");
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(res));
+ }
+ {
+ // Wrong attribute type (string).
+ FtFeatureTest ft(_factory, "distanceToPath(pos)");
+ auto att = AttributeBuilder("pos", AVC(AVBT::STRING, AVCT::SINGLE)).get();
+ ft.getIndexEnv().getAttributeMap().add(att);
+ ft.getQueryEnv().getProperties().add("distanceToPath(pos).path", "0 0 1 1");
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(res));
+ }
+ {
+ // Wrong attribute collection type (weighted set).
+ FtFeatureTest ft(_factory, "distanceToPath(pos)");
+ auto att = AttributeBuilder("pos", AVC(AVBT::INT64, AVCT::WSET)).get();
+ ft.getIndexEnv().getAttributeMap().add(att);
+ ft.getQueryEnv().getProperties().add("distanceToPath(pos).path", "0 0 1 1");
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(res));
+ }
+ }
+ }
+}
+
+void
+Test::assertDistanceToPath(const std::vector<std::pair<int32_t, int32_t> > & pos,
+ const vespalib::string &path, feature_t distance, feature_t traveled, feature_t product)
+{
+ LOG(info, "Testing distance to path '%s' with %zd document locations.", path.c_str(), pos.size());
+
+ FtFeatureTest ft(_factory, "distanceToPath(pos)");
+ setupForDistanceTest(ft, "pos", pos, true);
+
+ ft.getQueryEnv().getProperties().add("distanceToPath(pos).path", path);
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult()
+ .addScore("distanceToPath(pos).distance", distance)
+ .addScore("distanceToPath(pos).traveled", traveled)
+ .addScore("distanceToPath(pos).product", product)));
+}
+
+namespace {
+
+void
+verifyCorrectDotProductExecutor(BlueprintFactory & factory, vespalib::stringref attrName,
+ vespalib::stringref queryVector, vespalib::stringref expected)
+{
+ ParameterList params = {{ParameterType::ATTRIBUTE, attrName}, {ParameterType::STRING, "vector"}};
+ FtFeatureTest ft(factory, "value(0)");
+ Test::setupForDotProductTest(ft);
+ ft.getQueryEnv().getProperties().add("dotProduct.vector", queryVector);
+ DotProductBlueprint bp;
+ DummyDependencyHandler deps(bp);
+ EXPECT_TRUE(bp.setup(ft.getIndexEnv(), params));
+ vespalib::Stash stash;
+ FeatureExecutor &exc = bp.createExecutor(ft.getQueryEnv(), stash);
+ // check that we have the optimized enum version
+ EXPECT_EQUAL(expected, exc.getClassName());
+ EXPECT_EQUAL(1u, deps.output.size());
+}
+
+template<typename T>
+void verifyArrayParser()
+{
+ std::vector<vespalib::string> v = {"(0:2,7:-3,1:-3)", "{0:2,7:-3,1:-3}", "[2 -3 0 0 0 0 0 -3]"};
+ for(const vespalib::string & s : v) {
+ std::vector<T> out;
+ ArrayParser::parse(s, out);
+ EXPECT_EQUAL(8u, out.size());
+ EXPECT_EQUAL(2, out[0]);
+ EXPECT_EQUAL(-3, out[1]);
+ EXPECT_EQUAL(0, out[2]);
+ EXPECT_EQUAL(0, out[3]);
+ EXPECT_EQUAL(0, out[4]);
+ EXPECT_EQUAL(0, out[5]);
+ EXPECT_EQUAL(0, out[6]);
+ EXPECT_EQUAL(-3, out[7]);
+ }
+}
+
+}
+
+void
+Test::testDotProduct()
+{
+ { // Test blueprint.
+ FtIndexEnvironment idx_env;
+ idx_env.getBuilder()
+ .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "attribute");
+
+ DotProductBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "dotProduct"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, idx_env, params);
+ FT_SETUP_OK(pt, idx_env, params.add("attribute").add("vector"), in, out.add("scalar"));
+
+ FT_DUMP_EMPTY(_factory, "dotProduct");
+ }
+
+ { // Test vector parser
+ { // string enum vector
+ FtFeatureTest ft(_factory, "value(0)");
+ setupForDotProductTest(ft);
+ const search::attribute::IAttributeVector * sv(ft.getIndexEnv().getAttributeMap().getAttribute("wsstr"));
+ ASSERT_TRUE(sv != nullptr);
+ EXPECT_TRUE(sv->hasEnum());
+ search::attribute::EnumHandle e;
+ {
+ dotproduct::wset::EnumVector out(sv);
+ WeightedSetParser::parse("", out);
+ EXPECT_EQUAL(out.getVector().size(), 0u);
+ WeightedSetParser::parse("()", out);
+ EXPECT_EQUAL(out.getVector().size(), 0u);
+ WeightedSetParser::parse("(a;1)", out);
+ EXPECT_EQUAL(out.getVector().size(), 0u);
+ WeightedSetParser::parse("(a:1)", out);
+ EXPECT_EQUAL(out.getVector().size(), 1u);
+ EXPECT_TRUE(sv->findEnum("a", e));
+ EXPECT_EQUAL(out.getVector()[0].first, e);
+ EXPECT_EQUAL(out.getVector()[0].second, 1.0);
+ }
+ std::vector<vespalib::string> v = {"(b:2.5,c:-3.5)", "{b:2.5,c:-3.5}"};
+ for(const vespalib::string & s : v) {
+ dotproduct::wset::EnumVector out(sv);
+ WeightedSetParser::parse(s, out);
+ EXPECT_EQUAL(out.getVector().size(), 2u);
+ EXPECT_TRUE(sv->findEnum("b", e));
+ EXPECT_EQUAL(out.getVector()[0].first, e);
+ EXPECT_EQUAL(out.getVector()[0].second, 2.5);
+ EXPECT_TRUE(sv->findEnum("c", e));
+ EXPECT_EQUAL(out.getVector()[1].first, e);
+ EXPECT_EQUAL(out.getVector()[1].second, -3.5);
+ }
+ { // test funky syntax
+ dotproduct::wset::EnumVector out(sv);
+ WeightedSetParser::parse("( a: 1, b:2 ,c: , :3)", out);
+ EXPECT_EQUAL(out.getVector().size(), 4u);
+ EXPECT_TRUE(sv->findEnum("a", e));
+ EXPECT_EQUAL(out.getVector()[0].first, e);
+ EXPECT_EQUAL(out.getVector()[0].second, 1);
+ EXPECT_TRUE(sv->findEnum("b", e));
+ EXPECT_EQUAL(out.getVector()[1].first, e);
+ EXPECT_EQUAL(out.getVector()[1].second, 2);
+ EXPECT_TRUE(sv->findEnum("c", e));
+ EXPECT_EQUAL(out.getVector()[2].first, e);
+ EXPECT_EQUAL(out.getVector()[2].second, 0);
+ EXPECT_TRUE(sv->findEnum("", e));
+ EXPECT_EQUAL(out.getVector()[3].first, e);
+ EXPECT_EQUAL(out.getVector()[3].second, 3);
+ }
+ { // strings not in attribute vector
+ dotproduct::wset::EnumVector out(sv);
+ WeightedSetParser::parse("(not:1)", out);
+ EXPECT_EQUAL(out.getVector().size(), 0u);
+ }
+ }
+ { // string vector
+ dotproduct::wset::StringVector out;
+ WeightedSetParser::parse("(b:2.5,c:-3.5)", out);
+ EXPECT_EQUAL(out.getVector().size(), 2u);
+ EXPECT_EQUAL(out.getVector()[0].first, "b");
+ EXPECT_EQUAL(out.getVector()[0].second, 2.5);
+ EXPECT_EQUAL(out.getVector()[1].first, "c");
+ EXPECT_EQUAL(out.getVector()[1].second, -3.5);
+ }
+ { // integer vector
+ dotproduct::wset::IntegerVector out;
+ WeightedSetParser::parse("(20:2.5,30:-3.5)", out);
+ EXPECT_EQUAL(out.getVector().size(), 2u);
+ EXPECT_EQUAL(out.getVector()[0].first, 20);
+ EXPECT_EQUAL(out.getVector()[0].second, 2.5);
+ EXPECT_EQUAL(out.getVector()[1].first, 30);
+ EXPECT_EQUAL(out.getVector()[1].second, -3.5);
+ }
+ }
+ verifyArrayParser<int8_t>();
+ verifyArrayParser<int16_t>();
+ verifyArrayParser<int32_t>();
+ verifyArrayParser<int64_t>();
+ verifyArrayParser<float>();
+ verifyArrayParser<double>();
+ {
+ vespalib::string s = "[[1:3]]";
+ std::vector<int32_t> out;
+ ArrayParser::parse(s, out);
+ EXPECT_EQUAL(0u, out.size());
+ }
+
+ { // Test executor.
+ { // string enum attribute
+ // docId = 1
+ assertDotProduct(0, "()");
+ assertDotProduct(0, "(f:5)");
+ assertDotProduct(0, "(f:5,g:5)");
+ assertDotProduct(-5, "(a:-5)");
+ assertDotProduct(25, "(e:5)");
+ assertDotProduct(-5.5, "(a:-5.5)");
+ assertDotProduct(27.5, "(e:5.5)");
+ assertDotProduct(55, "(a:1,b:2,c:3,d:4,e:5)");
+ assertDotProduct(20, "(b:10,b:15)");
+ // docId = 2
+ assertDotProduct(0, "()", 2);
+ assertDotProduct(0, "(a:1,b:2,c:3,d:4,e:5)", 2);
+ }
+ { // string attribute
+ assertDotProduct(0, "(f:5,g:5)", 1, "wsextstr");
+ assertDotProduct(550, "(a:1,b:2,c:3,d:4,e:5)", 1, "wsextstr");
+ }
+ for (const char * name : {"wsbyte", "wsint", "wsint_fast"}) {
+ TEST_DO(assertDotProduct(0, "()", 1, name));
+ TEST_DO(assertDotProduct(0, "(6:5,7:5)", 1, name));
+ TEST_DO(assertDotProduct(18, "(4:4.5)", 1, name));
+ TEST_DO(assertDotProduct(57, "(1:1,2:2,3:3,4:4.5,5:5)", 1, name));
+ }
+ for (const char * name : {"arrbyte", "arrint", "arrfloat", "arrint_fast", "arrfloat_fast"}) {
+ assertDotProduct(0, "()", 1, name);
+ assertDotProduct(0, "(6:5,7:5)", 1, name);
+ assertDotProduct(55, "(0:1,1:2,2:3,3:4,4:5)", 1, name);
+ assertDotProduct(55, "[1 2 3 4 5]", 1, name);
+ assertDotProduct(41, "{3:4,4:5}", 1, name);
+ }
+ { // float array attribute
+ assertDotProduct(55, "[1.0 2.0 3.0 4.0 5.0]", 1, "arrfloat");
+ assertDotProduct(41, "{3:4,4:5.0}", 1, "arrfloat");
+ }
+ { // Sparse float array attribute.
+ assertDotProduct(17, "(0:1,3:4,50:97)", 1, "arrfloat");
+ }
+
+ assertDotProduct(0, "(0:1,3:4,50:97)", 1, "sint"); // attribute of the wrong type
+ assertDotProduct(17, "(0:1,3:4,50:97)", 1, "sint", "arrfloat"); // attribute override
+ assertDotProduct(0, "(0:1,3:4,50:97)", 1, "sint", "arrfloat_non_existing"); // incorrect attribute override
+ }
+ TEST_DO(verifyCorrectDotProductExecutor(_factory, "wsstr", "{a:1,b:2}", "search::features::dotproduct::wset::(anonymous namespace)::DotProductExecutorByEnum"));
+ TEST_DO(verifyCorrectDotProductExecutor(_factory, "wsstr", "{a:1}", "search::features::dotproduct::wset::(anonymous namespace)::SingleDotProductExecutorByEnum"));
+ TEST_DO(verifyCorrectDotProductExecutor(_factory, "wsstr", "{unknown:1}", "search::features::SingleZeroValueExecutor"));
+ TEST_DO(verifyCorrectDotProductExecutor(_factory, "wsint", "{1:1, 2:3}", "search::features::dotproduct::wset::DotProductByWeightedSetReadViewExecutor<int>"));
+ TEST_DO(verifyCorrectDotProductExecutor(_factory, "wsint", "{1:1}", "search::features::dotproduct::wset::(anonymous namespace)::SingleDotProductByWeightedValueExecutor<int>"));
+ TEST_DO(verifyCorrectDotProductExecutor(_factory, "wsint", "{}", "search::features::SingleZeroValueExecutor"));
+
+}
+
+void
+Test::assertDotProduct(feature_t exp, const vespalib::string & vector, uint32_t docId,
+ const vespalib::string & attribute, const vespalib::string & attributeOverride)
+{
+ RankResult rr;
+ rr.addScore("dotProduct(" + attribute + ",vector)", exp);
+ FtFeatureTest ft(_factory, rr.getKeys());
+ setupForDotProductTest(ft);
+ ft.getQueryEnv().getProperties().add("dotProduct.vector", vector);
+ if ( ! attributeOverride.empty() ) {
+ ft.getQueryEnv().getProperties().add("dotProduct." + attribute + ".override.name", attributeOverride);
+ }
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(rr, docId));
+}
+
+void
+Test::setupForDotProductTest(FtFeatureTest & ft)
+{
+ struct Config {
+ Config() : name(nullptr), dataType(AVBT::BOOL), collectionType(AVCT::SINGLE), fastSearch(false) {}
+ Config(const char *n, AVBT dt, AVCT ct, bool fs) : name(n), dataType(dt), collectionType(ct), fastSearch(fs) {}
+ const char * name;
+ AVBT dataType;
+ AVCT collectionType;
+ bool fastSearch;
+ };
+ std::vector<Config> cfgList = { {"wsint", AVBT::INT32, AVCT::WSET, false},
+ {"wsbyte", AVBT::INT8, AVCT::WSET, false},
+ {"wsint_fast", AVBT::INT8, AVCT::WSET, true},
+ {"arrbyte", AVBT::INT8, AVCT::ARRAY, false},
+ {"arrint", AVBT::INT32, AVCT::ARRAY, false},
+ {"arrfloat", AVBT::FLOAT, AVCT::ARRAY, false},
+ {"arrint_fast", AVBT::INT32, AVCT::ARRAY, true},
+ {"arrfloat_fast", AVBT::FLOAT, AVCT::ARRAY, true}
+ };
+ auto a = AttributeBuilder("wsstr", AVC(AVBT::STRING, AVCT::WSET)).
+ fill_wset({{{"a", 1}, {"b", 2}, {"c", 3}, {"d", 4}, {"e", 5}}, {}}).get();
+ auto c = AttributeBuilder("sint", AVC(AVBT::INT32, AVCT::SINGLE)).docs(2).get();
+ auto d = std::make_shared<search::WeightedSetStringExtAttribute>("wsextstr");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsstr");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsextstr");
+ for (const Config & cfg : cfgList) {
+ AttributeBuilder builder(cfg.name, AVC(cfg.dataType,
+ cfg.collectionType,
+ cfg.fastSearch));
+ auto baf = builder.get();
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE,
+ cfg.collectionType==AVCT::ARRAY
+ ? CollectionType::ARRAY
+ : CollectionType::WEIGHTEDSET,
+ cfg.name);
+ ft.getIndexEnv().getAttributeMap().add(baf);
+ if (baf->isIntegerType()) {
+ using WIL = AttributeBuilder::WeightedIntList;
+ builder.fill_wset({WIL{{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}}, {}});
+ } else {
+ using WDL = AttributeBuilder::WeightedDoubleList;
+ builder.fill_wset({WDL{{1.0, 1}, {2.0, 2}, {3.0, 3}, {4.0, 4}, {5.0, 5}}, {}});
+ }
+ }
+
+ ft.getIndexEnv().getAttributeMap().add(a);
+ ft.getIndexEnv().getAttributeMap().add(c);
+ ft.getIndexEnv().getAttributeMap().add(d);
+
+ EXPECT_TRUE(!d->hasEnum());
+ uint32_t docId;
+ d->addDoc(docId); // reserved doc
+ d->addDoc(docId);
+ d->add("a", 10);
+ d->add("b", 20);
+ d->add("c", 30);
+ d->add("d", 40);
+ d->add("e", 50);
+ d->addDoc(docId);
+}
+
+void
+Test::testNow()
+{
+ {
+ // Test blueprint.
+ NowBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "now"));
+
+ StringList params, in, out;
+ FT_SETUP_OK (pt, params, in, out.add("out"));
+ FT_SETUP_FAIL(pt, params.add("foo"));
+
+ FT_DUMP_EMPTY(_factory, "now");
+ }
+
+ {
+ // Test executor.
+ FtFeatureTest ft(_factory, "now");
+ ASSERT_TRUE(ft.setup());
+
+ RankResult res;
+ res.addScore("now", 0.0f);
+ for (uint32_t i = 1; i <= 10; ++i) {
+ feature_t last = res.getScore("now");
+ res.clear();
+ ASSERT_TRUE(ft.executeOnly(res, i));
+ ASSERT_TRUE(last <= res.getScore("now"));
+ }
+ }
+
+ {
+ // Test executor with ms resolution
+ FtFeatureTest ft(_factory, "now");
+ ft.getQueryEnv().getProperties().add("vespa.now", "15000000000");
+ ASSERT_TRUE(ft.setup());
+
+ RankResult res;
+ ASSERT_TRUE(ft.executeOnly(res, 1));
+ feature_t now = 15000000000;
+ ASSERT_EQUAL(now, res.getScore("now"));
+ }
+}
+
+
+void
+Test::testMatch()
+{
+ { // Test blueprint.
+ MatchBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "match"));
+
+ FtFeatureTest ft(_factory, "");
+ setupForAttributeTest(ft);
+
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "bar");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "baz");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "tensor");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "predicate");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "reference");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "raw");
+
+ FtIndexEnvironment idx_env;
+ idx_env.getBuilder()
+ .addField(FieldType::INDEX, CollectionType::SINGLE, "foo")
+ .addField(FieldType::INDEX, CollectionType::ARRAY, "bar")
+ .addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "baz")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint")
+ .addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint")
+ .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::TENSOR, "tensor")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::BOOLEANTREE, "predicate")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::REFERENCE, "reference")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::RAW, "raw");
+
+ StringList params, in, out;
+ FT_SETUP_OK(pt, params, in, out.add("score").add("totalWeight"));
+ FT_SETUP_OK(pt, idx_env, params, in
+ .add("fieldMatch(foo)")
+ .add("elementCompleteness(bar)")
+ .add("elementCompleteness(baz)")
+ .add("attributeMatch(sint)")
+ .add("attributeMatch(aint)")
+ .add("attributeMatch(wsint)"), out
+ .add("weight.foo")
+ .add("weight.bar")
+ .add("weight.baz")
+ .add("weight.sint")
+ .add("weight.aint")
+ .add("weight.wsint"));
+ FT_SETUP_FAIL(pt, idx_env, params.add("1")); // expects 0 parameters
+
+ FT_DUMP_EMPTY(_factory, "match");
+ }
+
+ { // Test executor
+ FtFeatureTest ft(_factory, "match");
+
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "bar");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "baz");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint");
+
+ ft.getIndexEnv().getProperties().add("vespa.fieldweight.foo", "100"); // assign weight to all fields, simulate sd behaviour
+ ft.getIndexEnv().getProperties().add("vespa.fieldweight.bar", "200");
+ ft.getIndexEnv().getProperties().add("vespa.fieldweight.sint", "300");
+ ft.getIndexEnv().getProperties().add("vespa.fieldweight.aint", "400");
+
+ // search in field 'foo'
+ ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")); // term id 0
+
+ // search in field 'sint'
+ ft.getQueryEnv().getBuilder().addAttributeNode("sint"); // term id 1
+ setupForAttributeTest(ft, false);
+
+ ASSERT_TRUE(ft.setup());
+
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+
+ // add hit for field 'foo' for search term 0
+ ASSERT_TRUE(mdb->setFieldLength("foo", 1));
+ ASSERT_TRUE(mdb->addOccurence("foo", 0, 0));
+ ASSERT_TRUE(mdb->setWeight("sint", 1, 0));
+ ASSERT_TRUE(mdb->apply(1));
+
+ RankResult rr = toRankResult("match", "score:1 totalWeight:400 weight.foo:100 weight.bar:200 weight.baz:100 weight.sint:300 weight.aint:400 weight.wsint:100");
+ rr.setEpsilon(1e-4); // same as java tests
+ ASSERT_TRUE(ft.execute(rr));
+ }
+
+ { // Test executor
+ FtFeatureTest ft(_factory, "match");
+
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ // search in field 'foo'
+ ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")); // term id 0
+ ASSERT_TRUE(ft.setup());
+
+ // must create this so that term match data is configured with the term data object
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+
+ // no hits on docId 1
+ RankResult rr = toRankResult("match", "score:0 totalWeight:0 weight.foo:100");
+ ASSERT_TRUE(ft.execute(rr, 1));
+ }
+}
+
+void
+Test::testMatchCount()
+{
+ { // Test blueprint.
+ MatchCountBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "matchCount"));
+
+ FtFeatureTest ft(_factory, "");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar");
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, ft.getIndexEnv(), params); // expects 1 parameter
+ FT_SETUP_FAIL(pt, ft.getIndexEnv(), params.add("baz")); // cannot find the field
+ FT_SETUP_OK(pt, ft.getIndexEnv(), params.clear().add("foo"), in, out.add("out"));
+ FT_SETUP_OK(pt, ft.getIndexEnv(), params.clear().add("bar"), in, out);
+
+ FT_DUMP_EMPTY(_factory, "matchCount");
+ }
+ { // Test executor for index fields
+ EXPECT_TRUE(assertMatches(0, "x", "a", "matchCount(foo)"));
+ EXPECT_TRUE(assertMatches(1, "a", "a", "matchCount(foo)"));
+ EXPECT_TRUE(assertMatches(2, "a b", "a b", "matchCount(foo)"));
+ // change docId to indicate no matches in the field
+ EXPECT_TRUE(assertMatches(0, "a", "a", "matchCount(foo)", 2));
+ }
+ { // Test executor for attribute fields
+ FtFeatureTest ft(_factory, StringList().add("matchCount(foo)").
+ add("matchCount(baz)"));
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "baz");
+ ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("foo") != nullptr); // query term 0, hit in foo
+ ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("bar") != nullptr); // query term 1, hit in bar
+ ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("foo") != nullptr); // query term 2, hit in foo
+ ASSERT_TRUE(ft.setup());
+
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ mdb->setWeight("foo", 0, 0);
+ mdb->setWeight("bar", 1, 0);
+ mdb->setWeight("foo", 2, 0);
+ mdb->apply(1);
+ EXPECT_TRUE(ft.execute(RankResult().addScore("matchCount(foo)", 2)));
+ EXPECT_TRUE(ft.execute(RankResult().addScore("matchCount(baz)", 0)));
+ }
+}
+
+void verifySequence(uint64_t first, uint64_t second) {
+ ASSERT_GREATER(first, second);
+ ASSERT_GREATER(double(first), double(second));
+}
+
+void
+Test::testUnique()
+{
+ {
+ GlobalSequenceBlueprint bp;
+ EXPECT_TRUE(assertCreateInstance(bp, "globalSequence"));
+ FtFeatureTest ft(_factory, "");
+ StringList params, in, out;
+ FT_SETUP_OK(bp, ft.getIndexEnv(), params, in, out.add("out"));
+ FT_DUMP_EMPTY(_factory, "globalSequence");
+ }
+ FtFeatureTest ft(_factory, "globalSequence");
+ ASSERT_TRUE(ft.setup());
+ TEST_DO(verifySequence(GlobalSequenceBlueprint::globalSequence(1, 0), GlobalSequenceBlueprint::globalSequence(1,1)));
+ TEST_DO(verifySequence(GlobalSequenceBlueprint::globalSequence(1, 1), GlobalSequenceBlueprint::globalSequence(1,2)));
+ TEST_DO(verifySequence(GlobalSequenceBlueprint::globalSequence(1, 1), GlobalSequenceBlueprint::globalSequence(2,1)));
+ TEST_DO(verifySequence(GlobalSequenceBlueprint::globalSequence(2, 1), GlobalSequenceBlueprint::globalSequence(2,2)));
+ TEST_DO(verifySequence(GlobalSequenceBlueprint::globalSequence(2, 2), GlobalSequenceBlueprint::globalSequence(2,3)));
+ TEST_DO(verifySequence(GlobalSequenceBlueprint::globalSequence(2, 2), GlobalSequenceBlueprint::globalSequence(3,0)));
+ ASSERT_EQUAL(0xfffffffefffdul, (1ul << 48) - 0x10003l);
+ EXPECT_TRUE(ft.execute(0xfffffffefffdul, 0, 1));
+ EXPECT_TRUE(ft.execute(0xfffffff8fffdul, 0, 7));
+}
+
+void
+Test::testMatches()
+{
+ { // Test blueprint.
+ MatchesBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "matches"));
+
+ FtFeatureTest ft(_factory, "");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar");
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, ft.getIndexEnv(), params); // expects 1-2 parameters
+ FT_SETUP_FAIL(pt, ft.getIndexEnv(), params.add("baz")); // cannot find the field
+ FT_SETUP_OK(pt, ft.getIndexEnv(), params.clear().add("foo"), in, out.add("out"));
+ FT_SETUP_OK(pt, ft.getIndexEnv(), params.add("1"), in, out);
+ FT_SETUP_OK(pt, ft.getIndexEnv(), params.clear().add("bar"), in, out);
+ FT_SETUP_OK(pt, ft.getIndexEnv(), params.add("1"), in, out);
+
+ FT_DUMP(_factory, "matches", ft.getIndexEnv(), StringList().add("matches(foo)").add("matches(bar)"));
+ }
+ { // Test executor for index fields
+ EXPECT_TRUE(assertMatches(0, "x", "a", "matches(foo)"));
+ EXPECT_TRUE(assertMatches(1, "a", "a", "matches(foo)"));
+ EXPECT_TRUE(assertMatches(1, "a b", "a b", "matches(foo)"));
+ // change docId to indicate no matches in the field
+ EXPECT_TRUE(assertMatches(0, "a", "a", "matches(foo)", 2));
+ // specify termIdx as second parameter
+ EXPECT_TRUE(assertMatches(0, "x", "a", "matches(foo,0)"));
+ EXPECT_TRUE(assertMatches(1, "a", "a", "matches(foo,0)"));
+ EXPECT_TRUE(assertMatches(0, "a", "a", "matches(foo,1)"));
+ EXPECT_TRUE(assertMatches(0, "x b", "a b", "matches(foo,0)"));
+ EXPECT_TRUE(assertMatches(1, "x b", "a b", "matches(foo,1)"));
+ }
+ { // Test executor for attribute fields
+ FtFeatureTest ft(_factory, StringList().add("matches(foo)").
+ add("matches(baz)").
+ add("matches(foo,0)").
+ add("matches(foo,1)").
+ add("matches(foo,2)").
+ add("matches(foo,3)"));
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "baz");
+ ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("foo") != nullptr); // query term 0, hit in foo
+ ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("bar") != nullptr); // query term 1, hit in bar
+ ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("foo") != nullptr); // query term 2, hit in foo
+ ASSERT_TRUE(ft.setup());
+
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ mdb->setWeight("foo", 0, 0);
+ mdb->setWeight("bar", 1, 0);
+ mdb->apply(1);
+ EXPECT_TRUE(ft.execute(RankResult().addScore("matches(foo)", 1)));
+ EXPECT_TRUE(ft.execute(RankResult().addScore("matches(baz)", 0)));
+ EXPECT_TRUE(ft.execute(RankResult().addScore("matches(foo,0)", 1)));
+ EXPECT_TRUE(ft.execute(RankResult().addScore("matches(foo,1)", 0)));
+ EXPECT_TRUE(ft.execute(RankResult().addScore("matches(foo,2)", 0)));
+ EXPECT_TRUE(ft.execute(RankResult().addScore("matches(foo,3)", 0)));
+ }
+ { // Test executor for virtual fields
+ FtFeatureTest ft(_factory, StringList().add("matches(foo)"));
+ ft.getIndexEnv().getBuilder().addField(FieldType::VIRTUAL, CollectionType::ARRAY, "foo");
+ ASSERT_TRUE(ft.getQueryEnv().getBuilder().add_virtual_node("foo") != nullptr); // query term 0 hits in foo
+ ASSERT_TRUE(ft.setup());
+
+ auto mdb = ft.createMatchDataBuilder();
+ mdb->setWeight("foo", 0, 100);
+ mdb->apply(1);
+ EXPECT_TRUE(ft.execute(RankResult().addScore("matches(foo)", 1)));
+ }
+}
+
+bool
+Test::assertMatches(uint32_t output,
+ const vespalib::string & query,
+ const vespalib::string & field,
+ const vespalib::string & feature,
+ uint32_t docId)
+{
+ LOG(info, "assertMatches(%u, '%s', '%s', '%s')", output, query.c_str(), field.c_str(), feature.c_str());
+
+ // Setup feature test.
+ FtFeatureTest ft(_factory, feature);
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ std::map<vespalib::string, std::vector<vespalib::string> > index;
+ index["foo"] = FtUtil::tokenize(field);
+ FT_SETUP(ft, FtUtil::toQuery(query), index, 1);
+
+ ASSERT_TRUE(ft.execute(output, EPS, docId));
+ // Execute and compare results.
+ return EXPECT_TRUE(ft.execute(output, EPS, docId));
+}
+
+void
+Test::testQuery()
+{
+ { // Test blueprint.
+ QueryBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "query"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, params);
+ FT_SETUP_OK(pt, params.add("foo"), in, out.add("out"));
+
+ FT_DUMP_EMPTY(_factory, "query");
+ }
+
+ { // Test executor.
+ RankResult exp;
+ exp.addScore("query(def1)", 1.0).
+ addScore("query(def2)", 2.0).
+ addScore("query(def3)", 0.0).
+ addScore("query(val1)", 1.1).
+ addScore("query(val2)", 2.2).
+ addScore("query(hash1)", vespalib::hash2d("foo")).
+ addScore("query(hash2)", vespalib::hash2d("2")).
+ addScore("query(hash3)", vespalib::hash2d("foo")).
+ addScore("query(hash4)", vespalib::hash2d("'foo"));
+ FtFeatureTest ft(_factory, exp.getKeys());
+ ft.getIndexEnv().getProperties()
+ .add("query(def1)", "1.0")
+ .add("$def2", "2.0");
+ ft.getQueryEnv().getProperties()
+ .add("val1", "1.1")
+ .add("$val2", "2.2")
+ .add("hash1", "foo")
+ .add("hash2", "'2")
+ .add("hash3", "'foo")
+ .add("hash4", "''foo");
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(exp));
+ }
+}
+
+void
+Test::testQueryTermCount()
+{
+ { // Test blueprint.
+ QueryTermCountBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "queryTermCount"));
+
+ StringList params, in, out;
+ FT_SETUP_OK(pt, params, in, out.add("out"));
+ FT_SETUP_FAIL(pt, params.add("foo"));
+
+ StringList dump;
+ FT_DUMP(_factory, "queryTermCount", dump.add("queryTermCount"));
+ }
+
+ { // Test executor.
+ FtFeatureTest ft(_factory, "queryTermCount");
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult().addScore("queryTermCount", 0)));
+ }
+
+ { // Test executor.
+ FtFeatureTest ft(_factory, "queryTermCount");
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult().addScore("queryTermCount", 1)));
+ }
+
+ { // Test executor.
+ FtFeatureTest ft(_factory, "queryTermCount");
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ASSERT_TRUE(ft.setup());
+ ASSERT_TRUE(ft.execute(RankResult().addScore("queryTermCount", 2)));
+ }
+}
+
+void
+Test::testRandom()
+{
+ { // Test blueprint.
+ RandomBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "random"));
+
+ StringList params, in, out;
+ FT_SETUP_OK (pt, params, in, out.add("out").add("match"));
+ FT_SETUP_OK (pt, params.add("1"), in, out);
+ FT_SETUP_FAIL(pt, params.add("2"));
+
+ FT_DUMP_EMPTY(_factory, "random");
+ }
+
+ { // Test executor (seed specified through config)
+ FtFeatureTest ft(_factory, "random");
+ ft.getIndexEnv().getProperties().add("random.seed", "100");
+ ASSERT_TRUE(ft.setup());
+ vespalib::Rand48 rnd;
+ rnd.srand48(100);
+ for (uint32_t i = 0; i < 5; ++i) {
+ feature_t exp = static_cast<feature_t>(rnd.lrand48()) / static_cast<feature_t>(0x80000000u);
+ ASSERT_TRUE(ft.execute(exp, EPS, i + 1));
+ }
+ }
+ { // Test executor (current time used as seed)
+ FtFeatureTest ft(_factory, "random");
+ ASSERT_TRUE(ft.setup());
+ RankResult rr;
+ rr.addScore("random", 1.0f);
+ for (uint32_t i = 0; i < 5; ++i) {
+ feature_t last = rr.getScore("random");
+ rr.clear();
+ ASSERT_TRUE(ft.executeOnly(rr, i + 1));
+ ASSERT_TRUE(last != rr.getScore("random"));
+ }
+ }
+ { // Test executor (random.match)
+ FtFeatureTest ft(_factory, "random.match");
+ ft.getQueryEnv().getProperties().add("random.match.seed", "100");
+ ASSERT_TRUE(ft.setup());
+ vespalib::Rand48 rnd;
+ for (uint32_t i = 1; i <= 5; ++i) {
+ rnd.srand48(100 + i); // seed + lid
+ feature_t exp = static_cast<feature_t>(rnd.lrand48()) / static_cast<feature_t>(0x80000000u);
+ ASSERT_TRUE(ft.execute(exp, EPS, i));
+ }
+ }
+}
+
+void
+Test::testRandomNormal() {
+ { // Test blueprint.
+ RandomNormalBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "randomNormal"));
+
+ StringList params, in, out;
+ FT_SETUP_OK(pt, params, in, out.add("out"));
+ FT_SETUP_OK(pt, params.add("0.5").add("1.0"), in, out);
+ FT_SETUP_OK(pt, params.add("val1"), in, out);
+
+ FT_DUMP_EMPTY(_factory, "randomNormal");
+ }
+
+ { // Test executor (current time used as seed)
+ FtFeatureTest ft(_factory, "randomNormal");
+ ASSERT_TRUE(ft.setup());
+ RankResult rr;
+ rr.addScore("randomNormal", 1000.0);
+ for (uint32_t i = 0; i < 5; ++i) {
+ feature_t last = rr.getScore("randomNormal");
+ rr.clear();
+ ASSERT_TRUE(ft.executeOnly(rr, i + 1));
+ ASSERT_TRUE(last != rr.getScore("randomNormal"));
+ }
+ }
+
+ { // Test setting of mean and stddev values, and seed
+ FtFeatureTest ft1(_factory, "randomNormal(0.0,0.1)");
+ FtFeatureTest ft2(_factory, "randomNormal(1.0,0.2)");
+ ft1.getIndexEnv().getProperties().add("randomNormal(0.0,0.1).seed", "100");
+ ft2.getIndexEnv().getProperties().add("randomNormal(1.0,0.2).seed", "100");
+ ASSERT_TRUE(ft1.setup());
+ ASSERT_TRUE(ft2.setup());
+ RankResult rr;
+ for (uint32_t i = 0; i < 5; ++i) {
+ rr.clear();
+ ASSERT_TRUE(ft1.executeOnly(rr, i + 1));
+ ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormal(0.0,0.1)") - 0.0) / 0.1) * 0.2 + 1.0, EPS, i + 1));
+ }
+ }
+}
+
+void
+Test::testRandomNormalStable() {
+ { // Test blueprint.
+ RandomNormalStableBlueprint pt;
+
+ EXPECT_TRUE(assertCreateInstance(pt, "randomNormalStable"));
+
+ StringList params, in, out;
+ FT_SETUP_OK(pt, params, in, out.add("out"));
+ FT_SETUP_OK(pt, params.add("0.5").add("1.0"), in, out);
+ FT_SETUP_OK(pt, params.add("val1"), in, out);
+
+ FT_DUMP_EMPTY(_factory, "randomNormalStable");
+ }
+
+ { // Test setting of mean and stddev values, and seed
+ FtFeatureTest ft1(_factory, "randomNormalStable(0.0,0.1)");
+ FtFeatureTest ft2(_factory, "randomNormalStable(1.0,0.2)");
+ ft1.getIndexEnv().getProperties().add("randomNormalStable(0.0,0.1).seed", "100");
+ ft2.getIndexEnv().getProperties().add("randomNormalStable(1.0,0.2).seed", "100");
+ ASSERT_TRUE(ft1.setup());
+ ASSERT_TRUE(ft2.setup());
+ RankResult rr;
+ for (uint32_t i = 0; i < 5; ++i) {
+ rr.clear();
+ ASSERT_TRUE(ft1.executeOnly(rr, i + 1));
+ ASSERT_TRUE(ft2.execute(((rr.getScore("randomNormalStable(0.0,0.1)") - 0.0) / 0.1) * 0.2 + 1.0, EPS, i + 1));
+ }
+ }
+ { // Test executor (randomNormalStable)
+ FtFeatureTest ft1(_factory, "randomNormalStable");
+ FtFeatureTest ft2(_factory, "randomNormalStable");
+ ASSERT_TRUE(ft1.setup());
+ ASSERT_TRUE(ft2.setup());
+ RankResult rr;
+ for (uint32_t i = 0; i < 5; ++i) {
+ rr.clear();
+ ASSERT_TRUE(ft1.executeOnly(rr, i + 1));
+ ASSERT_TRUE(ft2.execute(rr.getScore("randomNormalStable"), EPS, i + 1));
+ }
+ }
+}
+
+void
+Test::testRankingExpression()
+{
+ { // Test blueprint.
+ RankingExpressionBlueprint prototype;
+
+ EXPECT_TRUE(assertCreateInstance(prototype, "rankingExpression"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(prototype, params); // requires config to run without params
+ FT_SETUP_OK (prototype, params.add("foo.out"), in.add("foo.out"), out.add("out"));
+ FT_SETUP_FAIL(prototype, params.add("bar.out"));
+ FT_SETUP_OK (prototype, params.clear().add("log((1 + 2)- 3 * 4 / 5 )"), in.clear(), out);
+ FT_SETUP_OK (prototype,
+ params.clear().add("if(if(f1.out<1,0,1)<if(f2.out<2,0,1),f3.out,3)"),
+ in.clear().add("f1.out").add("f2.out").add("f3.out"), out);
+
+ FT_DUMP_EMPTY(_factory, "rankingExpression");
+ }
+
+ { // Test executor.
+ {
+ FtFeatureTest ft(_factory, getExpression("if(1<2,3,4)"));
+ ASSERT_TRUE(ft.setup());
+ EXPECT_TRUE(ft.execute(3.0f));
+ }
+ {
+ FtFeatureTest ft(_factory, getExpression("sqrt(100)"));
+ ASSERT_TRUE(ft.setup());
+ EXPECT_TRUE(ft.execute(10.0f));
+ }
+ {
+ FtFeatureTest ft(_factory, getExpression("mysum(value(4),value(4))"));
+ ASSERT_TRUE(ft.setup());
+ EXPECT_TRUE(ft.execute(8.0f));
+ }
+ {
+ FtFeatureTest ft(_factory, getExpression("if(mysum(value(4),value(4))>3+4,1,0)"));
+ ASSERT_TRUE(ft.setup());
+ EXPECT_TRUE(ft.execute(1.0f));
+ }
+ {
+ FtFeatureTest ft(_factory, "rankingExpression");
+ ft.getIndexEnv().getProperties().add("rankingExpression.rankingScript", "if(1<2,3,4)");
+ ASSERT_TRUE(ft.setup());
+ EXPECT_TRUE(ft.execute(3.0f));
+ }
+ {
+ FtFeatureTest ft(_factory, "rankingExpression(foo)");
+ ft.getIndexEnv().getProperties().add("rankingExpression(foo).rankingScript", "if(1<2,3,4)");
+ ASSERT_TRUE(ft.setup());
+ EXPECT_TRUE(ft.execute(3.0f));
+ }
+ {
+ FtFeatureTest ft(_factory, "rankingExpression");
+ ft.getIndexEnv().getProperties()
+ .add("rankingExpression.rankingScript", "if(")
+ .add("rankingExpression.rankingScript", "1<")
+ .add("rankingExpression.rankingScript", "2,")
+ .add("rankingExpression.rankingScript", "3,")
+ .add("rankingExpression.rankingScript", "4)");
+ ASSERT_TRUE(ft.setup());
+ EXPECT_TRUE(ft.execute(3.0f));
+ }
+ {
+ // test interpreted expression
+ vespalib::string my_expr("3.0 + value(4.0) + reduce(tensorFromWeightedSet(query(my_tensor)),sum)");
+ FtFeatureTest ft(_factory, getExpression(my_expr));
+ ft.getQueryEnv().getProperties().add("my_tensor", "{a:1,b:2,c:3}");
+ ASSERT_TRUE(ft.setup());
+ EXPECT_TRUE(ft.execute(13.0));
+ }
+ }
+}
+
+vespalib::string
+Test::getExpression(const vespalib::string &parameter) const
+{
+ using FNB = search::fef::FeatureNameBuilder;
+ return FNB().baseName("rankingExpression").parameter(parameter).buildName();
+}
+
+void
+Test::testTerm()
+{
+ {
+ // Test blueprint.
+ TermBlueprint pt;
+ {
+ EXPECT_TRUE(assertCreateInstance(pt, "term"));
+
+ StringList params, in, out;
+ FT_SETUP_OK (pt, params.add("0"), in, out.add("connectedness").add("significance").add("weight"));
+ FT_SETUP_FAIL(pt, params.add("1"));
+ }
+ {
+ StringList dump;
+ for (uint32_t term = 0; term < 3; ++term) {
+ vespalib::string bn = vespalib::make_string("term(%u)", term);
+ dump.add(bn + ".connectedness").add(bn + ".significance").add(bn + ".weight");
+ }
+ FtIndexEnvironment ie;
+ ie.getProperties().add("term.numTerms", "3");
+ FT_DUMP(_factory, "term", ie, dump); // check override
+
+ for (uint32_t term = 3; term < 5; ++term) {
+ vespalib::string bn = vespalib::make_string("term(%u)", term);
+ dump.add(bn + ".connectedness").add(bn + ".significance").add(bn + ".weight");
+ }
+ FT_DUMP(_factory, "term", dump); // check default
+ }
+ }
+
+ {
+ // Test executor.
+ FtFeatureTest ft(_factory, "term(0)");
+ ASSERT_TRUE(ft.setup());
+
+ RankResult exp;
+ exp .addScore("term(0).connectedness", 0)
+ .addScore("term(0).significance", 0)
+ .addScore("term(0).weight", 0);
+ ASSERT_TRUE(ft.execute(exp));
+ }
+ {
+ // Test executor.
+ FtFeatureTest ft(_factory, StringList().add("term(1)").add("term(2)"));
+ ft.getIndexEnv().getBuilder()
+ .addField(FieldType::INDEX, CollectionType::SINGLE, "idx1") // field 0
+ .addField(FieldType::INDEX, CollectionType::SINGLE, "idx2") // field 1
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "attr"); // field 2
+ ft.getQueryEnv().getBuilder().addAllFields().setUniqueId(0);
+ ft.getQueryEnv().getBuilder().addAllFields().setUniqueId(1)
+ .setWeight(search::query::Weight(200)).lookupField(0)->setDocFreq(50, 100);
+ ft.getQueryEnv().getBuilder().addAttributeNode("attr")->setUniqueId(2)
+ .setWeight(search::query::Weight(400)).lookupField(2)->setDocFreq(25, 100);
+ // setup connectedness between term 1 and term 0
+ ft.getQueryEnv().getProperties().add("vespa.term.1.connexity", "0");
+ ft.getQueryEnv().getProperties().add("vespa.term.1.connexity", "0.7");
+ ASSERT_TRUE(ft.setup());
+
+ RankResult exp;
+ exp.addScore("term(1).significance", util::getSignificance(0.50)).
+ addScore("term(1).weight", 200.0f).
+ addScore("term(1).connectedness", 0.7f).
+ addScore("term(2).significance", util::getSignificance(0.25)).
+ addScore("term(2).weight", 400.0f).
+ addScore("term(2).connectedness", 0.1f). // default connectedness
+ setEpsilon(10e-6);
+ ASSERT_TRUE(ft.execute(exp));
+ }
+ {
+ // Test executor.
+ FtFeatureTest ft(_factory, "term(0)");
+ ft.getQueryEnv().getBuilder().addAllFields().setUniqueId(0);
+ // setup significance for term 0
+ ft.getQueryEnv().getProperties().add("vespa.term.0.significance", "0.3");
+ ASSERT_TRUE(ft.setup());
+
+ ASSERT_TRUE(ft.execute(RankResult().addScore("term(0).significance", 0.3f).setEpsilon(10e-6)));
+ }
+}
+
+void
+Test::testTermDistance()
+{
+ { // test blueprint
+ TermDistanceBlueprint pt;
+ {
+ EXPECT_TRUE(assertCreateInstance(pt, "termDistance"));
+
+ StringList params, in, out;
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar");
+ FT_SETUP_FAIL(pt, params);
+ FT_SETUP_FAIL(pt, ie, params.add("baz").add("0").add("0"));
+ FT_SETUP_FAIL(pt, ie, params.clear().add("bar").add("0").add("0"));
+
+ FT_SETUP_OK(pt, ie, params.clear().add("foo").add("0").add("0"),
+ in, out.add("forward").add("forwardTermPosition")
+ .add("reverse").add("reverseTermPosition"));
+ }
+ {
+ FT_DUMP_EMPTY(_factory, "termDistance");
+ }
+ }
+
+ { // test executor
+ using Result = TermDistanceCalculator::Result;
+ const uint32_t UV = TermDistanceCalculator::UNDEFINED_VALUE;
+
+ EXPECT_TRUE(assertTermDistance(Result(), "a b", "x x"));
+ EXPECT_TRUE(assertTermDistance(Result(), "a b", "a x"));
+ EXPECT_TRUE(assertTermDistance(Result(), "a b", "x b"));
+ EXPECT_TRUE(assertTermDistance(Result(), "a", "a b"));
+ EXPECT_TRUE(assertTermDistance(Result(), "a", "a a"));
+ EXPECT_TRUE(assertTermDistance(Result(1,0,UV,UV), "a b", "a b"));
+ EXPECT_TRUE(assertTermDistance(Result(2,0,UV,UV), "a b", "a x b"));
+ EXPECT_TRUE(assertTermDistance(Result(UV,UV,1,0), "a b", "b a"));
+ EXPECT_TRUE(assertTermDistance(Result(UV,UV,2,0), "a b", "b x a"));
+ EXPECT_TRUE(assertTermDistance(Result(2,18,1,20), "a b", "a x x x x x b x x x x a x x x b x x a x b a"));
+ EXPECT_TRUE(assertTermDistance(Result(1,0,2,1), "a b", "a b x a x x b x x x a x x x x b x x x x x a"));
+ EXPECT_TRUE(assertTermDistance(Result(1,0,1,1), "a b", "a b a b a")); // first best is kept
+ EXPECT_TRUE(assertTermDistance(Result(1,0,1,0), "a a", "a a"));
+ EXPECT_TRUE(assertTermDistance(Result(2,0,2,0), "a a", "a x a"));
+ }
+}
+
+bool
+Test::assertTermDistance(const TermDistanceCalculator::Result & exp,
+ const vespalib::string & query,
+ const vespalib::string & field,
+ uint32_t docId)
+{
+ LOG(info, "assertTermDistance('%s', '%s')", query.c_str(), field.c_str());
+
+ vespalib::string feature = "termDistance(foo,0,1)";
+ FtFeatureTest ft(_factory, feature);
+
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ StringVectorMap index;
+ index["foo"] = FtUtil::tokenize(field);
+ FT_SETUP(ft, FtUtil::toQuery(query), index, 1);
+
+ RankResult rr;
+ rr.addScore(feature + ".forward", exp.forwardDist);
+ rr.addScore(feature + ".forwardTermPosition", exp.forwardTermPos);
+ rr.addScore(feature + ".reverse", exp.reverseDist);
+ rr.addScore(feature + ".reverseTermPosition", exp.reverseTermPos);
+ return EXPECT_TRUE(ft.execute(rr, docId));
+}
+
+void
+Test::testUtils()
+{
+ { // getSignificance
+ EXPECT_APPROX(util::getSignificance(0.0), 1, EPS);
+ EXPECT_APPROX(util::getSignificance(0.0 + 1.0e-7), 1, EPS);
+ EXPECT_APPROX(util::getSignificance(1.0), 0.5, EPS);
+ EXPECT_APPROX(util::getSignificance(1.0 + 1.0e-7), 0.5, EPS);
+ feature_t last = 1;
+ for (uint32_t i = 2; i <= 100; i = i + 1) {
+ feature_t s = util::getSignificance(i * 1.0e-6);
+ EXPECT_GREATER(s, 0);
+ EXPECT_LESS(s, 1);
+ EXPECT_LESS(s, last);
+ last = s;
+ }
+ for (uint32_t i = 999900; i <= 1000000; i = i + 1) {
+ feature_t s = util::getSignificance(i * 1.0e-6);
+ EXPECT_GREATER(s, 0);
+ EXPECT_LESS(s, 1);
+ EXPECT_LESS(s, last);
+ last = s;
+ }
+ }
+}
+