summaryrefslogtreecommitdiffstats
path: root/searchlib/src/tests/features/beta/beta_features_test.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib/src/tests/features/beta/beta_features_test.cpp')
-rw-r--r--searchlib/src/tests/features/beta/beta_features_test.cpp712
1 files changed, 712 insertions, 0 deletions
diff --git a/searchlib/src/tests/features/beta/beta_features_test.cpp b/searchlib/src/tests/features/beta/beta_features_test.cpp
new file mode 100644
index 00000000000..622228ff168
--- /dev/null
+++ b/searchlib/src/tests/features/beta/beta_features_test.cpp
@@ -0,0 +1,712 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/stringbase.h>
+#include <vespa/searchlib/features/flow_completeness_feature.h>
+#include <vespa/searchlib/features/jarowinklerdistancefeature.h>
+#include <vespa/searchlib/features/proximityfeature.h>
+#include <vespa/searchlib/features/querycompletenessfeature.h>
+#include <vespa/searchlib/features/rankingexpressionfeature.h>
+#include <vespa/searchlib/features/reverseproximityfeature.h>
+#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/features/termeditdistancefeature.h>
+#include <vespa/searchlib/features/utils.h>
+#include <vespa/searchlib/fef/test/plugin/setup.h>
+#include <vespa/vespalib/util/rand48.h>
+#include <vespa/searchlib/fef/test/ftlib.h>
+#include <vespa/vespalib/util/stringfmt.h>
+
+using namespace search::features;
+using namespace search::fef;
+using namespace search::fef::test;
+using CollectionType = FieldInfo::CollectionType;
+
+//---------------------------------------------------------------------------------------------------------------------
+// Test
+//---------------------------------------------------------------------------------------------------------------------
+class Test : public FtTestApp {
+public:
+ Test();
+ ~Test() override;
+ int Main() override;
+ void testJaroWinklerDistance();
+ void testProximity();
+ void testFlowCompleteness();
+ void testQueryCompleteness();
+ void testReverseProximity();
+ void testTermEditDistance();
+
+private:
+ void assertJaroWinklerDistance(const vespalib::string &query, const vespalib::string &field, feature_t expected);
+ void assertQueryCompleteness(FtFeatureTest & ft, uint32_t firstOcc, uint32_t hits, uint32_t miss);
+ void assertTermEditDistance(const vespalib::string &query, const vespalib::string &field,
+ uint32_t expectedDel, uint32_t expectedIns, uint32_t expectedSub);
+
+private:
+ search::fef::BlueprintFactory _factory;
+};
+
+TEST_APPHOOK(Test);
+
+Test::Test() = default;
+Test::~Test() = default;
+
+int
+Test::Main()
+{
+ TEST_INIT("beta_features_test");
+
+ // Configure factory with all known blueprints.
+ setup_fef_test_plugin(_factory);
+ setup_search_features(_factory);
+
+ // Test all features.
+ testJaroWinklerDistance(); TEST_FLUSH();
+ testProximity(); TEST_FLUSH();
+ testFlowCompleteness(); TEST_FLUSH();
+ testQueryCompleteness(); TEST_FLUSH();
+ testReverseProximity(); TEST_FLUSH();
+ testTermEditDistance(); TEST_FLUSH();
+
+ TEST_DONE();
+ return 0;
+}
+
+void
+Test::testJaroWinklerDistance()
+{
+ {
+ // Test blueprint.
+ JaroWinklerDistanceBlueprint pt;
+ {
+ EXPECT_TRUE(assertCreateInstance(pt, "jaroWinklerDistance"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, params);
+ FT_SETUP_FAIL(pt, params.add("foo"));
+ FT_SETUP_FAIL(pt, params.add("0"));
+ params.clear();
+
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "afoo");
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wfoo");
+ FT_SETUP_FAIL(pt, ie, params);
+ FT_SETUP_OK (pt, ie, params.add("foo"), in.add("fieldLength(foo)"), out.add("out"));
+ FT_SETUP_FAIL(pt, ie, params.add("afoo"));
+ FT_SETUP_FAIL(pt, ie, params.add("wfoo"));
+ FT_SETUP_FAIL(pt, ie, params.add("1"));
+ }
+ {
+ FT_DUMP_EMPTY(_factory, "jaroWinklerDistance");
+
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo");
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "abar");
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wbar");
+ FT_DUMP_EMPTY(_factory, "jaroWinklerDistance", ie); // must be a single value index field
+
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar");
+ StringList dump;
+ FT_DUMP(_factory, "jaroWinklerDistance", ie, dump/*.add("jaroWinklerDistance(bar).out")*/);
+ }
+ }
+ {
+ // These measures are taken from table 6 in the paper "Overview of Record Linkage and Current Research Directions"
+ // by William E. Winkler. It is available at: http://www.census.gov/srd/papers/pdf/rrs2006-02.pdf
+ //
+ // Note that the strings used as query and field here are transformed into query and field terms, and therefore
+ // they all need to be unique. The second occurence of a character in the below names are therefore
+ // capitalized. A comment is given whenever our result is different from what is presented in the paper (only 2
+ // of 17 is actually different).
+ assertJaroWinklerDistance("shackleford", "shackelford", 1 - 0.982f);
+ assertJaroWinklerDistance("dunNigham", "cunnigham", 1 - 0.852f); // 3x'n' in query, removed one
+ assertJaroWinklerDistance("nichlesoN", "nichulsoN", 1 - 0.956f);
+ assertJaroWinklerDistance("jones", "johnsoN", 1 - 0.832f);
+ assertJaroWinklerDistance("masSey", "masSie", 1 - 0.933f);
+ assertJaroWinklerDistance("abroms", "abrAms", 1 - 0.922f);
+ assertJaroWinklerDistance("hardin", "martinez", 1 - 0.722f); // no measure was given
+ assertJaroWinklerDistance("itman", "smith", 1 - 0.622f); // no measure was given
+ assertJaroWinklerDistance("jeraldinE", "geraldinE", 1 - 0.926f);
+ assertJaroWinklerDistance("marhtA", "marthA", 1 - 0.961f);
+ assertJaroWinklerDistance("micheLlE", "michael", 1 - 0.921f);
+ assertJaroWinklerDistance("julies", "juliUs", 1 - 0.933f);
+ assertJaroWinklerDistance("tanyA", "tonyA", 1 - 0.880f);
+ assertJaroWinklerDistance("dwayne", "duane", 1 - 0.765f); // was 0.840 in paper
+ assertJaroWinklerDistance("sean", "suSan", 1 - 0.672f); // was 0.805 in paper
+ assertJaroWinklerDistance("jon", "john", 1 - 0.933f);
+ assertJaroWinklerDistance("jon", "jan", 1 - 0.800f); // no measure was given
+ }
+}
+
+void
+Test::assertJaroWinklerDistance(const vespalib::string &query, const vespalib::string &field, feature_t expected)
+{
+ FtFeatureTest ft(_factory, "jaroWinklerDistance(foo)");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ FT_SETUP(ft, query, StringMap().add("foo", field), 1);
+
+ RankResult res;
+ ASSERT_TRUE(ft.execute(res.setEpsilon(0.001).addScore("jaroWinklerDistance(foo).out", expected)));
+}
+
+void
+Test::testProximity()
+{
+
+ { // Test blueprint.
+ ProximityBlueprint prototype;
+ {
+ EXPECT_TRUE(assertCreateInstance(prototype, "proximity"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(prototype, params);
+ FT_SETUP_FAIL(prototype, params.add("foo"));
+ FT_SETUP_FAIL(prototype, params.add("0"));
+ FT_SETUP_FAIL(prototype, params.add("1"));
+ FT_SETUP_FAIL(prototype, params.add("2"));
+ params.clear();
+
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ FT_SETUP_FAIL(prototype, ie, params.add("foo"));
+ FT_SETUP_FAIL(prototype, ie, params.add("0"));
+ FT_SETUP_OK (prototype, ie, params.add("1"), in, out.add("out").add("posA").add("posB"));
+ FT_SETUP_FAIL(prototype, ie, params.add("2"));
+ }
+
+ {
+ FT_DUMP_EMPTY(_factory, "proximity");
+
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo");
+ FT_DUMP_EMPTY(_factory, "proximity", ie); // must be an index field
+
+ StringList dump;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar");
+#ifdef VISIT_BETA_FEATURES
+ for (uint32_t a = 0; a < 5; ++a) {
+ for (uint32_t b = a + 1; b < 6; ++b) {
+ vespalib::string bn = vespalib::make_string("proximity(bar,%u,%u)", a, b);
+ dump.add(bn + ".out");
+ dump.add(bn + ".posA");
+ dump.add(bn + ".posB");
+ }
+ }
+#endif
+ FT_DUMP(_factory, "proximity", ie, dump);
+ }
+ }
+ {
+ // Test executor.
+ FtFeatureTest ft(_factory, "proximity(foo,0,1)");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ASSERT_TRUE(ft.setup());
+
+ search::fef::test::RankResult exp;
+ exp.addScore("proximity(foo,0,1).out", util::FEATURE_MAX).
+ addScore("proximity(foo,0,1).posA", util::FEATURE_MAX).
+ addScore("proximity(foo,0,1).posB", util::FEATURE_MIN);
+ ASSERT_TRUE(ft.execute(exp, 1));
+ }
+ {
+ FtFeatureTest ft(_factory, "proximity(foo,0,1)");
+ ASSERT_TRUE(!ft.setup());
+
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ASSERT_TRUE(ft.setup());
+
+ search::fef::test::MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ ASSERT_TRUE(mdb->setFieldLength("foo", 50));
+ ASSERT_TRUE(mdb->addOccurence("foo", 0, 30));
+ search::fef::test::RankResult exp;
+ exp.addScore("proximity(foo,0,1).out", util::FEATURE_MAX).
+ addScore("proximity(foo,0,1).posA", util::FEATURE_MAX).
+ addScore("proximity(foo,0,1).posB", util::FEATURE_MIN);
+ ASSERT_TRUE(mdb->apply(1));
+ ASSERT_TRUE(ft.execute(exp, 1));
+
+ ASSERT_TRUE(mdb->addOccurence("foo", 1, 20));
+ ASSERT_TRUE(mdb->apply(2));
+ ASSERT_TRUE(ft.execute(exp, 2));
+
+ ASSERT_TRUE(mdb->addOccurence("foo", 0, 10));
+ ASSERT_TRUE(mdb->apply(3));
+ exp .clear()
+ .addScore("proximity(foo,0,1).out", 10.0f)
+ .addScore("proximity(foo,0,1).posA", 10.0f)
+ .addScore("proximity(foo,0,1).posB", 20.0f);
+ ASSERT_TRUE(ft.execute(exp, 3));
+ }
+ {
+ for (int a = 0; a < 10; ++a) {
+ for (int b = 0; b < 10; ++b) {
+ FtFeatureTest ft(_factory, "proximity(foo,0,1)");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ASSERT_TRUE(ft.setup());
+
+ search::fef::test::MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ ASSERT_TRUE(mdb->setFieldLength("foo", 10));
+ ASSERT_TRUE(mdb->addOccurence("foo", 0, a));
+ ASSERT_TRUE(mdb->addOccurence("foo", 1, b));
+ ASSERT_TRUE(mdb->apply(1));
+
+ search::fef::test::RankResult exp;
+ exp .addScore("proximity(foo,0,1).out", a < b ? b - a : util::FEATURE_MAX)
+ .addScore("proximity(foo,0,1).posA", a < b ? a : util::FEATURE_MAX)
+ .addScore("proximity(foo,0,1).posB", a < b ? b : util::FEATURE_MIN);
+ TEST_STATE(vespalib::make_string("a=%u, b=%u", a, b).c_str());
+ { // reset lazy evaluation
+ RankResult dummy;
+ ft.executeOnly(dummy, 0);
+ }
+ EXPECT_TRUE(ft.execute(exp));
+ }
+ }
+ }
+}
+
+void
+Test::testQueryCompleteness()
+{
+ { // Test blueprint.
+ QueryCompletenessBlueprint prototype;
+
+ EXPECT_TRUE(assertCreateInstance(prototype, "queryCompleteness"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(prototype, params);
+ FT_SETUP_FAIL(prototype, params.add("foo"));
+ FT_SETUP_FAIL(prototype, params.add("0"));
+ params.clear();
+
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ FT_SETUP_OK (prototype, ie, params.add("foo"), in, out.add("hit").add("miss"));
+ FT_SETUP_OK (prototype, ie, params.add("0"), in, out);
+ FT_SETUP_OK (prototype, ie, params.add("1"), in, out);
+ FT_SETUP_FAIL(prototype, ie, params.add("2"));
+
+ FT_DUMP_EMPTY(_factory, "queryCompleteness");
+ FT_DUMP_EMPTY(_factory, "queryCompleteness", ie);
+ }
+
+ { // Test executor.
+ FtFeatureTest ft(_factory, "queryCompleteness(foo)");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ // add 5 term nodes
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ASSERT_TRUE(ft.setup());
+ // from 0 to 5 hits (5 to 0 misses)
+ for (uint32_t i = 0; i < 6; ++i) {
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ mdb->setFieldLength("foo", 10);
+ for (uint32_t j = 0; j < i; ++j) {
+ mdb->addOccurence("foo", j, j);
+ }
+ ASSERT_TRUE(mdb->apply(1));
+ RankResult exp;
+ exp.addScore("queryCompleteness(foo).hit", (feature_t)(i));
+ exp.addScore("queryCompleteness(foo).miss", (feature_t)(5 - i));
+ { // reset lazy evaluation
+ RankResult dummy;
+ ft.executeOnly(dummy, 0);
+ }
+ EXPECT_TRUE(ft.execute(exp));
+ }
+ }
+ { // Test executor.
+ FtFeatureTest ft(_factory, "queryCompleteness(foo,5,10)");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ASSERT_TRUE(ft.setup());
+
+ // before window
+ assertQueryCompleteness(ft, 4, 0, 1);
+ // inside window
+ assertQueryCompleteness(ft, 5, 1, 0);
+ // inside window
+ assertQueryCompleteness(ft, 9, 1, 0);
+ // after window
+ assertQueryCompleteness(ft, 10, 0, 1);
+ }
+}
+
+void
+Test::assertQueryCompleteness(FtFeatureTest & ft, uint32_t firstOcc, uint32_t hits, uint32_t miss)
+{
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ mdb->setFieldLength("foo", 20);
+ mdb->addOccurence("foo", 0, firstOcc);
+ ASSERT_TRUE(mdb->apply(1));
+ RankResult exp;
+ exp.addScore("queryCompleteness(foo,5,10).hit", hits);
+ exp.addScore("queryCompleteness(foo,5,10).miss", miss);
+ { // reset lazy evaluation
+ RankResult dummy;
+ ft.executeOnly(dummy, 0);
+ }
+ EXPECT_TRUE(ft.execute(exp));
+}
+
+// BFI implementation: brute force and ignorance
+int cntFlow(int m1, int m2, int m3, int m4)
+{
+ int flow = 0;
+
+ for (int p1p = 0; p1p < 4; p1p++) {
+ if (((1 << p1p) & m1) == 0) continue;
+ for (int p2p = 0; p2p < 4; p2p++) {
+ if (((1 << p2p) & m2) == 0) continue;
+ int f2 = 1;
+ if (p2p != p1p) ++f2;
+ for (int p3p = 0; p3p < 4; p3p++) {
+ if (((1 << p3p) & m3) == 0) continue;
+ int f3 = f2;
+ if (p3p != p1p && p3p != p2p) ++f3;
+ for (int p4p = 0; p4p < 4; p4p++) {
+ if (((1 << p4p) & m4) == 0) continue;
+ int f4 = f3;
+ if (p4p != p1p && p4p != p2p && p4p != p3p) ++f4;
+ if (flow < f4) flow = f4;
+ }
+ }
+ }
+ }
+ return flow;
+}
+
+void
+Test::testFlowCompleteness()
+{
+ { // Test blueprint.
+ TEST_STATE("test flow completeness blueprint");
+ FlowCompletenessBlueprint prototype;
+
+ EXPECT_TRUE(assertCreateInstance(prototype, "flowCompleteness"));
+
+ StringList params, in, out;
+ TEST_DO(FT_SETUP_FAIL(prototype, params));
+ TEST_DO(FT_SETUP_FAIL(prototype, params.add("foo")));
+ TEST_DO(FT_SETUP_FAIL(prototype, params.add("0")));
+
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+
+ params.clear();
+ params.add("foo");
+
+ out.add("completeness").add("fieldCompleteness")
+ .add("queryCompleteness").add("elementWeight")
+ .add("weight").add("flow");
+
+ StringList expDump;
+ for (size_t i = 0; i < out.size(); ++i) {
+ vespalib::string fn = "flowCompleteness(foo).";
+ fn.append(out[i]);
+ expDump.push_back(fn);
+ }
+
+ TEST_DO(FT_SETUP_OK(prototype, ie, params, in, out));
+ TEST_DO(FT_SETUP_FAIL(prototype, ie, params.add("2")));
+ TEST_DO(FT_DUMP_EMPTY(_factory, "flowCompleteness"));
+#ifdef notyet
+ TEST_DO(FT_DUMP(_factory, "flowCompleteness", ie, expDump));
+#endif
+ }
+
+ { // Test executor.
+ TEST_STATE("test flow completeness executor");
+
+ FtFeatureTest ft(_factory, "flowCompleteness(foo)");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ // add 5 term nodes
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ASSERT_TRUE(ft.setup());
+ // from 0 to 5 hits (5 to 0 misses)
+ for (uint32_t i = 0; i < 6; ++i) {
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ mdb->setFieldLength("foo", 10);
+ for (uint32_t j = 0; j < i; ++j) {
+ mdb->addOccurence("foo", j, j);
+ }
+ ASSERT_TRUE(mdb->apply(1));
+ RankResult exp;
+ exp.setEpsilon(0.000001);
+ exp.addScore("flowCompleteness(foo)", i * 0.15);
+ exp.addScore("flowCompleteness(foo).completeness", i * 0.15); // == 0.1*0.5 + 0.2*(1-0.5)
+ exp.addScore("flowCompleteness(foo).fieldCompleteness", i * 0.1);
+ exp.addScore("flowCompleteness(foo).queryCompleteness", i * 0.2);
+ exp.addScore("flowCompleteness(foo).elementWeight", i > 0 ? 1 : 0);
+ exp.addScore("flowCompleteness(foo).weight", 100.0);
+ exp.addScore("flowCompleteness(foo).flow", i);
+ TEST_STATE("run execute");
+ { // reset lazy evaluation
+ RankResult dummy;
+ ft.executeOnly(dummy, 0);
+ }
+ EXPECT_TRUE(ft.execute(exp));
+ }
+ }
+
+
+ { // Test executor, pass 2
+ TEST_STATE("test flow completeness executor (pass 2)");
+
+ FtFeatureTest ft(_factory, "flowCompleteness(foo)");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ // add 4 term nodes
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ASSERT_TRUE(ft.setup());
+
+ // each term will have 1 to 3 positions it matches,
+ // with various points of overlap
+
+ for (uint32_t t0m = 1; t0m < 15 ; ++t0m) {
+
+ for (uint32_t t1m = 1; t1m < 15 ; ++t1m) {
+
+ for (uint32_t t2m = 1; t2m < 15 ; ++t2m) {
+
+ for (uint32_t t3m = 1; t3m < 15 ; ++t3m) {
+
+ int flow = cntFlow(t0m, t1m, t2m, t3m);
+
+ MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ mdb->setFieldLength("foo", 4);
+ for (int pos = 0; pos < 4; ++pos) {
+ if (((1 << pos) & t0m) != 0) mdb->addOccurence("foo", 0, pos);
+ if (((1 << pos) & t1m) != 0) mdb->addOccurence("foo", 1, pos);
+ if (((1 << pos) & t2m) != 0) mdb->addOccurence("foo", 2, pos);
+ if (((1 << pos) & t3m) != 0) mdb->addOccurence("foo", 3, pos);
+ }
+
+ ASSERT_TRUE(mdb->apply(1));
+ RankResult exp;
+ exp.setEpsilon(0.0001);
+ exp.addScore("flowCompleteness(foo)", flow * 0.25);
+ exp.addScore("flowCompleteness(foo).completeness", flow * 0.25);
+ exp.addScore("flowCompleteness(foo).fieldCompleteness", flow * 0.25);
+ exp.addScore("flowCompleteness(foo).queryCompleteness", flow * 0.25);
+ exp.addScore("flowCompleteness(foo).elementWeight", 1);
+ exp.addScore("flowCompleteness(foo).weight", 100.0);
+ exp.addScore("flowCompleteness(foo).flow", flow);
+ TEST_STATE(vespalib::make_string("execute t0m=%u t1m=%u t2m=%u t3m=%u flow=%u",
+ t0m, t1m, t2m, t3m, flow).c_str());
+ { // reset lazy evaluation
+ RankResult dummy;
+ ft.executeOnly(dummy, 0);
+ }
+ ASSERT_TRUE(ft.execute(exp));
+ }
+ }
+ }
+ }
+ }
+}
+
+
+void
+Test::testReverseProximity()
+{
+ { // Test blueprint.
+ ReverseProximityBlueprint prototype;
+ {
+ EXPECT_TRUE(assertCreateInstance(prototype, "reverseProximity"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(prototype, params);
+ FT_SETUP_FAIL(prototype, params.add("foo"));
+ FT_SETUP_FAIL(prototype, params.add("0"));
+ FT_SETUP_FAIL(prototype, params.add("1"));
+ FT_SETUP_FAIL(prototype, params.add("2"));
+ params.clear();
+
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ FT_SETUP_FAIL(prototype, ie, params.add("foo"));
+ FT_SETUP_FAIL(prototype, ie, params.add("0"));
+ FT_SETUP_OK (prototype, ie, params.add("1"), in, out.add("out").add("posA").add("posB"));
+ FT_SETUP_FAIL(prototype, ie, params.add("2"));
+ }
+
+ {
+ FT_DUMP_EMPTY(_factory, "reverseProximity");
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo");
+ FT_DUMP_EMPTY(_factory, "reverseProximity", ie); // must be an index field
+
+ StringList dump;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar");
+#ifdef VISIT_BETA_FEATURES
+ for (uint32_t a = 0; a < 5; ++a) {
+ for (uint32_t b = a + 1; b < 6; ++b) {
+ vespalib::string bn = vespalib::make_string("reverseProximity(bar,%u,%u)", a, b);
+ dump.add(bn + ".out");
+ dump.add(bn + ".posA");
+ dump.add(bn + ".posB");
+ }
+ }
+#endif
+ FT_DUMP(_factory, "reverseProximity", ie, dump);
+ }
+ }
+
+
+ { // Test executor.
+ FtFeatureTest ft(_factory, "reverseProximity(foo,0,1)");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ASSERT_TRUE(ft.setup());
+ search::fef::test::RankResult exp;
+ exp.addScore("reverseProximity(foo,0,1).out", util::FEATURE_MAX).
+ addScore("reverseProximity(foo,0,1).posA", util::FEATURE_MIN).
+ addScore("reverseProximity(foo,0,1).posB", util::FEATURE_MAX);
+ ASSERT_TRUE(ft.execute(exp, 1));
+ }
+ {
+ FtFeatureTest ft(_factory, "reverseProximity(foo,0,1)"); ASSERT_TRUE(!ft.setup());
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields(); ASSERT_TRUE(ft.setup());
+
+ search::fef::test::MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ ASSERT_TRUE(mdb->setFieldLength("foo", 50));
+ ASSERT_TRUE(mdb->addOccurence("foo", 0, 20));
+ search::fef::test::RankResult exp;
+ exp .addScore("reverseProximity(foo,0,1).out", util::FEATURE_MAX)
+ .addScore("reverseProximity(foo,0,1).posA", util::FEATURE_MIN)
+ .addScore("reverseProximity(foo,0,1).posB", util::FEATURE_MAX);
+ ASSERT_TRUE(mdb->apply(1));
+ ASSERT_TRUE(ft.execute(exp, 1));
+
+ ASSERT_TRUE(mdb->addOccurence("foo", 1, 30));
+ ASSERT_TRUE(mdb->apply(2));
+ ASSERT_TRUE(ft.execute(exp, 2));
+
+ ASSERT_TRUE(mdb->addOccurence("foo", 1, 10));
+ ASSERT_TRUE(mdb->apply(3));
+ exp .clear()
+ .addScore("reverseProximity(foo,0,1).out", 10.0f)
+ .addScore("reverseProximity(foo,0,1).posA", 20.0f)
+ .addScore("reverseProximity(foo,0,1).posB", 10.0f);
+ ASSERT_TRUE(ft.execute(exp, 3));
+ }
+ {
+ for (int a = 0; a < 10; ++a) {
+ for (int b = 0; b < 10; ++b) {
+ FtFeatureTest ft(_factory, "reverseProximity(foo,0,1)");
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ft.getQueryEnv().getBuilder().addAllFields();
+ ASSERT_TRUE(ft.setup());
+
+ search::fef::test::MatchDataBuilder::UP mdb = ft.createMatchDataBuilder();
+ ASSERT_TRUE(mdb->setFieldLength("foo", 10));
+ ASSERT_TRUE(mdb->addOccurence("foo", 0, a));
+ ASSERT_TRUE(mdb->addOccurence("foo", 1, b));
+ ASSERT_TRUE(mdb->apply(1));
+
+ search::fef::test::RankResult exp;
+ exp .addScore("reverseProximity(foo,0,1).out", a >= b ? a - b : util::FEATURE_MAX)
+ .addScore("reverseProximity(foo,0,1).posA", a >= b ? a : util::FEATURE_MIN)
+ .addScore("reverseProximity(foo,0,1).posB", a >= b ? b : util::FEATURE_MAX);
+ ASSERT_TRUE(ft.execute(exp));
+ }
+ }
+ }
+}
+
+void
+Test::testTermEditDistance()
+{
+ { // Test blueprint.
+ TermEditDistanceBlueprint prototype;
+ {
+ EXPECT_TRUE(assertCreateInstance(prototype, "termEditDistance"));
+
+ StringList params, in, out;
+ FT_SETUP_FAIL(prototype, params);
+ FT_SETUP_FAIL(prototype, params.add("foo"));
+ FT_SETUP_FAIL(prototype, params.add("0"));
+
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "afoo");
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wfoo");
+ FT_SETUP_FAIL(prototype, ie, params.clear());
+ FT_SETUP_OK (prototype, ie, params.add("foo"), in.add("fieldLength(foo)"), out.add("out").add("del").add("ins").add("sub"));
+ FT_SETUP_FAIL(prototype, ie, params.add("afoo"));
+ FT_SETUP_FAIL(prototype, ie, params.add("wfoo"));
+ FT_SETUP_FAIL(prototype, ie, params.add("0"));
+ }
+
+ {
+ FT_DUMP_EMPTY(_factory, "termEditDistance");
+ FtIndexEnvironment ie;
+ ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo");
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "abar");
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wbar");
+ FT_DUMP_EMPTY(_factory, "termEditDistance", ie); // must be a single-value index field
+
+ StringList dump;
+#ifdef VISIT_BETA_FEATURES
+ ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar");
+ vespalib::string bn = "termEditDistance(bar)";
+ dump.add(bn + ".out");
+ dump.add(bn + ".del");
+ dump.add(bn + ".ins");
+ dump.add(bn + ".sub");
+#endif
+ FT_DUMP(_factory, "termEditDistance", ie, dump);
+ }
+ }
+
+ { // Test executor.
+ assertTermEditDistance("abcde", "abcde", 0, 0, 0);
+ assertTermEditDistance("abcde", "abcd.", 0, 0, 1);
+ assertTermEditDistance("abcde", ".bcd.", 0, 0, 2);
+ assertTermEditDistance("abcde", ".bc..", 0, 0, 3);
+ assertTermEditDistance("abcde", "..c..", 0, 0, 4);
+ assertTermEditDistance("abcd" , "..c..", 0, 1, 3);
+ assertTermEditDistance("abc", "..c..", 0, 2, 2);
+ assertTermEditDistance("ab", "..b..", 0, 3, 1);
+ assertTermEditDistance("a", "..a..", 0, 4, 0);
+ }
+}
+
+void
+Test::assertTermEditDistance(const vespalib::string &query, const vespalib::string &field,
+ uint32_t expectedDel, uint32_t expectedIns, uint32_t expectedSub)
+{
+ // Setup feature test.
+ vespalib::string feature = "termEditDistance(foo)";
+ FtFeatureTest ft(_factory, feature);
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo");
+ StringMap foo;
+ foo.add("foo", field);
+ FT_SETUP(ft, query, foo, 1);
+
+ // Execute and compare results.
+ search::fef::test::RankResult exp;
+ exp .addScore(feature + ".out", (feature_t)(expectedDel*1 + expectedIns*1 + expectedSub*1))
+ .addScore(feature + ".del", (feature_t)expectedDel)
+ .addScore(feature + ".ins", (feature_t)expectedIns)
+ .addScore(feature + ".sub", (feature_t)expectedSub);
+ ASSERT_TRUE(ft.execute(exp));
+}