Merge pull request #9264 from vespa-engine/geirst/experimental-posting-list-format-flag-in-backend

Geirst/experimental posting list format flag in backend
author: Geir Storli <geirst@verizonmedia.com> 2019-05-03 14:09:13 +0200
committer: GitHub <noreply@github.com> 2019-05-03 14:09:13 +0200
commit: 99f41741ca2784640ce1bec9e673355ab92e9d42 (patch)
tree: e18ba44053381584cdfdd96f8fe5db4b592259e2
parent: 25cb895e18e9dd1f74b32896e41fe01b76ddb48e (diff)
parent: 2b0d59378d4fc46bb4843ecfe897b808506a58c2 (diff)
12 files changed, 220 insertions, 290 deletions
diff --git a/searchcommon/src/tests/schema/CMakeLists.txt b/searchcommon/src/tests/schema/CMakeLists.txt
index a8e6ec311a3..aafe015d9a1 100644
--- a/searchcommon/src/tests/schema/CMakeLists.txt
+++ b/searchcommon/src/tests/schema/CMakeLists.txt
@@ -4,5 +4,6 @@ vespa_add_executable(searchcommon_schema_test_app TEST
     schema_test.cpp
     DEPENDS
     searchcommon
+    gtest
 )
 vespa_add_test(NAME searchcommon_schema_test_app NO_VALGRIND COMMAND searchcommon_schema_test_app)
diff --git a/searchcommon/src/tests/schema/load-save-cfg/indexschema.cfg b/searchcommon/src/tests/schema/load-save-cfg/indexschema.cfg
index c0998bcf597..b6c547c52c9 100644
--- a/searchcommon/src/tests/schema/load-save-cfg/indexschema.cfg
+++ b/searchcommon/src/tests/schema/load-save-cfg/indexschema.cfg
@@ -5,9 +5,7 @@ indexfield[1].name b
 indexfield[1].datatype INT64
 indexfield[2].name c
 indexfield[2].datatype STRING
-indexfield[2].prefix true
-indexfield[2].phrases false
-indexfield[2].positions false
+indexfield[2].experimentalpostinglistformat true
 fieldset[1]
 fieldset[0].name default
 fieldset[0].field[2]
diff --git a/searchcommon/src/tests/schema/schema-without-index-field-properties.txt b/searchcommon/src/tests/schema/schema-without-index-field-properties.txt
new file mode 100644
index 00000000000..4491b1242e0
--- /dev/null
+++ b/searchcommon/src/tests/schema/schema-without-index-field-properties.txt
@@ -0,0 +1,7 @@
+attributefield[0]
+summaryfield[0]
+fieldset[0]
+indexfield[1]
+indexfield[0].name foo
+indexfield[0].datatype STRING
+indexfield[0].collectiontype SINGLE
diff --git a/searchcommon/src/tests/schema/schema_test.cpp b/searchcommon/src/tests/schema/schema_test.cpp
index e9997c2e70d..e360ee1ba7a 100644
--- a/searchcommon/src/tests/schema/schema_test.cpp
+++ b/searchcommon/src/tests/schema/schema_test.cpp
@@ -1,10 +1,11 @@
 // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/vespalib/stllike/string.h>
-#include <fstream>
-#include <vespa/vespalib/testkit/test_kit.h>
 #include <vespa/config/common/configparser.h>
-#include <vespa/searchcommon/common/schemaconfigurer.h>
 #include <vespa/searchcommon/common/schema.h>
+#include <vespa/searchcommon/common/schemaconfigurer.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <fstream>
+
 #include <vespa/log/log.h>
 LOG_SETUP("schema_test");
 
@@ -15,63 +16,70 @@ namespace search::index {
 using schema::DataType;
 using schema::CollectionType;
 using SIAF = Schema::ImportedAttributeField;
+using SIF = Schema::IndexField;
 
-void assertField(const Schema::Field & exp, const Schema::Field & act) {
-    EXPECT_EQUAL(exp.getName(), act.getName());
-    EXPECT_EQUAL(exp.getDataType(), act.getDataType());
-    EXPECT_EQUAL(exp.getCollectionType(), act.getCollectionType());
+void
+assertField(const Schema::Field& exp, const Schema::Field& act)
+{
+    EXPECT_EQ(exp.getName(), act.getName());
+    EXPECT_EQ(exp.getDataType(), act.getDataType());
+    EXPECT_EQ(exp.getCollectionType(), act.getCollectionType());
 }
 
-void assertIndexField(const Schema::IndexField & exp,
-                      const Schema::IndexField & act)
+void
+assertIndexField(const Schema::IndexField& exp,
+                 const Schema::IndexField& act)
 {
     assertField(exp, act);
-    EXPECT_EQUAL(exp.hasPrefix(), act.hasPrefix());
-    EXPECT_EQUAL(exp.hasPhrases(), act.hasPhrases());
-    EXPECT_EQUAL(exp.hasPositions(), act.hasPositions());
+    EXPECT_EQ(exp.getAvgElemLen(), act.getAvgElemLen());
+    EXPECT_EQ(exp.use_experimental_posting_list_format(), act.use_experimental_posting_list_format());
 }
 
-void assertSet(const Schema::FieldSet &exp,
-               const Schema::FieldSet &act)
+void
+assertSet(const Schema::FieldSet& exp,
+          const Schema::FieldSet& act)
 {
-    EXPECT_EQUAL(exp.getName(), act.getName());
-    ASSERT_EQUAL(exp.getFields().size(), act.getFields().size());
+    EXPECT_EQ(exp.getName(), act.getName());
+    ASSERT_EQ(exp.getFields().size(), act.getFields().size());
     for (size_t i = 0; i < exp.getFields().size(); ++i) {
-        EXPECT_EQUAL(exp.getFields()[i], act.getFields()[i]);
+        EXPECT_EQ(exp.getFields()[i], act.getFields()[i]);
     }
 }
 
-void assertSchema(const Schema & exp, const Schema & act) {
-    ASSERT_EQUAL(exp.getNumIndexFields(), act.getNumIndexFields());
+void
+assertSchema(const Schema& exp, const Schema& act)
+{
+    ASSERT_EQ(exp.getNumIndexFields(), act.getNumIndexFields());
     for (size_t i = 0; i < exp.getNumIndexFields(); ++i) {
         assertIndexField(exp.getIndexField(i), act.getIndexField(i));
     }
-    ASSERT_EQUAL(exp.getNumAttributeFields(), act.getNumAttributeFields());
+    ASSERT_EQ(exp.getNumAttributeFields(), act.getNumAttributeFields());
     for (size_t i = 0; i < exp.getNumAttributeFields(); ++i) {
         assertField(exp.getAttributeField(i), act.getAttributeField(i));
     }
-    ASSERT_EQUAL(exp.getNumSummaryFields(), act.getNumSummaryFields());
+    ASSERT_EQ(exp.getNumSummaryFields(), act.getNumSummaryFields());
     for (size_t i = 0; i < exp.getNumSummaryFields(); ++i) {
         assertField(exp.getSummaryField(i), act.getSummaryField(i));
     }
-    ASSERT_EQUAL(exp.getNumFieldSets(), act.getNumFieldSets());
+    ASSERT_EQ(exp.getNumFieldSets(), act.getNumFieldSets());
     for (size_t i = 0; i < exp.getNumFieldSets(); ++i) {
         assertSet(exp.getFieldSet(i), act.getFieldSet(i));
     }
     const auto &expImported = exp.getImportedAttributeFields();
     const auto &actImported = act.getImportedAttributeFields();
-    ASSERT_EQUAL(expImported.size(), actImported.size());
+    ASSERT_EQ(expImported.size(), actImported.size());
     for (size_t i = 0; i < expImported.size(); ++i) {
         assertField(expImported[i], actImported[i]);
     }
 }
 
-TEST("testBasic") {
+TEST(SchemaTest, test_basic)
+{
     Schema s;
-    EXPECT_EQUAL(0u, s.getNumIndexFields());
-    EXPECT_EQUAL(0u, s.getNumAttributeFields());
-    EXPECT_EQUAL(0u, s.getNumSummaryFields());
-    EXPECT_EQUAL(0u, s.getNumImportedAttributeFields());
+    EXPECT_EQ(0u, s.getNumIndexFields());
+    EXPECT_EQ(0u, s.getNumAttributeFields());
+    EXPECT_EQ(0u, s.getNumSummaryFields());
+    EXPECT_EQ(0u, s.getNumImportedAttributeFields());
 
     s.addIndexField(Schema::IndexField("foo", DataType::STRING));
     s.addIndexField(Schema::IndexField("bar", DataType::INT32));
@@ -89,100 +97,95 @@ TEST("testBasic") {
 
     s.addImportedAttributeField(SIAF("imported", DataType::INT32));
 
-    EXPECT_EQUAL(2u, s.getNumIndexFields());
+    ASSERT_EQ(2u, s.getNumIndexFields());
     {
-        EXPECT_EQUAL("foo", s.getIndexField(0).getName());
-        EXPECT_EQUAL(DataType::STRING, s.getIndexField(0).getDataType());
-        EXPECT_EQUAL(CollectionType::SINGLE, s.getIndexField(0).getCollectionType());
-        EXPECT_TRUE(!s.getIndexField(0).hasPrefix());
-        EXPECT_TRUE(!s.getIndexField(0).hasPhrases());
-        EXPECT_TRUE(s.getIndexField(0).hasPositions());
-
-        EXPECT_EQUAL("bar", s.getIndexField(1).getName());
-        EXPECT_EQUAL(DataType::INT32, s.getIndexField(1).getDataType());
-        EXPECT_EQUAL(CollectionType::SINGLE, s.getIndexField(1).getCollectionType());
-
-        EXPECT_EQUAL(0u, s.getIndexFieldId("foo"));
-        EXPECT_EQUAL(1u, s.getIndexFieldId("bar"));
-        EXPECT_EQUAL(Schema::UNKNOWN_FIELD_ID, s.getIndexFieldId("cox"));
+        EXPECT_EQ("foo", s.getIndexField(0).getName());
+        EXPECT_EQ(DataType::STRING, s.getIndexField(0).getDataType());
+        EXPECT_EQ(CollectionType::SINGLE, s.getIndexField(0).getCollectionType());
+
+        EXPECT_EQ("bar", s.getIndexField(1).getName());
+        EXPECT_EQ(DataType::INT32, s.getIndexField(1).getDataType());
+        EXPECT_EQ(CollectionType::SINGLE, s.getIndexField(1).getCollectionType());
+
+        EXPECT_EQ(0u, s.getIndexFieldId("foo"));
+        EXPECT_EQ(1u, s.getIndexFieldId("bar"));
+        EXPECT_EQ(Schema::UNKNOWN_FIELD_ID, s.getIndexFieldId("cox"));
     }
-    EXPECT_EQUAL(3u, s.getNumAttributeFields());
+    ASSERT_EQ(3u, s.getNumAttributeFields());
     {
-        EXPECT_EQUAL("foo", s.getAttributeField(0).getName());
-        EXPECT_EQUAL(DataType::STRING, s.getAttributeField(0).getDataType());
-        EXPECT_EQUAL(CollectionType::ARRAY, s.getAttributeField(0).getCollectionType());
-
-        EXPECT_EQUAL("bar", s.getAttributeField(1).getName());
-        EXPECT_EQUAL(DataType::INT32, s.getAttributeField(1).getDataType());
-        EXPECT_EQUAL(CollectionType::WEIGHTEDSET, s.getAttributeField(1).getCollectionType());
-
-        EXPECT_EQUAL("cox", s.getAttributeField(2).getName());
-        EXPECT_EQUAL(DataType::STRING, s.getAttributeField(2).getDataType());
-        EXPECT_EQUAL(CollectionType::SINGLE, s.getAttributeField(2).getCollectionType());
-
-        EXPECT_EQUAL(0u, s.getAttributeFieldId("foo"));
-        EXPECT_EQUAL(1u, s.getAttributeFieldId("bar"));
-        EXPECT_EQUAL(2u, s.getAttributeFieldId("cox"));
-        EXPECT_EQUAL(Schema::UNKNOWN_FIELD_ID, s.getIndexFieldId("fox"));
+        EXPECT_EQ("foo", s.getAttributeField(0).getName());
+        EXPECT_EQ(DataType::STRING, s.getAttributeField(0).getDataType());
+        EXPECT_EQ(CollectionType::ARRAY, s.getAttributeField(0).getCollectionType());
+
+        EXPECT_EQ("bar", s.getAttributeField(1).getName());
+        EXPECT_EQ(DataType::INT32, s.getAttributeField(1).getDataType());
+        EXPECT_EQ(CollectionType::WEIGHTEDSET, s.getAttributeField(1).getCollectionType());
+
+        EXPECT_EQ("cox", s.getAttributeField(2).getName());
+        EXPECT_EQ(DataType::STRING, s.getAttributeField(2).getDataType());
+        EXPECT_EQ(CollectionType::SINGLE, s.getAttributeField(2).getCollectionType());
+
+        EXPECT_EQ(0u, s.getAttributeFieldId("foo"));
+        EXPECT_EQ(1u, s.getAttributeFieldId("bar"));
+        EXPECT_EQ(2u, s.getAttributeFieldId("cox"));
+        EXPECT_EQ(Schema::UNKNOWN_FIELD_ID, s.getIndexFieldId("fox"));
     }
-    EXPECT_EQUAL(4u, s.getNumSummaryFields());
+    ASSERT_EQ(4u, s.getNumSummaryFields());
     {
-        EXPECT_EQUAL("foo", s.getSummaryField(0).getName());
-        EXPECT_EQUAL(DataType::STRING, s.getSummaryField(0).getDataType());
-        EXPECT_EQUAL(CollectionType::ARRAY, s.getSummaryField(0).getCollectionType());
-
-        EXPECT_EQUAL("bar", s.getSummaryField(1).getName());
-        EXPECT_EQUAL(DataType::INT32, s.getSummaryField(1).getDataType());
-        EXPECT_EQUAL(CollectionType::WEIGHTEDSET, s.getSummaryField(1).getCollectionType());
-
-        EXPECT_EQUAL("cox", s.getSummaryField(2).getName());
-        EXPECT_EQUAL(DataType::STRING, s.getSummaryField(2).getDataType());
-        EXPECT_EQUAL(CollectionType::SINGLE, s.getSummaryField(2).getCollectionType());
-
-        EXPECT_EQUAL("fox", s.getSummaryField(3).getName());
-        EXPECT_EQUAL(DataType::RAW, s.getSummaryField(3).getDataType());
-        EXPECT_EQUAL(CollectionType::SINGLE, s.getSummaryField(3).getCollectionType());
-
-        EXPECT_EQUAL(0u, s.getSummaryFieldId("foo"));
-        EXPECT_EQUAL(1u, s.getSummaryFieldId("bar"));
-        EXPECT_EQUAL(2u, s.getSummaryFieldId("cox"));
-        EXPECT_EQUAL(3u, s.getSummaryFieldId("fox"));
-        EXPECT_EQUAL(Schema::UNKNOWN_FIELD_ID, s.getSummaryFieldId("not"));
+        EXPECT_EQ("foo", s.getSummaryField(0).getName());
+        EXPECT_EQ(DataType::STRING, s.getSummaryField(0).getDataType());
+        EXPECT_EQ(CollectionType::ARRAY, s.getSummaryField(0).getCollectionType());
+
+        EXPECT_EQ("bar", s.getSummaryField(1).getName());
+        EXPECT_EQ(DataType::INT32, s.getSummaryField(1).getDataType());
+        EXPECT_EQ(CollectionType::WEIGHTEDSET, s.getSummaryField(1).getCollectionType());
+
+        EXPECT_EQ("cox", s.getSummaryField(2).getName());
+        EXPECT_EQ(DataType::STRING, s.getSummaryField(2).getDataType());
+        EXPECT_EQ(CollectionType::SINGLE, s.getSummaryField(2).getCollectionType());
+
+        EXPECT_EQ("fox", s.getSummaryField(3).getName());
+        EXPECT_EQ(DataType::RAW, s.getSummaryField(3).getDataType());
+        EXPECT_EQ(CollectionType::SINGLE, s.getSummaryField(3).getCollectionType());
+
+        EXPECT_EQ(0u, s.getSummaryFieldId("foo"));
+        EXPECT_EQ(1u, s.getSummaryFieldId("bar"));
+        EXPECT_EQ(2u, s.getSummaryFieldId("cox"));
+        EXPECT_EQ(3u, s.getSummaryFieldId("fox"));
+        EXPECT_EQ(Schema::UNKNOWN_FIELD_ID, s.getSummaryFieldId("not"));
     }
-    EXPECT_EQUAL(1u, s.getNumFieldSets());
+    ASSERT_EQ(1u, s.getNumFieldSets());
     {
-        EXPECT_EQUAL("default", s.getFieldSet(0).getName());
-        EXPECT_EQUAL(2u, s.getFieldSet(0).getFields().size());
-        EXPECT_EQUAL("foo", s.getFieldSet(0).getFields()[0]);
-        EXPECT_EQUAL("bar", s.getFieldSet(0).getFields()[1]);
+        EXPECT_EQ("default", s.getFieldSet(0).getName());
+        EXPECT_EQ(2u, s.getFieldSet(0).getFields().size());
+        EXPECT_EQ("foo", s.getFieldSet(0).getFields()[0]);
+        EXPECT_EQ("bar", s.getFieldSet(0).getFields()[1]);
     }
-    EXPECT_EQUAL(1u, s.getNumImportedAttributeFields());
+    EXPECT_EQ(1u, s.getNumImportedAttributeFields());
     {
         const auto &imported = s.getImportedAttributeFields();
-        EXPECT_EQUAL(1u, imported.size());
-        TEST_DO(assertField(SIAF("imported", DataType::INT32, CollectionType::SINGLE), imported[0]));
+        EXPECT_EQ(1u, imported.size());
+        assertField(SIAF("imported", DataType::INT32, CollectionType::SINGLE), imported[0]);
     }
 }
 
-TEST("testLoadAndSave") {
-    using SIF = Schema::IndexField;
+TEST(SchemaTest, test_load_and_save)
+{
     using SAF = Schema::AttributeField;
     using SSF = Schema::SummaryField;
     using SDT = schema::DataType;
     using SCT = schema::CollectionType;
-    typedef Schema::FieldSet SFS;
+    using SFS = Schema::FieldSet;
 
     { // load from config -> save to file -> load from file
         Schema s;
-        SchemaConfigurer configurer(s, "dir:" + TEST_PATH("load-save-cfg"));
-        EXPECT_EQUAL(3u, s.getNumIndexFields());
+        SchemaConfigurer configurer(s, "dir:load-save-cfg");
+        EXPECT_EQ(3u, s.getNumIndexFields());
         assertIndexField(SIF("a", SDT::STRING), s.getIndexField(0));
         assertIndexField(SIF("b", SDT::INT64), s.getIndexField(1));
-        assertIndexField(SIF("c", SDT::STRING).setPrefix(true)
-                         .setPhrases(false).setPositions(false),
-                         s.getIndexField(2));
+        assertIndexField(SIF("c", SDT::STRING).set_experimental_posting_list_format(true), s.getIndexField(2));
 
-        EXPECT_EQUAL(9u, s.getNumAttributeFields());
+        EXPECT_EQ(9u, s.getNumAttributeFields());
         assertField(SAF("a", SDT::STRING, SCT::SINGLE),
                     s.getAttributeField(0));
         assertField(SAF("b", SDT::INT8, SCT::ARRAY), s.getAttributeField(1));
@@ -195,7 +198,7 @@ TEST("testLoadAndSave") {
         assertField(SAF("h", SDT::BOOLEANTREE), s.getAttributeField(7));
         assertField(SAF("i", SDT::TENSOR), s.getAttributeField(8));
 
-        EXPECT_EQUAL(12u, s.getNumSummaryFields());
+        EXPECT_EQ(12u, s.getNumSummaryFields());
         assertField(SSF("a", SDT::INT8),   s.getSummaryField(0));
         assertField(SSF("b", SDT::INT16),  s.getSummaryField(1));
         assertField(SSF("c", SDT::INT32),  s.getSummaryField(2));
@@ -209,7 +212,7 @@ TEST("testLoadAndSave") {
         assertField(SSF("k", SDT::RAW),    s.getSummaryField(10));
         assertField(SSF("l", SDT::RAW),    s.getSummaryField(11));
 
-        EXPECT_EQUAL(1u, s.getNumFieldSets());
+        EXPECT_EQ(1u, s.getNumFieldSets());
         assertSet(SFS("default").addField("a").addField("c"),
                   s.getFieldSet(0));
 
@@ -240,7 +243,8 @@ TEST("testLoadAndSave") {
     }
 }
 
-TEST("require that schema can save and load timestamps for fields") {
+TEST(SchemaTest, require_that_schema_can_save_and_load_timestamps_for_fields)
+{
     const fastos::TimeStamp timestamp(42);
     const std::string file_name = "schema-with-timestamps.txt";
     Schema s;
@@ -250,11 +254,12 @@ TEST("require that schema can save and load timestamps for fields") {
     ASSERT_TRUE(s.saveToFile(file_name));
     Schema s2;
     ASSERT_TRUE(s2.loadFromFile(file_name));
-    ASSERT_EQUAL(1u, s2.getNumIndexFields());
-    ASSERT_EQUAL(timestamp, s2.getIndexField(0).getTimestamp());
+    ASSERT_EQ(1u, s2.getNumIndexFields());
+    EXPECT_EQ(timestamp, s2.getIndexField(0).getTimestamp());
 }
 
-TEST("require that timestamps are omitted when 0.") {
+TEST(SchemaTest, require_that_timestamps_are_omitted_when_0)
+{
     const std::string file_name = "schema-without-timestamps.txt";
     Schema s;
     s.addIndexField(Schema::IndexField("foo", DataType::STRING));
@@ -265,16 +270,18 @@ TEST("require that timestamps are omitted when 0.") {
     while (file) {
         std::string line;
         getline(file, line);
-        EXPECT_NOT_EQUAL("indexfield[0].timestamp 0", line);
+        EXPECT_NE("indexfield[0].timestamp 0", line);
     }
 
     Schema s2;
     ASSERT_TRUE(s2.loadFromFile(file_name));
-    ASSERT_EQUAL(1u, s2.getNumIndexFields());
+    ASSERT_EQ(1u, s2.getNumIndexFields());
 }
 
-void addAllFieldTypes(const string &name, Schema &schema,
-                      fastos::TimeStamp timestamp) {
+void
+addAllFieldTypes(const string& name, Schema& schema,
+                 fastos::TimeStamp timestamp)
+{
     Schema::IndexField index_field(name, DataType::STRING);
     index_field.setTimestamp(timestamp);
     schema.addIndexField(index_field);
@@ -290,7 +297,8 @@ void addAllFieldTypes(const string &name, Schema &schema,
     schema.addFieldSet(Schema::FieldSet(name));
 }
 
-TEST("require that schemas can be added") {
+TEST(SchemaTest, require_that_schemas_can_be_added)
+{
     const string name1 = "foo";
     const string name2 = "bar";
     const fastos::TimeStamp timestamp1(42);
@@ -301,29 +309,30 @@ TEST("require that schemas can be added") {
     addAllFieldTypes(name2, s2, timestamp2);
 
     Schema::UP sum = Schema::make_union(s1, s2);
-    ASSERT_EQUAL(2u, sum->getNumIndexFields());
+    ASSERT_EQ(2u, sum->getNumIndexFields());
     EXPECT_TRUE(s1.getIndexField(0) ==
                 sum->getIndexField(sum->getIndexFieldId(name1)));
     EXPECT_TRUE(s2.getIndexField(0) ==
                 sum->getIndexField(sum->getIndexFieldId(name2)));
-    ASSERT_EQUAL(2u, sum->getNumAttributeFields());
+    ASSERT_EQ(2u, sum->getNumAttributeFields());
     EXPECT_TRUE(s1.getAttributeField(0) ==
                 sum->getAttributeField(sum->getAttributeFieldId(name1)));
     EXPECT_TRUE(s2.getAttributeField(0) ==
                 sum->getAttributeField(sum->getAttributeFieldId(name2)));
-    ASSERT_EQUAL(2u, sum->getNumSummaryFields());
+    ASSERT_EQ(2u, sum->getNumSummaryFields());
     EXPECT_TRUE(s1.getSummaryField(0) ==
                 sum->getSummaryField(sum->getSummaryFieldId(name1)));
     EXPECT_TRUE(s2.getSummaryField(0) ==
                 sum->getSummaryField(sum->getSummaryFieldId(name2)));
-    ASSERT_EQUAL(2u, sum->getNumFieldSets());
+    ASSERT_EQ(2u, sum->getNumFieldSets());
     EXPECT_TRUE(s1.getFieldSet(0) ==
                 sum->getFieldSet(sum->getFieldSetId(name1)));
     EXPECT_TRUE(s2.getFieldSet(0) ==
                 sum->getFieldSet(sum->getFieldSetId(name2)));
 }
 
-TEST("require that S union S = S for schema S") {
+TEST(SchemaTest, require_that_S_union_S_equals_S_for_schema_S)
+{
     Schema schema;
     addAllFieldTypes("foo", schema, 42);
 
@@ -331,7 +340,8 @@ TEST("require that S union S = S for schema S") {
     EXPECT_TRUE(schema == *sum);
 }
 
-TEST("require that schema can calculate set_difference") {
+TEST(SchemaTest, require_that_schema_can_calculate_set_difference)
+{
     const string name1 = "foo";
     const string name2 = "bar";
     const fastos::TimeStamp timestamp1(42);
@@ -349,7 +359,8 @@ TEST("require that schema can calculate set_difference") {
     EXPECT_TRUE(expected == *schema);
 }
 
-TEST("require that getOldFields returns a subset of a schema") {
+TEST(SchemaTest, require_that_get_old_fields_returns_a_subset_of_a_schema)
+{
     Schema schema;
     const int64_t limit_timestamp = 1000;
 
@@ -359,13 +370,14 @@ TEST("require that getOldFields returns a subset of a schema") {
     Schema::UP old_fields =
         schema.getOldFields(fastos::TimeStamp(limit_timestamp));
 
-    EXPECT_EQUAL(1u, old_fields->getNumIndexFields());
-    EXPECT_EQUAL("bar", old_fields->getIndexField(0).getName());
-    EXPECT_EQUAL(1u, old_fields->getNumAttributeFields());
-    EXPECT_EQUAL(1u, old_fields->getNumSummaryFields());
+    EXPECT_EQ(1u, old_fields->getNumIndexFields());
+    EXPECT_EQ("bar", old_fields->getIndexField(0).getName());
+    EXPECT_EQ(1u, old_fields->getNumAttributeFields());
+    EXPECT_EQ(1u, old_fields->getNumSummaryFields());
 }
 
-TEST("require that schema can calculate intersection") {
+TEST(SchemaTest, require_that_schema_can_calculate_intersection)
+{
     const string name1 = "foo";
     const string name2 = "bar";
     const string name3 = "baz";
@@ -385,18 +397,19 @@ TEST("require that schema can calculate intersection") {
     EXPECT_TRUE(expected == *schema);
 }
 
-TEST("require that incompatible fields are removed from intersection") {
+TEST(SchemaTest, require_that_incompatible_fields_are_removed_from_intersection)
+{
     const string name = "foo";
     Schema s1;
     s1.addIndexField(Schema::IndexField(name, DataType::STRING));
     Schema s2;
     s2.addIndexField(Schema::IndexField(name, DataType::INT32));
     Schema::UP schema = Schema::intersect(s1, s2);
-    EXPECT_EQUAL(0u, schema->getNumIndexFields());
+    EXPECT_EQ(0u, schema->getNumIndexFields());
     EXPECT_FALSE(schema->isIndexField(name));
 }
 
-TEST("require that imported attribute fields are not saved to disk")
+TEST(SchemaTest, require_that_imported_attribute_fields_are_not_saved_to_disk)
 {
     const vespalib::string fileName = "schema-no-imported-fields.txt";
     {
@@ -407,25 +420,39 @@ TEST("require that imported attribute fields are not saved to disk")
     {
         Schema s;
         s.loadFromFile(fileName);
-        EXPECT_EQUAL(0u, s.getNumImportedAttributeFields());
+        EXPECT_EQ(0u, s.getNumImportedAttributeFields());
     }
 }
 
-TEST("require that schema can be built with imported attribute fields")
+TEST(SchemaTest, require_that_schema_can_be_built_with_imported_attribute_fields)
 {
     Schema s;
-    SchemaConfigurer configurer(s, "dir:" + TEST_PATH("imported-fields-cfg"));
+    SchemaConfigurer configurer(s, "dir:imported-fields-cfg");
 
     const auto &imported = s.getImportedAttributeFields();
-    EXPECT_EQUAL(2u, imported.size());
-    TEST_DO(assertField(SIAF("imported_a", DataType::INT32, CollectionType::SINGLE), imported[0]));
-    TEST_DO(assertField(SIAF("imported_b", DataType::STRING, CollectionType::ARRAY), imported[1]));
+    ASSERT_EQ(2u, imported.size());
+    assertField(SIAF("imported_a", DataType::INT32, CollectionType::SINGLE), imported[0]);
+    assertField(SIAF("imported_b", DataType::STRING, CollectionType::ARRAY), imported[1]);
 
     const auto &regular = s.getAttributeFields();
-    EXPECT_EQUAL(1u, regular.size());
-    TEST_DO(assertField(SIAF("regular", DataType::INT32, CollectionType::SINGLE), regular[0]));
+    ASSERT_EQ(1u, regular.size());
+    assertField(SIAF("regular", DataType::INT32, CollectionType::SINGLE), regular[0]);
+}
+
+TEST(SchemaTest, require_that_index_field_is_loaded_with_default_values_when_properties_are_not_set)
+{
+    Schema s;
+    s.loadFromFile("schema-without-index-field-properties.txt");
+
+    const auto& index_fields = s.getIndexFields();
+    ASSERT_EQ(1, index_fields.size());
+    assertIndexField(SIF("foo", DataType::STRING, CollectionType::SINGLE).
+                             setAvgElemLen(512).
+                             set_experimental_posting_list_format(false),
+                     index_fields[0]);
+    assertIndexField(SIF("foo", DataType::STRING, CollectionType::SINGLE), index_fields[0]);
 }
 
 }
 
-TEST_MAIN() { TEST_RUN_ALL(); }
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchcommon/src/vespa/searchcommon/common/schema.cpp b/searchcommon/src/vespa/searchcommon/common/schema.cpp
index cef74409024..6d3bae31508 100644
--- a/searchcommon/src/vespa/searchcommon/common/schema.cpp
+++ b/searchcommon/src/vespa/searchcommon/common/schema.cpp
@@ -131,29 +131,23 @@ Schema::Field::operator!=(const Field &rhs) const
 
 Schema::IndexField::IndexField(vespalib::stringref name, DataType dt)
     : Field(name, dt),
-      _prefix(false),
-      _phrases(false),
-      _positions(true),
-      _avgElemLen(512)
+      _avgElemLen(512),
+      _experimental_posting_list_format(false)
 {
 }
 
 Schema::IndexField::IndexField(vespalib::stringref name, DataType dt,
                                CollectionType ct)
     : Field(name, dt, ct),
-      _prefix(false),
-      _phrases(false),
-      _positions(true),
-      _avgElemLen(512)
+      _avgElemLen(512),
+      _experimental_posting_list_format(false)
 {
 }
 
 Schema::IndexField::IndexField(const std::vector<vespalib::string> &lines)
     : Field(lines),
-      _prefix(ConfigParser::parse<bool>("prefix", lines)),
-      _phrases(ConfigParser::parse<bool>("phrases", lines)),
-      _positions(ConfigParser::parse<bool>("positions", lines)),
-      _avgElemLen(ConfigParser::parse<int32_t>("averageelementlen", lines))
+      _avgElemLen(ConfigParser::parse<int32_t>("averageelementlen", lines, 512)),
+      _experimental_posting_list_format(ConfigParser::parse<bool>("experimentalpostinglistformat", lines, false))
 {
 }
 
@@ -161,30 +155,29 @@ void
 Schema::IndexField::write(vespalib::asciistream & os, vespalib::stringref prefix) const
 {
     Field::write(os, prefix);
-    os << prefix << "prefix " << (_prefix ? "true" : "false") << "\n";
-    os << prefix << "phrases " << (_phrases ? "true" : "false") << "\n";
-    os << prefix << "positions " << (_positions ? "true" : "false") << "\n";
     os << prefix << "averageelementlen " << static_cast<int32_t>(_avgElemLen) << "\n";
+    os << prefix << "experimentalpostinglistformat " << (_experimental_posting_list_format ? "true" : "false") << "\n";
+
+    // TODO: Remove prefix, phrases and positions when breaking downgrade is no longer an issue.
+    os << prefix << "prefix false" << "\n";
+    os << prefix << "phrases false" << "\n";
+    os << prefix << "positions true" << "\n";
 }
 
 bool
 Schema::IndexField::operator==(const IndexField &rhs) const
 {
     return Field::operator==(rhs) &&
-                  _prefix == rhs._prefix &&
-                 _phrases == rhs._phrases &&
-               _positions == rhs._positions &&
-              _avgElemLen == rhs._avgElemLen;
+            _avgElemLen == rhs._avgElemLen &&
+            _experimental_posting_list_format == rhs._experimental_posting_list_format;
 }
 
 bool
 Schema::IndexField::operator!=(const IndexField &rhs) const
 {
     return Field::operator!=(rhs) ||
-                  _prefix != rhs._prefix ||
-                 _phrases != rhs._phrases ||
-               _positions != rhs._positions ||
-              _avgElemLen != rhs._avgElemLen;
+            _avgElemLen != rhs._avgElemLen ||
+            _experimental_posting_list_format != rhs._experimental_posting_list_format;
 }
 
 Schema::FieldSet::FieldSet(const std::vector<vespalib::string> & lines) :
@@ -337,9 +330,6 @@ cloneIndexField(const Schema::IndexField &field,
     return Schema::IndexField(field.getName() + suffix,
                               field.getDataType(),
                               field.getCollectionType()).
-        setPrefix(field.hasPrefix()).
-        setPhrases(field.hasPhrases()).
-        setPositions(field.hasPositions()).
         setAvgElemLen(field.getAvgElemLen());
 }
 
diff --git a/searchcommon/src/vespa/searchcommon/common/schema.h b/searchcommon/src/vespa/searchcommon/common/schema.h
index 90cf099f2d8..bb2163e5577 100644
--- a/searchcommon/src/vespa/searchcommon/common/schema.h
+++ b/searchcommon/src/vespa/searchcommon/common/schema.h
@@ -74,12 +74,11 @@ public:
      * A representation of an index field with extra information on
      * how the index should be generated.
      **/
-    class IndexField : public Field
-    {
-        bool _prefix;
-        bool _phrases;
-        bool _positions;
+    class IndexField : public Field {
+    private:
         uint32_t _avgElemLen;
+        // TODO: Remove when experimental posting list format is made default
+        bool _experimental_posting_list_format;
 
     public:
         IndexField(vespalib::stringref name, DataType dt);
@@ -89,21 +88,17 @@ public:
          **/
         IndexField(const std::vector<vespalib::string> &lines);
 
-        IndexField &setPrefix(bool value) { _prefix = value; return *this; }
-        IndexField &setPhrases(bool value) { _phrases = value; return *this; }
-        IndexField &setPositions(bool value)
-        { _positions = value; return *this; }
-        IndexField &setAvgElemLen(uint32_t avgElemLen)
-        { _avgElemLen = avgElemLen; return *this; }
+        IndexField &setAvgElemLen(uint32_t avgElemLen) { _avgElemLen = avgElemLen; return *this; }
+        IndexField &set_experimental_posting_list_format(bool value) {
+            _experimental_posting_list_format = value;
+            return *this;
+        }
 
-        void
-        write(vespalib::asciistream &os,
-              vespalib::stringref prefix) const override;
+        void write(vespalib::asciistream &os,
+                   vespalib::stringref prefix) const override;
 
-        bool hasPrefix() const { return _prefix; }
-        bool hasPhrases() const { return _phrases; }
-        bool hasPositions() const { return _positions; }
         uint32_t getAvgElemLen() const { return _avgElemLen; }
+        bool use_experimental_posting_list_format() const { return _experimental_posting_list_format; }
 
         bool operator==(const IndexField &rhs) const;
         bool operator!=(const IndexField &rhs) const;
diff --git a/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp b/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp
index d56f3c747c1..59ed15eefb0 100644
--- a/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp
+++ b/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp
@@ -144,10 +144,8 @@ SchemaBuilder::build(const IndexschemaConfig &cfg, Schema &schema)
         const IndexschemaConfig::Indexfield & f = cfg.indexfield[i];
         schema.addIndexField(Schema::IndexField(f.name, convertIndexDataType(f.datatype),
                                                 convertIndexCollectionType(f.collectiontype)).
-                setPrefix(f.prefix).
-                setPhrases(f.phrases).
-                setPositions(f.positions).
-                setAvgElemLen(f.averageelementlen));
+                setAvgElemLen(f.averageelementlen).
+                set_experimental_posting_list_format(f.experimentalpostinglistformat));
     }
     for (size_t i = 0; i < cfg.fieldset.size(); ++i) {
         const IndexschemaConfig::Fieldset &fs = cfg.fieldset[i];
diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp
index 64a54187254..b6d843e4e3c 100644
--- a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp
@@ -182,9 +182,8 @@ DiskIndex::setup(const TuneFileSearch &tuneFileSearch,
         if (settings.hasError()) {
             return false;
         }
-        bool hasPhraseOcc = settings.hasPhrases();
         SchemaUtil::IndexIterator oItr(oldSchema, itr);
-        if (!itr.hasMatchingOldFields(oldSchema, hasPhraseOcc) || !oItr.isValid()) {
+        if (!itr.hasMatchingOldFields(oldSchema) || !oItr.isValid()) {
             if (!openField(fieldDir, tuneFileSearch)) {
                 return false;
             }
diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp b/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp
index a41f0412294..8da590654da 100644
--- a/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp
@@ -199,10 +199,10 @@ FieldReader::allocFieldReader(const SchemaUtil::IndexIterator &index,
                               const Schema &oldSchema)
 {
     assert(index.isValid());
-    if (index.hasMatchingOldFields(oldSchema, false)) {
+    if (index.hasMatchingOldFields(oldSchema)) {
         return std::make_unique<FieldReader>();      // The common case
     }
-    if (!index.hasOldFields(oldSchema, false)) {
+    if (!index.hasOldFields(oldSchema)) {
         return std::make_unique<FieldReaderEmpty>(index); // drop data
     }
     // field exists in old schema with different collection type setting
diff --git a/searchlib/src/vespa/searchlib/diskindex/fusion.cpp b/searchlib/src/vespa/searchlib/diskindex/fusion.cpp
index fc198e3b74e..ed311b682e6 100644
--- a/searchlib/src/vespa/searchlib/diskindex/fusion.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/fusion.cpp
@@ -102,7 +102,7 @@ Fusion::openInputWordReaders(const SchemaUtil::IndexIterator &index,
         vespalib::string fieldDir(oldindexpath + "/" + index.getName());
         vespalib::string dictName(fieldDir + "/dictionary");
         const Schema &oldSchema = oi.getSchema();
-        if (!index.hasOldFields(oldSchema, false)) {
+        if (!index.hasOldFields(oldSchema)) {
             continue; // drop data
         }
         bool res = reader->open(dictName,
@@ -296,7 +296,7 @@ Fusion::openInputFieldReaders(const SchemaUtil::IndexIterator &index,
     for (auto &i : _oldIndexes) {
         OldIndex &oi = *i;
         const Schema &oldSchema = oi.getSchema();
-        if (!index.hasOldFields(oldSchema, false)) {
+        if (!index.hasOldFields(oldSchema)) {
             continue; // drop data
         }
         auto reader = FieldReader::allocFieldReader(index, oldSchema);
@@ -413,7 +413,7 @@ Fusion::ReadMappingFiles(const SchemaUtil::IndexIterator *index)
             wordNumMapping.noMappingFile();
             continue;
         }
-        if (index && !index->hasOldFields(oldSchema, false)) {
+        if (index && !index->hasOldFields(oldSchema)) {
             continue; // drop data
         }
 
diff --git a/searchlib/src/vespa/searchlib/index/schemautil.cpp b/searchlib/src/vespa/searchlib/index/schemautil.cpp
index 62f6cd08510..7f3b7c8c2a9 100644
--- a/searchlib/src/vespa/searchlib/index/schemautil.cpp
+++ b/searchlib/src/vespa/searchlib/index/schemautil.cpp
@@ -15,47 +15,21 @@ SchemaUtil::IndexSettings
 SchemaUtil::getIndexSettings(const Schema &schema,
                              const uint32_t index)
 {
-    IndexSettings ret;
     Schema::DataType indexDataType(DataType::STRING);
     bool error = false;
-    bool somePrefixes = false;
-    bool someNotPrefixes = false;
-    bool somePhrases = false;
-    bool someNotPhrases = false;
-    bool somePositions = false;
-    bool someNotPositions = false;
 
     const Schema::IndexField &iField = schema.getIndexField(index);
-    if (iField.hasPhrases()) {
-        somePhrases = true;
-    } else {
-        someNotPhrases = true;
-    }
-    if (iField.hasPrefix()) {
-        somePrefixes = true;
-    } else {
-        someNotPrefixes = true;
-    }
-    if (iField.hasPositions()) {
-        somePositions = true;
-    } else {
-        someNotPositions = true;
-    }
     indexDataType = iField.getDataType();
     if (indexDataType != DataType::STRING) {
         error = true;
         LOG(error, "Field %s has bad data type", iField.getName().c_str());
     }
 
-    return IndexSettings(indexDataType, error,
-                         somePrefixes && !someNotPrefixes,
-                         somePhrases && !someNotPhrases,
-                         somePositions && !someNotPositions);
+    return IndexSettings(indexDataType, error);
 }
 
 bool
-SchemaUtil::IndexIterator::hasOldFields(const Schema &oldSchema,
-                                        bool phrases) const
+SchemaUtil::IndexIterator::hasOldFields(const Schema &oldSchema) const
 {
     assert(isValid());
     const Schema::IndexField &newField =
@@ -70,15 +44,11 @@ SchemaUtil::IndexIterator::hasOldFields(const Schema &oldSchema,
     if (oldField.getDataType() != newField.getDataType()) {
         return false;   // wrong data type
     }
-    if (!phrases) {
-        return true;
-    }
-    return oldField.hasPhrases();
+    return true;
 }
 
 bool
-SchemaUtil::IndexIterator::hasMatchingOldFields(const Schema &oldSchema,
-        bool phrases) const
+SchemaUtil::IndexIterator::hasMatchingOldFields(const Schema &oldSchema) const
 {
     assert(isValid());
     const Schema::IndexField &newField =
@@ -88,18 +58,13 @@ SchemaUtil::IndexIterator::hasMatchingOldFields(const Schema &oldSchema,
     if (oldFieldId == Schema::UNKNOWN_FIELD_ID) {
         return false;
     }
-    if (phrases) {
-        IndexIterator oldIterator(oldSchema, oldFieldId);
-        IndexSettings settings = oldIterator.getIndexSettings();
-        if (!settings.hasPhrases()) {
-            return false;
-        }
-    }
     const Schema::IndexField &oldField =
         oldSchema.getIndexField(oldFieldId);
     if (oldField.getDataType() != newField.getDataType() ||
         oldField.getCollectionType() != newField.getCollectionType())
+    {
         return false;
+    }
     return true;
 }
 
@@ -113,32 +78,6 @@ SchemaUtil::validateIndexField(const Schema::IndexField &field)
             field.getName().c_str());
         ok = false;
     }
-    if (field.getDataType() != DataType::STRING) {
-        if (field.hasPrefix()) {
-            LOG(error,
-                "Field %s is non-string but has prefix",
-                field.getName().c_str());
-            ok = false;
-        }
-        if (field.hasPhrases()) {
-            LOG(error,
-                "Field %s is non-string but has phrases",
-                field.getName().c_str());
-            ok = false;
-        }
-        if (field.hasPositions()) {
-            LOG(error,
-                "Field %s is non-string but has positions",
-                field.getName().c_str());
-            ok = false;
-        }
-    }
-    if (field.hasPhrases() && !field.hasPositions()) {
-        LOG(error,
-            "Field %s has phrases but not positions",
-            field.getName().c_str());
-        ok = false;
-    }
     return ok;
 }
 
diff --git a/searchlib/src/vespa/searchlib/index/schemautil.h b/searchlib/src/vespa/searchlib/index/schemautil.h
index a678e335ebb..c8fe8e4fe32 100644
--- a/searchlib/src/vespa/searchlib/index/schemautil.h
+++ b/searchlib/src/vespa/searchlib/index/schemautil.h
@@ -12,9 +12,6 @@ public:
     class IndexSettings {
         schema::DataType _dataType;
         bool _error;        // Schema is bad.
-        bool _prefix;
-        bool _phrases;
-        bool _positions;
 
     public:
         const schema::DataType & getDataType() const {
@@ -22,36 +19,21 @@ public:
         }
 
         bool hasError() const { return _error; }
-        bool hasPrefix() const { return _prefix; }
-        bool hasPhrases() const { return _phrases; }
-        bool hasPositions() const { return _positions; }
 
         IndexSettings()
             : _dataType(schema::DataType::STRING),
-              _error(false),
-              _prefix(false),
-              _phrases(false),
-              _positions(false)
+              _error(false)
         { }
 
         IndexSettings(const IndexSettings &rhs)
             : _dataType(rhs._dataType),
-              _error(rhs._error),
-              _prefix(rhs._prefix),
-              _phrases(rhs._phrases),
-              _positions(rhs._positions)
+              _error(rhs._error)
         { }
 
         IndexSettings(schema::DataType dataType,
-                      bool error,
-                      bool prefix,
-                      bool phrases,
-                      bool positions)
+                      bool error)
             : _dataType(dataType),
-              _error(error),
-              _prefix(prefix),
-              _phrases(phrases),
-              _positions(positions)
+              _error(error)
         { }
 
         IndexSettings & operator=(const IndexSettings &rhs) {
@@ -63,9 +45,6 @@ public:
         void swap(IndexSettings &rhs) {
             std::swap(_dataType, rhs._dataType);
             std::swap(_error, rhs._error);
-            std::swap(_prefix, rhs._prefix);
-            std::swap(_phrases, rhs._phrases);
-            std::swap(_positions, rhs._positions);
         }
     };
 
@@ -121,13 +100,11 @@ public:
 
         /**
          * Return if old schema has at least one usable input field
-         * with matching data type.  If we want phrases then all input
-         * fields usable for terms must also be usable for phrases.
+         * with matching data type.
          *
          * @param oldSchema old schema, present in an input index
-         * @param phrases   ask for phrase files
          */
-        bool hasOldFields(const Schema &oldSchema, bool phrases) const;
+        bool hasOldFields(const Schema &oldSchema) const;
 
         /**
          * Return if fields in old schema matches fields in new
@@ -136,9 +113,8 @@ public:
          * also match between new and old schema.
          *
          * @param oldSchema old schema, present in an input index
-         * @param phrases   ask for phrase files
          */
-        bool hasMatchingOldFields(const Schema &oldSchema, bool phrases) const;
+        bool hasMatchingOldFields(const Schema &oldSchema) const;
     };
 
     static IndexSettings getIndexSettings(const Schema &schema, const uint32_t index);
author	Geir Storli <geirst@verizonmedia.com>	2019-05-03 14:09:13 +0200
committer	GitHub <noreply@github.com>	2019-05-03 14:09:13 +0200
commit	99f41741ca2784640ce1bec9e673355ab92e9d42 (patch)
tree	e18ba44053381584cdfdd96f8fe5db4b592259e2
parent	25cb895e18e9dd1f74b32896e41fe01b76ddb48e (diff)
parent	2b0d59378d4fc46bb4843ecfe897b808506a58c2 (diff)