diff options
author | Geir Storli <geirst@verizonmedia.com> | 2019-05-03 14:09:13 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-05-03 14:09:13 +0200 |
commit | 99f41741ca2784640ce1bec9e673355ab92e9d42 (patch) | |
tree | e18ba44053381584cdfdd96f8fe5db4b592259e2 | |
parent | 25cb895e18e9dd1f74b32896e41fe01b76ddb48e (diff) | |
parent | 2b0d59378d4fc46bb4843ecfe897b808506a58c2 (diff) |
Merge pull request #9264 from vespa-engine/geirst/experimental-posting-list-format-flag-in-backend
Geirst/experimental posting list format flag in backend
12 files changed, 220 insertions, 290 deletions
diff --git a/searchcommon/src/tests/schema/CMakeLists.txt b/searchcommon/src/tests/schema/CMakeLists.txt index a8e6ec311a3..aafe015d9a1 100644 --- a/searchcommon/src/tests/schema/CMakeLists.txt +++ b/searchcommon/src/tests/schema/CMakeLists.txt @@ -4,5 +4,6 @@ vespa_add_executable(searchcommon_schema_test_app TEST schema_test.cpp DEPENDS searchcommon + gtest ) vespa_add_test(NAME searchcommon_schema_test_app NO_VALGRIND COMMAND searchcommon_schema_test_app) diff --git a/searchcommon/src/tests/schema/load-save-cfg/indexschema.cfg b/searchcommon/src/tests/schema/load-save-cfg/indexschema.cfg index c0998bcf597..b6c547c52c9 100644 --- a/searchcommon/src/tests/schema/load-save-cfg/indexschema.cfg +++ b/searchcommon/src/tests/schema/load-save-cfg/indexschema.cfg @@ -5,9 +5,7 @@ indexfield[1].name b indexfield[1].datatype INT64 indexfield[2].name c indexfield[2].datatype STRING -indexfield[2].prefix true -indexfield[2].phrases false -indexfield[2].positions false +indexfield[2].experimentalpostinglistformat true fieldset[1] fieldset[0].name default fieldset[0].field[2] diff --git a/searchcommon/src/tests/schema/schema-without-index-field-properties.txt b/searchcommon/src/tests/schema/schema-without-index-field-properties.txt new file mode 100644 index 00000000000..4491b1242e0 --- /dev/null +++ b/searchcommon/src/tests/schema/schema-without-index-field-properties.txt @@ -0,0 +1,7 @@ +attributefield[0] +summaryfield[0] +fieldset[0] +indexfield[1] +indexfield[0].name foo +indexfield[0].datatype STRING +indexfield[0].collectiontype SINGLE diff --git a/searchcommon/src/tests/schema/schema_test.cpp b/searchcommon/src/tests/schema/schema_test.cpp index e9997c2e70d..e360ee1ba7a 100644 --- a/searchcommon/src/tests/schema/schema_test.cpp +++ b/searchcommon/src/tests/schema/schema_test.cpp @@ -1,10 +1,11 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/vespalib/stllike/string.h> -#include <fstream> -#include <vespa/vespalib/testkit/test_kit.h> #include <vespa/config/common/configparser.h> -#include <vespa/searchcommon/common/schemaconfigurer.h> #include <vespa/searchcommon/common/schema.h> +#include <vespa/searchcommon/common/schemaconfigurer.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <vespa/vespalib/stllike/string.h> +#include <fstream> + #include <vespa/log/log.h> LOG_SETUP("schema_test"); @@ -15,63 +16,70 @@ namespace search::index { using schema::DataType; using schema::CollectionType; using SIAF = Schema::ImportedAttributeField; +using SIF = Schema::IndexField; -void assertField(const Schema::Field & exp, const Schema::Field & act) { - EXPECT_EQUAL(exp.getName(), act.getName()); - EXPECT_EQUAL(exp.getDataType(), act.getDataType()); - EXPECT_EQUAL(exp.getCollectionType(), act.getCollectionType()); +void +assertField(const Schema::Field& exp, const Schema::Field& act) +{ + EXPECT_EQ(exp.getName(), act.getName()); + EXPECT_EQ(exp.getDataType(), act.getDataType()); + EXPECT_EQ(exp.getCollectionType(), act.getCollectionType()); } -void assertIndexField(const Schema::IndexField & exp, - const Schema::IndexField & act) +void +assertIndexField(const Schema::IndexField& exp, + const Schema::IndexField& act) { assertField(exp, act); - EXPECT_EQUAL(exp.hasPrefix(), act.hasPrefix()); - EXPECT_EQUAL(exp.hasPhrases(), act.hasPhrases()); - EXPECT_EQUAL(exp.hasPositions(), act.hasPositions()); + EXPECT_EQ(exp.getAvgElemLen(), act.getAvgElemLen()); + EXPECT_EQ(exp.use_experimental_posting_list_format(), act.use_experimental_posting_list_format()); } -void assertSet(const Schema::FieldSet &exp, - const Schema::FieldSet &act) +void +assertSet(const Schema::FieldSet& exp, + const Schema::FieldSet& act) { - EXPECT_EQUAL(exp.getName(), act.getName()); - ASSERT_EQUAL(exp.getFields().size(), act.getFields().size()); + EXPECT_EQ(exp.getName(), act.getName()); + ASSERT_EQ(exp.getFields().size(), act.getFields().size()); for (size_t i = 0; i < exp.getFields().size(); ++i) { - EXPECT_EQUAL(exp.getFields()[i], act.getFields()[i]); + EXPECT_EQ(exp.getFields()[i], act.getFields()[i]); } } -void assertSchema(const Schema & exp, const Schema & act) { - ASSERT_EQUAL(exp.getNumIndexFields(), act.getNumIndexFields()); +void +assertSchema(const Schema& exp, const Schema& act) +{ + ASSERT_EQ(exp.getNumIndexFields(), act.getNumIndexFields()); for (size_t i = 0; i < exp.getNumIndexFields(); ++i) { assertIndexField(exp.getIndexField(i), act.getIndexField(i)); } - ASSERT_EQUAL(exp.getNumAttributeFields(), act.getNumAttributeFields()); + ASSERT_EQ(exp.getNumAttributeFields(), act.getNumAttributeFields()); for (size_t i = 0; i < exp.getNumAttributeFields(); ++i) { assertField(exp.getAttributeField(i), act.getAttributeField(i)); } - ASSERT_EQUAL(exp.getNumSummaryFields(), act.getNumSummaryFields()); + ASSERT_EQ(exp.getNumSummaryFields(), act.getNumSummaryFields()); for (size_t i = 0; i < exp.getNumSummaryFields(); ++i) { assertField(exp.getSummaryField(i), act.getSummaryField(i)); } - ASSERT_EQUAL(exp.getNumFieldSets(), act.getNumFieldSets()); + ASSERT_EQ(exp.getNumFieldSets(), act.getNumFieldSets()); for (size_t i = 0; i < exp.getNumFieldSets(); ++i) { assertSet(exp.getFieldSet(i), act.getFieldSet(i)); } const auto &expImported = exp.getImportedAttributeFields(); const auto &actImported = act.getImportedAttributeFields(); - ASSERT_EQUAL(expImported.size(), actImported.size()); + ASSERT_EQ(expImported.size(), actImported.size()); for (size_t i = 0; i < expImported.size(); ++i) { assertField(expImported[i], actImported[i]); } } -TEST("testBasic") { +TEST(SchemaTest, test_basic) +{ Schema s; - EXPECT_EQUAL(0u, s.getNumIndexFields()); - EXPECT_EQUAL(0u, s.getNumAttributeFields()); - EXPECT_EQUAL(0u, s.getNumSummaryFields()); - EXPECT_EQUAL(0u, s.getNumImportedAttributeFields()); + EXPECT_EQ(0u, s.getNumIndexFields()); + EXPECT_EQ(0u, s.getNumAttributeFields()); + EXPECT_EQ(0u, s.getNumSummaryFields()); + EXPECT_EQ(0u, s.getNumImportedAttributeFields()); s.addIndexField(Schema::IndexField("foo", DataType::STRING)); s.addIndexField(Schema::IndexField("bar", DataType::INT32)); @@ -89,100 +97,95 @@ TEST("testBasic") { s.addImportedAttributeField(SIAF("imported", DataType::INT32)); - EXPECT_EQUAL(2u, s.getNumIndexFields()); + ASSERT_EQ(2u, s.getNumIndexFields()); { - EXPECT_EQUAL("foo", s.getIndexField(0).getName()); - EXPECT_EQUAL(DataType::STRING, s.getIndexField(0).getDataType()); - EXPECT_EQUAL(CollectionType::SINGLE, s.getIndexField(0).getCollectionType()); - EXPECT_TRUE(!s.getIndexField(0).hasPrefix()); - EXPECT_TRUE(!s.getIndexField(0).hasPhrases()); - EXPECT_TRUE(s.getIndexField(0).hasPositions()); - - EXPECT_EQUAL("bar", s.getIndexField(1).getName()); - EXPECT_EQUAL(DataType::INT32, s.getIndexField(1).getDataType()); - EXPECT_EQUAL(CollectionType::SINGLE, s.getIndexField(1).getCollectionType()); - - EXPECT_EQUAL(0u, s.getIndexFieldId("foo")); - EXPECT_EQUAL(1u, s.getIndexFieldId("bar")); - EXPECT_EQUAL(Schema::UNKNOWN_FIELD_ID, s.getIndexFieldId("cox")); + EXPECT_EQ("foo", s.getIndexField(0).getName()); + EXPECT_EQ(DataType::STRING, s.getIndexField(0).getDataType()); + EXPECT_EQ(CollectionType::SINGLE, s.getIndexField(0).getCollectionType()); + + EXPECT_EQ("bar", s.getIndexField(1).getName()); + EXPECT_EQ(DataType::INT32, s.getIndexField(1).getDataType()); + EXPECT_EQ(CollectionType::SINGLE, s.getIndexField(1).getCollectionType()); + + EXPECT_EQ(0u, s.getIndexFieldId("foo")); + EXPECT_EQ(1u, s.getIndexFieldId("bar")); + EXPECT_EQ(Schema::UNKNOWN_FIELD_ID, s.getIndexFieldId("cox")); } - EXPECT_EQUAL(3u, s.getNumAttributeFields()); + ASSERT_EQ(3u, s.getNumAttributeFields()); { - EXPECT_EQUAL("foo", s.getAttributeField(0).getName()); - EXPECT_EQUAL(DataType::STRING, s.getAttributeField(0).getDataType()); - EXPECT_EQUAL(CollectionType::ARRAY, s.getAttributeField(0).getCollectionType()); - - EXPECT_EQUAL("bar", s.getAttributeField(1).getName()); - EXPECT_EQUAL(DataType::INT32, s.getAttributeField(1).getDataType()); - EXPECT_EQUAL(CollectionType::WEIGHTEDSET, s.getAttributeField(1).getCollectionType()); - - EXPECT_EQUAL("cox", s.getAttributeField(2).getName()); - EXPECT_EQUAL(DataType::STRING, s.getAttributeField(2).getDataType()); - EXPECT_EQUAL(CollectionType::SINGLE, s.getAttributeField(2).getCollectionType()); - - EXPECT_EQUAL(0u, s.getAttributeFieldId("foo")); - EXPECT_EQUAL(1u, s.getAttributeFieldId("bar")); - EXPECT_EQUAL(2u, s.getAttributeFieldId("cox")); - EXPECT_EQUAL(Schema::UNKNOWN_FIELD_ID, s.getIndexFieldId("fox")); + EXPECT_EQ("foo", s.getAttributeField(0).getName()); + EXPECT_EQ(DataType::STRING, s.getAttributeField(0).getDataType()); + EXPECT_EQ(CollectionType::ARRAY, s.getAttributeField(0).getCollectionType()); + + EXPECT_EQ("bar", s.getAttributeField(1).getName()); + EXPECT_EQ(DataType::INT32, s.getAttributeField(1).getDataType()); + EXPECT_EQ(CollectionType::WEIGHTEDSET, s.getAttributeField(1).getCollectionType()); + + EXPECT_EQ("cox", s.getAttributeField(2).getName()); + EXPECT_EQ(DataType::STRING, s.getAttributeField(2).getDataType()); + EXPECT_EQ(CollectionType::SINGLE, s.getAttributeField(2).getCollectionType()); + + EXPECT_EQ(0u, s.getAttributeFieldId("foo")); + EXPECT_EQ(1u, s.getAttributeFieldId("bar")); + EXPECT_EQ(2u, s.getAttributeFieldId("cox")); + EXPECT_EQ(Schema::UNKNOWN_FIELD_ID, s.getIndexFieldId("fox")); } - EXPECT_EQUAL(4u, s.getNumSummaryFields()); + ASSERT_EQ(4u, s.getNumSummaryFields()); { - EXPECT_EQUAL("foo", s.getSummaryField(0).getName()); - EXPECT_EQUAL(DataType::STRING, s.getSummaryField(0).getDataType()); - EXPECT_EQUAL(CollectionType::ARRAY, s.getSummaryField(0).getCollectionType()); - - EXPECT_EQUAL("bar", s.getSummaryField(1).getName()); - EXPECT_EQUAL(DataType::INT32, s.getSummaryField(1).getDataType()); - EXPECT_EQUAL(CollectionType::WEIGHTEDSET, s.getSummaryField(1).getCollectionType()); - - EXPECT_EQUAL("cox", s.getSummaryField(2).getName()); - EXPECT_EQUAL(DataType::STRING, s.getSummaryField(2).getDataType()); - EXPECT_EQUAL(CollectionType::SINGLE, s.getSummaryField(2).getCollectionType()); - - EXPECT_EQUAL("fox", s.getSummaryField(3).getName()); - EXPECT_EQUAL(DataType::RAW, s.getSummaryField(3).getDataType()); - EXPECT_EQUAL(CollectionType::SINGLE, s.getSummaryField(3).getCollectionType()); - - EXPECT_EQUAL(0u, s.getSummaryFieldId("foo")); - EXPECT_EQUAL(1u, s.getSummaryFieldId("bar")); - EXPECT_EQUAL(2u, s.getSummaryFieldId("cox")); - EXPECT_EQUAL(3u, s.getSummaryFieldId("fox")); - EXPECT_EQUAL(Schema::UNKNOWN_FIELD_ID, s.getSummaryFieldId("not")); + EXPECT_EQ("foo", s.getSummaryField(0).getName()); + EXPECT_EQ(DataType::STRING, s.getSummaryField(0).getDataType()); + EXPECT_EQ(CollectionType::ARRAY, s.getSummaryField(0).getCollectionType()); + + EXPECT_EQ("bar", s.getSummaryField(1).getName()); + EXPECT_EQ(DataType::INT32, s.getSummaryField(1).getDataType()); + EXPECT_EQ(CollectionType::WEIGHTEDSET, s.getSummaryField(1).getCollectionType()); + + EXPECT_EQ("cox", s.getSummaryField(2).getName()); + EXPECT_EQ(DataType::STRING, s.getSummaryField(2).getDataType()); + EXPECT_EQ(CollectionType::SINGLE, s.getSummaryField(2).getCollectionType()); + + EXPECT_EQ("fox", s.getSummaryField(3).getName()); + EXPECT_EQ(DataType::RAW, s.getSummaryField(3).getDataType()); + EXPECT_EQ(CollectionType::SINGLE, s.getSummaryField(3).getCollectionType()); + + EXPECT_EQ(0u, s.getSummaryFieldId("foo")); + EXPECT_EQ(1u, s.getSummaryFieldId("bar")); + EXPECT_EQ(2u, s.getSummaryFieldId("cox")); + EXPECT_EQ(3u, s.getSummaryFieldId("fox")); + EXPECT_EQ(Schema::UNKNOWN_FIELD_ID, s.getSummaryFieldId("not")); } - EXPECT_EQUAL(1u, s.getNumFieldSets()); + ASSERT_EQ(1u, s.getNumFieldSets()); { - EXPECT_EQUAL("default", s.getFieldSet(0).getName()); - EXPECT_EQUAL(2u, s.getFieldSet(0).getFields().size()); - EXPECT_EQUAL("foo", s.getFieldSet(0).getFields()[0]); - EXPECT_EQUAL("bar", s.getFieldSet(0).getFields()[1]); + EXPECT_EQ("default", s.getFieldSet(0).getName()); + EXPECT_EQ(2u, s.getFieldSet(0).getFields().size()); + EXPECT_EQ("foo", s.getFieldSet(0).getFields()[0]); + EXPECT_EQ("bar", s.getFieldSet(0).getFields()[1]); } - EXPECT_EQUAL(1u, s.getNumImportedAttributeFields()); + EXPECT_EQ(1u, s.getNumImportedAttributeFields()); { const auto &imported = s.getImportedAttributeFields(); - EXPECT_EQUAL(1u, imported.size()); - TEST_DO(assertField(SIAF("imported", DataType::INT32, CollectionType::SINGLE), imported[0])); + EXPECT_EQ(1u, imported.size()); + assertField(SIAF("imported", DataType::INT32, CollectionType::SINGLE), imported[0]); } } -TEST("testLoadAndSave") { - using SIF = Schema::IndexField; +TEST(SchemaTest, test_load_and_save) +{ using SAF = Schema::AttributeField; using SSF = Schema::SummaryField; using SDT = schema::DataType; using SCT = schema::CollectionType; - typedef Schema::FieldSet SFS; + using SFS = Schema::FieldSet; { // load from config -> save to file -> load from file Schema s; - SchemaConfigurer configurer(s, "dir:" + TEST_PATH("load-save-cfg")); - EXPECT_EQUAL(3u, s.getNumIndexFields()); + SchemaConfigurer configurer(s, "dir:load-save-cfg"); + EXPECT_EQ(3u, s.getNumIndexFields()); assertIndexField(SIF("a", SDT::STRING), s.getIndexField(0)); assertIndexField(SIF("b", SDT::INT64), s.getIndexField(1)); - assertIndexField(SIF("c", SDT::STRING).setPrefix(true) - .setPhrases(false).setPositions(false), - s.getIndexField(2)); + assertIndexField(SIF("c", SDT::STRING).set_experimental_posting_list_format(true), s.getIndexField(2)); - EXPECT_EQUAL(9u, s.getNumAttributeFields()); + EXPECT_EQ(9u, s.getNumAttributeFields()); assertField(SAF("a", SDT::STRING, SCT::SINGLE), s.getAttributeField(0)); assertField(SAF("b", SDT::INT8, SCT::ARRAY), s.getAttributeField(1)); @@ -195,7 +198,7 @@ TEST("testLoadAndSave") { assertField(SAF("h", SDT::BOOLEANTREE), s.getAttributeField(7)); assertField(SAF("i", SDT::TENSOR), s.getAttributeField(8)); - EXPECT_EQUAL(12u, s.getNumSummaryFields()); + EXPECT_EQ(12u, s.getNumSummaryFields()); assertField(SSF("a", SDT::INT8), s.getSummaryField(0)); assertField(SSF("b", SDT::INT16), s.getSummaryField(1)); assertField(SSF("c", SDT::INT32), s.getSummaryField(2)); @@ -209,7 +212,7 @@ TEST("testLoadAndSave") { assertField(SSF("k", SDT::RAW), s.getSummaryField(10)); assertField(SSF("l", SDT::RAW), s.getSummaryField(11)); - EXPECT_EQUAL(1u, s.getNumFieldSets()); + EXPECT_EQ(1u, s.getNumFieldSets()); assertSet(SFS("default").addField("a").addField("c"), s.getFieldSet(0)); @@ -240,7 +243,8 @@ TEST("testLoadAndSave") { } } -TEST("require that schema can save and load timestamps for fields") { +TEST(SchemaTest, require_that_schema_can_save_and_load_timestamps_for_fields) +{ const fastos::TimeStamp timestamp(42); const std::string file_name = "schema-with-timestamps.txt"; Schema s; @@ -250,11 +254,12 @@ TEST("require that schema can save and load timestamps for fields") { ASSERT_TRUE(s.saveToFile(file_name)); Schema s2; ASSERT_TRUE(s2.loadFromFile(file_name)); - ASSERT_EQUAL(1u, s2.getNumIndexFields()); - ASSERT_EQUAL(timestamp, s2.getIndexField(0).getTimestamp()); + ASSERT_EQ(1u, s2.getNumIndexFields()); + EXPECT_EQ(timestamp, s2.getIndexField(0).getTimestamp()); } -TEST("require that timestamps are omitted when 0.") { +TEST(SchemaTest, require_that_timestamps_are_omitted_when_0) +{ const std::string file_name = "schema-without-timestamps.txt"; Schema s; s.addIndexField(Schema::IndexField("foo", DataType::STRING)); @@ -265,16 +270,18 @@ TEST("require that timestamps are omitted when 0.") { while (file) { std::string line; getline(file, line); - EXPECT_NOT_EQUAL("indexfield[0].timestamp 0", line); + EXPECT_NE("indexfield[0].timestamp 0", line); } Schema s2; ASSERT_TRUE(s2.loadFromFile(file_name)); - ASSERT_EQUAL(1u, s2.getNumIndexFields()); + ASSERT_EQ(1u, s2.getNumIndexFields()); } -void addAllFieldTypes(const string &name, Schema &schema, - fastos::TimeStamp timestamp) { +void +addAllFieldTypes(const string& name, Schema& schema, + fastos::TimeStamp timestamp) +{ Schema::IndexField index_field(name, DataType::STRING); index_field.setTimestamp(timestamp); schema.addIndexField(index_field); @@ -290,7 +297,8 @@ void addAllFieldTypes(const string &name, Schema &schema, schema.addFieldSet(Schema::FieldSet(name)); } -TEST("require that schemas can be added") { +TEST(SchemaTest, require_that_schemas_can_be_added) +{ const string name1 = "foo"; const string name2 = "bar"; const fastos::TimeStamp timestamp1(42); @@ -301,29 +309,30 @@ TEST("require that schemas can be added") { addAllFieldTypes(name2, s2, timestamp2); Schema::UP sum = Schema::make_union(s1, s2); - ASSERT_EQUAL(2u, sum->getNumIndexFields()); + ASSERT_EQ(2u, sum->getNumIndexFields()); EXPECT_TRUE(s1.getIndexField(0) == sum->getIndexField(sum->getIndexFieldId(name1))); EXPECT_TRUE(s2.getIndexField(0) == sum->getIndexField(sum->getIndexFieldId(name2))); - ASSERT_EQUAL(2u, sum->getNumAttributeFields()); + ASSERT_EQ(2u, sum->getNumAttributeFields()); EXPECT_TRUE(s1.getAttributeField(0) == sum->getAttributeField(sum->getAttributeFieldId(name1))); EXPECT_TRUE(s2.getAttributeField(0) == sum->getAttributeField(sum->getAttributeFieldId(name2))); - ASSERT_EQUAL(2u, sum->getNumSummaryFields()); + ASSERT_EQ(2u, sum->getNumSummaryFields()); EXPECT_TRUE(s1.getSummaryField(0) == sum->getSummaryField(sum->getSummaryFieldId(name1))); EXPECT_TRUE(s2.getSummaryField(0) == sum->getSummaryField(sum->getSummaryFieldId(name2))); - ASSERT_EQUAL(2u, sum->getNumFieldSets()); + ASSERT_EQ(2u, sum->getNumFieldSets()); EXPECT_TRUE(s1.getFieldSet(0) == sum->getFieldSet(sum->getFieldSetId(name1))); EXPECT_TRUE(s2.getFieldSet(0) == sum->getFieldSet(sum->getFieldSetId(name2))); } -TEST("require that S union S = S for schema S") { +TEST(SchemaTest, require_that_S_union_S_equals_S_for_schema_S) +{ Schema schema; addAllFieldTypes("foo", schema, 42); @@ -331,7 +340,8 @@ TEST("require that S union S = S for schema S") { EXPECT_TRUE(schema == *sum); } -TEST("require that schema can calculate set_difference") { +TEST(SchemaTest, require_that_schema_can_calculate_set_difference) +{ const string name1 = "foo"; const string name2 = "bar"; const fastos::TimeStamp timestamp1(42); @@ -349,7 +359,8 @@ TEST("require that schema can calculate set_difference") { EXPECT_TRUE(expected == *schema); } -TEST("require that getOldFields returns a subset of a schema") { +TEST(SchemaTest, require_that_get_old_fields_returns_a_subset_of_a_schema) +{ Schema schema; const int64_t limit_timestamp = 1000; @@ -359,13 +370,14 @@ TEST("require that getOldFields returns a subset of a schema") { Schema::UP old_fields = schema.getOldFields(fastos::TimeStamp(limit_timestamp)); - EXPECT_EQUAL(1u, old_fields->getNumIndexFields()); - EXPECT_EQUAL("bar", old_fields->getIndexField(0).getName()); - EXPECT_EQUAL(1u, old_fields->getNumAttributeFields()); - EXPECT_EQUAL(1u, old_fields->getNumSummaryFields()); + EXPECT_EQ(1u, old_fields->getNumIndexFields()); + EXPECT_EQ("bar", old_fields->getIndexField(0).getName()); + EXPECT_EQ(1u, old_fields->getNumAttributeFields()); + EXPECT_EQ(1u, old_fields->getNumSummaryFields()); } -TEST("require that schema can calculate intersection") { +TEST(SchemaTest, require_that_schema_can_calculate_intersection) +{ const string name1 = "foo"; const string name2 = "bar"; const string name3 = "baz"; @@ -385,18 +397,19 @@ TEST("require that schema can calculate intersection") { EXPECT_TRUE(expected == *schema); } -TEST("require that incompatible fields are removed from intersection") { +TEST(SchemaTest, require_that_incompatible_fields_are_removed_from_intersection) +{ const string name = "foo"; Schema s1; s1.addIndexField(Schema::IndexField(name, DataType::STRING)); Schema s2; s2.addIndexField(Schema::IndexField(name, DataType::INT32)); Schema::UP schema = Schema::intersect(s1, s2); - EXPECT_EQUAL(0u, schema->getNumIndexFields()); + EXPECT_EQ(0u, schema->getNumIndexFields()); EXPECT_FALSE(schema->isIndexField(name)); } -TEST("require that imported attribute fields are not saved to disk") +TEST(SchemaTest, require_that_imported_attribute_fields_are_not_saved_to_disk) { const vespalib::string fileName = "schema-no-imported-fields.txt"; { @@ -407,25 +420,39 @@ TEST("require that imported attribute fields are not saved to disk") { Schema s; s.loadFromFile(fileName); - EXPECT_EQUAL(0u, s.getNumImportedAttributeFields()); + EXPECT_EQ(0u, s.getNumImportedAttributeFields()); } } -TEST("require that schema can be built with imported attribute fields") +TEST(SchemaTest, require_that_schema_can_be_built_with_imported_attribute_fields) { Schema s; - SchemaConfigurer configurer(s, "dir:" + TEST_PATH("imported-fields-cfg")); + SchemaConfigurer configurer(s, "dir:imported-fields-cfg"); const auto &imported = s.getImportedAttributeFields(); - EXPECT_EQUAL(2u, imported.size()); - TEST_DO(assertField(SIAF("imported_a", DataType::INT32, CollectionType::SINGLE), imported[0])); - TEST_DO(assertField(SIAF("imported_b", DataType::STRING, CollectionType::ARRAY), imported[1])); + ASSERT_EQ(2u, imported.size()); + assertField(SIAF("imported_a", DataType::INT32, CollectionType::SINGLE), imported[0]); + assertField(SIAF("imported_b", DataType::STRING, CollectionType::ARRAY), imported[1]); const auto ®ular = s.getAttributeFields(); - EXPECT_EQUAL(1u, regular.size()); - TEST_DO(assertField(SIAF("regular", DataType::INT32, CollectionType::SINGLE), regular[0])); + ASSERT_EQ(1u, regular.size()); + assertField(SIAF("regular", DataType::INT32, CollectionType::SINGLE), regular[0]); +} + +TEST(SchemaTest, require_that_index_field_is_loaded_with_default_values_when_properties_are_not_set) +{ + Schema s; + s.loadFromFile("schema-without-index-field-properties.txt"); + + const auto& index_fields = s.getIndexFields(); + ASSERT_EQ(1, index_fields.size()); + assertIndexField(SIF("foo", DataType::STRING, CollectionType::SINGLE). + setAvgElemLen(512). + set_experimental_posting_list_format(false), + index_fields[0]); + assertIndexField(SIF("foo", DataType::STRING, CollectionType::SINGLE), index_fields[0]); } } -TEST_MAIN() { TEST_RUN_ALL(); } +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchcommon/src/vespa/searchcommon/common/schema.cpp b/searchcommon/src/vespa/searchcommon/common/schema.cpp index cef74409024..6d3bae31508 100644 --- a/searchcommon/src/vespa/searchcommon/common/schema.cpp +++ b/searchcommon/src/vespa/searchcommon/common/schema.cpp @@ -131,29 +131,23 @@ Schema::Field::operator!=(const Field &rhs) const Schema::IndexField::IndexField(vespalib::stringref name, DataType dt) : Field(name, dt), - _prefix(false), - _phrases(false), - _positions(true), - _avgElemLen(512) + _avgElemLen(512), + _experimental_posting_list_format(false) { } Schema::IndexField::IndexField(vespalib::stringref name, DataType dt, CollectionType ct) : Field(name, dt, ct), - _prefix(false), - _phrases(false), - _positions(true), - _avgElemLen(512) + _avgElemLen(512), + _experimental_posting_list_format(false) { } Schema::IndexField::IndexField(const std::vector<vespalib::string> &lines) : Field(lines), - _prefix(ConfigParser::parse<bool>("prefix", lines)), - _phrases(ConfigParser::parse<bool>("phrases", lines)), - _positions(ConfigParser::parse<bool>("positions", lines)), - _avgElemLen(ConfigParser::parse<int32_t>("averageelementlen", lines)) + _avgElemLen(ConfigParser::parse<int32_t>("averageelementlen", lines, 512)), + _experimental_posting_list_format(ConfigParser::parse<bool>("experimentalpostinglistformat", lines, false)) { } @@ -161,30 +155,29 @@ void Schema::IndexField::write(vespalib::asciistream & os, vespalib::stringref prefix) const { Field::write(os, prefix); - os << prefix << "prefix " << (_prefix ? "true" : "false") << "\n"; - os << prefix << "phrases " << (_phrases ? "true" : "false") << "\n"; - os << prefix << "positions " << (_positions ? "true" : "false") << "\n"; os << prefix << "averageelementlen " << static_cast<int32_t>(_avgElemLen) << "\n"; + os << prefix << "experimentalpostinglistformat " << (_experimental_posting_list_format ? "true" : "false") << "\n"; + + // TODO: Remove prefix, phrases and positions when breaking downgrade is no longer an issue. + os << prefix << "prefix false" << "\n"; + os << prefix << "phrases false" << "\n"; + os << prefix << "positions true" << "\n"; } bool Schema::IndexField::operator==(const IndexField &rhs) const { return Field::operator==(rhs) && - _prefix == rhs._prefix && - _phrases == rhs._phrases && - _positions == rhs._positions && - _avgElemLen == rhs._avgElemLen; + _avgElemLen == rhs._avgElemLen && + _experimental_posting_list_format == rhs._experimental_posting_list_format; } bool Schema::IndexField::operator!=(const IndexField &rhs) const { return Field::operator!=(rhs) || - _prefix != rhs._prefix || - _phrases != rhs._phrases || - _positions != rhs._positions || - _avgElemLen != rhs._avgElemLen; + _avgElemLen != rhs._avgElemLen || + _experimental_posting_list_format != rhs._experimental_posting_list_format; } Schema::FieldSet::FieldSet(const std::vector<vespalib::string> & lines) : @@ -337,9 +330,6 @@ cloneIndexField(const Schema::IndexField &field, return Schema::IndexField(field.getName() + suffix, field.getDataType(), field.getCollectionType()). - setPrefix(field.hasPrefix()). - setPhrases(field.hasPhrases()). - setPositions(field.hasPositions()). setAvgElemLen(field.getAvgElemLen()); } diff --git a/searchcommon/src/vespa/searchcommon/common/schema.h b/searchcommon/src/vespa/searchcommon/common/schema.h index 90cf099f2d8..bb2163e5577 100644 --- a/searchcommon/src/vespa/searchcommon/common/schema.h +++ b/searchcommon/src/vespa/searchcommon/common/schema.h @@ -74,12 +74,11 @@ public: * A representation of an index field with extra information on * how the index should be generated. **/ - class IndexField : public Field - { - bool _prefix; - bool _phrases; - bool _positions; + class IndexField : public Field { + private: uint32_t _avgElemLen; + // TODO: Remove when experimental posting list format is made default + bool _experimental_posting_list_format; public: IndexField(vespalib::stringref name, DataType dt); @@ -89,21 +88,17 @@ public: **/ IndexField(const std::vector<vespalib::string> &lines); - IndexField &setPrefix(bool value) { _prefix = value; return *this; } - IndexField &setPhrases(bool value) { _phrases = value; return *this; } - IndexField &setPositions(bool value) - { _positions = value; return *this; } - IndexField &setAvgElemLen(uint32_t avgElemLen) - { _avgElemLen = avgElemLen; return *this; } + IndexField &setAvgElemLen(uint32_t avgElemLen) { _avgElemLen = avgElemLen; return *this; } + IndexField &set_experimental_posting_list_format(bool value) { + _experimental_posting_list_format = value; + return *this; + } - void - write(vespalib::asciistream &os, - vespalib::stringref prefix) const override; + void write(vespalib::asciistream &os, + vespalib::stringref prefix) const override; - bool hasPrefix() const { return _prefix; } - bool hasPhrases() const { return _phrases; } - bool hasPositions() const { return _positions; } uint32_t getAvgElemLen() const { return _avgElemLen; } + bool use_experimental_posting_list_format() const { return _experimental_posting_list_format; } bool operator==(const IndexField &rhs) const; bool operator!=(const IndexField &rhs) const; diff --git a/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp b/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp index d56f3c747c1..59ed15eefb0 100644 --- a/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp +++ b/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp @@ -144,10 +144,8 @@ SchemaBuilder::build(const IndexschemaConfig &cfg, Schema &schema) const IndexschemaConfig::Indexfield & f = cfg.indexfield[i]; schema.addIndexField(Schema::IndexField(f.name, convertIndexDataType(f.datatype), convertIndexCollectionType(f.collectiontype)). - setPrefix(f.prefix). - setPhrases(f.phrases). - setPositions(f.positions). - setAvgElemLen(f.averageelementlen)); + setAvgElemLen(f.averageelementlen). + set_experimental_posting_list_format(f.experimentalpostinglistformat)); } for (size_t i = 0; i < cfg.fieldset.size(); ++i) { const IndexschemaConfig::Fieldset &fs = cfg.fieldset[i]; diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp index 64a54187254..b6d843e4e3c 100644 --- a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp @@ -182,9 +182,8 @@ DiskIndex::setup(const TuneFileSearch &tuneFileSearch, if (settings.hasError()) { return false; } - bool hasPhraseOcc = settings.hasPhrases(); SchemaUtil::IndexIterator oItr(oldSchema, itr); - if (!itr.hasMatchingOldFields(oldSchema, hasPhraseOcc) || !oItr.isValid()) { + if (!itr.hasMatchingOldFields(oldSchema) || !oItr.isValid()) { if (!openField(fieldDir, tuneFileSearch)) { return false; } diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp b/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp index a41f0412294..8da590654da 100644 --- a/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp @@ -199,10 +199,10 @@ FieldReader::allocFieldReader(const SchemaUtil::IndexIterator &index, const Schema &oldSchema) { assert(index.isValid()); - if (index.hasMatchingOldFields(oldSchema, false)) { + if (index.hasMatchingOldFields(oldSchema)) { return std::make_unique<FieldReader>(); // The common case } - if (!index.hasOldFields(oldSchema, false)) { + if (!index.hasOldFields(oldSchema)) { return std::make_unique<FieldReaderEmpty>(index); // drop data } // field exists in old schema with different collection type setting diff --git a/searchlib/src/vespa/searchlib/diskindex/fusion.cpp b/searchlib/src/vespa/searchlib/diskindex/fusion.cpp index fc198e3b74e..ed311b682e6 100644 --- a/searchlib/src/vespa/searchlib/diskindex/fusion.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/fusion.cpp @@ -102,7 +102,7 @@ Fusion::openInputWordReaders(const SchemaUtil::IndexIterator &index, vespalib::string fieldDir(oldindexpath + "/" + index.getName()); vespalib::string dictName(fieldDir + "/dictionary"); const Schema &oldSchema = oi.getSchema(); - if (!index.hasOldFields(oldSchema, false)) { + if (!index.hasOldFields(oldSchema)) { continue; // drop data } bool res = reader->open(dictName, @@ -296,7 +296,7 @@ Fusion::openInputFieldReaders(const SchemaUtil::IndexIterator &index, for (auto &i : _oldIndexes) { OldIndex &oi = *i; const Schema &oldSchema = oi.getSchema(); - if (!index.hasOldFields(oldSchema, false)) { + if (!index.hasOldFields(oldSchema)) { continue; // drop data } auto reader = FieldReader::allocFieldReader(index, oldSchema); @@ -413,7 +413,7 @@ Fusion::ReadMappingFiles(const SchemaUtil::IndexIterator *index) wordNumMapping.noMappingFile(); continue; } - if (index && !index->hasOldFields(oldSchema, false)) { + if (index && !index->hasOldFields(oldSchema)) { continue; // drop data } diff --git a/searchlib/src/vespa/searchlib/index/schemautil.cpp b/searchlib/src/vespa/searchlib/index/schemautil.cpp index 62f6cd08510..7f3b7c8c2a9 100644 --- a/searchlib/src/vespa/searchlib/index/schemautil.cpp +++ b/searchlib/src/vespa/searchlib/index/schemautil.cpp @@ -15,47 +15,21 @@ SchemaUtil::IndexSettings SchemaUtil::getIndexSettings(const Schema &schema, const uint32_t index) { - IndexSettings ret; Schema::DataType indexDataType(DataType::STRING); bool error = false; - bool somePrefixes = false; - bool someNotPrefixes = false; - bool somePhrases = false; - bool someNotPhrases = false; - bool somePositions = false; - bool someNotPositions = false; const Schema::IndexField &iField = schema.getIndexField(index); - if (iField.hasPhrases()) { - somePhrases = true; - } else { - someNotPhrases = true; - } - if (iField.hasPrefix()) { - somePrefixes = true; - } else { - someNotPrefixes = true; - } - if (iField.hasPositions()) { - somePositions = true; - } else { - someNotPositions = true; - } indexDataType = iField.getDataType(); if (indexDataType != DataType::STRING) { error = true; LOG(error, "Field %s has bad data type", iField.getName().c_str()); } - return IndexSettings(indexDataType, error, - somePrefixes && !someNotPrefixes, - somePhrases && !someNotPhrases, - somePositions && !someNotPositions); + return IndexSettings(indexDataType, error); } bool -SchemaUtil::IndexIterator::hasOldFields(const Schema &oldSchema, - bool phrases) const +SchemaUtil::IndexIterator::hasOldFields(const Schema &oldSchema) const { assert(isValid()); const Schema::IndexField &newField = @@ -70,15 +44,11 @@ SchemaUtil::IndexIterator::hasOldFields(const Schema &oldSchema, if (oldField.getDataType() != newField.getDataType()) { return false; // wrong data type } - if (!phrases) { - return true; - } - return oldField.hasPhrases(); + return true; } bool -SchemaUtil::IndexIterator::hasMatchingOldFields(const Schema &oldSchema, - bool phrases) const +SchemaUtil::IndexIterator::hasMatchingOldFields(const Schema &oldSchema) const { assert(isValid()); const Schema::IndexField &newField = @@ -88,18 +58,13 @@ SchemaUtil::IndexIterator::hasMatchingOldFields(const Schema &oldSchema, if (oldFieldId == Schema::UNKNOWN_FIELD_ID) { return false; } - if (phrases) { - IndexIterator oldIterator(oldSchema, oldFieldId); - IndexSettings settings = oldIterator.getIndexSettings(); - if (!settings.hasPhrases()) { - return false; - } - } const Schema::IndexField &oldField = oldSchema.getIndexField(oldFieldId); if (oldField.getDataType() != newField.getDataType() || oldField.getCollectionType() != newField.getCollectionType()) + { return false; + } return true; } @@ -113,32 +78,6 @@ SchemaUtil::validateIndexField(const Schema::IndexField &field) field.getName().c_str()); ok = false; } - if (field.getDataType() != DataType::STRING) { - if (field.hasPrefix()) { - LOG(error, - "Field %s is non-string but has prefix", - field.getName().c_str()); - ok = false; - } - if (field.hasPhrases()) { - LOG(error, - "Field %s is non-string but has phrases", - field.getName().c_str()); - ok = false; - } - if (field.hasPositions()) { - LOG(error, - "Field %s is non-string but has positions", - field.getName().c_str()); - ok = false; - } - } - if (field.hasPhrases() && !field.hasPositions()) { - LOG(error, - "Field %s has phrases but not positions", - field.getName().c_str()); - ok = false; - } return ok; } diff --git a/searchlib/src/vespa/searchlib/index/schemautil.h b/searchlib/src/vespa/searchlib/index/schemautil.h index a678e335ebb..c8fe8e4fe32 100644 --- a/searchlib/src/vespa/searchlib/index/schemautil.h +++ b/searchlib/src/vespa/searchlib/index/schemautil.h @@ -12,9 +12,6 @@ public: class IndexSettings { schema::DataType _dataType; bool _error; // Schema is bad. - bool _prefix; - bool _phrases; - bool _positions; public: const schema::DataType & getDataType() const { @@ -22,36 +19,21 @@ public: } bool hasError() const { return _error; } - bool hasPrefix() const { return _prefix; } - bool hasPhrases() const { return _phrases; } - bool hasPositions() const { return _positions; } IndexSettings() : _dataType(schema::DataType::STRING), - _error(false), - _prefix(false), - _phrases(false), - _positions(false) + _error(false) { } IndexSettings(const IndexSettings &rhs) : _dataType(rhs._dataType), - _error(rhs._error), - _prefix(rhs._prefix), - _phrases(rhs._phrases), - _positions(rhs._positions) + _error(rhs._error) { } IndexSettings(schema::DataType dataType, - bool error, - bool prefix, - bool phrases, - bool positions) + bool error) : _dataType(dataType), - _error(error), - _prefix(prefix), - _phrases(phrases), - _positions(positions) + _error(error) { } IndexSettings & operator=(const IndexSettings &rhs) { @@ -63,9 +45,6 @@ public: void swap(IndexSettings &rhs) { std::swap(_dataType, rhs._dataType); std::swap(_error, rhs._error); - std::swap(_prefix, rhs._prefix); - std::swap(_phrases, rhs._phrases); - std::swap(_positions, rhs._positions); } }; @@ -121,13 +100,11 @@ public: /** * Return if old schema has at least one usable input field - * with matching data type. If we want phrases then all input - * fields usable for terms must also be usable for phrases. + * with matching data type. * * @param oldSchema old schema, present in an input index - * @param phrases ask for phrase files */ - bool hasOldFields(const Schema &oldSchema, bool phrases) const; + bool hasOldFields(const Schema &oldSchema) const; /** * Return if fields in old schema matches fields in new @@ -136,9 +113,8 @@ public: * also match between new and old schema. * * @param oldSchema old schema, present in an input index - * @param phrases ask for phrase files */ - bool hasMatchingOldFields(const Schema &oldSchema, bool phrases) const; + bool hasMatchingOldFields(const Schema &oldSchema) const; }; static IndexSettings getIndexSettings(const Schema &schema, const uint32_t index); |