summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2019-05-03 14:09:13 +0200
committerGitHub <noreply@github.com>2019-05-03 14:09:13 +0200
commit99f41741ca2784640ce1bec9e673355ab92e9d42 (patch)
treee18ba44053381584cdfdd96f8fe5db4b592259e2
parent25cb895e18e9dd1f74b32896e41fe01b76ddb48e (diff)
parent2b0d59378d4fc46bb4843ecfe897b808506a58c2 (diff)
Merge pull request #9264 from vespa-engine/geirst/experimental-posting-list-format-flag-in-backend
Geirst/experimental posting list format flag in backend
-rw-r--r--searchcommon/src/tests/schema/CMakeLists.txt1
-rw-r--r--searchcommon/src/tests/schema/load-save-cfg/indexschema.cfg4
-rw-r--r--searchcommon/src/tests/schema/schema-without-index-field-properties.txt7
-rw-r--r--searchcommon/src/tests/schema/schema_test.cpp297
-rw-r--r--searchcommon/src/vespa/searchcommon/common/schema.cpp42
-rw-r--r--searchcommon/src/vespa/searchcommon/common/schema.h29
-rw-r--r--searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/diskindex.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/fusion.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/index/schemautil.cpp73
-rw-r--r--searchlib/src/vespa/searchlib/index/schemautil.h38
12 files changed, 220 insertions, 290 deletions
diff --git a/searchcommon/src/tests/schema/CMakeLists.txt b/searchcommon/src/tests/schema/CMakeLists.txt
index a8e6ec311a3..aafe015d9a1 100644
--- a/searchcommon/src/tests/schema/CMakeLists.txt
+++ b/searchcommon/src/tests/schema/CMakeLists.txt
@@ -4,5 +4,6 @@ vespa_add_executable(searchcommon_schema_test_app TEST
schema_test.cpp
DEPENDS
searchcommon
+ gtest
)
vespa_add_test(NAME searchcommon_schema_test_app NO_VALGRIND COMMAND searchcommon_schema_test_app)
diff --git a/searchcommon/src/tests/schema/load-save-cfg/indexschema.cfg b/searchcommon/src/tests/schema/load-save-cfg/indexschema.cfg
index c0998bcf597..b6c547c52c9 100644
--- a/searchcommon/src/tests/schema/load-save-cfg/indexschema.cfg
+++ b/searchcommon/src/tests/schema/load-save-cfg/indexschema.cfg
@@ -5,9 +5,7 @@ indexfield[1].name b
indexfield[1].datatype INT64
indexfield[2].name c
indexfield[2].datatype STRING
-indexfield[2].prefix true
-indexfield[2].phrases false
-indexfield[2].positions false
+indexfield[2].experimentalpostinglistformat true
fieldset[1]
fieldset[0].name default
fieldset[0].field[2]
diff --git a/searchcommon/src/tests/schema/schema-without-index-field-properties.txt b/searchcommon/src/tests/schema/schema-without-index-field-properties.txt
new file mode 100644
index 00000000000..4491b1242e0
--- /dev/null
+++ b/searchcommon/src/tests/schema/schema-without-index-field-properties.txt
@@ -0,0 +1,7 @@
+attributefield[0]
+summaryfield[0]
+fieldset[0]
+indexfield[1]
+indexfield[0].name foo
+indexfield[0].datatype STRING
+indexfield[0].collectiontype SINGLE
diff --git a/searchcommon/src/tests/schema/schema_test.cpp b/searchcommon/src/tests/schema/schema_test.cpp
index e9997c2e70d..e360ee1ba7a 100644
--- a/searchcommon/src/tests/schema/schema_test.cpp
+++ b/searchcommon/src/tests/schema/schema_test.cpp
@@ -1,10 +1,11 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/vespalib/stllike/string.h>
-#include <fstream>
-#include <vespa/vespalib/testkit/test_kit.h>
#include <vespa/config/common/configparser.h>
-#include <vespa/searchcommon/common/schemaconfigurer.h>
#include <vespa/searchcommon/common/schema.h>
+#include <vespa/searchcommon/common/schemaconfigurer.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <fstream>
+
#include <vespa/log/log.h>
LOG_SETUP("schema_test");
@@ -15,63 +16,70 @@ namespace search::index {
using schema::DataType;
using schema::CollectionType;
using SIAF = Schema::ImportedAttributeField;
+using SIF = Schema::IndexField;
-void assertField(const Schema::Field & exp, const Schema::Field & act) {
- EXPECT_EQUAL(exp.getName(), act.getName());
- EXPECT_EQUAL(exp.getDataType(), act.getDataType());
- EXPECT_EQUAL(exp.getCollectionType(), act.getCollectionType());
+void
+assertField(const Schema::Field& exp, const Schema::Field& act)
+{
+ EXPECT_EQ(exp.getName(), act.getName());
+ EXPECT_EQ(exp.getDataType(), act.getDataType());
+ EXPECT_EQ(exp.getCollectionType(), act.getCollectionType());
}
-void assertIndexField(const Schema::IndexField & exp,
- const Schema::IndexField & act)
+void
+assertIndexField(const Schema::IndexField& exp,
+ const Schema::IndexField& act)
{
assertField(exp, act);
- EXPECT_EQUAL(exp.hasPrefix(), act.hasPrefix());
- EXPECT_EQUAL(exp.hasPhrases(), act.hasPhrases());
- EXPECT_EQUAL(exp.hasPositions(), act.hasPositions());
+ EXPECT_EQ(exp.getAvgElemLen(), act.getAvgElemLen());
+ EXPECT_EQ(exp.use_experimental_posting_list_format(), act.use_experimental_posting_list_format());
}
-void assertSet(const Schema::FieldSet &exp,
- const Schema::FieldSet &act)
+void
+assertSet(const Schema::FieldSet& exp,
+ const Schema::FieldSet& act)
{
- EXPECT_EQUAL(exp.getName(), act.getName());
- ASSERT_EQUAL(exp.getFields().size(), act.getFields().size());
+ EXPECT_EQ(exp.getName(), act.getName());
+ ASSERT_EQ(exp.getFields().size(), act.getFields().size());
for (size_t i = 0; i < exp.getFields().size(); ++i) {
- EXPECT_EQUAL(exp.getFields()[i], act.getFields()[i]);
+ EXPECT_EQ(exp.getFields()[i], act.getFields()[i]);
}
}
-void assertSchema(const Schema & exp, const Schema & act) {
- ASSERT_EQUAL(exp.getNumIndexFields(), act.getNumIndexFields());
+void
+assertSchema(const Schema& exp, const Schema& act)
+{
+ ASSERT_EQ(exp.getNumIndexFields(), act.getNumIndexFields());
for (size_t i = 0; i < exp.getNumIndexFields(); ++i) {
assertIndexField(exp.getIndexField(i), act.getIndexField(i));
}
- ASSERT_EQUAL(exp.getNumAttributeFields(), act.getNumAttributeFields());
+ ASSERT_EQ(exp.getNumAttributeFields(), act.getNumAttributeFields());
for (size_t i = 0; i < exp.getNumAttributeFields(); ++i) {
assertField(exp.getAttributeField(i), act.getAttributeField(i));
}
- ASSERT_EQUAL(exp.getNumSummaryFields(), act.getNumSummaryFields());
+ ASSERT_EQ(exp.getNumSummaryFields(), act.getNumSummaryFields());
for (size_t i = 0; i < exp.getNumSummaryFields(); ++i) {
assertField(exp.getSummaryField(i), act.getSummaryField(i));
}
- ASSERT_EQUAL(exp.getNumFieldSets(), act.getNumFieldSets());
+ ASSERT_EQ(exp.getNumFieldSets(), act.getNumFieldSets());
for (size_t i = 0; i < exp.getNumFieldSets(); ++i) {
assertSet(exp.getFieldSet(i), act.getFieldSet(i));
}
const auto &expImported = exp.getImportedAttributeFields();
const auto &actImported = act.getImportedAttributeFields();
- ASSERT_EQUAL(expImported.size(), actImported.size());
+ ASSERT_EQ(expImported.size(), actImported.size());
for (size_t i = 0; i < expImported.size(); ++i) {
assertField(expImported[i], actImported[i]);
}
}
-TEST("testBasic") {
+TEST(SchemaTest, test_basic)
+{
Schema s;
- EXPECT_EQUAL(0u, s.getNumIndexFields());
- EXPECT_EQUAL(0u, s.getNumAttributeFields());
- EXPECT_EQUAL(0u, s.getNumSummaryFields());
- EXPECT_EQUAL(0u, s.getNumImportedAttributeFields());
+ EXPECT_EQ(0u, s.getNumIndexFields());
+ EXPECT_EQ(0u, s.getNumAttributeFields());
+ EXPECT_EQ(0u, s.getNumSummaryFields());
+ EXPECT_EQ(0u, s.getNumImportedAttributeFields());
s.addIndexField(Schema::IndexField("foo", DataType::STRING));
s.addIndexField(Schema::IndexField("bar", DataType::INT32));
@@ -89,100 +97,95 @@ TEST("testBasic") {
s.addImportedAttributeField(SIAF("imported", DataType::INT32));
- EXPECT_EQUAL(2u, s.getNumIndexFields());
+ ASSERT_EQ(2u, s.getNumIndexFields());
{
- EXPECT_EQUAL("foo", s.getIndexField(0).getName());
- EXPECT_EQUAL(DataType::STRING, s.getIndexField(0).getDataType());
- EXPECT_EQUAL(CollectionType::SINGLE, s.getIndexField(0).getCollectionType());
- EXPECT_TRUE(!s.getIndexField(0).hasPrefix());
- EXPECT_TRUE(!s.getIndexField(0).hasPhrases());
- EXPECT_TRUE(s.getIndexField(0).hasPositions());
-
- EXPECT_EQUAL("bar", s.getIndexField(1).getName());
- EXPECT_EQUAL(DataType::INT32, s.getIndexField(1).getDataType());
- EXPECT_EQUAL(CollectionType::SINGLE, s.getIndexField(1).getCollectionType());
-
- EXPECT_EQUAL(0u, s.getIndexFieldId("foo"));
- EXPECT_EQUAL(1u, s.getIndexFieldId("bar"));
- EXPECT_EQUAL(Schema::UNKNOWN_FIELD_ID, s.getIndexFieldId("cox"));
+ EXPECT_EQ("foo", s.getIndexField(0).getName());
+ EXPECT_EQ(DataType::STRING, s.getIndexField(0).getDataType());
+ EXPECT_EQ(CollectionType::SINGLE, s.getIndexField(0).getCollectionType());
+
+ EXPECT_EQ("bar", s.getIndexField(1).getName());
+ EXPECT_EQ(DataType::INT32, s.getIndexField(1).getDataType());
+ EXPECT_EQ(CollectionType::SINGLE, s.getIndexField(1).getCollectionType());
+
+ EXPECT_EQ(0u, s.getIndexFieldId("foo"));
+ EXPECT_EQ(1u, s.getIndexFieldId("bar"));
+ EXPECT_EQ(Schema::UNKNOWN_FIELD_ID, s.getIndexFieldId("cox"));
}
- EXPECT_EQUAL(3u, s.getNumAttributeFields());
+ ASSERT_EQ(3u, s.getNumAttributeFields());
{
- EXPECT_EQUAL("foo", s.getAttributeField(0).getName());
- EXPECT_EQUAL(DataType::STRING, s.getAttributeField(0).getDataType());
- EXPECT_EQUAL(CollectionType::ARRAY, s.getAttributeField(0).getCollectionType());
-
- EXPECT_EQUAL("bar", s.getAttributeField(1).getName());
- EXPECT_EQUAL(DataType::INT32, s.getAttributeField(1).getDataType());
- EXPECT_EQUAL(CollectionType::WEIGHTEDSET, s.getAttributeField(1).getCollectionType());
-
- EXPECT_EQUAL("cox", s.getAttributeField(2).getName());
- EXPECT_EQUAL(DataType::STRING, s.getAttributeField(2).getDataType());
- EXPECT_EQUAL(CollectionType::SINGLE, s.getAttributeField(2).getCollectionType());
-
- EXPECT_EQUAL(0u, s.getAttributeFieldId("foo"));
- EXPECT_EQUAL(1u, s.getAttributeFieldId("bar"));
- EXPECT_EQUAL(2u, s.getAttributeFieldId("cox"));
- EXPECT_EQUAL(Schema::UNKNOWN_FIELD_ID, s.getIndexFieldId("fox"));
+ EXPECT_EQ("foo", s.getAttributeField(0).getName());
+ EXPECT_EQ(DataType::STRING, s.getAttributeField(0).getDataType());
+ EXPECT_EQ(CollectionType::ARRAY, s.getAttributeField(0).getCollectionType());
+
+ EXPECT_EQ("bar", s.getAttributeField(1).getName());
+ EXPECT_EQ(DataType::INT32, s.getAttributeField(1).getDataType());
+ EXPECT_EQ(CollectionType::WEIGHTEDSET, s.getAttributeField(1).getCollectionType());
+
+ EXPECT_EQ("cox", s.getAttributeField(2).getName());
+ EXPECT_EQ(DataType::STRING, s.getAttributeField(2).getDataType());
+ EXPECT_EQ(CollectionType::SINGLE, s.getAttributeField(2).getCollectionType());
+
+ EXPECT_EQ(0u, s.getAttributeFieldId("foo"));
+ EXPECT_EQ(1u, s.getAttributeFieldId("bar"));
+ EXPECT_EQ(2u, s.getAttributeFieldId("cox"));
+ EXPECT_EQ(Schema::UNKNOWN_FIELD_ID, s.getIndexFieldId("fox"));
}
- EXPECT_EQUAL(4u, s.getNumSummaryFields());
+ ASSERT_EQ(4u, s.getNumSummaryFields());
{
- EXPECT_EQUAL("foo", s.getSummaryField(0).getName());
- EXPECT_EQUAL(DataType::STRING, s.getSummaryField(0).getDataType());
- EXPECT_EQUAL(CollectionType::ARRAY, s.getSummaryField(0).getCollectionType());
-
- EXPECT_EQUAL("bar", s.getSummaryField(1).getName());
- EXPECT_EQUAL(DataType::INT32, s.getSummaryField(1).getDataType());
- EXPECT_EQUAL(CollectionType::WEIGHTEDSET, s.getSummaryField(1).getCollectionType());
-
- EXPECT_EQUAL("cox", s.getSummaryField(2).getName());
- EXPECT_EQUAL(DataType::STRING, s.getSummaryField(2).getDataType());
- EXPECT_EQUAL(CollectionType::SINGLE, s.getSummaryField(2).getCollectionType());
-
- EXPECT_EQUAL("fox", s.getSummaryField(3).getName());
- EXPECT_EQUAL(DataType::RAW, s.getSummaryField(3).getDataType());
- EXPECT_EQUAL(CollectionType::SINGLE, s.getSummaryField(3).getCollectionType());
-
- EXPECT_EQUAL(0u, s.getSummaryFieldId("foo"));
- EXPECT_EQUAL(1u, s.getSummaryFieldId("bar"));
- EXPECT_EQUAL(2u, s.getSummaryFieldId("cox"));
- EXPECT_EQUAL(3u, s.getSummaryFieldId("fox"));
- EXPECT_EQUAL(Schema::UNKNOWN_FIELD_ID, s.getSummaryFieldId("not"));
+ EXPECT_EQ("foo", s.getSummaryField(0).getName());
+ EXPECT_EQ(DataType::STRING, s.getSummaryField(0).getDataType());
+ EXPECT_EQ(CollectionType::ARRAY, s.getSummaryField(0).getCollectionType());
+
+ EXPECT_EQ("bar", s.getSummaryField(1).getName());
+ EXPECT_EQ(DataType::INT32, s.getSummaryField(1).getDataType());
+ EXPECT_EQ(CollectionType::WEIGHTEDSET, s.getSummaryField(1).getCollectionType());
+
+ EXPECT_EQ("cox", s.getSummaryField(2).getName());
+ EXPECT_EQ(DataType::STRING, s.getSummaryField(2).getDataType());
+ EXPECT_EQ(CollectionType::SINGLE, s.getSummaryField(2).getCollectionType());
+
+ EXPECT_EQ("fox", s.getSummaryField(3).getName());
+ EXPECT_EQ(DataType::RAW, s.getSummaryField(3).getDataType());
+ EXPECT_EQ(CollectionType::SINGLE, s.getSummaryField(3).getCollectionType());
+
+ EXPECT_EQ(0u, s.getSummaryFieldId("foo"));
+ EXPECT_EQ(1u, s.getSummaryFieldId("bar"));
+ EXPECT_EQ(2u, s.getSummaryFieldId("cox"));
+ EXPECT_EQ(3u, s.getSummaryFieldId("fox"));
+ EXPECT_EQ(Schema::UNKNOWN_FIELD_ID, s.getSummaryFieldId("not"));
}
- EXPECT_EQUAL(1u, s.getNumFieldSets());
+ ASSERT_EQ(1u, s.getNumFieldSets());
{
- EXPECT_EQUAL("default", s.getFieldSet(0).getName());
- EXPECT_EQUAL(2u, s.getFieldSet(0).getFields().size());
- EXPECT_EQUAL("foo", s.getFieldSet(0).getFields()[0]);
- EXPECT_EQUAL("bar", s.getFieldSet(0).getFields()[1]);
+ EXPECT_EQ("default", s.getFieldSet(0).getName());
+ EXPECT_EQ(2u, s.getFieldSet(0).getFields().size());
+ EXPECT_EQ("foo", s.getFieldSet(0).getFields()[0]);
+ EXPECT_EQ("bar", s.getFieldSet(0).getFields()[1]);
}
- EXPECT_EQUAL(1u, s.getNumImportedAttributeFields());
+ EXPECT_EQ(1u, s.getNumImportedAttributeFields());
{
const auto &imported = s.getImportedAttributeFields();
- EXPECT_EQUAL(1u, imported.size());
- TEST_DO(assertField(SIAF("imported", DataType::INT32, CollectionType::SINGLE), imported[0]));
+ EXPECT_EQ(1u, imported.size());
+ assertField(SIAF("imported", DataType::INT32, CollectionType::SINGLE), imported[0]);
}
}
-TEST("testLoadAndSave") {
- using SIF = Schema::IndexField;
+TEST(SchemaTest, test_load_and_save)
+{
using SAF = Schema::AttributeField;
using SSF = Schema::SummaryField;
using SDT = schema::DataType;
using SCT = schema::CollectionType;
- typedef Schema::FieldSet SFS;
+ using SFS = Schema::FieldSet;
{ // load from config -> save to file -> load from file
Schema s;
- SchemaConfigurer configurer(s, "dir:" + TEST_PATH("load-save-cfg"));
- EXPECT_EQUAL(3u, s.getNumIndexFields());
+ SchemaConfigurer configurer(s, "dir:load-save-cfg");
+ EXPECT_EQ(3u, s.getNumIndexFields());
assertIndexField(SIF("a", SDT::STRING), s.getIndexField(0));
assertIndexField(SIF("b", SDT::INT64), s.getIndexField(1));
- assertIndexField(SIF("c", SDT::STRING).setPrefix(true)
- .setPhrases(false).setPositions(false),
- s.getIndexField(2));
+ assertIndexField(SIF("c", SDT::STRING).set_experimental_posting_list_format(true), s.getIndexField(2));
- EXPECT_EQUAL(9u, s.getNumAttributeFields());
+ EXPECT_EQ(9u, s.getNumAttributeFields());
assertField(SAF("a", SDT::STRING, SCT::SINGLE),
s.getAttributeField(0));
assertField(SAF("b", SDT::INT8, SCT::ARRAY), s.getAttributeField(1));
@@ -195,7 +198,7 @@ TEST("testLoadAndSave") {
assertField(SAF("h", SDT::BOOLEANTREE), s.getAttributeField(7));
assertField(SAF("i", SDT::TENSOR), s.getAttributeField(8));
- EXPECT_EQUAL(12u, s.getNumSummaryFields());
+ EXPECT_EQ(12u, s.getNumSummaryFields());
assertField(SSF("a", SDT::INT8), s.getSummaryField(0));
assertField(SSF("b", SDT::INT16), s.getSummaryField(1));
assertField(SSF("c", SDT::INT32), s.getSummaryField(2));
@@ -209,7 +212,7 @@ TEST("testLoadAndSave") {
assertField(SSF("k", SDT::RAW), s.getSummaryField(10));
assertField(SSF("l", SDT::RAW), s.getSummaryField(11));
- EXPECT_EQUAL(1u, s.getNumFieldSets());
+ EXPECT_EQ(1u, s.getNumFieldSets());
assertSet(SFS("default").addField("a").addField("c"),
s.getFieldSet(0));
@@ -240,7 +243,8 @@ TEST("testLoadAndSave") {
}
}
-TEST("require that schema can save and load timestamps for fields") {
+TEST(SchemaTest, require_that_schema_can_save_and_load_timestamps_for_fields)
+{
const fastos::TimeStamp timestamp(42);
const std::string file_name = "schema-with-timestamps.txt";
Schema s;
@@ -250,11 +254,12 @@ TEST("require that schema can save and load timestamps for fields") {
ASSERT_TRUE(s.saveToFile(file_name));
Schema s2;
ASSERT_TRUE(s2.loadFromFile(file_name));
- ASSERT_EQUAL(1u, s2.getNumIndexFields());
- ASSERT_EQUAL(timestamp, s2.getIndexField(0).getTimestamp());
+ ASSERT_EQ(1u, s2.getNumIndexFields());
+ EXPECT_EQ(timestamp, s2.getIndexField(0).getTimestamp());
}
-TEST("require that timestamps are omitted when 0.") {
+TEST(SchemaTest, require_that_timestamps_are_omitted_when_0)
+{
const std::string file_name = "schema-without-timestamps.txt";
Schema s;
s.addIndexField(Schema::IndexField("foo", DataType::STRING));
@@ -265,16 +270,18 @@ TEST("require that timestamps are omitted when 0.") {
while (file) {
std::string line;
getline(file, line);
- EXPECT_NOT_EQUAL("indexfield[0].timestamp 0", line);
+ EXPECT_NE("indexfield[0].timestamp 0", line);
}
Schema s2;
ASSERT_TRUE(s2.loadFromFile(file_name));
- ASSERT_EQUAL(1u, s2.getNumIndexFields());
+ ASSERT_EQ(1u, s2.getNumIndexFields());
}
-void addAllFieldTypes(const string &name, Schema &schema,
- fastos::TimeStamp timestamp) {
+void
+addAllFieldTypes(const string& name, Schema& schema,
+ fastos::TimeStamp timestamp)
+{
Schema::IndexField index_field(name, DataType::STRING);
index_field.setTimestamp(timestamp);
schema.addIndexField(index_field);
@@ -290,7 +297,8 @@ void addAllFieldTypes(const string &name, Schema &schema,
schema.addFieldSet(Schema::FieldSet(name));
}
-TEST("require that schemas can be added") {
+TEST(SchemaTest, require_that_schemas_can_be_added)
+{
const string name1 = "foo";
const string name2 = "bar";
const fastos::TimeStamp timestamp1(42);
@@ -301,29 +309,30 @@ TEST("require that schemas can be added") {
addAllFieldTypes(name2, s2, timestamp2);
Schema::UP sum = Schema::make_union(s1, s2);
- ASSERT_EQUAL(2u, sum->getNumIndexFields());
+ ASSERT_EQ(2u, sum->getNumIndexFields());
EXPECT_TRUE(s1.getIndexField(0) ==
sum->getIndexField(sum->getIndexFieldId(name1)));
EXPECT_TRUE(s2.getIndexField(0) ==
sum->getIndexField(sum->getIndexFieldId(name2)));
- ASSERT_EQUAL(2u, sum->getNumAttributeFields());
+ ASSERT_EQ(2u, sum->getNumAttributeFields());
EXPECT_TRUE(s1.getAttributeField(0) ==
sum->getAttributeField(sum->getAttributeFieldId(name1)));
EXPECT_TRUE(s2.getAttributeField(0) ==
sum->getAttributeField(sum->getAttributeFieldId(name2)));
- ASSERT_EQUAL(2u, sum->getNumSummaryFields());
+ ASSERT_EQ(2u, sum->getNumSummaryFields());
EXPECT_TRUE(s1.getSummaryField(0) ==
sum->getSummaryField(sum->getSummaryFieldId(name1)));
EXPECT_TRUE(s2.getSummaryField(0) ==
sum->getSummaryField(sum->getSummaryFieldId(name2)));
- ASSERT_EQUAL(2u, sum->getNumFieldSets());
+ ASSERT_EQ(2u, sum->getNumFieldSets());
EXPECT_TRUE(s1.getFieldSet(0) ==
sum->getFieldSet(sum->getFieldSetId(name1)));
EXPECT_TRUE(s2.getFieldSet(0) ==
sum->getFieldSet(sum->getFieldSetId(name2)));
}
-TEST("require that S union S = S for schema S") {
+TEST(SchemaTest, require_that_S_union_S_equals_S_for_schema_S)
+{
Schema schema;
addAllFieldTypes("foo", schema, 42);
@@ -331,7 +340,8 @@ TEST("require that S union S = S for schema S") {
EXPECT_TRUE(schema == *sum);
}
-TEST("require that schema can calculate set_difference") {
+TEST(SchemaTest, require_that_schema_can_calculate_set_difference)
+{
const string name1 = "foo";
const string name2 = "bar";
const fastos::TimeStamp timestamp1(42);
@@ -349,7 +359,8 @@ TEST("require that schema can calculate set_difference") {
EXPECT_TRUE(expected == *schema);
}
-TEST("require that getOldFields returns a subset of a schema") {
+TEST(SchemaTest, require_that_get_old_fields_returns_a_subset_of_a_schema)
+{
Schema schema;
const int64_t limit_timestamp = 1000;
@@ -359,13 +370,14 @@ TEST("require that getOldFields returns a subset of a schema") {
Schema::UP old_fields =
schema.getOldFields(fastos::TimeStamp(limit_timestamp));
- EXPECT_EQUAL(1u, old_fields->getNumIndexFields());
- EXPECT_EQUAL("bar", old_fields->getIndexField(0).getName());
- EXPECT_EQUAL(1u, old_fields->getNumAttributeFields());
- EXPECT_EQUAL(1u, old_fields->getNumSummaryFields());
+ EXPECT_EQ(1u, old_fields->getNumIndexFields());
+ EXPECT_EQ("bar", old_fields->getIndexField(0).getName());
+ EXPECT_EQ(1u, old_fields->getNumAttributeFields());
+ EXPECT_EQ(1u, old_fields->getNumSummaryFields());
}
-TEST("require that schema can calculate intersection") {
+TEST(SchemaTest, require_that_schema_can_calculate_intersection)
+{
const string name1 = "foo";
const string name2 = "bar";
const string name3 = "baz";
@@ -385,18 +397,19 @@ TEST("require that schema can calculate intersection") {
EXPECT_TRUE(expected == *schema);
}
-TEST("require that incompatible fields are removed from intersection") {
+TEST(SchemaTest, require_that_incompatible_fields_are_removed_from_intersection)
+{
const string name = "foo";
Schema s1;
s1.addIndexField(Schema::IndexField(name, DataType::STRING));
Schema s2;
s2.addIndexField(Schema::IndexField(name, DataType::INT32));
Schema::UP schema = Schema::intersect(s1, s2);
- EXPECT_EQUAL(0u, schema->getNumIndexFields());
+ EXPECT_EQ(0u, schema->getNumIndexFields());
EXPECT_FALSE(schema->isIndexField(name));
}
-TEST("require that imported attribute fields are not saved to disk")
+TEST(SchemaTest, require_that_imported_attribute_fields_are_not_saved_to_disk)
{
const vespalib::string fileName = "schema-no-imported-fields.txt";
{
@@ -407,25 +420,39 @@ TEST("require that imported attribute fields are not saved to disk")
{
Schema s;
s.loadFromFile(fileName);
- EXPECT_EQUAL(0u, s.getNumImportedAttributeFields());
+ EXPECT_EQ(0u, s.getNumImportedAttributeFields());
}
}
-TEST("require that schema can be built with imported attribute fields")
+TEST(SchemaTest, require_that_schema_can_be_built_with_imported_attribute_fields)
{
Schema s;
- SchemaConfigurer configurer(s, "dir:" + TEST_PATH("imported-fields-cfg"));
+ SchemaConfigurer configurer(s, "dir:imported-fields-cfg");
const auto &imported = s.getImportedAttributeFields();
- EXPECT_EQUAL(2u, imported.size());
- TEST_DO(assertField(SIAF("imported_a", DataType::INT32, CollectionType::SINGLE), imported[0]));
- TEST_DO(assertField(SIAF("imported_b", DataType::STRING, CollectionType::ARRAY), imported[1]));
+ ASSERT_EQ(2u, imported.size());
+ assertField(SIAF("imported_a", DataType::INT32, CollectionType::SINGLE), imported[0]);
+ assertField(SIAF("imported_b", DataType::STRING, CollectionType::ARRAY), imported[1]);
const auto &regular = s.getAttributeFields();
- EXPECT_EQUAL(1u, regular.size());
- TEST_DO(assertField(SIAF("regular", DataType::INT32, CollectionType::SINGLE), regular[0]));
+ ASSERT_EQ(1u, regular.size());
+ assertField(SIAF("regular", DataType::INT32, CollectionType::SINGLE), regular[0]);
+}
+
+TEST(SchemaTest, require_that_index_field_is_loaded_with_default_values_when_properties_are_not_set)
+{
+ Schema s;
+ s.loadFromFile("schema-without-index-field-properties.txt");
+
+ const auto& index_fields = s.getIndexFields();
+ ASSERT_EQ(1, index_fields.size());
+ assertIndexField(SIF("foo", DataType::STRING, CollectionType::SINGLE).
+ setAvgElemLen(512).
+ set_experimental_posting_list_format(false),
+ index_fields[0]);
+ assertIndexField(SIF("foo", DataType::STRING, CollectionType::SINGLE), index_fields[0]);
}
}
-TEST_MAIN() { TEST_RUN_ALL(); }
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchcommon/src/vespa/searchcommon/common/schema.cpp b/searchcommon/src/vespa/searchcommon/common/schema.cpp
index cef74409024..6d3bae31508 100644
--- a/searchcommon/src/vespa/searchcommon/common/schema.cpp
+++ b/searchcommon/src/vespa/searchcommon/common/schema.cpp
@@ -131,29 +131,23 @@ Schema::Field::operator!=(const Field &rhs) const
Schema::IndexField::IndexField(vespalib::stringref name, DataType dt)
: Field(name, dt),
- _prefix(false),
- _phrases(false),
- _positions(true),
- _avgElemLen(512)
+ _avgElemLen(512),
+ _experimental_posting_list_format(false)
{
}
Schema::IndexField::IndexField(vespalib::stringref name, DataType dt,
CollectionType ct)
: Field(name, dt, ct),
- _prefix(false),
- _phrases(false),
- _positions(true),
- _avgElemLen(512)
+ _avgElemLen(512),
+ _experimental_posting_list_format(false)
{
}
Schema::IndexField::IndexField(const std::vector<vespalib::string> &lines)
: Field(lines),
- _prefix(ConfigParser::parse<bool>("prefix", lines)),
- _phrases(ConfigParser::parse<bool>("phrases", lines)),
- _positions(ConfigParser::parse<bool>("positions", lines)),
- _avgElemLen(ConfigParser::parse<int32_t>("averageelementlen", lines))
+ _avgElemLen(ConfigParser::parse<int32_t>("averageelementlen", lines, 512)),
+ _experimental_posting_list_format(ConfigParser::parse<bool>("experimentalpostinglistformat", lines, false))
{
}
@@ -161,30 +155,29 @@ void
Schema::IndexField::write(vespalib::asciistream & os, vespalib::stringref prefix) const
{
Field::write(os, prefix);
- os << prefix << "prefix " << (_prefix ? "true" : "false") << "\n";
- os << prefix << "phrases " << (_phrases ? "true" : "false") << "\n";
- os << prefix << "positions " << (_positions ? "true" : "false") << "\n";
os << prefix << "averageelementlen " << static_cast<int32_t>(_avgElemLen) << "\n";
+ os << prefix << "experimentalpostinglistformat " << (_experimental_posting_list_format ? "true" : "false") << "\n";
+
+ // TODO: Remove prefix, phrases and positions when breaking downgrade is no longer an issue.
+ os << prefix << "prefix false" << "\n";
+ os << prefix << "phrases false" << "\n";
+ os << prefix << "positions true" << "\n";
}
bool
Schema::IndexField::operator==(const IndexField &rhs) const
{
return Field::operator==(rhs) &&
- _prefix == rhs._prefix &&
- _phrases == rhs._phrases &&
- _positions == rhs._positions &&
- _avgElemLen == rhs._avgElemLen;
+ _avgElemLen == rhs._avgElemLen &&
+ _experimental_posting_list_format == rhs._experimental_posting_list_format;
}
bool
Schema::IndexField::operator!=(const IndexField &rhs) const
{
return Field::operator!=(rhs) ||
- _prefix != rhs._prefix ||
- _phrases != rhs._phrases ||
- _positions != rhs._positions ||
- _avgElemLen != rhs._avgElemLen;
+ _avgElemLen != rhs._avgElemLen ||
+ _experimental_posting_list_format != rhs._experimental_posting_list_format;
}
Schema::FieldSet::FieldSet(const std::vector<vespalib::string> & lines) :
@@ -337,9 +330,6 @@ cloneIndexField(const Schema::IndexField &field,
return Schema::IndexField(field.getName() + suffix,
field.getDataType(),
field.getCollectionType()).
- setPrefix(field.hasPrefix()).
- setPhrases(field.hasPhrases()).
- setPositions(field.hasPositions()).
setAvgElemLen(field.getAvgElemLen());
}
diff --git a/searchcommon/src/vespa/searchcommon/common/schema.h b/searchcommon/src/vespa/searchcommon/common/schema.h
index 90cf099f2d8..bb2163e5577 100644
--- a/searchcommon/src/vespa/searchcommon/common/schema.h
+++ b/searchcommon/src/vespa/searchcommon/common/schema.h
@@ -74,12 +74,11 @@ public:
* A representation of an index field with extra information on
* how the index should be generated.
**/
- class IndexField : public Field
- {
- bool _prefix;
- bool _phrases;
- bool _positions;
+ class IndexField : public Field {
+ private:
uint32_t _avgElemLen;
+ // TODO: Remove when experimental posting list format is made default
+ bool _experimental_posting_list_format;
public:
IndexField(vespalib::stringref name, DataType dt);
@@ -89,21 +88,17 @@ public:
**/
IndexField(const std::vector<vespalib::string> &lines);
- IndexField &setPrefix(bool value) { _prefix = value; return *this; }
- IndexField &setPhrases(bool value) { _phrases = value; return *this; }
- IndexField &setPositions(bool value)
- { _positions = value; return *this; }
- IndexField &setAvgElemLen(uint32_t avgElemLen)
- { _avgElemLen = avgElemLen; return *this; }
+ IndexField &setAvgElemLen(uint32_t avgElemLen) { _avgElemLen = avgElemLen; return *this; }
+ IndexField &set_experimental_posting_list_format(bool value) {
+ _experimental_posting_list_format = value;
+ return *this;
+ }
- void
- write(vespalib::asciistream &os,
- vespalib::stringref prefix) const override;
+ void write(vespalib::asciistream &os,
+ vespalib::stringref prefix) const override;
- bool hasPrefix() const { return _prefix; }
- bool hasPhrases() const { return _phrases; }
- bool hasPositions() const { return _positions; }
uint32_t getAvgElemLen() const { return _avgElemLen; }
+ bool use_experimental_posting_list_format() const { return _experimental_posting_list_format; }
bool operator==(const IndexField &rhs) const;
bool operator!=(const IndexField &rhs) const;
diff --git a/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp b/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp
index d56f3c747c1..59ed15eefb0 100644
--- a/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp
+++ b/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp
@@ -144,10 +144,8 @@ SchemaBuilder::build(const IndexschemaConfig &cfg, Schema &schema)
const IndexschemaConfig::Indexfield & f = cfg.indexfield[i];
schema.addIndexField(Schema::IndexField(f.name, convertIndexDataType(f.datatype),
convertIndexCollectionType(f.collectiontype)).
- setPrefix(f.prefix).
- setPhrases(f.phrases).
- setPositions(f.positions).
- setAvgElemLen(f.averageelementlen));
+ setAvgElemLen(f.averageelementlen).
+ set_experimental_posting_list_format(f.experimentalpostinglistformat));
}
for (size_t i = 0; i < cfg.fieldset.size(); ++i) {
const IndexschemaConfig::Fieldset &fs = cfg.fieldset[i];
diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp
index 64a54187254..b6d843e4e3c 100644
--- a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp
@@ -182,9 +182,8 @@ DiskIndex::setup(const TuneFileSearch &tuneFileSearch,
if (settings.hasError()) {
return false;
}
- bool hasPhraseOcc = settings.hasPhrases();
SchemaUtil::IndexIterator oItr(oldSchema, itr);
- if (!itr.hasMatchingOldFields(oldSchema, hasPhraseOcc) || !oItr.isValid()) {
+ if (!itr.hasMatchingOldFields(oldSchema) || !oItr.isValid()) {
if (!openField(fieldDir, tuneFileSearch)) {
return false;
}
diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp b/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp
index a41f0412294..8da590654da 100644
--- a/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp
@@ -199,10 +199,10 @@ FieldReader::allocFieldReader(const SchemaUtil::IndexIterator &index,
const Schema &oldSchema)
{
assert(index.isValid());
- if (index.hasMatchingOldFields(oldSchema, false)) {
+ if (index.hasMatchingOldFields(oldSchema)) {
return std::make_unique<FieldReader>(); // The common case
}
- if (!index.hasOldFields(oldSchema, false)) {
+ if (!index.hasOldFields(oldSchema)) {
return std::make_unique<FieldReaderEmpty>(index); // drop data
}
// field exists in old schema with different collection type setting
diff --git a/searchlib/src/vespa/searchlib/diskindex/fusion.cpp b/searchlib/src/vespa/searchlib/diskindex/fusion.cpp
index fc198e3b74e..ed311b682e6 100644
--- a/searchlib/src/vespa/searchlib/diskindex/fusion.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/fusion.cpp
@@ -102,7 +102,7 @@ Fusion::openInputWordReaders(const SchemaUtil::IndexIterator &index,
vespalib::string fieldDir(oldindexpath + "/" + index.getName());
vespalib::string dictName(fieldDir + "/dictionary");
const Schema &oldSchema = oi.getSchema();
- if (!index.hasOldFields(oldSchema, false)) {
+ if (!index.hasOldFields(oldSchema)) {
continue; // drop data
}
bool res = reader->open(dictName,
@@ -296,7 +296,7 @@ Fusion::openInputFieldReaders(const SchemaUtil::IndexIterator &index,
for (auto &i : _oldIndexes) {
OldIndex &oi = *i;
const Schema &oldSchema = oi.getSchema();
- if (!index.hasOldFields(oldSchema, false)) {
+ if (!index.hasOldFields(oldSchema)) {
continue; // drop data
}
auto reader = FieldReader::allocFieldReader(index, oldSchema);
@@ -413,7 +413,7 @@ Fusion::ReadMappingFiles(const SchemaUtil::IndexIterator *index)
wordNumMapping.noMappingFile();
continue;
}
- if (index && !index->hasOldFields(oldSchema, false)) {
+ if (index && !index->hasOldFields(oldSchema)) {
continue; // drop data
}
diff --git a/searchlib/src/vespa/searchlib/index/schemautil.cpp b/searchlib/src/vespa/searchlib/index/schemautil.cpp
index 62f6cd08510..7f3b7c8c2a9 100644
--- a/searchlib/src/vespa/searchlib/index/schemautil.cpp
+++ b/searchlib/src/vespa/searchlib/index/schemautil.cpp
@@ -15,47 +15,21 @@ SchemaUtil::IndexSettings
SchemaUtil::getIndexSettings(const Schema &schema,
const uint32_t index)
{
- IndexSettings ret;
Schema::DataType indexDataType(DataType::STRING);
bool error = false;
- bool somePrefixes = false;
- bool someNotPrefixes = false;
- bool somePhrases = false;
- bool someNotPhrases = false;
- bool somePositions = false;
- bool someNotPositions = false;
const Schema::IndexField &iField = schema.getIndexField(index);
- if (iField.hasPhrases()) {
- somePhrases = true;
- } else {
- someNotPhrases = true;
- }
- if (iField.hasPrefix()) {
- somePrefixes = true;
- } else {
- someNotPrefixes = true;
- }
- if (iField.hasPositions()) {
- somePositions = true;
- } else {
- someNotPositions = true;
- }
indexDataType = iField.getDataType();
if (indexDataType != DataType::STRING) {
error = true;
LOG(error, "Field %s has bad data type", iField.getName().c_str());
}
- return IndexSettings(indexDataType, error,
- somePrefixes && !someNotPrefixes,
- somePhrases && !someNotPhrases,
- somePositions && !someNotPositions);
+ return IndexSettings(indexDataType, error);
}
bool
-SchemaUtil::IndexIterator::hasOldFields(const Schema &oldSchema,
- bool phrases) const
+SchemaUtil::IndexIterator::hasOldFields(const Schema &oldSchema) const
{
assert(isValid());
const Schema::IndexField &newField =
@@ -70,15 +44,11 @@ SchemaUtil::IndexIterator::hasOldFields(const Schema &oldSchema,
if (oldField.getDataType() != newField.getDataType()) {
return false; // wrong data type
}
- if (!phrases) {
- return true;
- }
- return oldField.hasPhrases();
+ return true;
}
bool
-SchemaUtil::IndexIterator::hasMatchingOldFields(const Schema &oldSchema,
- bool phrases) const
+SchemaUtil::IndexIterator::hasMatchingOldFields(const Schema &oldSchema) const
{
assert(isValid());
const Schema::IndexField &newField =
@@ -88,18 +58,13 @@ SchemaUtil::IndexIterator::hasMatchingOldFields(const Schema &oldSchema,
if (oldFieldId == Schema::UNKNOWN_FIELD_ID) {
return false;
}
- if (phrases) {
- IndexIterator oldIterator(oldSchema, oldFieldId);
- IndexSettings settings = oldIterator.getIndexSettings();
- if (!settings.hasPhrases()) {
- return false;
- }
- }
const Schema::IndexField &oldField =
oldSchema.getIndexField(oldFieldId);
if (oldField.getDataType() != newField.getDataType() ||
oldField.getCollectionType() != newField.getCollectionType())
+ {
return false;
+ }
return true;
}
@@ -113,32 +78,6 @@ SchemaUtil::validateIndexField(const Schema::IndexField &field)
field.getName().c_str());
ok = false;
}
- if (field.getDataType() != DataType::STRING) {
- if (field.hasPrefix()) {
- LOG(error,
- "Field %s is non-string but has prefix",
- field.getName().c_str());
- ok = false;
- }
- if (field.hasPhrases()) {
- LOG(error,
- "Field %s is non-string but has phrases",
- field.getName().c_str());
- ok = false;
- }
- if (field.hasPositions()) {
- LOG(error,
- "Field %s is non-string but has positions",
- field.getName().c_str());
- ok = false;
- }
- }
- if (field.hasPhrases() && !field.hasPositions()) {
- LOG(error,
- "Field %s has phrases but not positions",
- field.getName().c_str());
- ok = false;
- }
return ok;
}
diff --git a/searchlib/src/vespa/searchlib/index/schemautil.h b/searchlib/src/vespa/searchlib/index/schemautil.h
index a678e335ebb..c8fe8e4fe32 100644
--- a/searchlib/src/vespa/searchlib/index/schemautil.h
+++ b/searchlib/src/vespa/searchlib/index/schemautil.h
@@ -12,9 +12,6 @@ public:
class IndexSettings {
schema::DataType _dataType;
bool _error; // Schema is bad.
- bool _prefix;
- bool _phrases;
- bool _positions;
public:
const schema::DataType & getDataType() const {
@@ -22,36 +19,21 @@ public:
}
bool hasError() const { return _error; }
- bool hasPrefix() const { return _prefix; }
- bool hasPhrases() const { return _phrases; }
- bool hasPositions() const { return _positions; }
IndexSettings()
: _dataType(schema::DataType::STRING),
- _error(false),
- _prefix(false),
- _phrases(false),
- _positions(false)
+ _error(false)
{ }
IndexSettings(const IndexSettings &rhs)
: _dataType(rhs._dataType),
- _error(rhs._error),
- _prefix(rhs._prefix),
- _phrases(rhs._phrases),
- _positions(rhs._positions)
+ _error(rhs._error)
{ }
IndexSettings(schema::DataType dataType,
- bool error,
- bool prefix,
- bool phrases,
- bool positions)
+ bool error)
: _dataType(dataType),
- _error(error),
- _prefix(prefix),
- _phrases(phrases),
- _positions(positions)
+ _error(error)
{ }
IndexSettings & operator=(const IndexSettings &rhs) {
@@ -63,9 +45,6 @@ public:
void swap(IndexSettings &rhs) {
std::swap(_dataType, rhs._dataType);
std::swap(_error, rhs._error);
- std::swap(_prefix, rhs._prefix);
- std::swap(_phrases, rhs._phrases);
- std::swap(_positions, rhs._positions);
}
};
@@ -121,13 +100,11 @@ public:
/**
* Return if old schema has at least one usable input field
- * with matching data type. If we want phrases then all input
- * fields usable for terms must also be usable for phrases.
+ * with matching data type.
*
* @param oldSchema old schema, present in an input index
- * @param phrases ask for phrase files
*/
- bool hasOldFields(const Schema &oldSchema, bool phrases) const;
+ bool hasOldFields(const Schema &oldSchema) const;
/**
* Return if fields in old schema matches fields in new
@@ -136,9 +113,8 @@ public:
* also match between new and old schema.
*
* @param oldSchema old schema, present in an input index
- * @param phrases ask for phrase files
*/
- bool hasMatchingOldFields(const Schema &oldSchema, bool phrases) const;
+ bool hasMatchingOldFields(const Schema &oldSchema) const;
};
static IndexSettings getIndexSettings(const Schema &schema, const uint32_t index);