summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/CMakeLists.txt6
-rw-r--r--searchlib/src/tests/searchcommon/.gitignore3
-rw-r--r--searchlib/src/tests/searchcommon/attribute/config/.gitignore1
-rw-r--r--searchlib/src/tests/searchcommon/attribute/config/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/searchcommon/attribute/config/attribute_config_test.cpp142
-rw-r--r--searchlib/src/tests/searchcommon/schema/.gitignore9
-rw-r--r--searchlib/src/tests/searchcommon/schema/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/searchcommon/schema/imported-fields-cfg/attributes.cfg12
-rw-r--r--searchlib/src/tests/searchcommon/schema/load-save-cfg/attributes.cfg22
-rw-r--r--searchlib/src/tests/searchcommon/schema/load-save-cfg/indexschema.cfg13
-rw-r--r--searchlib/src/tests/searchcommon/schema/load-save-cfg/summary.cfg29
-rw-r--r--searchlib/src/tests/searchcommon/schema/schema-without-index-field-properties.txt7
-rw-r--r--searchlib/src/tests/searchcommon/schema/schema_test.cpp396
-rw-r--r--searchlib/src/vespa/searchcommon/.gitignore3
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/.gitignore2
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/CMakeLists.txt11
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/attribute_utils.cpp23
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/attribute_utils.h30
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/attributecontent.h166
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/basictype.cpp37
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/basictype.h63
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/collectiontype.cpp26
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/collectiontype.h75
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/config.cpp72
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/config.h158
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/distance_metric.h9
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/hnsw_index_params.h45
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/i_attribute_functor.h37
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/i_multi_value_attribute.h55
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/i_multi_value_read_view.h46
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/i_search_context.h74
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/iattributecontext.h54
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/iattributevector.h457
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/multi_value_traits.h35
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/multivalue.h65
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/persistent_predicate_params.h37
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/predicate_params.h30
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/search_context_params.cpp9
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/search_context_params.h53
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/status.cpp86
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/status.h61
-rw-r--r--searchlib/src/vespa/searchcommon/common/.gitignore2
-rw-r--r--searchlib/src/vespa/searchcommon/common/CMakeLists.txt10
-rw-r--r--searchlib/src/vespa/searchcommon/common/datatype.cpp99
-rw-r--r--searchlib/src/vespa/searchcommon/common/datatype.h47
-rw-r--r--searchlib/src/vespa/searchcommon/common/dictionary_config.cpp39
-rw-r--r--searchlib/src/vespa/searchcommon/common/dictionary_config.h31
-rw-r--r--searchlib/src/vespa/searchcommon/common/growstrategy.cpp18
-rw-r--r--searchlib/src/vespa/searchcommon/common/growstrategy.h61
-rw-r--r--searchlib/src/vespa/searchcommon/common/iblobconverter.h22
-rw-r--r--searchlib/src/vespa/searchcommon/common/range.h29
-rw-r--r--searchlib/src/vespa/searchcommon/common/schema.cpp581
-rw-r--r--searchlib/src/vespa/searchcommon/common/schema.h411
-rw-r--r--searchlib/src/vespa/searchcommon/common/schemaconfigurer.cpp239
-rw-r--r--searchlib/src/vespa/searchcommon/common/schemaconfigurer.h68
-rw-r--r--searchlib/src/vespa/searchcommon/common/subscriptionproxyng.h61
-rw-r--r--searchlib/src/vespa/searchcommon/common/undefinedvalues.h69
-rw-r--r--searchlib/src/vespa/searchlib/CMakeLists.txt4
-rw-r--r--searchlib/src/vespa/searchlib/docstore/logdatastore.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/index/field_length_calculator.h12
60 files changed, 4277 insertions, 12 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index 412d00a1c6a..7c67508a196 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -4,7 +4,6 @@ vespa_define_module(
fastos
vespalog
vespalib
- staging_vespalib
vespaeval
fnet
configdefinitions
@@ -12,7 +11,6 @@ vespa_define_module(
fastlib_fast
document
config_cloudconfig
- searchcommon
EXTERNAL_DEPENDS
${VESPA_GLIBC_RT_LIB}
@@ -53,6 +51,8 @@ vespa_define_module(
src/vespa/searchlib/transactionlog
src/vespa/searchlib/uca
src/vespa/searchlib/util
+ src/vespa/searchcommon/attribute
+ src/vespa/searchcommon/common
APPS
src/apps/docstore
@@ -213,6 +213,8 @@ vespa_define_module(
src/tests/rankingexpression/intrinsic_blueprint_adapter
src/tests/ranksetup
src/tests/ranksetup/verify_feature
+ src/tests/searchcommon/attribute/config
+ src/tests/searchcommon/schema
src/tests/sort
src/tests/sortresults
src/tests/sortspec
diff --git a/searchlib/src/tests/searchcommon/.gitignore b/searchlib/src/tests/searchcommon/.gitignore
new file mode 100644
index 00000000000..a3e9c375723
--- /dev/null
+++ b/searchlib/src/tests/searchcommon/.gitignore
@@ -0,0 +1,3 @@
+.depend
+Makefile
+*_test
diff --git a/searchlib/src/tests/searchcommon/attribute/config/.gitignore b/searchlib/src/tests/searchcommon/attribute/config/.gitignore
new file mode 100644
index 00000000000..ffdb7b1e933
--- /dev/null
+++ b/searchlib/src/tests/searchcommon/attribute/config/.gitignore
@@ -0,0 +1 @@
+searchcommon_attribute_config_test_app
diff --git a/searchlib/src/tests/searchcommon/attribute/config/CMakeLists.txt b/searchlib/src/tests/searchcommon/attribute/config/CMakeLists.txt
new file mode 100644
index 00000000000..f61138c5d73
--- /dev/null
+++ b/searchlib/src/tests/searchcommon/attribute/config/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchcommon_attribute_config_test_app TEST
+ SOURCES
+ attribute_config_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchcommon_attribute_config_test_app NO_VALGRIND COMMAND searchcommon_attribute_config_test_app)
diff --git a/searchlib/src/tests/searchcommon/attribute/config/attribute_config_test.cpp b/searchlib/src/tests/searchcommon/attribute/config/attribute_config_test.cpp
new file mode 100644
index 00000000000..918e14546e6
--- /dev/null
+++ b/searchlib/src/tests/searchcommon/attribute/config/attribute_config_test.cpp
@@ -0,0 +1,142 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchcommon/attribute/config.h>
+
+using search::attribute::Config;
+using search::attribute::BasicType;
+using search::attribute::CollectionType;
+using vespalib::eval::ValueType;
+using search::GrowStrategy;
+using search::DictionaryConfig;
+
+
+struct Fixture
+{
+ Config _config;
+ Fixture()
+ : _config()
+ { }
+
+ Fixture(BasicType bt,
+ CollectionType ct = CollectionType::SINGLE,
+ bool fastSearch_ = false,
+ bool huge_ = false)
+ : _config(bt, ct, fastSearch_, huge_)
+ { }
+};
+
+TEST_F("test default attribute config", Fixture)
+{
+ EXPECT_EQUAL(BasicType::Type::NONE, f._config.basicType().type());
+ EXPECT_EQUAL(CollectionType::Type::SINGLE,
+ f._config.collectionType().type());
+ EXPECT_TRUE(!f._config.fastSearch());
+ EXPECT_TRUE(!f._config.huge());
+ EXPECT_TRUE(!f._config.getEnableBitVectors());
+ EXPECT_TRUE(!f._config.getEnableOnlyBitVector());
+ EXPECT_TRUE(!f._config.getIsFilter());
+ EXPECT_TRUE(!f._config.fastAccess());
+ EXPECT_TRUE(f._config.tensorType().is_error());
+}
+
+TEST_F("test integer weightedset attribute config",
+ Fixture(BasicType::Type::INT32,
+ CollectionType::Type::WSET))
+{
+ EXPECT_EQUAL(BasicType::Type::INT32, f._config.basicType().type());
+ EXPECT_EQUAL(CollectionType::Type::WSET,
+ f._config.collectionType().type());
+ EXPECT_TRUE(!f._config.fastSearch());
+ EXPECT_TRUE(!f._config.huge());
+ EXPECT_TRUE(!f._config.getEnableBitVectors());
+ EXPECT_TRUE(!f._config.getEnableOnlyBitVector());
+ EXPECT_TRUE(!f._config.getIsFilter());
+ EXPECT_TRUE(!f._config.fastAccess());
+ EXPECT_TRUE(f._config.tensorType().is_error());
+}
+
+
+TEST("test operator== on attribute config")
+{
+ Config cfg1(BasicType::Type::INT32, CollectionType::Type::WSET);
+ Config cfg2(BasicType::Type::INT32, CollectionType::Type::ARRAY);
+ Config cfg3(BasicType::Type::INT32, CollectionType::Type::WSET);
+
+ EXPECT_TRUE(cfg1 != cfg2);
+ EXPECT_TRUE(cfg2 != cfg3);
+ EXPECT_TRUE(cfg1 == cfg3);
+}
+
+
+TEST("test operator== on attribute config for tensor type")
+{
+ Config cfg1(BasicType::Type::TENSOR);
+ Config cfg2(BasicType::Type::TENSOR);
+ Config cfg3(BasicType::Type::TENSOR);
+
+ ValueType dense_x = ValueType::from_spec("tensor(x[10])");
+ ValueType sparse_x = ValueType::from_spec("tensor(x{})");
+
+ EXPECT_TRUE(cfg1 == cfg2);
+ EXPECT_TRUE(cfg2 == cfg3);
+ EXPECT_TRUE(cfg1 == cfg3);
+
+ cfg1.setTensorType(dense_x);
+ cfg3.setTensorType(dense_x);
+ EXPECT_EQUAL(dense_x, cfg1.tensorType());
+ EXPECT_EQUAL(dense_x, cfg3.tensorType());
+ EXPECT_TRUE(!cfg1.tensorType().is_error());
+ EXPECT_TRUE(cfg2.tensorType().is_error());
+ EXPECT_TRUE(!cfg3.tensorType().is_error());
+
+ EXPECT_TRUE(cfg1 != cfg2);
+ EXPECT_TRUE(cfg2 != cfg3);
+ EXPECT_TRUE(cfg1 == cfg3);
+
+ cfg3.setTensorType(sparse_x);
+ EXPECT_EQUAL(sparse_x, cfg3.tensorType());
+ EXPECT_TRUE(!cfg3.tensorType().is_error());
+ EXPECT_TRUE(cfg1 != cfg3);
+}
+
+TEST("Test GrowStrategy consistency") {
+ GrowStrategy g(1024, 0.5, 17, 0.4f);
+ EXPECT_EQUAL(1024u, g.getDocsInitialCapacity());
+ EXPECT_EQUAL(50u, g.getDocsGrowPercent());
+ EXPECT_EQUAL(0.5, g.getDocsGrowFactor());
+ EXPECT_EQUAL(17u, g.getDocsGrowDelta());
+ EXPECT_EQUAL(0.4f, g.getMultiValueAllocGrowFactor());
+}
+
+TEST("DictionaryConfig") {
+ using Type = DictionaryConfig::Type;
+ using Match = DictionaryConfig::Match;
+ EXPECT_EQUAL(Type::BTREE, DictionaryConfig().getType());
+ EXPECT_EQUAL(Match::UNCASED, DictionaryConfig().getMatch());
+
+ EXPECT_EQUAL(Type::BTREE, DictionaryConfig(Type::BTREE).getType());
+ EXPECT_EQUAL(Match::UNCASED, DictionaryConfig(Type::BTREE).getMatch());
+ EXPECT_EQUAL(Match::UNCASED, DictionaryConfig(Type::BTREE, Match::UNCASED).getMatch());
+ EXPECT_EQUAL(Match::CASED, DictionaryConfig(Type::BTREE, Match::CASED).getMatch());
+
+ EXPECT_EQUAL(Type::HASH, DictionaryConfig(Type::HASH).getType());
+ EXPECT_EQUAL(Type::BTREE_AND_HASH, DictionaryConfig(Type::BTREE_AND_HASH).getType());
+
+ EXPECT_EQUAL(DictionaryConfig(Type::BTREE), DictionaryConfig(Type::BTREE));
+ EXPECT_EQUAL(DictionaryConfig(Type::HASH), DictionaryConfig(Type::HASH));
+ EXPECT_EQUAL(DictionaryConfig(Type::BTREE_AND_HASH), DictionaryConfig(Type::BTREE_AND_HASH));
+ EXPECT_NOT_EQUAL(DictionaryConfig(Type::HASH), DictionaryConfig(Type::BTREE));
+ EXPECT_NOT_EQUAL(DictionaryConfig(Type::BTREE), DictionaryConfig(Type::HASH));
+ EXPECT_TRUE(Config().set_dictionary_config(DictionaryConfig(Type::HASH)) ==
+ Config().set_dictionary_config(DictionaryConfig(Type::HASH)));
+ EXPECT_FALSE(Config().set_dictionary_config(DictionaryConfig(Type::HASH)) ==
+ Config().set_dictionary_config(DictionaryConfig(Type::BTREE)));
+ EXPECT_FALSE(Config().set_dictionary_config(DictionaryConfig(Type::HASH)) !=
+ Config().set_dictionary_config(DictionaryConfig(Type::HASH)));
+ EXPECT_TRUE(Config().set_dictionary_config(DictionaryConfig(Type::HASH)) !=
+ Config().set_dictionary_config(DictionaryConfig(Type::BTREE)));
+}
+
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/searchcommon/schema/.gitignore b/searchlib/src/tests/searchcommon/schema/.gitignore
new file mode 100644
index 00000000000..e000f0ca2c8
--- /dev/null
+++ b/searchlib/src/tests/searchcommon/schema/.gitignore
@@ -0,0 +1,9 @@
+/.depend
+/Makefile
+/schema_test
+searchcommon_schema_test_app
+/schema-no-imported-fields.txt
+/schema-with-timestamps.txt
+/schema-without-timestamps.txt
+/schema.txt
+/schema2.txt
diff --git a/searchlib/src/tests/searchcommon/schema/CMakeLists.txt b/searchlib/src/tests/searchcommon/schema/CMakeLists.txt
new file mode 100644
index 00000000000..2304c319dea
--- /dev/null
+++ b/searchlib/src/tests/searchcommon/schema/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchcommon_schema_test_app TEST
+ SOURCES
+ schema_test.cpp
+ DEPENDS
+ searchlib
+ GTest::GTest
+)
+vespa_add_test(NAME searchcommon_schema_test_app NO_VALGRIND COMMAND searchcommon_schema_test_app)
diff --git a/searchlib/src/tests/searchcommon/schema/imported-fields-cfg/attributes.cfg b/searchlib/src/tests/searchcommon/schema/imported-fields-cfg/attributes.cfg
new file mode 100644
index 00000000000..9a08f7e2324
--- /dev/null
+++ b/searchlib/src/tests/searchcommon/schema/imported-fields-cfg/attributes.cfg
@@ -0,0 +1,12 @@
+attribute[3]
+attribute[0].name imported_a
+attribute[0].imported true
+attribute[0].datatype INT32
+attribute[0].collectiontype SINGLE
+attribute[1].name imported_b
+attribute[1].imported true
+attribute[1].datatype STRING
+attribute[1].collectiontype ARRAY
+attribute[2].name regular
+attribute[2].datatype INT32
+attribute[2].collectiontype SINGLE
diff --git a/searchlib/src/tests/searchcommon/schema/load-save-cfg/attributes.cfg b/searchlib/src/tests/searchcommon/schema/load-save-cfg/attributes.cfg
new file mode 100644
index 00000000000..09f711b6a65
--- /dev/null
+++ b/searchlib/src/tests/searchcommon/schema/load-save-cfg/attributes.cfg
@@ -0,0 +1,22 @@
+attribute[9]
+attribute[0].name a
+attribute[0].datatype STRING
+attribute[0].collectiontype SINGLE
+attribute[1].name b
+attribute[1].datatype INT8
+attribute[1].collectiontype ARRAY
+attribute[2].name c
+attribute[2].datatype INT16
+attribute[2].collectiontype WEIGHTEDSET
+attribute[3].name d
+attribute[3].datatype INT32
+attribute[4].name e
+attribute[4].datatype INT64
+attribute[5].name f
+attribute[5].datatype FLOAT
+attribute[6].name g
+attribute[6].datatype DOUBLE
+attribute[7].name h
+attribute[7].datatype PREDICATE
+attribute[8].name i
+attribute[8].datatype TENSOR
diff --git a/searchlib/src/tests/searchcommon/schema/load-save-cfg/indexschema.cfg b/searchlib/src/tests/searchcommon/schema/load-save-cfg/indexschema.cfg
new file mode 100644
index 00000000000..b9d82b9b569
--- /dev/null
+++ b/searchlib/src/tests/searchcommon/schema/load-save-cfg/indexschema.cfg
@@ -0,0 +1,13 @@
+indexfield[6]
+indexfield[0].name a
+indexfield[0].datatype STRING
+indexfield[1].name b
+indexfield[1].datatype INT64
+indexfield[2].name c
+indexfield[2].datatype STRING
+indexfield[2].interleavedfeatures true
+fieldset[1]
+fieldset[0].name default
+fieldset[0].field[2]
+fieldset[0].field[0].name a
+fieldset[0].field[1].name c
diff --git a/searchlib/src/tests/searchcommon/schema/load-save-cfg/summary.cfg b/searchlib/src/tests/searchcommon/schema/load-save-cfg/summary.cfg
new file mode 100644
index 00000000000..0c2de33d076
--- /dev/null
+++ b/searchlib/src/tests/searchcommon/schema/load-save-cfg/summary.cfg
@@ -0,0 +1,29 @@
+defaultsummaryid 0
+classes[1]
+classes[0].id 0
+classes[0].name test
+classes[0].fields[12]
+classes[0].fields[0].name a
+classes[0].fields[0].type byte
+classes[0].fields[1].name b
+classes[0].fields[1].type short
+classes[0].fields[2].name c
+classes[0].fields[2].type integer
+classes[0].fields[3].name d
+classes[0].fields[3].type int64
+classes[0].fields[4].name e
+classes[0].fields[4].type float
+classes[0].fields[5].name f
+classes[0].fields[5].type double
+classes[0].fields[6].name g
+classes[0].fields[6].type string
+classes[0].fields[7].name h
+classes[0].fields[7].type longstring
+classes[0].fields[8].name i
+classes[0].fields[8].type xmlstring
+classes[0].fields[9].name j
+classes[0].fields[9].type jsonstring
+classes[0].fields[10].name k
+classes[0].fields[10].type data
+classes[0].fields[11].name l
+classes[0].fields[11].type longdata
diff --git a/searchlib/src/tests/searchcommon/schema/schema-without-index-field-properties.txt b/searchlib/src/tests/searchcommon/schema/schema-without-index-field-properties.txt
new file mode 100644
index 00000000000..4491b1242e0
--- /dev/null
+++ b/searchlib/src/tests/searchcommon/schema/schema-without-index-field-properties.txt
@@ -0,0 +1,7 @@
+attributefield[0]
+summaryfield[0]
+fieldset[0]
+indexfield[1]
+indexfield[0].name foo
+indexfield[0].datatype STRING
+indexfield[0].collectiontype SINGLE
diff --git a/searchlib/src/tests/searchcommon/schema/schema_test.cpp b/searchlib/src/tests/searchcommon/schema/schema_test.cpp
new file mode 100644
index 00000000000..09a7359bac7
--- /dev/null
+++ b/searchlib/src/tests/searchcommon/schema/schema_test.cpp
@@ -0,0 +1,396 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/config/common/configparser.h>
+#include <vespa/searchcommon/common/schema.h>
+#include <vespa/searchcommon/common/schemaconfigurer.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <fstream>
+
+#include <vespa/log/log.h>
+LOG_SETUP("schema_test");
+
+using vespalib::string;
+
+namespace search::index {
+
+using schema::DataType;
+using schema::CollectionType;
+using SIAF = Schema::ImportedAttributeField;
+using SIF = Schema::IndexField;
+
+void
+assertField(const Schema::Field& exp, const Schema::Field& act)
+{
+ EXPECT_EQ(exp.getName(), act.getName());
+ EXPECT_EQ(exp.getDataType(), act.getDataType());
+ EXPECT_EQ(exp.getCollectionType(), act.getCollectionType());
+}
+
+void
+assertIndexField(const Schema::IndexField& exp,
+ const Schema::IndexField& act)
+{
+ assertField(exp, act);
+ EXPECT_EQ(exp.getAvgElemLen(), act.getAvgElemLen());
+ EXPECT_EQ(exp.use_interleaved_features(), act.use_interleaved_features());
+}
+
+void
+assertSet(const Schema::FieldSet& exp,
+ const Schema::FieldSet& act)
+{
+ EXPECT_EQ(exp.getName(), act.getName());
+ ASSERT_EQ(exp.getFields().size(), act.getFields().size());
+ for (size_t i = 0; i < exp.getFields().size(); ++i) {
+ EXPECT_EQ(exp.getFields()[i], act.getFields()[i]);
+ }
+}
+
+void
+assertSchema(const Schema& exp, const Schema& act)
+{
+ ASSERT_EQ(exp.getNumIndexFields(), act.getNumIndexFields());
+ for (size_t i = 0; i < exp.getNumIndexFields(); ++i) {
+ assertIndexField(exp.getIndexField(i), act.getIndexField(i));
+ }
+ ASSERT_EQ(exp.getNumAttributeFields(), act.getNumAttributeFields());
+ for (size_t i = 0; i < exp.getNumAttributeFields(); ++i) {
+ assertField(exp.getAttributeField(i), act.getAttributeField(i));
+ }
+ ASSERT_EQ(exp.getNumSummaryFields(), act.getNumSummaryFields());
+ for (size_t i = 0; i < exp.getNumSummaryFields(); ++i) {
+ assertField(exp.getSummaryField(i), act.getSummaryField(i));
+ }
+ ASSERT_EQ(exp.getNumFieldSets(), act.getNumFieldSets());
+ for (size_t i = 0; i < exp.getNumFieldSets(); ++i) {
+ assertSet(exp.getFieldSet(i), act.getFieldSet(i));
+ }
+ const auto &expImported = exp.getImportedAttributeFields();
+ const auto &actImported = act.getImportedAttributeFields();
+ ASSERT_EQ(expImported.size(), actImported.size());
+ for (size_t i = 0; i < expImported.size(); ++i) {
+ assertField(expImported[i], actImported[i]);
+ }
+}
+
+TEST(SchemaTest, test_basic)
+{
+ Schema s;
+ EXPECT_EQ(0u, s.getNumIndexFields());
+ EXPECT_EQ(0u, s.getNumAttributeFields());
+ EXPECT_EQ(0u, s.getNumSummaryFields());
+ EXPECT_EQ(0u, s.getNumImportedAttributeFields());
+
+ s.addIndexField(Schema::IndexField("foo", DataType::STRING));
+ s.addIndexField(Schema::IndexField("bar", DataType::INT32));
+
+ s.addAttributeField(Schema::AttributeField("foo", DataType::STRING, CollectionType::ARRAY));
+ s.addAttributeField(Schema::AttributeField("bar", DataType::INT32, CollectionType::WEIGHTEDSET));
+ s.addAttributeField(Schema::AttributeField("cox", DataType::STRING));
+
+ s.addSummaryField(Schema::SummaryField("foo", DataType::STRING, CollectionType::ARRAY));
+ s.addSummaryField(Schema::SummaryField("bar", DataType::INT32, CollectionType::WEIGHTEDSET));
+ s.addSummaryField(Schema::SummaryField("cox", DataType::STRING));
+ s.addSummaryField(Schema::SummaryField("fox", DataType::RAW));
+
+ s.addFieldSet(Schema::FieldSet("default").addField("foo").addField("bar"));
+
+ s.addImportedAttributeField(SIAF("imported", DataType::INT32));
+
+ ASSERT_EQ(2u, s.getNumIndexFields());
+ {
+ EXPECT_EQ("foo", s.getIndexField(0).getName());
+ EXPECT_EQ(DataType::STRING, s.getIndexField(0).getDataType());
+ EXPECT_EQ(CollectionType::SINGLE, s.getIndexField(0).getCollectionType());
+
+ EXPECT_EQ("bar", s.getIndexField(1).getName());
+ EXPECT_EQ(DataType::INT32, s.getIndexField(1).getDataType());
+ EXPECT_EQ(CollectionType::SINGLE, s.getIndexField(1).getCollectionType());
+
+ EXPECT_EQ(0u, s.getIndexFieldId("foo"));
+ EXPECT_EQ(1u, s.getIndexFieldId("bar"));
+ EXPECT_EQ(Schema::UNKNOWN_FIELD_ID, s.getIndexFieldId("cox"));
+ }
+ ASSERT_EQ(3u, s.getNumAttributeFields());
+ {
+ EXPECT_EQ("foo", s.getAttributeField(0).getName());
+ EXPECT_EQ(DataType::STRING, s.getAttributeField(0).getDataType());
+ EXPECT_EQ(CollectionType::ARRAY, s.getAttributeField(0).getCollectionType());
+
+ EXPECT_EQ("bar", s.getAttributeField(1).getName());
+ EXPECT_EQ(DataType::INT32, s.getAttributeField(1).getDataType());
+ EXPECT_EQ(CollectionType::WEIGHTEDSET, s.getAttributeField(1).getCollectionType());
+
+ EXPECT_EQ("cox", s.getAttributeField(2).getName());
+ EXPECT_EQ(DataType::STRING, s.getAttributeField(2).getDataType());
+ EXPECT_EQ(CollectionType::SINGLE, s.getAttributeField(2).getCollectionType());
+
+ EXPECT_EQ(0u, s.getAttributeFieldId("foo"));
+ EXPECT_EQ(1u, s.getAttributeFieldId("bar"));
+ EXPECT_EQ(2u, s.getAttributeFieldId("cox"));
+ EXPECT_EQ(Schema::UNKNOWN_FIELD_ID, s.getIndexFieldId("fox"));
+ }
+ ASSERT_EQ(4u, s.getNumSummaryFields());
+ {
+ EXPECT_EQ("foo", s.getSummaryField(0).getName());
+ EXPECT_EQ(DataType::STRING, s.getSummaryField(0).getDataType());
+ EXPECT_EQ(CollectionType::ARRAY, s.getSummaryField(0).getCollectionType());
+
+ EXPECT_EQ("bar", s.getSummaryField(1).getName());
+ EXPECT_EQ(DataType::INT32, s.getSummaryField(1).getDataType());
+ EXPECT_EQ(CollectionType::WEIGHTEDSET, s.getSummaryField(1).getCollectionType());
+
+ EXPECT_EQ("cox", s.getSummaryField(2).getName());
+ EXPECT_EQ(DataType::STRING, s.getSummaryField(2).getDataType());
+ EXPECT_EQ(CollectionType::SINGLE, s.getSummaryField(2).getCollectionType());
+
+ EXPECT_EQ("fox", s.getSummaryField(3).getName());
+ EXPECT_EQ(DataType::RAW, s.getSummaryField(3).getDataType());
+ EXPECT_EQ(CollectionType::SINGLE, s.getSummaryField(3).getCollectionType());
+
+ EXPECT_EQ(0u, s.getSummaryFieldId("foo"));
+ EXPECT_EQ(1u, s.getSummaryFieldId("bar"));
+ EXPECT_EQ(2u, s.getSummaryFieldId("cox"));
+ EXPECT_EQ(3u, s.getSummaryFieldId("fox"));
+ EXPECT_EQ(Schema::UNKNOWN_FIELD_ID, s.getSummaryFieldId("not"));
+ }
+ ASSERT_EQ(1u, s.getNumFieldSets());
+ {
+ EXPECT_EQ("default", s.getFieldSet(0).getName());
+ EXPECT_EQ(2u, s.getFieldSet(0).getFields().size());
+ EXPECT_EQ("foo", s.getFieldSet(0).getFields()[0]);
+ EXPECT_EQ("bar", s.getFieldSet(0).getFields()[1]);
+ }
+ EXPECT_EQ(1u, s.getNumImportedAttributeFields());
+ {
+ const auto &imported = s.getImportedAttributeFields();
+ EXPECT_EQ(1u, imported.size());
+ assertField(SIAF("imported", DataType::INT32, CollectionType::SINGLE), imported[0]);
+ }
+}
+
+TEST(SchemaTest, test_load_and_save)
+{
+ using SAF = Schema::AttributeField;
+ using SSF = Schema::SummaryField;
+ using SDT = schema::DataType;
+ using SCT = schema::CollectionType;
+ using SFS = Schema::FieldSet;
+
+ { // load from config -> save to file -> load from file
+ Schema s;
+ SchemaConfigurer configurer(s, "dir:load-save-cfg");
+ EXPECT_EQ(3u, s.getNumIndexFields());
+ assertIndexField(SIF("a", SDT::STRING), s.getIndexField(0));
+ assertIndexField(SIF("b", SDT::INT64), s.getIndexField(1));
+ assertIndexField(SIF("c", SDT::STRING).set_interleaved_features(true), s.getIndexField(2));
+
+ EXPECT_EQ(9u, s.getNumAttributeFields());
+ assertField(SAF("a", SDT::STRING, SCT::SINGLE),
+ s.getAttributeField(0));
+ assertField(SAF("b", SDT::INT8, SCT::ARRAY), s.getAttributeField(1));
+ assertField(SAF("c", SDT::INT16, SCT::WEIGHTEDSET),
+ s.getAttributeField(2));
+ assertField(SAF("d", SDT::INT32), s.getAttributeField(3));
+ assertField(SAF("e", SDT::INT64), s.getAttributeField(4));
+ assertField(SAF("f", SDT::FLOAT), s.getAttributeField(5));
+ assertField(SAF("g", SDT::DOUBLE), s.getAttributeField(6));
+ assertField(SAF("h", SDT::BOOLEANTREE), s.getAttributeField(7));
+ assertField(SAF("i", SDT::TENSOR), s.getAttributeField(8));
+
+ EXPECT_EQ(12u, s.getNumSummaryFields());
+ assertField(SSF("a", SDT::INT8), s.getSummaryField(0));
+ assertField(SSF("b", SDT::INT16), s.getSummaryField(1));
+ assertField(SSF("c", SDT::INT32), s.getSummaryField(2));
+ assertField(SSF("d", SDT::INT64), s.getSummaryField(3));
+ assertField(SSF("e", SDT::FLOAT), s.getSummaryField(4));
+ assertField(SSF("f", SDT::DOUBLE), s.getSummaryField(5));
+ assertField(SSF("g", SDT::STRING), s.getSummaryField(6));
+ assertField(SSF("h", SDT::STRING), s.getSummaryField(7));
+ assertField(SSF("i", SDT::STRING), s.getSummaryField(8));
+ assertField(SSF("j", SDT::STRING), s.getSummaryField(9));
+ assertField(SSF("k", SDT::RAW), s.getSummaryField(10));
+ assertField(SSF("l", SDT::RAW), s.getSummaryField(11));
+
+ EXPECT_EQ(1u, s.getNumFieldSets());
+ assertSet(SFS("default").addField("a").addField("c"),
+ s.getFieldSet(0));
+
+ Schema s2 = s;
+ EXPECT_TRUE(s.saveToFile("schema.txt"));
+ assertSchema(s, s2); // test copy contructor
+ Schema s3;
+ EXPECT_TRUE(s3.loadFromFile("schema.txt"));
+ assertSchema(s, s3); // test that saved file is loaded correctly
+ s3.addIndexField(SIF("foo", SDT::STRING));
+ s3.addImportedAttributeField(SIAF("imported", DataType::INT32));
+ EXPECT_TRUE(s3.loadFromFile("schema.txt")); // load should clear the current content
+ assertSchema(s, s3);
+ }
+ { // empty schema
+ Schema s;
+ EXPECT_TRUE(s.saveToFile("schema2.txt"));
+ Schema s2;
+ s2.addIndexField(SIF("foo", SDT::STRING));
+ s2.addImportedAttributeField(SIAF("imported", DataType::INT32));
+ EXPECT_TRUE(s2.loadFromFile("schema2.txt"));
+ assertSchema(s, s2);
+ }
+ { // load with error
+ Schema s;
+ EXPECT_TRUE(!s.loadFromFile("not.txt"));
+ EXPECT_TRUE(!s.saveToFile("not/not.txt"));
+ }
+}
+
+void
+addAllFieldTypes(const string& name, Schema& schema)
+{
+ Schema::IndexField index_field(name, DataType::STRING);
+ schema.addIndexField(index_field);
+
+ Schema::AttributeField attribute_field(name, DataType::STRING);
+ schema.addAttributeField(attribute_field);
+
+ Schema::SummaryField summary_field(name, DataType::STRING);
+ schema.addSummaryField(summary_field);
+
+ schema.addFieldSet(Schema::FieldSet(name));
+}
+
+TEST(SchemaTest, require_that_schemas_can_be_added)
+{
+ const string name1 = "foo";
+ const string name2 = "bar";
+ Schema s1;
+ addAllFieldTypes(name1, s1);
+ Schema s2;
+ addAllFieldTypes(name2, s2);
+
+ Schema::UP sum = Schema::make_union(s1, s2);
+ ASSERT_EQ(2u, sum->getNumIndexFields());
+ EXPECT_TRUE(s1.getIndexField(0) ==
+ sum->getIndexField(sum->getIndexFieldId(name1)));
+ EXPECT_TRUE(s2.getIndexField(0) ==
+ sum->getIndexField(sum->getIndexFieldId(name2)));
+ ASSERT_EQ(2u, sum->getNumAttributeFields());
+ EXPECT_TRUE(s1.getAttributeField(0) ==
+ sum->getAttributeField(sum->getAttributeFieldId(name1)));
+ EXPECT_TRUE(s2.getAttributeField(0) ==
+ sum->getAttributeField(sum->getAttributeFieldId(name2)));
+ ASSERT_EQ(2u, sum->getNumSummaryFields());
+ EXPECT_TRUE(s1.getSummaryField(0) ==
+ sum->getSummaryField(sum->getSummaryFieldId(name1)));
+ EXPECT_TRUE(s2.getSummaryField(0) ==
+ sum->getSummaryField(sum->getSummaryFieldId(name2)));
+ ASSERT_EQ(2u, sum->getNumFieldSets());
+ EXPECT_TRUE(s1.getFieldSet(0) ==
+ sum->getFieldSet(sum->getFieldSetId(name1)));
+ EXPECT_TRUE(s2.getFieldSet(0) ==
+ sum->getFieldSet(sum->getFieldSetId(name2)));
+}
+
+TEST(SchemaTest, require_that_S_union_S_equals_S_for_schema_S)
+{
+ Schema schema;
+ addAllFieldTypes("foo", schema);
+
+ Schema::UP sum = Schema::make_union(schema, schema);
+ EXPECT_TRUE(schema == *sum);
+}
+
+TEST(SchemaTest, require_that_schema_can_calculate_set_difference)
+{
+ const string name1 = "foo";
+ const string name2 = "bar";
+ Schema s1;
+ addAllFieldTypes(name1, s1);
+ addAllFieldTypes(name2, s1);
+ Schema s2;
+ addAllFieldTypes(name2, s2);
+
+ Schema::UP schema = Schema::set_difference(s1, s2);
+
+ Schema expected;
+ addAllFieldTypes(name1, expected);
+ EXPECT_TRUE(expected == *schema);
+}
+
+TEST(SchemaTest, require_that_schema_can_calculate_intersection)
+{
+ const string name1 = "foo";
+ const string name2 = "bar";
+ const string name3 = "baz";
+ Schema s1;
+ addAllFieldTypes(name1, s1);
+ addAllFieldTypes(name2, s1);
+ Schema s2;
+ addAllFieldTypes(name2, s2);
+ addAllFieldTypes(name3, s2);
+
+ Schema::UP schema = Schema::intersect(s1, s2);
+
+ Schema expected;
+ addAllFieldTypes(name2, expected);
+ EXPECT_TRUE(expected == *schema);
+}
+
+TEST(SchemaTest, require_that_incompatible_fields_are_removed_from_intersection)
+{
+ const string name = "foo";
+ Schema s1;
+ s1.addIndexField(Schema::IndexField(name, DataType::STRING));
+ Schema s2;
+ s2.addIndexField(Schema::IndexField(name, DataType::INT32));
+ Schema::UP schema = Schema::intersect(s1, s2);
+ EXPECT_EQ(0u, schema->getNumIndexFields());
+ EXPECT_FALSE(schema->isIndexField(name));
+}
+
+TEST(SchemaTest, require_that_imported_attribute_fields_are_not_saved_to_disk)
+{
+ const vespalib::string fileName = "schema-no-imported-fields.txt";
+ {
+ Schema s;
+ s.addImportedAttributeField(Schema::ImportedAttributeField("imported", DataType::INT32));
+ s.saveToFile(fileName);
+ }
+ {
+ Schema s;
+ s.loadFromFile(fileName);
+ EXPECT_EQ(0u, s.getNumImportedAttributeFields());
+ }
+}
+
+TEST(SchemaTest, require_that_schema_can_be_built_with_imported_attribute_fields)
+{
+ Schema s;
+ SchemaConfigurer configurer(s, "dir:imported-fields-cfg");
+
+ const auto &imported = s.getImportedAttributeFields();
+ ASSERT_EQ(2u, imported.size());
+ assertField(SIAF("imported_a", DataType::INT32, CollectionType::SINGLE), imported[0]);
+ assertField(SIAF("imported_b", DataType::STRING, CollectionType::ARRAY), imported[1]);
+
+ const auto &regular = s.getAttributeFields();
+ ASSERT_EQ(1u, regular.size());
+ assertField(SIAF("regular", DataType::INT32, CollectionType::SINGLE), regular[0]);
+}
+
+TEST(SchemaTest, require_that_index_field_is_loaded_with_default_values_when_properties_are_not_set)
+{
+ Schema s;
+ s.loadFromFile("schema-without-index-field-properties.txt");
+
+ const auto& index_fields = s.getIndexFields();
+ ASSERT_EQ(1, index_fields.size());
+ assertIndexField(SIF("foo", DataType::STRING, CollectionType::SINGLE).
+ setAvgElemLen(512).
+ set_interleaved_features(false),
+ index_fields[0]);
+ assertIndexField(SIF("foo", DataType::STRING, CollectionType::SINGLE), index_fields[0]);
+}
+
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchcommon/.gitignore b/searchlib/src/vespa/searchcommon/.gitignore
new file mode 100644
index 00000000000..f76a9d84bed
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/.gitignore
@@ -0,0 +1,3 @@
+/.depend
+/Makefile
+/libsearchcommon.so.5.1
diff --git a/searchlib/src/vespa/searchcommon/attribute/.gitignore b/searchlib/src/vespa/searchcommon/attribute/.gitignore
new file mode 100644
index 00000000000..7e7c0fe7fae
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/.gitignore
@@ -0,0 +1,2 @@
+/.depend
+/Makefile
diff --git a/searchlib/src/vespa/searchcommon/attribute/CMakeLists.txt b/searchlib/src/vespa/searchcommon/attribute/CMakeLists.txt
new file mode 100644
index 00000000000..704fe238ed5
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/CMakeLists.txt
@@ -0,0 +1,11 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchcommon_searchcommon_attribute OBJECT
+ SOURCES
+ attribute_utils.cpp
+ basictype.cpp
+ collectiontype.cpp
+ config.cpp
+ search_context_params.cpp
+ status.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchcommon/attribute/attribute_utils.cpp b/searchlib/src/vespa/searchcommon/attribute/attribute_utils.cpp
new file mode 100644
index 00000000000..cd5cc58c75c
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/attribute_utils.cpp
@@ -0,0 +1,23 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "attribute_utils.h"
+#include <vespa/searchcommon/attribute/config.h>
+
+namespace search::attribute {
+
+bool
+isUpdateableInMemoryOnly(const vespalib::string &attrName, const Config &cfg)
+{
+ auto basicType = cfg.basicType().type();
+ return ((basicType != BasicType::Type::PREDICATE) &&
+ (basicType != BasicType::Type::REFERENCE)) &&
+ !isStructFieldAttribute(attrName);
+}
+
+bool
+isStructFieldAttribute(const vespalib::string &attrName)
+{
+ return attrName.find('.') != vespalib::string::npos;
+}
+
+}
diff --git a/searchlib/src/vespa/searchcommon/attribute/attribute_utils.h b/searchlib/src/vespa/searchcommon/attribute/attribute_utils.h
new file mode 100644
index 00000000000..e4c2a8e4727
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/attribute_utils.h
@@ -0,0 +1,30 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search::attribute {
+
+class Config;
+
+/**
+ * Returns whether the given attribute vector is updateable only in-memory.
+ *
+ * For most attributes this is true.
+ * The data stored in the attribute is equal to the data stored in the field value in the document.
+ *
+ * For predicate and reference attributes this is false.
+ * The original data is transformed (lossy) before it is stored in the attribute.
+ * During update we also need to update the field value in the document.
+ *
+ * For struct field attributes this is false.
+ * A struct field attribute typically represents a sub-field of a more complex field (e.g. map of struct or array of struct).
+ * During update the complex field is first updated in the document,
+ * then the struct field attribute is updated based on the new content of the complex field.
+ */
+bool isUpdateableInMemoryOnly(const vespalib::string &attrName, const Config &cfg);
+
+bool isStructFieldAttribute(const vespalib::string &attrName);
+
+}
diff --git a/searchlib/src/vespa/searchcommon/attribute/attributecontent.h b/searchlib/src/vespa/searchcommon/attribute/attributecontent.h
new file mode 100644
index 00000000000..f5960ce358b
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/attributecontent.h
@@ -0,0 +1,166 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "iattributevector.h"
+#include <cstdint>
+
+namespace search::attribute {
+
+
+/**
+ * This class is wrapping an array of type T and is used to hold the
+ * attribute vector content for a given document. The values stored for the
+ * given document in the attribute vector is copied into the array wrapped
+ * in an instance of this class.
+ *
+ * @param T the type of the data stored in this object
+ **/
+template <typename T>
+class AttributeContent
+{
+private:
+ T _staticBuf[16];
+ T * _dynamicBuf;
+ uint32_t _size;
+ uint32_t _capacity;
+
+ AttributeContent(const AttributeContent & rhs);
+ AttributeContent & operator=(const AttributeContent & rhs);
+
+public:
+ /**
+ * Creates a new object with an initial capacity of 16 without dynamic allocation.
+ **/
+ AttributeContent() :
+ _dynamicBuf(nullptr),
+ _size(0),
+ _capacity(16)
+ {
+ }
+ /**
+ * Destructs the object.
+ **/
+ ~AttributeContent() {
+ if (_dynamicBuf != nullptr) {
+ delete [] _dynamicBuf;
+ }
+ }
+
+ /**
+ * Returns a read-only iterator to the beginning of the underlying data array.
+ *
+ * @return iterator
+ **/
+ const T * begin() const {
+ if (_dynamicBuf != nullptr) {
+ return _dynamicBuf;
+ }
+ return _staticBuf;
+ }
+
+ /**
+ * Returns a read-only iterator to the end of the underlying data array.
+ *
+ * @return iterator
+ **/
+ const T * end() const {
+ return begin() + _size;
+ }
+
+ /**
+ * Returns the element at the given position in the underlying data array.
+ *
+ * @return read-only reference to the element
+ * @param idx position into the underlying data
+ **/
+ const T & operator[](uint32_t idx) const {
+ return *(begin() + idx);
+ }
+
+ /**
+ * Returns the number of elements used in the underlying data array.
+ *
+ * @return number of elements used
+ **/
+ uint32_t size() const {
+ return _size;
+ }
+
+ /**
+ * Returns the number of elements allocated in the underlying data array.
+ *
+ * @return number of elements allocated
+ **/
+ uint32_t capacity() const {
+ return _capacity;
+ }
+
+ /**
+ * Returns a read/write pointer to the underlying data array.
+ *
+ * @return read/write pointer.
+ **/
+ T * data() {
+ if (_dynamicBuf != nullptr) {
+ return _dynamicBuf;
+ }
+ return _staticBuf;
+ }
+
+ /**
+ * Sets the number of elements used in the underlying data array.
+ *
+ * @param n number of elements used
+ **/
+ void setSize(uint32_t n) {
+ _size = n;
+ }
+
+ /**
+ * Allocates memory so that the underlying data array can hold the
+ * given number of elements (capacity) and sets the size to 0.
+ * A new data array will only be allocated if n > capacity().
+ *
+ * @param n wanted number of elements
+ **/
+ void allocate(uint32_t n) {
+ if (n > _capacity) {
+ if (_dynamicBuf != nullptr) {
+ delete [] _dynamicBuf;
+ }
+ _dynamicBuf = new T[n];
+ _capacity = n;
+ _size = 0;
+ }
+ }
+
+ /**
+ * Fill this buffer with the content of the given attribute vector for the given docId.
+ *
+ * @param attribute the attribute vector
+ * @param docId the docId
+ **/
+ void fill(const IAttributeVector & attribute, IAttributeVector::DocId docId)
+ {
+ uint32_t count = attribute.get(docId, data(), capacity());
+ while (count > capacity()) {
+ allocate(count);
+ count = attribute.get(docId, data(), capacity());
+ }
+ setSize(count);
+ }
+};
+
+typedef AttributeContent<double> FloatContent;
+typedef AttributeContent<const char *> ConstCharContent;
+typedef AttributeContent<IAttributeVector::largeint_t> IntegerContent;
+typedef AttributeContent<IAttributeVector::EnumHandle> EnumContent;
+typedef AttributeContent<IAttributeVector::WeightedInt> WeightedIntegerContent;
+typedef AttributeContent<IAttributeVector::WeightedFloat> WeightedFloatContent;
+typedef AttributeContent<IAttributeVector::WeightedConstChar> WeightedConstCharContent;
+typedef AttributeContent<IAttributeVector::WeightedString> WeightedStringContent;
+typedef AttributeContent<IAttributeVector::WeightedEnum> WeightedEnumContent;
+typedef IAttributeVector::EnumHandle EnumHandle;
+
+}
diff --git a/searchlib/src/vespa/searchcommon/attribute/basictype.cpp b/searchlib/src/vespa/searchcommon/attribute/basictype.cpp
new file mode 100644
index 00000000000..5bab2fc06d2
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/basictype.cpp
@@ -0,0 +1,37 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchcommon/attribute/basictype.h>
+#include <vespa/vespalib/util/exceptions.h>
+
+namespace search::attribute {
+
+const BasicType::TypeInfo BasicType::_typeTable[BasicType::MAX_TYPE] = {
+ { BasicType::NONE, 0, "none" },
+ { BasicType::STRING, 0, "string" },
+ { BasicType::BOOL, sizeof(int8_t), "bool" },
+ { BasicType::UINT2, sizeof(int8_t), "uint2" },
+ { BasicType::UINT4, sizeof(int8_t), "uint4" },
+ { BasicType::INT8, sizeof(int8_t), "int8" },
+ { BasicType::INT16, sizeof(int16_t), "int16" },
+ { BasicType::INT32, sizeof(int32_t), "int32" },
+ { BasicType::INT64, sizeof(int64_t), "int64" },
+ { BasicType::FLOAT, sizeof(float), "float" },
+ { BasicType::DOUBLE, sizeof(double), "double" },
+ { BasicType::PREDICATE, 0, "predicate" },
+ { BasicType::TENSOR, 0, "tensor" },
+ { BasicType::REFERENCE, 12, "reference" }
+};
+
+BasicType::Type
+BasicType::asType(const vespalib::string &t)
+{
+ for (size_t i(0); i < sizeof(_typeTable)/sizeof(_typeTable[0]); i++) {
+ if (t == _typeTable[i]._name) {
+ return _typeTable[i]._type;
+ }
+ }
+ throw vespalib::IllegalStateException(t + " not recognized as valid attribute data type");
+ return NONE;
+}
+
+}
diff --git a/searchlib/src/vespa/searchcommon/attribute/basictype.h b/searchlib/src/vespa/searchcommon/attribute/basictype.h
new file mode 100644
index 00000000000..bd7b4a2b4bc
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/basictype.h
@@ -0,0 +1,63 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search::attribute {
+
+class BasicType
+{
+ public:
+ enum Type {
+ NONE = 0,
+ STRING = 1,
+ BOOL = 2,
+ UINT2 = 3,
+ UINT4 = 4,
+ INT8 = 5,
+ INT16 = 6,
+ INT32 = 7,
+ INT64 = 8,
+ FLOAT = 9,
+ DOUBLE = 10,
+ PREDICATE = 11,
+ TENSOR = 12,
+ REFERENCE = 13,
+ MAX_TYPE
+ };
+
+ explicit BasicType(int t) : _type(Type(t)) { }
+ explicit BasicType(unsigned int t) : _type(Type(t)) { }
+ BasicType(Type t) : _type(t) { }
+ explicit BasicType(const vespalib::string & t) : _type(asType(t)) { }
+
+ Type type() const { return _type; }
+ const char * asString() const { return asString(_type); }
+ size_t fixedSize() const { return fixedSize(_type); }
+ static BasicType fromType(bool) { return BOOL; }
+ static BasicType fromType(int8_t) { return INT8; }
+ static BasicType fromType(int16_t) { return INT16; }
+ static BasicType fromType(int32_t) { return INT32; }
+ static BasicType fromType(int64_t) { return INT64; }
+ static BasicType fromType(float) { return FLOAT; }
+ static BasicType fromType(double) { return DOUBLE; }
+ bool operator==(const BasicType &b) const { return _type == b._type; }
+ bool operator!=(const BasicType &b) const { return _type != b._type; }
+
+ private:
+ static const char * asString(Type t) { return _typeTable[t]._name; }
+ static size_t fixedSize(Type t) { return _typeTable[t]._fixedSize; }
+ static Type asType(const vespalib::string & t);
+
+ Type _type;
+
+ struct TypeInfo {
+ Type _type;
+ unsigned int _fixedSize;
+ const char * _name;
+ };
+ static const TypeInfo _typeTable[MAX_TYPE];
+};
+
+}
diff --git a/searchlib/src/vespa/searchcommon/attribute/collectiontype.cpp b/searchlib/src/vespa/searchcommon/attribute/collectiontype.cpp
new file mode 100644
index 00000000000..b77382f6126
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/collectiontype.cpp
@@ -0,0 +1,26 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchcommon/attribute/collectiontype.h>
+#include <vespa/vespalib/util/exceptions.h>
+
+namespace search::attribute {
+
+const CollectionType::TypeInfo CollectionType::_typeTable[CollectionType::MAX_TYPE] = {
+ { CollectionType::SINGLE, "single" },
+ { CollectionType::ARRAY, "array" },
+ { CollectionType::WSET, "weightedset" }
+};
+
+CollectionType::Type
+CollectionType::asType(const vespalib::string &t)
+{
+ for (size_t i(0); i < sizeof(_typeTable)/sizeof(_typeTable[0]); i++) {
+ if (t == _typeTable[i]._name) {
+ return _typeTable[i]._type;
+ }
+ }
+ throw vespalib::IllegalStateException(t + " not recognized as valid attribute collection type");
+ return SINGLE;
+}
+
+}
diff --git a/searchlib/src/vespa/searchcommon/attribute/collectiontype.h b/searchlib/src/vespa/searchcommon/attribute/collectiontype.h
new file mode 100644
index 00000000000..35cb7612ed0
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/collectiontype.h
@@ -0,0 +1,75 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search::attribute {
+
+class CollectionType
+{
+ public:
+ enum Type {
+ /**
+ * Single value type with one value stored for each document.
+ **/
+ SINGLE = 0,
+ /**
+ * Array type with zero to n values stored for each document.
+ **/
+ ARRAY = 1,
+ /**
+ * Weighted set type with zero to n unique values stored for each document.
+ * In addition each unique value is accociated with a weight.
+ **/
+ WSET = 2,
+ MAX_TYPE
+ };
+
+ CollectionType(Type t = SINGLE, bool remove = false, bool create = false) :
+ _type(t),
+ _removeIfZero(remove),
+ _createIfNonExistant(create)
+ {
+ }
+
+ explicit
+ CollectionType(const vespalib::string & t, bool remove = false, bool create = false) :
+ _type(asType(t)),
+ _removeIfZero(remove),
+ _createIfNonExistant(create)
+ {
+ }
+
+ Type type() const { return _type; }
+ bool isMultiValue() const { return _type != SINGLE; }
+ bool isWeightedSet() const { return _type == WSET; }
+ bool isArray() const { return _type == ARRAY; }
+ bool removeIfZero() const { return _removeIfZero; }
+ bool createIfNonExistant() const { return _createIfNonExistant; }
+ const char * asString() const { return asString(_type); }
+ void removeIfZero(bool newValue) { _removeIfZero = newValue; }
+ void createIfNonExistant(bool newValue) { _createIfNonExistant = newValue; }
+ bool operator!=(const CollectionType &b) const { return !(operator==(b)); }
+ bool operator==(const CollectionType &b) const {
+ return _type == b._type &&
+ _removeIfZero == b._removeIfZero &&
+ _createIfNonExistant == b._createIfNonExistant;
+ }
+
+ private:
+ struct TypeInfo {
+ Type _type;
+ const char * _name;
+ };
+
+ static const char * asString(Type t) { return _typeTable[t]._name; }
+ static Type asType(const vespalib::string &t);
+
+ Type _type;
+ bool _removeIfZero;
+ bool _createIfNonExistant;
+ static const TypeInfo _typeTable[MAX_TYPE];
+};
+
+}
diff --git a/searchlib/src/vespa/searchcommon/attribute/config.cpp b/searchlib/src/vespa/searchcommon/attribute/config.cpp
new file mode 100644
index 00000000000..0a50faa04c0
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/config.cpp
@@ -0,0 +1,72 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "config.h"
+
+namespace search::attribute {
+
+namespace {
+
+static constexpr uint64_t MAX_UNCOMMITTED_MEMORY = 8000;
+
+}
+
+Config::Config() noexcept
+ : Config(BasicType::NONE, CollectionType::SINGLE, false, false)
+{
+}
+
+Config::Config(BasicType bt, CollectionType ct, bool fastSearch_, bool huge_) noexcept
+ : _basicType(bt),
+ _type(ct),
+ _fastSearch(fastSearch_),
+ _huge(huge_),
+ _enableBitVectors(false),
+ _enableOnlyBitVector(false),
+ _isFilter(false),
+ _fastAccess(false),
+ _mutable(false),
+ _paged(false),
+ _maxUnCommittedMemory(MAX_UNCOMMITTED_MEMORY),
+ _match(Match::UNCASED),
+ _dictionary(),
+ _growStrategy(),
+ _compactionStrategy(),
+ _predicateParams(),
+ _tensorType(vespalib::eval::ValueType::error_type()),
+ _distance_metric(DistanceMetric::Euclidean),
+ _hnsw_index_params()
+{
+}
+
+Config::Config(const Config &) = default;
+Config & Config::operator = (const Config &) = default;
+Config::Config(Config &&) noexcept = default;
+Config & Config::operator = (Config &&) noexcept = default;
+Config::~Config() = default;
+
+bool
+Config::operator==(const Config &b) const
+{
+ return _basicType == b._basicType &&
+ _type == b._type &&
+ _huge == b._huge &&
+ _fastSearch == b._fastSearch &&
+ _enableBitVectors == b._enableBitVectors &&
+ _enableOnlyBitVector == b._enableOnlyBitVector &&
+ _isFilter == b._isFilter &&
+ _fastAccess == b._fastAccess &&
+ _mutable == b._mutable &&
+ _paged == b._paged &&
+ _maxUnCommittedMemory == b._maxUnCommittedMemory &&
+ _match == b._match &&
+ _dictionary == b._dictionary &&
+ _growStrategy == b._growStrategy &&
+ _compactionStrategy == b._compactionStrategy &&
+ _predicateParams == b._predicateParams &&
+ (_basicType.type() != BasicType::Type::TENSOR ||
+ _tensorType == b._tensorType) &&
+ _distance_metric == b._distance_metric &&
+ _hnsw_index_params == b._hnsw_index_params;
+}
+
+}
diff --git a/searchlib/src/vespa/searchcommon/attribute/config.h b/searchlib/src/vespa/searchcommon/attribute/config.h
new file mode 100644
index 00000000000..f572f5038fc
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/config.h
@@ -0,0 +1,158 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "basictype.h"
+#include "collectiontype.h"
+#include "hnsw_index_params.h"
+#include "predicate_params.h"
+#include <vespa/searchcommon/common/growstrategy.h>
+#include <vespa/searchcommon/common/dictionary_config.h>
+#include <vespa/eval/eval/value_type.h>
+#include <vespa/vespalib/datastore/compaction_strategy.h>
+#include <cassert>
+#include <optional>
+
+namespace search::attribute {
+
+/**
+ * Configuration for an attribute vector.
+ *
+ * Used to determine which implementation to instantiate.
+ */
+class Config {
+public:
+ enum class Match { CASED, UNCASED };
+ using CompactionStrategy = vespalib::datastore::CompactionStrategy;
+ Config() noexcept;
+ Config(BasicType bt) noexcept : Config(bt, CollectionType::SINGLE) { }
+ Config(BasicType bt, CollectionType ct) noexcept : Config(bt, ct, false) { }
+ Config(BasicType bt, CollectionType ct, bool fastSearch_) noexcept
+ : Config(bt, ct, fastSearch_, false)
+ {}
+ Config(BasicType bt, CollectionType ct, bool fastSearch_, bool huge_) noexcept;
+ Config(const Config &);
+ Config & operator = (const Config &);
+ Config(Config &&) noexcept;
+ Config & operator = (Config &&) noexcept;
+ ~Config();
+
+ BasicType basicType() const { return _basicType; }
+ CollectionType collectionType() const { return _type; }
+ bool fastSearch() const { return _fastSearch; }
+ bool huge() const { return _huge; }
+ bool paged() const { return _paged; }
+ const PredicateParams &predicateParams() const { return _predicateParams; }
+ const vespalib::eval::ValueType & tensorType() const { return _tensorType; }
+ DistanceMetric distance_metric() const { return _distance_metric; }
+ const std::optional<HnswIndexParams>& hnsw_index_params() const { return _hnsw_index_params; }
+
+ /**
+ * Check if attribute posting list can consist of a bitvector in
+ * addition to (or instead of) a btree.
+ */
+ bool getEnableBitVectors() const { return _enableBitVectors; }
+
+ /**
+ * Check if attribute posting list can consist of only a bitvector with
+ * no corresponding btree.
+ */
+ bool getEnableOnlyBitVector() const { return _enableOnlyBitVector; }
+
+ bool getIsFilter() const { return _isFilter; }
+ bool isMutable() const { return _mutable; }
+
+ /**
+ * Check if this attribute should be fast accessible at all times.
+ * If so, attribute is kept in memory also for non-searchable documents.
+ */
+ bool fastAccess() const { return _fastAccess; }
+
+ const GrowStrategy & getGrowStrategy() const { return _growStrategy; }
+ const CompactionStrategy &getCompactionStrategy() const { return _compactionStrategy; }
+ const DictionaryConfig & get_dictionary_config() const { return _dictionary; }
+ Match get_match() const { return _match; }
+ Config & setHuge(bool v) { _huge = v; return *this;}
+ Config & setFastSearch(bool v) { _fastSearch = v; return *this; }
+ Config & setPredicateParams(const PredicateParams &v) { _predicateParams = v; return *this; }
+ Config & setTensorType(const vespalib::eval::ValueType &tensorType_in) {
+ _tensorType = tensorType_in;
+ return *this;
+ }
+ Config& set_distance_metric(DistanceMetric value) {
+ _distance_metric = value;
+ return *this;
+ }
+ Config& set_hnsw_index_params(const HnswIndexParams& params) {
+ assert(_distance_metric == params.distance_metric());
+ _hnsw_index_params = params;
+ return *this;
+ }
+ Config& clear_hnsw_index_params() {
+ _hnsw_index_params.reset();
+ return *this;
+ }
+
+ /**
+ * Enable attribute posting list to consist of a bitvector in
+ * addition to (or instead of) a btree.
+ */
+ Config & setEnableBitVectors(bool enableBitVectors) {
+ _enableBitVectors = enableBitVectors;
+ return *this;
+ }
+
+ /**
+ * Enable attribute posting list to consist of only a bitvector with
+ * no corresponding btree. Some information degradation might occur when
+ * document frequency goes down, since recreated btree representation
+ * will then have lost weight information.
+ */
+ Config & setEnableOnlyBitVector(bool enableOnlyBitVector) {
+ _enableOnlyBitVector = enableOnlyBitVector;
+ return *this;
+ }
+
+ /**
+ * Hide weight information when searching in attributes.
+ */
+ Config & setIsFilter(bool isFilter) { _isFilter = isFilter; return *this; }
+ Config & setMutable(bool isMutable) { _mutable = isMutable; return *this; }
+ Config & setPaged(bool paged_in) { _paged = paged_in; return *this; }
+ Config & setFastAccess(bool v) { _fastAccess = v; return *this; }
+ Config & setGrowStrategy(const GrowStrategy &gs) { _growStrategy = gs; return *this; }
+ Config & setCompactionStrategy(const CompactionStrategy &compactionStrategy) {
+ _compactionStrategy = compactionStrategy;
+ return *this;
+ }
+ Config & set_dictionary_config(const DictionaryConfig & cfg) { _dictionary = cfg; return *this; }
+ Config & set_match(Match match) { _match = match; return *this; }
+ bool operator!=(const Config &b) const { return !(operator==(b)); }
+ bool operator==(const Config &b) const;
+
+ uint64_t getMaxUnCommittedMemory() const { return _maxUnCommittedMemory; }
+ Config & setMaxUnCommittedMemory(uint64_t value) { _maxUnCommittedMemory = value; return *this; }
+
+private:
+ BasicType _basicType;
+ CollectionType _type;
+ bool _fastSearch;
+ bool _huge;
+ bool _enableBitVectors;
+ bool _enableOnlyBitVector;
+ bool _isFilter;
+ bool _fastAccess;
+ bool _mutable;
+ bool _paged;
+ uint64_t _maxUnCommittedMemory;
+ Match _match;
+ DictionaryConfig _dictionary;
+ GrowStrategy _growStrategy;
+ CompactionStrategy _compactionStrategy;
+ PredicateParams _predicateParams;
+ vespalib::eval::ValueType _tensorType;
+ DistanceMetric _distance_metric;
+ std::optional<HnswIndexParams> _hnsw_index_params;
+};
+
+}
diff --git a/searchlib/src/vespa/searchcommon/attribute/distance_metric.h b/searchlib/src/vespa/searchcommon/attribute/distance_metric.h
new file mode 100644
index 00000000000..26efa30bba4
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/distance_metric.h
@@ -0,0 +1,9 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search::attribute {
+
+enum class DistanceMetric { Euclidean, Angular, GeoDegrees, InnerProduct, Hamming };
+
+}
diff --git a/searchlib/src/vespa/searchcommon/attribute/hnsw_index_params.h b/searchlib/src/vespa/searchcommon/attribute/hnsw_index_params.h
new file mode 100644
index 00000000000..4f9d3c5593c
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/hnsw_index_params.h
@@ -0,0 +1,45 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "distance_metric.h"
+
+namespace search::attribute {
+
+/**
+ * Configuration parameters for a hnsw index used together with a 1-dimensional indexed tensor
+ * for approximate nearest neighbor search.
+ */
+class HnswIndexParams {
+private:
+ uint32_t _max_links_per_node;
+ uint32_t _neighbors_to_explore_at_insert;
+ // This is always the same as in the attribute config, and is duplicated here to simplify usage.
+ DistanceMetric _distance_metric;
+ bool _multi_threaded_indexing;
+
+public:
+ HnswIndexParams(uint32_t max_links_per_node_in,
+ uint32_t neighbors_to_explore_at_insert_in,
+ DistanceMetric distance_metric_in,
+ bool multi_threaded_indexing_in = false) noexcept
+ : _max_links_per_node(max_links_per_node_in),
+ _neighbors_to_explore_at_insert(neighbors_to_explore_at_insert_in),
+ _distance_metric(distance_metric_in),
+ _multi_threaded_indexing(multi_threaded_indexing_in)
+ {}
+
+ uint32_t max_links_per_node() const { return _max_links_per_node; }
+ uint32_t neighbors_to_explore_at_insert() const { return _neighbors_to_explore_at_insert; }
+ DistanceMetric distance_metric() const { return _distance_metric; }
+ bool multi_threaded_indexing() const { return _multi_threaded_indexing; }
+
+ bool operator==(const HnswIndexParams& rhs) const {
+ return (_max_links_per_node == rhs._max_links_per_node &&
+ _neighbors_to_explore_at_insert == rhs._neighbors_to_explore_at_insert &&
+ _distance_metric == rhs._distance_metric &&
+ _multi_threaded_indexing == rhs._multi_threaded_indexing);
+ }
+};
+
+}
diff --git a/searchlib/src/vespa/searchcommon/attribute/i_attribute_functor.h b/searchlib/src/vespa/searchcommon/attribute/i_attribute_functor.h
new file mode 100644
index 00000000000..da5127de8ee
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/i_attribute_functor.h
@@ -0,0 +1,37 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+#include <memory>
+
+namespace search::attribute {
+
+class IAttributeVector;
+
+/*
+ * Interface class for access attribute in correct attribute write
+ * thread as async callback from asyncForEachAttribute() call on
+ * attribute manager.
+ */
+class IConstAttributeFunctor
+{
+public:
+ virtual void operator()(const IAttributeVector &attributeVector) = 0;
+ virtual ~IConstAttributeFunctor() = default;
+};
+
+class IAttributeFunctor
+{
+public:
+ virtual void operator()(IAttributeVector &attributeVector) = 0;
+ virtual ~IAttributeFunctor() = default;
+};
+
+class IAttributeExecutor {
+public:
+ virtual ~IAttributeExecutor() = default;
+ virtual void asyncForAttribute(const vespalib::string &name, std::unique_ptr<IAttributeFunctor> func) const = 0;
+};
+
+}
diff --git a/searchlib/src/vespa/searchcommon/attribute/i_multi_value_attribute.h b/searchlib/src/vespa/searchcommon/attribute/i_multi_value_attribute.h
new file mode 100644
index 00000000000..ea1fbe0b2b4
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/i_multi_value_attribute.h
@@ -0,0 +1,55 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "i_multi_value_read_view.h"
+
+namespace vespalib { class Stash; }
+
+namespace search::attribute {
+
+/**
+ * Interface that provides read views for different multi-value attribute types.
+ *
+ * The type-safe down-cast functions only return a valid pointer when that particular type is supported.
+ * Otherwise a nullptr is returned.
+ * The returned read view is owned by the supplied stash.
+ */
+class IMultiValueAttribute {
+public:
+ template<typename MultiValueType>
+ class MultiValueTag {};
+
+ template<typename T>
+ using ArrayTag = MultiValueTag<T>;
+
+ using ArrayEnumTag = ArrayTag<vespalib::datastore::AtomicEntryRef>;
+
+ template<typename T>
+ using WeightedSetTag = MultiValueTag<search::multivalue::WeightedValue<T>>;
+
+ using WeightedSetEnumTag = WeightedSetTag<vespalib::datastore::AtomicEntryRef>;
+
+ virtual ~IMultiValueAttribute() {}
+
+ virtual const IArrayReadView<int8_t>* make_read_view(ArrayTag<int8_t>, vespalib::Stash&) const { return nullptr; }
+ virtual const IArrayReadView<int16_t>* make_read_view(ArrayTag<int16_t>, vespalib::Stash&) const { return nullptr; }
+ virtual const IArrayReadView<int32_t>* make_read_view(ArrayTag<int32_t>, vespalib::Stash&) const { return nullptr; }
+ virtual const IArrayReadView<int64_t>* make_read_view(ArrayTag<int64_t>, vespalib::Stash&) const { return nullptr; }
+ virtual const IArrayReadView<float>* make_read_view(ArrayTag<float>, vespalib::Stash&) const { return nullptr; }
+ virtual const IArrayReadView<double>* make_read_view(ArrayTag<double>, vespalib::Stash&) const { return nullptr; }
+ virtual const IArrayReadView<const char*>* make_read_view(ArrayTag<const char*>, vespalib::Stash&) const { return nullptr; }
+
+ virtual const IWeightedSetReadView<int8_t>* make_read_view(WeightedSetTag<int8_t>, vespalib::Stash&) const { return nullptr; }
+ virtual const IWeightedSetReadView<int16_t>* make_read_view(WeightedSetTag<int16_t>, vespalib::Stash&) const { return nullptr; }
+ virtual const IWeightedSetReadView<int32_t>* make_read_view(WeightedSetTag<int32_t>, vespalib::Stash&) const { return nullptr; }
+ virtual const IWeightedSetReadView<int64_t>* make_read_view(WeightedSetTag<int64_t>, vespalib::Stash&) const { return nullptr; }
+ virtual const IWeightedSetReadView<float>* make_read_view(WeightedSetTag<float>, vespalib::Stash&) const { return nullptr; }
+ virtual const IWeightedSetReadView<double>* make_read_view(WeightedSetTag<double>, vespalib::Stash&) const { return nullptr; }
+ virtual const IWeightedSetReadView<const char*>* make_read_view(WeightedSetTag<const char*>, vespalib::Stash&) const { return nullptr; }
+
+ virtual const IArrayEnumReadView* make_read_view(ArrayEnumTag, vespalib::Stash&) const { return nullptr; }
+ virtual const IWeightedSetEnumReadView* make_read_view(WeightedSetEnumTag, vespalib::Stash&) const { return nullptr; }
+};
+
+}
diff --git a/searchlib/src/vespa/searchcommon/attribute/i_multi_value_read_view.h b/searchlib/src/vespa/searchcommon/attribute/i_multi_value_read_view.h
new file mode 100644
index 00000000000..8e5005eae8d
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/i_multi_value_read_view.h
@@ -0,0 +1,46 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "multivalue.h"
+#include <vespa/vespalib/datastore/atomic_entry_ref.h>
+#include <vespa/vespalib/util/arrayref.h>
+
+namespace search::attribute {
+
+/**
+ * Read view for the data stored in a multi-value attribute.
+ * @tparam MultiValueType The multi-value type of the data to access.
+ */
+template <typename MultiValueType>
+class IMultiValueReadView {
+public:
+ virtual ~IMultiValueReadView() {}
+ virtual vespalib::ConstArrayRef<MultiValueType> get_values(uint32_t docid) const = 0;
+};
+
+/**
+ * Read view for the raw data stored in an array attribute.
+ * @tparam T The value type of the raw data to access.
+ */
+template <typename T>
+using IArrayReadView = IMultiValueReadView<T>;
+
+/**
+ * Read view for the raw data stored in a weighted set attribute.
+ * @tparam T The value type of the raw data to access.
+ */
+template <typename T>
+using IWeightedSetReadView = IMultiValueReadView<multivalue::WeightedValue<T>>;
+
+/**
+ * Read view for the raw data stored in an enumerated array attribute.
+ */
+using IArrayEnumReadView = IArrayReadView<vespalib::datastore::AtomicEntryRef>;
+
+/**
+ * Read view for the raw data stored in an enumerated weighted set attribute.
+ */
+using IWeightedSetEnumReadView = IWeightedSetReadView<vespalib::datastore::AtomicEntryRef>;
+
+};
diff --git a/searchlib/src/vespa/searchcommon/attribute/i_search_context.h b/searchlib/src/vespa/searchcommon/attribute/i_search_context.h
new file mode 100644
index 00000000000..ff62c535e7f
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/i_search_context.h
@@ -0,0 +1,74 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchcommon/common/range.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <memory>
+
+namespace search::fef { class TermFieldMatchData; }
+namespace search::queryeval {
+ class SearchIterator;
+ class ExecuteInfo;
+}
+namespace search { class QueryTermUCS4; }
+
+namespace search::attribute {
+
+class ISearchContext {
+public:
+ using UP = std::unique_ptr<ISearchContext>;
+ using DocId = uint32_t;
+
+private:
+ virtual int32_t onFind(DocId docId, int32_t elementId, int32_t &weight) const = 0;
+ virtual int32_t onFind(DocId docId, int32_t elementId) const = 0;
+
+public:
+ virtual ~ISearchContext() {}
+
+ virtual unsigned int approximateHits() const = 0;
+
+ /**
+ * Creates an attribute search iterator associated with this
+ * search context.
+ *
+ * @return attribute search iterator
+ *
+ * @param matchData the attribute match data used when
+ * unpacking data for a hit
+ *
+ * @param strict whether the iterator should be strict or not
+ **/
+ virtual std::unique_ptr<queryeval::SearchIterator>
+ createIterator(fef::TermFieldMatchData *matchData, bool strict) = 0;
+
+ /*
+ * Create temporary posting lists.
+ * Should be called before createIterator() is called.
+ */
+ virtual void fetchPostings(const queryeval::ExecuteInfo &execInfo) = 0;
+
+ virtual bool valid() const = 0;
+ virtual Int64Range getAsIntegerTerm() const = 0;
+ virtual const QueryTermUCS4 * queryTerm() const = 0;
+ virtual const vespalib::string &attributeName() const = 0;
+
+ int32_t find(DocId docId, int32_t elementId, int32_t &weight) const { return onFind(docId, elementId, weight); }
+ int32_t find(DocId docId, int32_t elementId) const { return onFind(docId, elementId); }
+ template<typename SC>
+ static bool matches(const SC & sc, DocId docId, int32_t &weight) {
+ weight = 0;
+ int32_t oneWeight(0);
+ int32_t firstId = sc.find(docId, 0, oneWeight);
+ for (int32_t id(firstId); id >= 0; id = sc.find(docId, id + 1, oneWeight)) {
+ weight += oneWeight;
+ }
+ return firstId >= 0;
+ }
+ bool matches(DocId docId, int32_t &weight) const { return matches(*this, docId, weight); }
+ bool matches(DocId doc) const { return find(doc, 0) >= 0; }
+
+};
+
+}
diff --git a/searchlib/src/vespa/searchcommon/attribute/iattributecontext.h b/searchlib/src/vespa/searchcommon/attribute/iattributecontext.h
new file mode 100644
index 00000000000..bb349057ca9
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/iattributecontext.h
@@ -0,0 +1,54 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "i_attribute_functor.h"
+#include "iattributevector.h"
+
+namespace search::attribute {
+
+/**
+ * This is an interface used to access all registered attribute vectors.
+ **/
+class IAttributeContext : public IAttributeExecutor {
+public:
+ typedef vespalib::string string;
+ /** Convenience typedefs **/
+ typedef std::unique_ptr<IAttributeContext> UP;
+
+ /**
+ * Returns the attribute vector with the given name.
+ *
+ * @param name the name of the attribute vector.
+ * @return const view of the attribute vector or NULL if the attribute vector does not exists.
+ **/
+ virtual const IAttributeVector * getAttribute(const string & name) const = 0;
+
+ /**
+ * Returns the attribute vector with the given name.
+ * Makes sure that the underlying enum values are stable during the use of this attribute.
+ *
+ * @param name the name of the attribute vector
+ * @return const view of the attribute vector or NULL if the attribute vector does not exists.
+ **/
+ virtual const IAttributeVector * getAttributeStableEnum(const string & name) const = 0;
+
+ /**
+ * Fill the given list with all attribute vectors registered.
+ *
+ * @param list the list to fill in attribute vectors.
+ **/
+ virtual void getAttributeList(std::vector<const IAttributeVector *> & list) const = 0;
+
+ /**
+ * Releases all cached attribute guards.
+ **/
+ virtual void releaseEnumGuards() {}
+
+ /**
+ * Virtual destructor to allow safe subclassing.
+ **/
+ virtual ~IAttributeContext() {}
+};
+
+}
diff --git a/searchlib/src/vespa/searchcommon/attribute/iattributevector.h b/searchlib/src/vespa/searchcommon/attribute/iattributevector.h
new file mode 100644
index 00000000000..fa91f301b92
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/iattributevector.h
@@ -0,0 +1,457 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "collectiontype.h"
+#include "basictype.h"
+#include <vespa/searchcommon/common/iblobconverter.h>
+#include <ostream>
+#include <vector>
+
+namespace search {
+ struct IDocumentWeightAttribute;
+ class QueryTermSimple;
+}
+
+namespace search::tensor {
+ class ITensorAttribute;
+}
+
+namespace search::attribute {
+
+class IMultiValueAttribute;
+class ISearchContext;
+class SearchContextParams;
+
+/**
+ * This class is used to store a value and a weight.
+ * It is used when getting content from a weighted set attribute vector.
+ *
+ * @param T the type of the value stored in this object
+ **/
+template <typename T>
+class WeightedType
+{
+private:
+ T _value;
+ int32_t _weight;
+
+public:
+ WeightedType() noexcept : _value(T()), _weight(1) { }
+ WeightedType(T value_, int32_t weight_ = 1) noexcept : _value(value_), _weight(weight_) { }
+ const T & getValue() const { return _value; }
+ const T & value() const { return _value; }
+ void setValue(const T & v) { _value = v; }
+ int32_t getWeight() const { return _weight; }
+ int32_t weight() const { return _weight; }
+ void setWeight(int32_t w) { _weight = w; }
+ bool operator==(const WeightedType & rhs) const {
+ return _value == rhs._value && _weight == rhs._weight;
+ }
+};
+
+template <typename T>
+std::ostream&
+operator<<(std::ostream& os, const WeightedType<T>& value)
+{
+ os << "{" << value.value() << "," << value.weight() << "}";
+ return os;
+}
+
+/**
+ * This is a read interface used to access the content of an attribute vector.
+ **/
+class IAttributeVector
+{
+public:
+ using SP = std::shared_ptr<IAttributeVector>;
+ using DocId = uint32_t;
+ using EnumHandle = uint32_t;
+ using largeint_t = int64_t;
+ using WeightedFloat = WeightedType<double>;
+ using WeightedInt = WeightedType<largeint_t>;
+ using WeightedEnum = WeightedType<EnumHandle>;
+ using WeightedConstChar = WeightedType<const char *>;
+ using WeightedString = WeightedType<vespalib::string>;
+
+ /**
+ * Returns the name of this attribute vector.
+ *
+ * @return attribute name
+ **/
+ virtual const vespalib::string & getName() const = 0;
+
+ vespalib::stringref getNamePrefix() const {
+ vespalib::stringref name = getName();
+ return name.substr(0, name.find('.'));
+ }
+
+ /**
+ * Returns the number of documents stored in this attribute vector.
+ *
+ * @return number of documents
+ **/
+ virtual uint32_t getNumDocs() const = 0;
+
+ /**
+ * Returns the number of values stored for the given document.
+ *
+ * @return number of values
+ * @param doc document identifier
+ **/
+ virtual uint32_t getValueCount(uint32_t doc) const = 0;
+
+ /**
+ * Returns the maximum number of values stored for any document.
+ *
+ * @return maximum number of values
+ **/
+ virtual uint32_t getMaxValueCount() const = 0;
+
+ /**
+ * Returns the first value stored for the given document as an integer.
+ *
+ * @param docId document identifier
+ * @return the integer value
+ **/
+ virtual largeint_t getInt(DocId doc) const = 0;
+
+ /**
+ * Returns the first value stored for the given document as a floating point number.
+ *
+ * @param docId document identifier
+ * @return the floating point value
+ **/
+ virtual double getFloat(DocId doc) const = 0;
+
+ /**
+ * Returns the first value stored for the given document as a string.
+ * Uses the given buffer to store the actual string if no underlying
+ * string storage is used for this attribute vector.
+ *
+ * @param docId document identifier
+ * @param buffer content buffer to optionally store the string
+ * @param sz the size of the buffer
+ * @return the string value
+ **/
+ virtual const char * getString(DocId doc, char * buffer, size_t sz) const = 0;
+
+ /**
+ * Returns the first value stored for the given document as an enum value.
+ *
+ * @param docId document identifier
+ * @return the enum value
+ **/
+ virtual EnumHandle getEnum(DocId doc) const = 0;
+
+ /**
+ * Copies the values stored for the given document into the given buffer.
+ *
+ * @param docId document identifier
+ * @param buffer content buffer to copy integer values into
+ * @param sz the size of the buffer
+ * @return the number of values for this document
+ **/
+ virtual uint32_t get(DocId docId, largeint_t * buffer, uint32_t sz) const = 0;
+
+ /**
+ * Copies the values stored for the given document into the given buffer.
+ *
+ * @param docId document identifier
+ * @param buffer content buffer to copy floating point values into
+ * @param sz the size of the buffer
+ * @return the number of values for this document
+ **/
+ virtual uint32_t get(DocId docId, double * buffer, uint32_t sz) const = 0;
+
+ /**
+ * Copies the values stored for the given document into the given buffer.
+ *
+ * @param docId document identifier
+ * @param buffer content buffer to copy string values into
+ * @param sz the size of the buffer
+ * @return the number of values for this document
+ **/
+// virtual uint32_t get(DocId docId, vespalib::string * buffer, uint32_t sz) const = 0;
+
+ /**
+ * Copies the values stored for the given document into the given buffer.
+ *
+ * @param docId document identifier
+ * @param buffer content buffer to copy const char values into
+ * @param sz the size of the buffer
+ * @return the number of values for this document
+ **/
+ virtual uint32_t get(DocId docId, const char ** buffer, uint32_t sz) const = 0;
+
+ /**
+ * Copies the enum values stored for the given document into the given buffer.
+ *
+ * @param docId document identifier
+ * @param buffer content object to copy enum into
+ * @param sz the size of the buffer
+ * @return the number of values for this document
+ **/
+ virtual uint32_t get(DocId docId, EnumHandle * buffer, uint32_t sz) const = 0;
+
+ /**
+ * Copies the values and weights stored for the given document into the given buffer.
+ * This method should only be invoked if @ref getCollectionType(docId) returns CollectionType::WEIGHTED_SET.
+ *
+ * @param docId document identifier
+ * @param buffer content object to copy integer values and weights into
+ * @param sz the size of the buffer
+ * @return the number of values for this document
+ **/
+ virtual uint32_t get(DocId docId, WeightedInt * buffer, uint32_t sz) const = 0;
+
+ /**
+ * Copies the values and weights stored for the given document into the given buffer.
+ * This method should only be invoked if @ref getCollectionType(docId) returns CollectionType::WEIGHTED_SET.
+ *
+ * @param docId document identifier
+ * @param buffer content object to copy floating point values and weights into
+ * @param sz the size of the buffer
+ * @return the number of values for this document
+ **/
+ virtual uint32_t get(DocId docId, WeightedFloat * buffer, uint32_t sz) const = 0;
+
+ /**
+ * Copies the values and weights stored for the given document into the given buffer.
+ * This method should only be invoked if @ref getCollectionType(docId) returns CollectionType::WEIGHTED_SET.
+ *
+ * @param docId document identifier
+ * @param buffer content object to copy string values and weights into
+ * @param sz the size of the buffer
+ * @return the number of values for this document
+ **/
+ virtual uint32_t get(DocId docId, WeightedString * buffer, uint32_t sz) const = 0;
+
+ /**
+ * Copies the values and weights stored for the given document into the given buffer.
+ * This method should only be invoked if @ref getCollectionType(docId) returns CollectionType::WEIGHTED_SET.
+ *
+ * @param docId document identifier
+ * @param buffer content object to copy const char values and weights into
+ * @param sz the size of the buffer
+ * @return the number of values for this document
+ **/
+ virtual uint32_t get(DocId docId, WeightedConstChar * buffer, uint32_t sz) const = 0;
+
+ /**
+ * Copies the enum values and weights stored for the given document into the given buffer.
+ * This method should only be invoked if @ref getCollectionType(docId) returns CollectionType::WEIGHTED_SET.
+ *
+ * @param docId document identifier
+ * @param buffer content object to copy enum values and weights into
+ * @param sz the size of the buffer
+ * @return the number of values for this document
+ **/
+ virtual uint32_t get(DocId docId, WeightedEnum * buffer, uint32_t sz) const = 0;
+
+ /**
+ * Finds the enum value for the given string value.
+ * This method will only have effect if @ref getBasicType() returns BasicType::STRING and
+ * @ref hasEnum() returns true.
+ *
+ * @param value the string value to lookup.
+ * @param e the handle in which to store the enum value.
+ * @return true if found.
+ **/
+ virtual bool findEnum(const char * value, EnumHandle & e) const = 0;
+
+ /**
+ * Finds all enum values matching the given string value.
+ * This method will only have effect if @ref getBasicType() returns BasicType::STRING and
+ * @ref hasEnum() returns true.
+ *
+ * @param value the string value to lookup.
+ * @return vector of EnumHandles, size 0 if no match found.
+ **/
+ virtual std::vector<EnumHandle> findFoldedEnums(const char * value) const = 0;
+
+ /**
+ * Given an enum handle, returns the string it refers to.
+ * This method will only have effect if @ref getBasicType() returns BasicType::STRING and
+ * @ref hasEnum() returns true.
+ *
+ * Effectively functions as the inverse of @ref findEnum(value, handle)
+ *
+ * @param e a valid enum handle
+ * @return enum string value, or nullptr if attribute type does
+ * not support enum handle lookups.
+ */
+ virtual const char * getStringFromEnum(EnumHandle e) const = 0;
+
+ /**
+ * Creates a context for searching this attribute with the given term.
+ * The search context is used to create the actual search iterator.
+ *
+ * @param term the term to search for.
+ * @param params optional bitvector and diversity settings for the search.
+ * @return the search context.
+ **/
+ virtual std::unique_ptr<ISearchContext> createSearchContext(std::unique_ptr<QueryTermSimple> term,
+ const SearchContextParams &params) const = 0;
+
+ /**
+ * Type-safe down-cast to an attribute supporting direct document weight iterators.
+ *
+ * @return document weight attribute or nullptr if not supported.
+ */
+ virtual const IDocumentWeightAttribute *asDocumentWeightAttribute() const = 0;
+
+ /**
+ * Type-safe down-cast to a tensor attribute.
+ *
+ * @return tensor attribute or nullptr if not supported.
+ */
+ virtual const tensor::ITensorAttribute *asTensorAttribute() const = 0;
+
+ /**
+ * Type-safe down-cast to a multi-value attribute.
+ *
+ * @return multi-value attribute or nullptr if not supported.
+ */
+ virtual const IMultiValueAttribute* as_multi_value_attribute() const = 0;
+
+ /**
+ * Returns the basic type of this attribute vector.
+ *
+ * @return basic type
+ **/
+ virtual BasicType::Type getBasicType() const = 0;
+
+ /**
+ * Returns the number of bytes a single value in this attribute occupies.
+ **/
+ virtual size_t getFixedWidth() const = 0;
+
+ /**
+ * Returns the collection type of this attribute vector.
+ *
+ * @return collection type
+ **/
+ virtual CollectionType::Type getCollectionType() const = 0;
+
+ /**
+ * Returns whether this is an integer attribute.
+ **/
+ virtual bool isIntegerType() const {
+ BasicType::Type t = getBasicType();
+ return t == BasicType::BOOL ||
+ t == BasicType::UINT2 ||
+ t == BasicType::UINT4 ||
+ t == BasicType::INT8 ||
+ t == BasicType::INT16 ||
+ t == BasicType::INT32 ||
+ t == BasicType::INT64;
+ }
+
+ /**
+ * Returns whether this is a floating point attribute.
+ **/
+ virtual bool isFloatingPointType() const {
+ BasicType::Type t = getBasicType();
+ return t == BasicType::FLOAT || t == BasicType::DOUBLE;
+ }
+
+ /**
+ * Returns whether this is a string attribute.
+ **/
+ virtual bool isStringType() const {
+ return getBasicType() == BasicType::STRING;
+ }
+
+ /**
+ * Returns whether this is a multi value attribute.
+ **/
+ virtual bool hasMultiValue() const {
+ return getCollectionType() != CollectionType::SINGLE;
+ }
+
+ /**
+ * Returns whether this is a weighted set attribute.
+ **/
+ virtual bool hasWeightedSetType() const {
+ return getCollectionType() == CollectionType::WSET;
+ }
+
+ /**
+ * Returns whether this attribute vector has underlying enum values.
+ *
+ * @return true if it has enum values.
+ **/
+ virtual bool hasEnum() const = 0;
+
+ /**
+ * Returns whether the attribute vector is a filter attribute.
+ *
+ * @return true if attribute vector is a filter attribute.
+ */
+ virtual bool getIsFilter() const = 0;
+
+ /**
+ * Returns whether the attribute vector is marked as fast search.
+ *
+ * @return true if attribute vector is marked as fast search.
+ */
+ virtual bool getIsFastSearch() const = 0;
+
+ /**
+ * Returns the committed docid limit for the attribute.
+ *
+ * @return committed docid limit for the attribute.
+ */
+ virtual uint32_t getCommittedDocIdLimit() const = 0;
+
+ /*
+ * Returns whether the current attribute vector is an imported attribute
+ * vector.
+ */
+ virtual bool isImported() const = 0;
+
+ /**
+ * Will serialize the values for the documentid in ascending order. The serialized form can be used by memcmp and
+ * sortorder will be preserved.
+ * @param doc The document id to serialize for.
+ * @param serTo The buffer to serialize into.
+ * @param available. Number of bytes available in the serialization buffer.
+ * @param bc An optional converter to use.
+ * @return The number of bytes serialized, -1 if not enough space.
+ */
+ long serializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc=NULL) const {
+ return onSerializeForAscendingSort(doc, serTo, available, bc);
+ }
+ /**
+ * Will serialize the values for the documentid in descending order. The serialized form can be used by memcmp and
+ * sortorder will be preserved.
+ * @param doc The document id to serialize for.
+ * @param serTo The buffer to serialize into.
+ * @param available. Number of bytes available in the serialization buffer.
+ * @param bc An optional converter to use.
+ * @return The number of bytes serialized, -1 if not enough space.
+ */
+ long serializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc=NULL) const {
+ return onSerializeForDescendingSort(doc, serTo, available, bc);
+ }
+
+ /**
+ * Virtual destructor to allow safe subclassing.
+ **/
+ virtual ~IAttributeVector() = default;
+
+ /**
+ * This method is used to simulate sparseness in the single value attributes.
+ * @param doc The document id to verify if attribute has a undefined value for this document.
+ * @return true if value is undefined.
+ */
+ virtual bool isUndefined(DocId doc) const { (void) doc; return false; }
+
+private:
+ virtual long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const = 0;
+ virtual long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const = 0;
+
+};
+
+}
diff --git a/searchlib/src/vespa/searchcommon/attribute/multi_value_traits.h b/searchlib/src/vespa/searchcommon/attribute/multi_value_traits.h
new file mode 100644
index 00000000000..f03b031f991
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/multi_value_traits.h
@@ -0,0 +1,35 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <type_traits>
+
+namespace search::multivalue {
+
+template <typename T> class WeightedValue;
+
+/*
+ * Check for the presence of a weight.
+ */
+template <typename T>
+struct is_WeightedValue : std::false_type {};
+
+template <typename T>
+struct is_WeightedValue<WeightedValue<T>> : std::true_type {};
+
+template <typename T>
+inline constexpr bool is_WeightedValue_v = is_WeightedValue<T>::value;
+
+/*
+ * Extract inner type.
+ */
+template <typename T>
+struct ValueType { using type = T; };
+
+template <typename T>
+struct ValueType<WeightedValue<T>> { using type = T; };
+
+template <typename T>
+using ValueType_t = typename ValueType<T>::type;
+
+}
diff --git a/searchlib/src/vespa/searchcommon/attribute/multivalue.h b/searchlib/src/vespa/searchcommon/attribute/multivalue.h
new file mode 100644
index 00000000000..2ed8309188e
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/multivalue.h
@@ -0,0 +1,65 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <cstdint>
+
+namespace search::multivalue {
+
+template <typename T>
+class WeightedValue {
+public:
+ WeightedValue() noexcept : _v(), _w(1) { }
+ WeightedValue(T v, int32_t w) noexcept : _v(v), _w(w) { }
+ T value() const noexcept { return _v; }
+ const T& value_ref() const noexcept { return _v; }
+ T& value_ref() noexcept { return _v; }
+ operator T () const noexcept { return _v; }
+ operator T & () noexcept { return _v; }
+ int32_t weight() const noexcept { return _w; }
+
+ bool operator==(const WeightedValue<T> & rhs) const { return _v == rhs._v; }
+ bool operator <(const WeightedValue<T> & rhs) const { return _v < rhs._v; }
+ bool operator >(const WeightedValue<T> & rhs) const { return _v > rhs._v; }
+private:
+ T _v;
+ int32_t _w;
+};
+
+template <typename T>
+inline int32_t get_weight(const T&) noexcept { return 1; }
+
+template <typename T>
+inline int32_t get_weight(const WeightedValue<T>& value) noexcept { return value.weight(); }
+
+template <typename T>
+inline T get_value(const T& value) noexcept { return value; }
+
+template <typename T>
+inline T get_value(const WeightedValue<T>& value) noexcept { return value.value(); }
+
+template <typename T>
+inline const T& get_value_ref(const T& value) noexcept { return value; }
+
+template <typename T>
+inline const T& get_value_ref(const WeightedValue<T>& value) noexcept { return value.value_ref(); }
+
+template <typename T>
+inline T& get_value_ref(T& value) noexcept { return value; }
+
+template <typename T>
+inline T& get_value_ref(WeightedValue<T>& value) noexcept { return value.value_ref(); }
+
+template <typename M>
+struct ValueBuilder
+{
+ static M build(M value, int32_t) noexcept { return value; }
+};
+
+template <typename T>
+struct ValueBuilder<WeightedValue<T>>
+{
+ static WeightedValue<T> build(T value, int32_t weight) noexcept { return WeightedValue<T>(value, weight); }
+};
+
+}
diff --git a/searchlib/src/vespa/searchcommon/attribute/persistent_predicate_params.h b/searchlib/src/vespa/searchcommon/attribute/persistent_predicate_params.h
new file mode 100644
index 00000000000..d81eb9c5d3c
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/persistent_predicate_params.h
@@ -0,0 +1,37 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <limits>
+
+namespace search::attribute {
+
+/*
+ * Persistent parameters for predicate attributes.
+ */
+class PersistentPredicateParams {
+ uint32_t _arity;
+ int64_t _lower_bound;
+ int64_t _upper_bound;
+
+public:
+ PersistentPredicateParams()
+ : _arity(8),
+ _lower_bound(std::numeric_limits<int64_t>::min()),
+ _upper_bound(std::numeric_limits<int64_t>::max())
+ {
+ }
+ uint32_t arity() const { return _arity; }
+ int64_t lower_bound() const { return _lower_bound; }
+ int64_t upper_bound() const { return _upper_bound; }
+ void setArity(uint32_t v) { _arity = v; }
+ void setBounds(int64_t lower, int64_t upper) { _lower_bound = lower; _upper_bound = upper; }
+
+ bool operator==(const PersistentPredicateParams &rhs) const {
+ return ((_arity == rhs._arity) &&
+ (_lower_bound == rhs._lower_bound) &&
+ (_upper_bound == rhs._upper_bound));
+ }
+};
+
+}
diff --git a/searchlib/src/vespa/searchcommon/attribute/predicate_params.h b/searchlib/src/vespa/searchcommon/attribute/predicate_params.h
new file mode 100644
index 00000000000..133b7331689
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/predicate_params.h
@@ -0,0 +1,30 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "persistent_predicate_params.h"
+
+namespace search::attribute {
+
+/*
+ * Parameters for predicate attributes.
+ */
+class PredicateParams : public PersistentPredicateParams
+{
+ double _dense_posting_list_threshold;
+public:
+ PredicateParams()
+ : PersistentPredicateParams(),
+ _dense_posting_list_threshold(0.4)
+ {
+ }
+
+ double dense_posting_list_threshold() const { return _dense_posting_list_threshold; }
+ void setDensePostingListThreshold(double v) { _dense_posting_list_threshold = v; }
+ bool operator==(const PredicateParams &rhs) const {
+ return (PersistentPredicateParams::operator==(rhs) &&
+ (_dense_posting_list_threshold == rhs._dense_posting_list_threshold));
+ }
+};
+
+}
diff --git a/searchlib/src/vespa/searchcommon/attribute/search_context_params.cpp b/searchlib/src/vespa/searchcommon/attribute/search_context_params.cpp
new file mode 100644
index 00000000000..2e8aba6f5f8
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/search_context_params.cpp
@@ -0,0 +1,9 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "search_context_params.h"
+#include <cstdint>
+#include <limits>
+
+namespace search::attribute {
+
+}
diff --git a/searchlib/src/vespa/searchcommon/attribute/search_context_params.h b/searchlib/src/vespa/searchcommon/attribute/search_context_params.h
new file mode 100644
index 00000000000..168f4215ef6
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/search_context_params.h
@@ -0,0 +1,53 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <cstddef>
+#include <limits>
+#include <cstdint>
+
+namespace search::attribute {
+
+class IAttributeVector;
+
+/**
+ * Params used to specify diversity and bitvector settings when creating a search context.
+ */
+class SearchContextParams {
+private:
+ const IAttributeVector * _diversityAttribute;
+ uint32_t _diversityCutoffGroups;
+ bool _useBitVector;
+ bool _diversityCutoffStrict;
+
+public:
+ SearchContextParams()
+ : _diversityAttribute(nullptr),
+ _diversityCutoffGroups(std::numeric_limits<uint32_t>::max()),
+ _useBitVector(false),
+ _diversityCutoffStrict(false)
+ { }
+ bool useBitVector() const { return _useBitVector; }
+ const IAttributeVector * diversityAttribute() const { return _diversityAttribute; }
+ uint32_t diversityCutoffGroups() const { return _diversityCutoffGroups; }
+ bool diversityCutoffStrict() const { return _diversityCutoffStrict; }
+
+ SearchContextParams &useBitVector(bool value) {
+ _useBitVector = value;
+ return *this;
+ }
+ SearchContextParams &diversityAttribute(const IAttributeVector *value) {
+ _diversityAttribute = value;
+ return *this;
+ }
+ SearchContextParams &diversityCutoffGroups(uint32_t groups) {
+ _diversityCutoffGroups = groups;
+ return *this;
+ }
+ SearchContextParams &diversityCutoffStrict(bool strict) {
+ _diversityCutoffStrict = strict;
+ return *this;
+ }
+};
+
+}
diff --git a/searchlib/src/vespa/searchcommon/attribute/status.cpp b/searchlib/src/vespa/searchcommon/attribute/status.cpp
new file mode 100644
index 00000000000..a7d1f5b3d38
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/status.cpp
@@ -0,0 +1,86 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "status.h"
+#include <vespa/vespalib/util/atomic.h>
+
+using namespace vespalib::atomic;
+
+namespace search::attribute {
+
+Status::Status()
+ : _numDocs (0),
+ _numValues (0),
+ _numUniqueValues (0),
+ _allocated (0),
+ _used (0),
+ _dead (0),
+ _unused (0),
+ _onHold (0),
+ _onHoldMax (0),
+ _lastSyncToken (0),
+ _updates (0),
+ _nonIdempotentUpdates (0),
+ _bitVectors(0)
+{
+}
+
+Status::Status(const Status& rhs)
+ : _numDocs(load_relaxed(rhs._numDocs)),
+ _numValues(load_relaxed(rhs._numValues)),
+ _numUniqueValues(load_relaxed(rhs._numUniqueValues)),
+ _allocated(load_relaxed(rhs._allocated)),
+ _used(load_relaxed(rhs._used)),
+ _dead(load_relaxed(rhs._dead)),
+ _unused(load_relaxed(rhs._unused)),
+ _onHold(load_relaxed(rhs._onHold)),
+ _onHoldMax(load_relaxed(rhs._onHoldMax)),
+ _lastSyncToken(rhs.getLastSyncToken()),
+ _updates(rhs._updates),
+ _nonIdempotentUpdates(rhs._nonIdempotentUpdates),
+ _bitVectors(rhs._bitVectors)
+{
+}
+
+Status&
+Status::operator=(const Status& rhs)
+{
+ store_relaxed(_numDocs, load_relaxed(rhs._numDocs));
+ store_relaxed(_numValues, load_relaxed(rhs._numValues));
+ store_relaxed(_numUniqueValues, load_relaxed(rhs._numUniqueValues));
+ store_relaxed(_allocated, load_relaxed(rhs._allocated));
+ store_relaxed(_used, load_relaxed(rhs._used));
+ store_relaxed(_dead, load_relaxed(rhs._dead));
+ store_relaxed(_unused, load_relaxed(rhs._unused));
+ store_relaxed(_onHold, load_relaxed(rhs._onHold));
+ store_relaxed(_onHoldMax, load_relaxed(rhs._onHoldMax));
+ setLastSyncToken(rhs.getLastSyncToken());
+ _updates = rhs._updates;
+ _nonIdempotentUpdates = rhs._nonIdempotentUpdates;
+ _bitVectors = rhs._bitVectors;
+ return *this;
+}
+
+vespalib::string
+Status::createName(vespalib::stringref index, vespalib::stringref attr)
+{
+ vespalib::string name (index);
+ name += ".attribute.";
+ name += attr;
+ return name;
+}
+
+void
+Status::updateStatistics(uint64_t numValues, uint64_t numUniqueValue, uint64_t allocated,
+ uint64_t used, uint64_t dead, uint64_t onHold)
+{
+ store_relaxed(_numValues, numValues);
+ store_relaxed(_numUniqueValues, numUniqueValue);
+ store_relaxed(_allocated, allocated);
+ store_relaxed(_used, used);
+ store_relaxed(_dead, dead);
+ store_relaxed(_unused, allocated - used);
+ store_relaxed(_onHold, onHold);
+ store_relaxed(_onHoldMax, std::max(load_relaxed(_onHoldMax), onHold));
+}
+
+}
diff --git a/searchlib/src/vespa/searchcommon/attribute/status.h b/searchlib/src/vespa/searchcommon/attribute/status.h
new file mode 100644
index 00000000000..f2212d4c76a
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/status.h
@@ -0,0 +1,61 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+#include <atomic>
+
+namespace search::attribute {
+
+class Status
+{
+public:
+ Status();
+ Status(const Status& rhs);
+ Status& operator=(const Status& rhs);
+
+ void updateStatistics(uint64_t numValues, uint64_t numUniqueValue, uint64_t allocated,
+ uint64_t used, uint64_t dead, uint64_t onHold);
+
+ uint64_t getNumDocs() const { return _numDocs.load(std::memory_order_relaxed); }
+ uint64_t getNumValues() const { return _numValues.load(std::memory_order_relaxed); }
+ uint64_t getNumUniqueValues() const { return _numUniqueValues.load(std::memory_order_relaxed); }
+ uint64_t getAllocated() const { return _allocated.load(std::memory_order_relaxed); }
+ uint64_t getUsed() const { return _used.load(std::memory_order_relaxed); }
+ uint64_t getDead() const { return _dead.load(std::memory_order_relaxed); }
+ uint64_t getOnHold() const { return _onHold.load(std::memory_order_relaxed); }
+ uint64_t getOnHoldMax() const { return _onHoldMax.load(std::memory_order_relaxed); }
+ // This might be accessed from other threads than the writer thread.
+ uint64_t getLastSyncToken() const { return _lastSyncToken.load(std::memory_order_relaxed); }
+ uint64_t getUpdateCount() const { return _updates; }
+ uint64_t getNonIdempotentUpdateCount() const { return _nonIdempotentUpdates; }
+ uint32_t getBitVectors() const { return _bitVectors; }
+
+ void setNumDocs(uint64_t v) { _numDocs.store(v, std::memory_order_relaxed); }
+ void incNumDocs() { _numDocs.store(_numDocs.load(std::memory_order_relaxed) + 1u,
+ std::memory_order_relaxed); }
+ void setLastSyncToken(uint64_t v) { _lastSyncToken.store(v, std::memory_order_relaxed); }
+ void incUpdates(uint64_t v=1) { _updates += v; }
+ void incNonIdempotentUpdates(uint64_t v = 1) { _nonIdempotentUpdates += v; }
+ void incBitVectors() { ++_bitVectors; }
+ void decBitVectors() { --_bitVectors; }
+
+ static vespalib::string
+ createName(vespalib::stringref index, vespalib::stringref attr);
+private:
+ std::atomic<uint64_t> _numDocs;
+ std::atomic<uint64_t> _numValues;
+ std::atomic<uint64_t> _numUniqueValues;
+ std::atomic<uint64_t> _allocated;
+ std::atomic<uint64_t> _used;
+ std::atomic<uint64_t> _dead;
+ std::atomic<uint64_t> _unused;
+ std::atomic<uint64_t> _onHold;
+ std::atomic<uint64_t> _onHoldMax;
+ std::atomic<uint64_t> _lastSyncToken;
+ uint64_t _updates;
+ uint64_t _nonIdempotentUpdates;
+ uint32_t _bitVectors;
+};
+
+}
diff --git a/searchlib/src/vespa/searchcommon/common/.gitignore b/searchlib/src/vespa/searchcommon/common/.gitignore
new file mode 100644
index 00000000000..7e7c0fe7fae
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/common/.gitignore
@@ -0,0 +1,2 @@
+/.depend
+/Makefile
diff --git a/searchlib/src/vespa/searchcommon/common/CMakeLists.txt b/searchlib/src/vespa/searchcommon/common/CMakeLists.txt
new file mode 100644
index 00000000000..6cc02ae7884
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/common/CMakeLists.txt
@@ -0,0 +1,10 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchcommon_searchcommon_common OBJECT
+ SOURCES
+ datatype.cpp
+ dictionary_config.cpp
+ growstrategy.cpp
+ schema.cpp
+ schemaconfigurer.cpp
+ DEPENDS
+)
diff --git a/searchlib/src/vespa/searchcommon/common/datatype.cpp b/searchlib/src/vespa/searchcommon/common/datatype.cpp
new file mode 100644
index 00000000000..1fe3a488aac
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/common/datatype.cpp
@@ -0,0 +1,99 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "datatype.h"
+#include <vespa/config/common/exceptions.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vespalib/util/arraysize.h>
+
+namespace search::index::schema {
+
+using config::InvalidConfigException;
+
+DataType
+dataTypeFromName(vespalib::stringref name) {
+ if (name == "BOOL") { return DataType::BOOL; }
+ else if (name == "UINT2") { return DataType::UINT2; }
+ else if (name == "UINT4") { return DataType::UINT4; }
+ else if (name == "INT8") { return DataType::INT8; }
+ else if (name == "INT16") { return DataType::INT16; }
+ else if (name == "INT32") { return DataType::INT32; }
+ else if (name == "INT64") { return DataType::INT64; }
+ else if (name == "FLOAT") { return DataType::FLOAT; }
+ else if (name == "DOUBLE") { return DataType::DOUBLE; }
+ else if (name == "STRING") { return DataType::STRING; }
+ else if (name == "RAW") { return DataType::RAW; }
+ else if (name == "BOOLEANTREE") { return DataType::BOOLEANTREE; }
+ else if (name == "TENSOR") { return DataType::TENSOR; }
+ else if (name == "REFERENCE") { return DataType::REFERENCE; }
+ else {
+ throw InvalidConfigException("Illegal enum value '" + name + "'");
+ }
+}
+
+const char *datatype_str[] = { "BOOL",
+ "UINT2",
+ "UINT4",
+ "INT8",
+ "INT16",
+ "INT32",
+ "INT64",
+ "FLOAT",
+ "DOUBLE",
+ "STRING",
+ "RAW",
+ "FEATURE_NOTUSED",
+ "BOOLEANTREE",
+ "TENSOR",
+ "REFERENCE"};
+
+vespalib::string
+getTypeName(DataType type) {
+ size_t typeAsNum = static_cast<size_t>(type);
+ if (typeAsNum > vespalib::arraysize(datatype_str)) {
+ vespalib::asciistream ost;
+ ost << "UNKNOWN(" << typeAsNum << ")";
+ return ost.str();
+ }
+ return datatype_str[typeAsNum];
+}
+
+std::ostream &
+operator<<(std::ostream &os, const DataType &type)
+{
+ os << getTypeName(type);
+ return os;
+}
+
+CollectionType
+collectionTypeFromName(vespalib::stringref name) {
+ if (name == "SINGLE") { return CollectionType::SINGLE; }
+ else if (name == "ARRAY") { return CollectionType::ARRAY; }
+ else if (name == "WEIGHTEDSET") { return CollectionType::WEIGHTEDSET; }
+ else {
+ throw InvalidConfigException("Illegal enum value '" + name + "'");
+ }
+}
+
+const char *collectiontype_str[] = { "SINGLE",
+ "ARRAY",
+ "WEIGHTEDSET" };
+
+vespalib::string
+getTypeName(CollectionType type) {
+ size_t typeAsNum = static_cast<size_t>(type);
+ if (typeAsNum > vespalib::arraysize(collectiontype_str)) {
+ vespalib::asciistream ost;
+ ost << "UNKNOWN(" << typeAsNum << ")";
+ return ost.str();
+ }
+ return collectiontype_str[typeAsNum];
+}
+
+std::ostream &
+operator<<(std::ostream &os, const CollectionType &type)
+{
+ os << getTypeName(type);
+ return os;
+}
+
+}
diff --git a/searchlib/src/vespa/searchcommon/common/datatype.h b/searchlib/src/vespa/searchcommon/common/datatype.h
new file mode 100644
index 00000000000..e1c6a44b620
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/common/datatype.h
@@ -0,0 +1,47 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search::index::schema {
+
+/**
+ * Basic data type for a field.
+ **/
+enum class DataType {
+ BOOL = 0,
+ UINT2 = 1,
+ UINT4 = 2,
+ INT8 = 3,
+ INT16 = 4,
+ INT32 = 5,
+ INT64 = 6,
+ FLOAT = 7,
+ DOUBLE = 8,
+ STRING = 9,
+ RAW = 10,
+ //FEATURE = 11,
+ BOOLEANTREE = 12,
+ TENSOR = 13,
+ REFERENCE = 14
+};
+
+/**
+ * Collection type for a field.
+ **/
+enum class CollectionType { SINGLE = 0,
+ ARRAY = 1,
+ WEIGHTEDSET = 2
+};
+
+DataType dataTypeFromName(vespalib::stringref name);
+vespalib::string getTypeName(DataType type);
+std::ostream &operator<<(std::ostream &os, const DataType &type);
+
+CollectionType collectionTypeFromName(vespalib::stringref n);
+vespalib::string getTypeName(CollectionType type);
+std::ostream &operator<<(std::ostream &os, const CollectionType &type);
+
+
+}
diff --git a/searchlib/src/vespa/searchcommon/common/dictionary_config.cpp b/searchlib/src/vespa/searchcommon/common/dictionary_config.cpp
new file mode 100644
index 00000000000..e1b990e5660
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/common/dictionary_config.cpp
@@ -0,0 +1,39 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "dictionary_config.h"
+#include <ostream>
+#include <cassert>
+
+namespace search {
+
+std::ostream&
+operator<<(std::ostream& os, const DictionaryConfig & cfg) {
+ return os << cfg.getType() << "," << cfg.getMatch();
+}
+
+std::ostream&
+operator<<(std::ostream& os, DictionaryConfig::Type type) {
+
+ switch (type) {
+ case DictionaryConfig::Type::BTREE:
+ return os << "BTREE";
+ case DictionaryConfig::Type::HASH:
+ return os << "HASH";
+ case DictionaryConfig::Type::BTREE_AND_HASH:
+ return os << "BTREE_AND_HASH";
+ }
+ assert(false);
+}
+
+std::ostream&
+operator<<(std::ostream& os, DictionaryConfig::Match match) {
+ switch(match) {
+ case DictionaryConfig::Match::CASED:
+ return os << "CASE_SENSTITIVE";
+ case DictionaryConfig::Match::UNCASED:
+ return os << "CASE_INSENSTITIVE";
+ }
+ assert(false);
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchcommon/common/dictionary_config.h b/searchlib/src/vespa/searchcommon/common/dictionary_config.h
new file mode 100644
index 00000000000..f51341ad799
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/common/dictionary_config.h
@@ -0,0 +1,31 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <iosfwd>
+
+namespace search {
+
+/**
+ * Contains the config required for setting up a suitable dictionary.
+ */
+class DictionaryConfig {
+public:
+ enum class Type { BTREE, HASH, BTREE_AND_HASH };
+ enum class Match { CASED, UNCASED };
+ DictionaryConfig() noexcept : _type(Type::BTREE), _match(Match::UNCASED) {}
+ DictionaryConfig(Type type) noexcept : _type(type), _match(Match::UNCASED) {}
+ DictionaryConfig(Type type, Match match) noexcept : _type(type), _match(match) {}
+ Type getType() const { return _type; }
+ Match getMatch() const { return _match; }
+ bool operator == (const DictionaryConfig & b) const { return (_type == b._type) && (_match == b._match); }
+private:
+ Type _type;
+ Match _match;
+};
+
+std::ostream& operator<<(std::ostream& os, const DictionaryConfig & cfg);
+std::ostream& operator<<(std::ostream& os, DictionaryConfig::Type type);
+std::ostream& operator<<(std::ostream& os, DictionaryConfig::Match match);
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchcommon/common/growstrategy.cpp b/searchlib/src/vespa/searchcommon/common/growstrategy.cpp
new file mode 100644
index 00000000000..f35cdbaa640
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/common/growstrategy.cpp
@@ -0,0 +1,18 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "growstrategy.h"
+#include <iostream>
+
+namespace search {
+
+std::ostream& operator<<(std::ostream& os, const GrowStrategy& grow_strategy)
+{
+ os << "{docsInitialCapacity=" << grow_strategy.getDocsInitialCapacity() <<
+ ", docsGrowFactor=" << grow_strategy.getDocsGrowFactor() <<
+ ", docsGrowDelta=" << grow_strategy.getDocsGrowDelta() <<
+ ", multiValueAllocGrowFactor=" << grow_strategy.getMultiValueAllocGrowFactor() <<
+ "}";
+ return os;
+}
+
+}
diff --git a/searchlib/src/vespa/searchcommon/common/growstrategy.h b/searchlib/src/vespa/searchcommon/common/growstrategy.h
new file mode 100644
index 00000000000..b9b4a42cf72
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/common/growstrategy.h
@@ -0,0 +1,61 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/util/growstrategy.h>
+#include <cstdint>
+#include <iosfwd>
+
+namespace search {
+
+class GrowStrategy
+{
+private:
+ uint32_t _docsInitialCapacity;
+ float _docsGrowFactor;
+ uint32_t _docsGrowDelta;
+ float _multiValueAllocGrowFactor;
+public:
+ GrowStrategy() noexcept
+ : GrowStrategy(1024, 0.5, 0, 0.2)
+ {}
+ GrowStrategy(uint32_t docsInitialCapacity, float docsGrowFactor,
+ uint32_t docsGrowDelta, float multiValueAllocGrowFactor) noexcept
+ : _docsInitialCapacity(docsInitialCapacity),
+ _docsGrowFactor(docsGrowFactor),
+ _docsGrowDelta(docsGrowDelta),
+ _multiValueAllocGrowFactor(multiValueAllocGrowFactor)
+ {
+ }
+
+ static GrowStrategy make(uint32_t docsInitialCapacity, float docsGrowFactor, uint32_t docsGrowDelta) {
+ return GrowStrategy(docsInitialCapacity, docsGrowFactor, docsGrowDelta, 0.2);
+ }
+
+ uint32_t getDocsInitialCapacity() const { return _docsInitialCapacity; }
+ uint32_t getDocsGrowPercent() const { return _docsGrowFactor*100; }
+ float getDocsGrowFactor() const { return _docsGrowFactor; }
+ uint32_t getDocsGrowDelta() const { return _docsGrowDelta; }
+ float getMultiValueAllocGrowFactor() const { return _multiValueAllocGrowFactor; }
+ void setDocsInitialCapacity(uint32_t v) { _docsInitialCapacity = v; }
+ void setDocsGrowDelta(uint32_t v) { _docsGrowDelta = v; }
+
+ vespalib::GrowStrategy to_generic_strategy() const {
+ return vespalib::GrowStrategy(_docsInitialCapacity, _docsGrowFactor, _docsGrowDelta);
+ }
+
+ bool operator==(const GrowStrategy & rhs) const {
+ return _docsInitialCapacity == rhs._docsInitialCapacity &&
+ _docsGrowFactor == rhs._docsGrowFactor &&
+ _docsGrowDelta == rhs._docsGrowDelta &&
+ _multiValueAllocGrowFactor == rhs._multiValueAllocGrowFactor;
+ }
+ bool operator!=(const GrowStrategy & rhs) const {
+ return !(operator==(rhs));
+ }
+};
+
+std::ostream& operator<<(std::ostream& os, const GrowStrategy& grow_strategy);
+
+}
+
diff --git a/searchlib/src/vespa/searchcommon/common/iblobconverter.h b/searchlib/src/vespa/searchcommon/common/iblobconverter.h
new file mode 100644
index 00000000000..6581c3e5ccb
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/common/iblobconverter.h
@@ -0,0 +1,22 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/util/buffer.h>
+#include <memory>
+
+namespace search::common {
+
+class BlobConverter
+{
+public:
+ using SP = std::shared_ptr<BlobConverter>;
+ using UP = std::unique_ptr<BlobConverter>;
+ using ConstBufferRef = vespalib::ConstBufferRef;
+ virtual ~BlobConverter() { }
+ ConstBufferRef convert(const ConstBufferRef & src) const { return onConvert(src); }
+private:
+ virtual ConstBufferRef onConvert(const ConstBufferRef & src) const = 0;
+};
+
+}
diff --git a/searchlib/src/vespa/searchcommon/common/range.h b/searchlib/src/vespa/searchcommon/common/range.h
new file mode 100644
index 00000000000..ea2553c129b
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/common/range.h
@@ -0,0 +1,29 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <limits>
+#include <cstdint>
+
+namespace search {
+
+template <typename T>
+class Range {
+public:
+ Range() :
+ _lower(std::numeric_limits<T>::max()),
+ _upper(std::numeric_limits<T>::min()) { }
+ Range(T v) : _lower(v), _upper(v) { }
+ Range(T low, T high) : _lower(low), _upper(high) { }
+ T lower() const { return _lower; }
+ T upper() const { return _upper; }
+ bool valid() const { return _lower <= _upper; }
+ bool isPoint() const { return _lower == _upper; }
+private:
+ T _lower;
+ T _upper;
+};
+
+using Int64Range = Range<int64_t>;
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchcommon/common/schema.cpp b/searchlib/src/vespa/searchcommon/common/schema.cpp
new file mode 100644
index 00000000000..c6a2a4532a3
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/common/schema.cpp
@@ -0,0 +1,581 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "schema.h"
+#include <fstream>
+#include <vespa/config/common/configparser.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vespalib/stllike/hashtable.hpp>
+#include <vespa/fastos/file.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".index.schema");
+
+using namespace config;
+using namespace search::index;
+
+namespace {
+
+template <typename T>
+void
+writeFields(vespalib::asciistream & os,
+ vespalib::stringref prefix,
+ const std::vector<T> & fields)
+{
+ os << prefix << "[" << fields.size() << "]\n";
+ for (size_t i = 0; i < fields.size(); ++i) {
+ fields[i].write(os, vespalib::make_string("%s[%zu].", prefix.data(), i));
+ }
+}
+
+void
+writeFieldSets(vespalib::asciistream &os,
+ const vespalib::string &name,
+ const std::vector<Schema::FieldSet> &fss)
+{
+ vespalib::string prefix(name);
+ prefix += "[";
+ os << prefix << fss.size() << "]\n";
+ for (size_t i = 0; i < fss.size(); ++i) {
+ os << prefix << i << "].name " << fss[i].getName() << "\n";
+ os << prefix << i << "].field[" << fss[i].getFields().size() << "]\n";
+ vespalib::asciistream tmp;
+ tmp << prefix << i << "].field[";
+ for (size_t j = 0; j < fss[i].getFields().size(); ++j) {
+ os << tmp.str() << j << "].name " << fss[i].getFields()[j] << "\n";
+ }
+ }
+}
+
+struct FieldName {
+ vespalib::string name;
+ FieldName(const config::StringVector & lines)
+ : name(ConfigParser::parse<vespalib::string>("name", lines))
+ {
+ }
+};
+
+template <typename T>
+uint32_t
+getFieldId(vespalib::stringref name, const T &map)
+{
+ typename T::const_iterator it = map.find(name);
+ return (it != map.end()) ? it->second : Schema::UNKNOWN_FIELD_ID;
+}
+
+} // namespace
+
+namespace search::index {
+
+const uint32_t Schema::UNKNOWN_FIELD_ID(std::numeric_limits<uint32_t>::max());
+
+Schema::Field::Field(vespalib::stringref n, DataType dt) noexcept
+ : Field(n, dt, schema::CollectionType::SINGLE, "")
+{
+}
+
+Schema::Field::Field(vespalib::stringref n, DataType dt, CollectionType ct) noexcept
+ : Field(n, dt, ct, "")
+{
+}
+
+Schema::Field::Field(vespalib::stringref n, DataType dt, CollectionType ct, vespalib::stringref tensor_spec) noexcept
+ : _name(n),
+ _dataType(dt),
+ _collectionType(ct),
+ _tensor_spec(tensor_spec)
+{
+}
+
+// XXX: Resource leak if exception is thrown.
+Schema::Field::Field(const config::StringVector & lines)
+ : _name(ConfigParser::parse<vespalib::string>("name", lines)),
+ _dataType(schema::dataTypeFromName(ConfigParser::parse<vespalib::string>("datatype", lines))),
+ _collectionType(schema::collectionTypeFromName(ConfigParser::parse<vespalib::string>("collectiontype", lines)))
+{
+}
+
+Schema::Field::Field(const Field &) noexcept = default;
+Schema::Field & Schema::Field::operator = (const Field &) noexcept = default;
+Schema::Field::Field(Field &&) noexcept = default;
+Schema::Field & Schema::Field::operator = (Field &&) noexcept = default;
+
+Schema::Field::~Field() = default;
+
+void
+Schema::Field::write(vespalib::asciistream & os, vespalib::stringref prefix) const
+{
+ os << prefix << "name " << _name << "\n";
+ os << prefix << "datatype " << getTypeName(_dataType) << "\n";
+ os << prefix << "collectiontype " << getTypeName(_collectionType) << "\n";
+}
+
+bool
+Schema::Field::operator==(const Field &rhs) const
+{
+ return _name == rhs._name &&
+ _dataType == rhs._dataType &&
+ _collectionType == rhs._collectionType &&
+ _tensor_spec == rhs._tensor_spec;
+}
+
+bool
+Schema::Field::operator!=(const Field &rhs) const
+{
+ return !((*this) == rhs);
+}
+
+Schema::IndexField::IndexField(vespalib::stringref name, DataType dt) noexcept
+ : Field(name, dt),
+ _avgElemLen(512),
+ _interleaved_features(false)
+{
+}
+
+Schema::IndexField::IndexField(vespalib::stringref name, DataType dt,
+ CollectionType ct) noexcept
+ : Field(name, dt, ct),
+ _avgElemLen(512),
+ _interleaved_features(false)
+{
+}
+
+Schema::IndexField::IndexField(const config::StringVector &lines)
+ : Field(lines),
+ _avgElemLen(ConfigParser::parse<int32_t>("averageelementlen", lines, 512)),
+ _interleaved_features(ConfigParser::parse<bool>("interleavedfeatures", lines, false))
+{
+}
+
+Schema::IndexField::IndexField(const IndexField &) noexcept = default;
+Schema::IndexField & Schema::IndexField::operator = (const IndexField &) noexcept = default;
+Schema::IndexField::IndexField(IndexField &&) noexcept = default;
+Schema::IndexField & Schema::IndexField::operator = (IndexField &&) noexcept = default;
+
+void
+Schema::IndexField::write(vespalib::asciistream & os, vespalib::stringref prefix) const
+{
+ Field::write(os, prefix);
+ os << prefix << "averageelementlen " << static_cast<int32_t>(_avgElemLen) << "\n";
+ os << prefix << "interleavedfeatures " << (_interleaved_features ? "true" : "false") << "\n";
+
+ // TODO: Remove prefix, phrases and positions when breaking downgrade is no longer an issue.
+ os << prefix << "prefix false" << "\n";
+ os << prefix << "phrases false" << "\n";
+ os << prefix << "positions true" << "\n";
+}
+
+bool
+Schema::IndexField::operator==(const IndexField &rhs) const
+{
+ return Field::operator==(rhs) &&
+ _avgElemLen == rhs._avgElemLen &&
+ _interleaved_features == rhs._interleaved_features;
+}
+
+bool
+Schema::IndexField::operator!=(const IndexField &rhs) const
+{
+ return Field::operator!=(rhs) ||
+ _avgElemLen != rhs._avgElemLen ||
+ _interleaved_features != rhs._interleaved_features;
+}
+
+Schema::FieldSet::FieldSet(const config::StringVector & lines) :
+ _name(ConfigParser::parse<vespalib::string>("name", lines)),
+ _fields()
+{
+ std::vector<FieldName> fn = ConfigParser::parseArray<std::vector<FieldName>>("field", lines);
+ for (size_t i = 0; i < fn.size(); ++i) {
+ _fields.push_back(fn[i].name);
+ }
+}
+
+Schema::FieldSet::FieldSet(const FieldSet &) = default;
+Schema::FieldSet & Schema::FieldSet::operator = (const FieldSet &) = default;
+
+Schema::FieldSet::~FieldSet() = default;
+
+bool
+Schema::FieldSet::operator==(const FieldSet &rhs) const
+{
+ return _name == rhs._name &&
+ _fields == rhs._fields;
+}
+
+bool
+Schema::FieldSet::operator!=(const FieldSet &rhs) const
+{
+ return _name != rhs._name ||
+ _fields != rhs._fields;
+}
+
+void
+Schema::writeToStream(vespalib::asciistream &os, bool saveToDisk) const
+{
+ writeFields(os, "attributefield", _attributeFields);
+ writeFields(os, "summaryfield", _summaryFields);
+ writeFieldSets(os, "fieldset", _fieldSets);
+ writeFields(os, "indexfield", _indexFields);
+ if (!saveToDisk) {
+ writeFields(os, "importedattributefields", _importedAttributeFields);
+ }
+}
+
+Schema::Schema() = default;
+
+Schema::Schema(const Schema & rhs) = default;
+Schema & Schema::operator=(const Schema & rhs) = default;
+Schema::Schema(Schema && rhs) = default;
+Schema & Schema::operator=(Schema && rhs) = default;
+Schema::~Schema() = default;
+
+bool
+Schema::loadFromFile(const vespalib::string & fileName)
+{
+ std::ifstream file(fileName.c_str());
+ if (!file) {
+ LOG(warning, "Could not open input file '%s' as part of loadFromFile()", fileName.c_str());
+ return false;
+ }
+ config::StringVector lines;
+ std::string tmpLine;
+ while (file) {
+ getline(file, tmpLine);
+ lines.push_back(tmpLine);
+ }
+ _indexFields = ConfigParser::parseArray<std::vector<IndexField>>("indexfield", lines);
+ _attributeFields = ConfigParser::parseArray<std::vector<AttributeField>>("attributefield", lines);
+ _summaryFields = ConfigParser::parseArray<std::vector<SummaryField>>("summaryfield", lines);
+ _fieldSets = ConfigParser::parseArray<std::vector<FieldSet>>("fieldset", lines);
+ _importedAttributeFields.clear(); // NOTE: these are not persisted to disk
+ _indexIds.clear();
+ for (size_t i(0), m(_indexFields.size()); i < m; i++) {
+ _indexIds[_indexFields[i].getName()] = i;
+ }
+ _attributeIds.clear();
+ for (size_t i(0), m(_attributeFields.size()); i < m; i++) {
+ _attributeIds[_attributeFields[i].getName()] = i;
+ }
+ _summaryIds.clear();
+ for (size_t i(0), m(_summaryFields.size()); i < m; i++) {
+ _summaryIds[_summaryFields[i].getName()] = i;
+ }
+ _fieldSetIds.clear();
+ for (size_t i(0), m(_fieldSets.size()); i < m; i++) {
+ _fieldSetIds[_fieldSets[i].getName()] = i;
+ }
+ _importedAttributeIds.clear();
+ return true;
+}
+
+bool
+Schema::saveToFile(const vespalib::string & fileName) const
+{
+ vespalib::asciistream os;
+ writeToStream(os, true);
+ std::ofstream file(fileName.c_str());
+ if (!file) {
+ LOG(warning, "Could not open output file '%s' as part of saveToFile()", fileName.c_str());
+ return false;
+ }
+ file << os.str();
+ file.close();
+ if (file.fail()) {
+ LOG(warning,
+ "Could not write to output file '%s' as part of saveToFile()",
+ fileName.c_str());
+ return false;
+ }
+ FastOS_File s;
+ s.OpenReadWrite(fileName.c_str());
+ if (!s.IsOpened()) {
+ LOG(warning, "Could not open schema file '%s' for fsync", fileName.c_str());
+ return false;
+ } else {
+ if (!s.Sync()) {
+ LOG(warning, "Could not fsync schema file '%s'", fileName.c_str());
+ return false;
+ }
+ }
+ return true;
+}
+
+vespalib::string
+Schema::toString() const
+{
+ vespalib::asciistream os;
+ writeToStream(os, false);
+ return os.str();
+}
+
+namespace {
+Schema::IndexField
+cloneIndexField(const Schema::IndexField &field,
+ const vespalib::string &suffix)
+{
+ return Schema::IndexField(field.getName() + suffix,
+ field.getDataType(),
+ field.getCollectionType()).
+ setAvgElemLen(field.getAvgElemLen());
+}
+
+template <typename T, typename M>
+Schema &
+addField(const T &field, Schema &self,
+ std::vector<T> &fields, M &name2id_map)
+{
+ name2id_map[field.getName()] = fields.size();
+ fields.push_back(field);
+ return self;
+}
+} // namespace
+
+Schema &
+Schema::addIndexField(const IndexField &field)
+{
+ return addField(field, *this, _indexFields, _indexIds);
+}
+
+Schema &
+Schema::addUriIndexFields(const IndexField &field)
+{
+ addIndexField(field);
+ addIndexField(cloneIndexField(field, ".scheme"));
+ addIndexField(cloneIndexField(field, ".host"));
+ addIndexField(cloneIndexField(field, ".port"));
+ addIndexField(cloneIndexField(field, ".path"));
+ addIndexField(cloneIndexField(field, ".query"));
+ addIndexField(cloneIndexField(field, ".fragment"));
+ addIndexField(cloneIndexField(field, ".hostname"));
+ return *this;
+}
+
+Schema &
+Schema::addAttributeField(const AttributeField &field)
+{
+ return addField(field, *this, _attributeFields, _attributeIds);
+}
+
+Schema &
+Schema::addSummaryField(const SummaryField &field)
+{
+ return addField(field, *this, _summaryFields, _summaryIds);
+}
+
+Schema &
+Schema::addImportedAttributeField(const ImportedAttributeField &field)
+{
+ return addField(field, *this, _importedAttributeFields, _importedAttributeIds);
+}
+
+Schema &
+Schema::addFieldSet(const FieldSet &fieldSet)
+{
+ return addField(fieldSet, *this, _fieldSets, _fieldSetIds);
+}
+
+uint32_t
+Schema::getIndexFieldId(vespalib::stringref name) const
+{
+ return getFieldId(name, _indexIds);
+}
+
+uint32_t
+Schema::getAttributeFieldId(vespalib::stringref name) const
+{
+ return getFieldId(name, _attributeIds);
+}
+
+uint32_t
+Schema::getSummaryFieldId(vespalib::stringref name) const
+{
+ return getFieldId(name, _summaryIds);
+}
+
+uint32_t
+Schema::getFieldSetId(vespalib::stringref name) const
+{
+ return getFieldId(name, _fieldSetIds);
+}
+
+bool
+Schema::isIndexField(vespalib::stringref name) const
+{
+ return _indexIds.find(name) != _indexIds.end();
+}
+
+bool
+Schema::isSummaryField(vespalib::stringref name) const
+{
+ return _summaryIds.find(name) != _summaryIds.end();
+}
+
+bool
+Schema::isAttributeField(vespalib::stringref name) const
+{
+ return _attributeIds.find(name) != _attributeIds.end();
+}
+
+
+void
+Schema::swap(Schema &rhs)
+{
+ _indexFields.swap(rhs._indexFields);
+ _attributeFields.swap(rhs._attributeFields);
+ _summaryFields.swap(rhs._summaryFields);
+ _fieldSets.swap(rhs._fieldSets);
+ _importedAttributeFields.swap(rhs._importedAttributeFields);
+ _indexIds.swap(rhs._indexIds);
+ _attributeIds.swap(rhs._attributeIds);
+ _summaryIds.swap(rhs._summaryIds);
+ _fieldSetIds.swap(rhs._fieldSetIds);
+ _importedAttributeIds.swap(rhs._importedAttributeIds);
+}
+
+void
+Schema::clear()
+{
+ _indexFields.clear();
+ _attributeFields.clear();
+ _summaryFields.clear();
+ _fieldSets.clear();
+ _importedAttributeFields.clear();
+ _indexIds.clear();
+ _attributeIds.clear();
+ _summaryIds.clear();
+ _fieldSetIds.clear();
+ _importedAttributeIds.clear();
+}
+
+namespace {
+// Helper class allowing the is_matching specialization to access the schema.
+struct IntersectHelper {
+ Schema::UP schema;
+ IntersectHelper() : schema(new Schema) {}
+
+ template <typename T>
+ bool is_matching(const T &t1, const T &t2) { return t1.matchingTypes(t2); }
+
+ template <typename T, typename Map>
+ void intersect(const std::vector<T> &set1, const std::vector<T> &set2,
+ const Map &set2_map,
+ std::vector<T> &intersection, Map &intersection_map) {
+ for (typename std::vector<T>::const_iterator
+ it = set1.begin(); it != set1.end(); ++it) {
+ typename Map::const_iterator it2 = set2_map.find(it->getName());
+ if (it2 != set2_map.end()) {
+ if (is_matching(*it, set2[it2->second])) {
+ intersection_map[it->getName()] = intersection.size();
+ intersection.push_back(*it);
+ }
+ }
+ }
+ }
+};
+
+template <>
+bool IntersectHelper::is_matching(const Schema::FieldSet &f1, const Schema::FieldSet &f2) {
+ if (f1.getFields() != f2.getFields())
+ return false;
+ for (const vespalib::string & field : f1.getFields()) {
+ if (schema->getIndexFieldId(field) == Schema::UNKNOWN_FIELD_ID) {
+ return false;
+ }
+ }
+ return true;
+}
+
+template <typename T, typename Map>
+void addEntries(const std::vector<T> &entries, std::vector<T> &v, Map &name2id_map) {
+ for (const T & key : entries) {
+ if (name2id_map.find(key.getName()) == name2id_map.end()) {
+ name2id_map[key.getName()] = v.size();
+ v.push_back(key);
+ }
+ }
+}
+
+template <typename T, typename Map>
+void difference(const std::vector<T> &minuend, const Map &subtrahend_map,
+ std::vector<T> &diff, Map &diff_map) {
+ for (const T & key : minuend){
+ if (subtrahend_map.find(key.getName()) == subtrahend_map.end()) {
+ diff_map[key.getName()] = diff.size();
+ diff.push_back(key);
+ }
+ }
+}
+} // namespace
+
+Schema::UP
+Schema::intersect(const Schema &lhs, const Schema &rhs)
+{
+ IntersectHelper h;
+ h.intersect(lhs._indexFields, rhs._indexFields, rhs._indexIds,
+ h.schema->_indexFields, h.schema->_indexIds);
+ h.intersect(lhs._attributeFields, rhs._attributeFields, rhs._attributeIds,
+ h.schema->_attributeFields, h.schema->_attributeIds);
+ h.intersect(lhs._summaryFields, rhs._summaryFields, rhs._summaryIds,
+ h.schema->_summaryFields, h.schema->_summaryIds);
+ h.intersect(lhs._fieldSets, rhs._fieldSets, rhs._fieldSetIds,
+ h.schema->_fieldSets, h.schema->_fieldSetIds);
+ return std::move(h.schema);
+}
+
+Schema::UP
+Schema::make_union(const Schema &lhs, const Schema &rhs)
+{
+ Schema::UP schema(new Schema(lhs));
+ addEntries(rhs._indexFields, schema->_indexFields, schema->_indexIds);
+ addEntries(rhs._attributeFields, schema->_attributeFields, schema->_attributeIds);
+ addEntries(rhs._summaryFields, schema->_summaryFields, schema->_summaryIds);
+ addEntries(rhs._fieldSets, schema->_fieldSets, schema->_fieldSetIds);
+ return schema;
+}
+
+Schema::UP
+Schema::set_difference(const Schema &lhs, const Schema &rhs)
+{
+ Schema::UP schema(new Schema);
+ difference(lhs._indexFields, rhs._indexIds,
+ schema->_indexFields, schema->_indexIds);
+ difference(lhs._attributeFields, rhs._attributeIds,
+ schema->_attributeFields, schema->_attributeIds);
+ difference(lhs._summaryFields, rhs._summaryIds,
+ schema->_summaryFields, schema->_summaryIds);
+ difference(lhs._fieldSets, rhs._fieldSetIds,
+ schema->_fieldSets, schema->_fieldSetIds);
+ return schema;
+}
+
+bool
+Schema::operator==(const Schema &rhs) const
+{
+ return _indexFields == rhs._indexFields &&
+ _attributeFields == rhs._attributeFields &&
+ _summaryFields == rhs._summaryFields &&
+ _fieldSets == rhs._fieldSets &&
+ _importedAttributeFields == rhs._importedAttributeFields;
+}
+
+bool
+Schema::operator!=(const Schema &rhs) const
+{
+ return _indexFields != rhs._indexFields ||
+ _attributeFields != rhs._attributeFields ||
+ _summaryFields != rhs._summaryFields ||
+ _fieldSets != rhs._fieldSets ||
+ _importedAttributeFields != rhs._importedAttributeFields;
+}
+
+bool
+Schema::empty() const
+{
+ return _indexFields.empty() &&
+ _attributeFields.empty() &&
+ _summaryFields.empty() &&
+ _fieldSets.empty() &&
+ _importedAttributeFields.empty();
+}
+
+}
diff --git a/searchlib/src/vespa/searchcommon/common/schema.h b/searchlib/src/vespa/searchcommon/common/schema.h
new file mode 100644
index 00000000000..3a9bcbdd904
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/common/schema.h
@@ -0,0 +1,411 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "datatype.h"
+#include <vespa/config/common/types.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+#include <vespa/vespalib/util/ptrholder.h>
+
+namespace vespalib { class asciistream; }
+namespace search::index {
+
+/**
+ * Schema class used to give a high-level description of the content
+ * of an index.
+ **/
+class Schema
+{
+public:
+ using UP = std::unique_ptr<Schema>;
+ using SP = std::shared_ptr<Schema>;
+
+ using DataType = schema::DataType;
+ using CollectionType = schema::CollectionType;
+
+ /**
+ * A single field has a name, data type and collection
+ * type. Various aspects (index/attribute/summary) may have
+ * limitations on what types are supported in the back-end.
+ **/
+ class Field
+ {
+ vespalib::string _name;
+ DataType _dataType;
+ CollectionType _collectionType;
+ vespalib::string _tensor_spec;
+
+ public:
+ Field(vespalib::stringref n, DataType dt) noexcept;
+ Field(vespalib::stringref n, DataType dt, CollectionType ct) noexcept;
+ Field(vespalib::stringref n, DataType dt, CollectionType ct, vespalib::stringref tensor_spec) noexcept;
+
+ /**
+ * Create this field based on the given config lines.
+ **/
+ Field(const config::StringVector & lines);
+ Field(const Field &) noexcept;
+ Field & operator = (const Field &) noexcept;
+ Field(Field &&) noexcept;
+ Field & operator = (Field &&) noexcept;
+
+ virtual ~Field();
+
+ virtual void
+ write(vespalib::asciistream & os,
+ vespalib::stringref prefix) const;
+
+ const vespalib::string &getName() const { return _name; }
+ DataType getDataType() const { return _dataType; }
+ CollectionType getCollectionType() const { return _collectionType; }
+ const vespalib::string& get_tensor_spec() const { return _tensor_spec; }
+
+ bool matchingTypes(const Field &rhs) const {
+ return getDataType() == rhs.getDataType() &&
+ getCollectionType() == rhs.getCollectionType();
+ }
+
+ bool operator==(const Field &rhs) const;
+ bool operator!=(const Field &rhs) const;
+ };
+
+
+ /**
+ * A representation of an index field with extra information on
+ * how the index should be generated.
+ **/
+ class IndexField : public Field {
+ private:
+ uint32_t _avgElemLen;
+ // TODO: Remove when posting list format with interleaved features is made default
+ bool _interleaved_features;
+
+ public:
+ IndexField(vespalib::stringref name, DataType dt) noexcept;
+ IndexField(vespalib::stringref name, DataType dt, CollectionType ct) noexcept;
+ IndexField(const IndexField &) noexcept;
+ IndexField & operator = (const IndexField &) noexcept;
+ IndexField(IndexField &&) noexcept;
+ IndexField & operator = (IndexField &&) noexcept;
+ /**
+ * Create this index field based on the given config lines.
+ **/
+ IndexField(const config::StringVector &lines);
+
+ IndexField &setAvgElemLen(uint32_t avgElemLen) { _avgElemLen = avgElemLen; return *this; }
+ IndexField &set_interleaved_features(bool value) {
+ _interleaved_features = value;
+ return *this;
+ }
+
+ void write(vespalib::asciistream &os,
+ vespalib::stringref prefix) const override;
+
+ uint32_t getAvgElemLen() const { return _avgElemLen; }
+ bool use_interleaved_features() const { return _interleaved_features; }
+
+ bool operator==(const IndexField &rhs) const;
+ bool operator!=(const IndexField &rhs) const;
+ };
+
+ using AttributeField = Field;
+ using SummaryField = Field;
+ using ImportedAttributeField = Field;
+
+ /**
+ * A field collection has a name and a list of index field names,
+ * and is a named physical view over the list of index fields.
+ **/
+ class FieldSet
+ {
+ vespalib::string _name;
+ std::vector<vespalib::string> _fields;
+
+ public:
+ FieldSet(vespalib::stringref n) : _name(n), _fields() {}
+ FieldSet(const FieldSet &);
+ FieldSet & operator =(const FieldSet &);
+ FieldSet(FieldSet &&) noexcept = default;
+ FieldSet & operator =(FieldSet &&) noexcept = default;
+
+ /**
+ * Create this field collection based on the given config lines.
+ **/
+ FieldSet(const config::StringVector & lines);
+
+ ~FieldSet();
+
+ FieldSet &addField(vespalib::stringref fieldName) {
+ _fields.push_back(fieldName);
+ return *this;
+ }
+
+ const vespalib::string &getName() const { return _name; }
+ const std::vector<vespalib::string> &getFields() const {
+ return _fields;
+ }
+
+ bool operator==(const FieldSet &rhs) const;
+ bool operator!=(const FieldSet &rhs) const;
+ };
+
+ static const uint32_t UNKNOWN_FIELD_ID;
+
+private:
+ std::vector<IndexField> _indexFields;
+ std::vector<AttributeField> _attributeFields;
+ std::vector<SummaryField> _summaryFields;
+ std::vector<FieldSet> _fieldSets;
+ std::vector<ImportedAttributeField> _importedAttributeFields;
+ using Name2IdMap = vespalib::hash_map<vespalib::string, uint32_t>;
+ Name2IdMap _indexIds;
+ Name2IdMap _attributeIds;
+ Name2IdMap _summaryIds;
+ Name2IdMap _fieldSetIds;
+ Name2IdMap _importedAttributeIds;
+
+ void writeToStream(vespalib::asciistream &os, bool saveToDisk) const;
+
+public:
+ /**
+ * Create an initially empty schema
+ **/
+ Schema();
+ Schema(const Schema & rhs);
+ Schema & operator=(const Schema & rhs);
+ Schema(Schema && rhs);
+ Schema & operator=(Schema && rhs);
+ ~Schema();
+
+ /**
+ * Load this schema from the file with the given name.
+ *
+ * @param fileName the name of the file.
+ * @return true if the schema could be loaded.
+ **/
+ bool
+ loadFromFile(const vespalib::string & fileName);
+
+ /**
+ * Save this schema to the file with the given name.
+ *
+ * @param fileName the name of the file.
+ * @return true if the schema could be saved.
+ **/
+ bool
+ saveToFile(const vespalib::string & fileName) const;
+
+ vespalib::string toString() const;
+
+ /**
+ * Add an index field to this schema
+ *
+ * @param field the field to add
+ **/
+ Schema &
+ addIndexField(const IndexField &field);
+
+ // Only used by tests.
+ Schema &
+ addUriIndexFields(const IndexField &field);
+
+ /**
+ * Add an attribute field to this schema
+ *
+ * @param field the field to add
+ **/
+ Schema &
+ addAttributeField(const AttributeField &field);
+
+ /**
+ * Add a summary field to this schema
+ *
+ * @param field the field to add
+ **/
+ Schema &
+ addSummaryField(const SummaryField &field);
+
+ /**
+ * Add a field set to this schema.
+ *
+ * @param collection the field set to add.
+ **/
+ Schema &
+ addFieldSet(const FieldSet &collection);
+
+ Schema &addImportedAttributeField(const ImportedAttributeField &field);
+
+ /**
+ * Obtain the number of index fields in this schema.
+ *
+ * @return number of fields
+ **/
+ uint32_t getNumIndexFields() const { return _indexFields.size(); }
+
+ /**
+ * Obtain the number of attribute fields in this schema.
+ *
+ * @return number of fields
+ **/
+ uint32_t getNumAttributeFields() const { return _attributeFields.size(); }
+
+ /**
+ * Obtain the number of summary fields in this schema.
+ *
+ * @return number of fields
+ **/
+ uint32_t getNumSummaryFields() const { return _summaryFields.size(); }
+
+ /**
+ * Obtain the number of field sets in this schema.
+ *
+ * @return number of field sets.
+ **/
+ uint32_t getNumFieldSets() const { return _fieldSets.size(); }
+
+ size_t getNumImportedAttributeFields() const { return _importedAttributeFields.size(); }
+
+ /**
+ * Get information about a specific index field using the given fieldId.
+ *
+ * @return the field
+ * @param idx an index in the range [0, size - 1].
+ **/
+ const IndexField &
+ getIndexField(uint32_t fieldId) const
+ {
+ return _indexFields[fieldId];
+ }
+
+ /**
+ * Returns const view of the index fields.
+ */
+ const std::vector<IndexField> &getIndexFields() const {
+ return _indexFields;
+ }
+
+ /**
+ * Get the field id for the index field with the given name.
+ *
+ * @return the field id or UNKNOWN_FIELD_ID if not found.
+ * @param name the name of the field.
+ **/
+ uint32_t getIndexFieldId(vespalib::stringref name) const;
+
+ /**
+ * Check if a field is an index
+ *
+ * @return true if field is an index field.
+ * @param name the name of the field.
+ **/
+ bool isIndexField(vespalib::stringref name) const;
+
+ /**
+ * Check if a field is a summary field
+ *
+ * @return true if field is an summary field.
+ * @param name the name of the field.
+ **/
+ bool isSummaryField(vespalib::stringref name) const;
+
+ /**
+ * Check if a field is a attribute field
+ *
+ * @return true if field is an attribute field.
+ * @param name the name of the field.
+ **/
+ bool isAttributeField(vespalib::stringref name) const;
+
+ /**
+ * Get information about a specific attribute field using the given fieldId.
+ *
+ * @return the field
+ * @param idx an index in the range [0, size - 1].
+ **/
+ const AttributeField &
+ getAttributeField(uint32_t fieldId) const
+ {
+ return _attributeFields[fieldId];
+ }
+
+ /**
+ * Returns const view of the attribute fields.
+ */
+ const std::vector<AttributeField> &getAttributeFields() const {
+ return _attributeFields;
+ }
+
+ /**
+ * Get the field id for the attribute field with the given name.
+ *
+ * @return the field id or UNKNOWN_FIELD_ID if not found.
+ * @param name the name of the field.
+ **/
+ uint32_t getAttributeFieldId(vespalib::stringref name) const;
+
+ /**
+ * Get information about a specific summary field using the given fieldId.
+ *
+ * @return the field
+ * @param idx an index in the range [0, size - 1]
+ **/
+ const SummaryField &
+ getSummaryField(uint32_t fieldId) const
+ {
+ return _summaryFields[fieldId];
+ }
+
+ /**
+ * Returns const view of the summary fields.
+ */
+ const std::vector<SummaryField> &getSummaryFields() const {
+ return _summaryFields;
+ }
+
+ /**
+ * Get the field id for the summary field with the given name.
+ *
+ * @return the field id or UNKNOWN_FIELD_ID if not found.
+ * @param name the name of the field.
+ **/
+ uint32_t getSummaryFieldId(vespalib::stringref name) const;
+
+ /**
+ * Get information about a specific field set
+ *
+ * @return the field set.
+ * @param idx an index in the range [0, size - 1].
+ **/
+ const FieldSet &
+ getFieldSet(uint32_t idx) const
+ {
+ return _fieldSets[idx];
+ }
+
+ /**
+ * Get the field id for the field set with the given name.
+ *
+ * @return the field id or UNKNOWN_FIELD_ID if not found.
+ * @param name the name of the field set.
+ **/
+ uint32_t
+ getFieldSetId(vespalib::stringref name) const;
+
+ const std::vector<ImportedAttributeField> &getImportedAttributeFields() const {
+ return _importedAttributeFields;
+ }
+
+ void swap(Schema &rhs);
+ void clear();
+
+ static Schema::UP intersect(const Schema &lhs, const Schema &rhs);
+ static Schema::UP make_union(const Schema &lhs, const Schema &rhs);
+ static Schema::UP set_difference(const Schema &lhs, const Schema &rhs);
+
+ bool operator==(const Schema &rhs) const;
+ bool operator!=(const Schema &rhs) const;
+
+ bool empty() const;
+};
+
+}
diff --git a/searchlib/src/vespa/searchcommon/common/schemaconfigurer.cpp b/searchlib/src/vespa/searchcommon/common/schemaconfigurer.cpp
new file mode 100644
index 00000000000..8fbebe80b4b
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/common/schemaconfigurer.cpp
@@ -0,0 +1,239 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "schemaconfigurer.h"
+#include "subscriptionproxyng.h"
+#include <vespa/config-attributes.h>
+#include <vespa/config-imported-fields.h>
+#include <vespa/config-indexschema.h>
+#include <vespa/config-summary.h>
+#include <vespa/searchcommon/common/schema.h>
+#include <vespa/searchcommon/attribute/collectiontype.h>
+#include <vespa/searchcommon/attribute/basictype.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".index.schemaconfigurer");
+
+using namespace config;
+using namespace vespa::config::search;
+
+namespace search::index {
+
+using schema::DataType;
+using schema::CollectionType;
+
+namespace {
+
+Schema::DataType
+convertIndexDataType(const IndexschemaConfig::Indexfield::Datatype &type)
+{
+ switch (type) {
+ case IndexschemaConfig::Indexfield::Datatype::STRING:
+ return DataType::STRING;
+ case IndexschemaConfig::Indexfield::Datatype::INT64:
+ return DataType::INT64;
+ }
+ return DataType::STRING;
+}
+
+
+Schema::CollectionType
+convertIndexCollectionType(const IndexschemaConfig::Indexfield::Collectiontype &type)
+{
+ switch (type) {
+ case IndexschemaConfig::Indexfield::Collectiontype::SINGLE:
+ return CollectionType::SINGLE;
+ case IndexschemaConfig::Indexfield::Collectiontype::ARRAY:
+ return CollectionType::ARRAY;
+ case IndexschemaConfig::Indexfield::Collectiontype::WEIGHTEDSET:
+ return CollectionType::WEIGHTEDSET;
+ }
+ return CollectionType::SINGLE;
+}
+
+template <typename ConfigType>
+Schema::DataType
+convertDataType(const ConfigType &type)
+{
+ switch (type) {
+ case ConfigType::STRING:
+ return DataType::STRING;
+ case ConfigType::BOOL:
+ return DataType::BOOL;
+ case ConfigType::UINT2:
+ return DataType::UINT2;
+ case ConfigType::UINT4:
+ return DataType::UINT4;
+ case ConfigType::INT8:
+ return DataType::INT8;
+ case ConfigType::INT16:
+ return DataType::INT16;
+ case ConfigType::INT32:
+ return DataType::INT32;
+ case ConfigType::INT64:
+ return DataType::INT64;
+ case ConfigType::FLOAT:
+ return DataType::FLOAT;
+ case ConfigType::DOUBLE:
+ return DataType::DOUBLE;
+ case ConfigType::PREDICATE:
+ return DataType::BOOLEANTREE;
+ case ConfigType::TENSOR:
+ return DataType::TENSOR;
+ case ConfigType::REFERENCE:
+ return DataType::REFERENCE;
+ default:
+ break;
+ }
+ // TODO: exception?
+ return DataType::STRING;
+}
+
+template <typename ConfigType>
+Schema::CollectionType
+convertCollectionType(const ConfigType &type)
+{
+ switch (type) {
+ case ConfigType::SINGLE:
+ return CollectionType::SINGLE;
+ case ConfigType::ARRAY:
+ return CollectionType::ARRAY;
+ case ConfigType::WEIGHTEDSET:
+ return CollectionType::WEIGHTEDSET;
+ }
+ return CollectionType::SINGLE;
+}
+
+
+Schema::DataType
+convertSummaryType(const vespalib::string &type)
+{
+ if (type == "byte") {
+ return DataType::INT8;
+ } else if (type == "short") {
+ return DataType::INT16;
+ } else if (type == "integer") {
+ return DataType::INT32;
+ } else if (type == "int64") {
+ return DataType::INT64;
+ } else if (type == "float") {
+ return DataType::FLOAT;
+ } else if (type == "double") {
+ return DataType::DOUBLE;
+ } else if (type == "string" ||
+ type == "longstring" ||
+ type == "xmlstring" ||
+ type == "featuredata" ||
+ type == "jsonstring")
+ {
+ return DataType::STRING;
+ } else if (type == "data" ||
+ type == "longdata")
+ {
+ return DataType::RAW;
+ }
+ return DataType::RAW;
+}
+
+}
+
+void
+SchemaBuilder::build(const IndexschemaConfig &cfg, Schema &schema)
+{
+ for (size_t i = 0; i < cfg.indexfield.size(); ++i) {
+ const IndexschemaConfig::Indexfield & f = cfg.indexfield[i];
+ schema.addIndexField(Schema::IndexField(f.name, convertIndexDataType(f.datatype),
+ convertIndexCollectionType(f.collectiontype)).
+ setAvgElemLen(f.averageelementlen).
+ set_interleaved_features(f.interleavedfeatures));
+ }
+ for (size_t i = 0; i < cfg.fieldset.size(); ++i) {
+ const IndexschemaConfig::Fieldset &fs = cfg.fieldset[i];
+ Schema::FieldSet toAdd(fs.name);
+ for (size_t j = 0; j < fs.field.size(); ++j) {
+ toAdd.addField(fs.field[j].name);
+ }
+ schema.addFieldSet(toAdd);
+ }
+}
+
+
+void
+SchemaBuilder::build(const AttributesConfig &cfg, Schema &schema)
+{
+ for (const auto &attr : cfg.attribute) {
+ if (attr.imported) {
+ schema.addImportedAttributeField(Schema::ImportedAttributeField(attr.name,
+ convertDataType(attr.datatype),
+ convertCollectionType(attr.collectiontype)));
+ } else {
+ schema.addAttributeField(Schema::Field(attr.name,
+ convertDataType(attr.datatype),
+ convertCollectionType(attr.collectiontype)));
+ }
+ }
+}
+
+
+void
+SchemaBuilder::build(const SummaryConfig &cfg, Schema &schema)
+{
+ for (size_t i = 0; i < cfg.classes.size(); ++i) {
+ LOG(debug, "class with index %lu has id %d (default has id %d)",
+ i, cfg.classes[i].id, cfg.defaultsummaryid);
+ }
+ for (size_t i = 0; i < cfg.classes.size(); ++i) {
+ // use the default summary class that has all fields
+ if (cfg.classes[i].id == cfg.defaultsummaryid) {
+ for (size_t j = 0; j < cfg.classes[i].fields.size(); ++j) {
+ const SummaryConfig::Classes::Fields & f =
+ cfg.classes[i].fields[j];
+ schema.addSummaryField(Schema::Field(f.name,
+ convertSummaryType(f.type)));
+ }
+ return;
+ }
+ }
+ if (cfg.classes.empty()) {
+ LOG(debug,
+ "No summary class configured that match the default summary id %d",
+ cfg.defaultsummaryid);
+ } else {
+ LOG(warning,
+ "No summary class configured that match the default summary id %d",
+ cfg.defaultsummaryid);
+ }
+}
+
+void
+SchemaConfigurer::configure(const IndexschemaConfig &cfg)
+{
+ SchemaBuilder::build(cfg, _schema);
+}
+
+void
+SchemaConfigurer::configure(const AttributesConfig &cfg)
+{
+ SchemaBuilder::build(cfg, _schema);
+}
+
+void
+SchemaConfigurer::configure(const SummaryConfig & cfg)
+{
+ SchemaBuilder::build(cfg, _schema);
+}
+
+SchemaConfigurer::SchemaConfigurer(Schema &schema, const vespalib::string &configId)
+ : _schema(schema)
+{
+ search::SubscriptionProxyNg<SchemaConfigurer, IndexschemaConfig>
+ indexSchemaSubscriber(*this, &SchemaConfigurer::configure);
+ search::SubscriptionProxyNg<SchemaConfigurer, AttributesConfig>
+ attributesSubscriber(*this, &SchemaConfigurer::configure);
+ search::SubscriptionProxyNg<SchemaConfigurer, SummaryConfig>
+ summarySubscriber(*this, &SchemaConfigurer::configure);
+ indexSchemaSubscriber.subscribe(configId.c_str());
+ attributesSubscriber.subscribe(configId.c_str());
+ summarySubscriber.subscribe(configId.c_str());
+}
+
+}
diff --git a/searchlib/src/vespa/searchcommon/common/schemaconfigurer.h b/searchlib/src/vespa/searchcommon/common/schemaconfigurer.h
new file mode 100644
index 00000000000..925aefcfa25
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/common/schemaconfigurer.h
@@ -0,0 +1,68 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace vespa::config::search::internal {
+ class InternalIndexschemaType;
+ class InternalAttributesType;
+ class InternalSummaryType;
+}
+
+namespace search::index {
+
+class Schema;
+
+/**
+ * Schema class used to give a high-level description of the content
+ * of an index.
+ **/
+class SchemaBuilder
+{
+public:
+ using IndexschemaConfig = const vespa::config::search::internal::InternalIndexschemaType;
+ using AttributesConfig = const vespa::config::search::internal::InternalAttributesType;
+ using SummaryConfig = const vespa::config::search::internal::InternalSummaryType;
+ /**
+ * Build from indexschema config.
+ *
+ * @param indexCfg IndexschemaConfig to use
+ */
+ static void build(const IndexschemaConfig &cfg, Schema &schema);
+ /**
+ * Build from attribute config.
+ *
+ * @param attributeCfg AttributesConfig to use
+ **/
+ static void build(const AttributesConfig &cfg, Schema &schema);
+ /**
+ * Build from summary config.
+ *
+ * @param summaryCfg SummaryConfig to use
+ **/
+ static void build(const SummaryConfig &cfg, Schema &schema);
+
+};
+
+class SchemaConfigurer
+{
+private:
+ using IndexschemaConfig = SchemaBuilder::IndexschemaConfig;
+ using AttributesConfig = SchemaBuilder::AttributesConfig;
+ using SummaryConfig = SchemaBuilder::SummaryConfig;
+ Schema & _schema;
+ void configure(const IndexschemaConfig & cfg);
+ void configure(const AttributesConfig & cfg);
+ void configure(const SummaryConfig & cfg);
+
+public:
+ /**
+ * Load this schema from config using the given config id.
+ *
+ * @param configId the config id used to retrieve the relevant config.
+ **/
+ SchemaConfigurer(Schema & schema, const vespalib::string &configId);
+};
+
+}
diff --git a/searchlib/src/vespa/searchcommon/common/subscriptionproxyng.h b/searchlib/src/vespa/searchcommon/common/subscriptionproxyng.h
new file mode 100644
index 00000000000..dd24480f689
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/common/subscriptionproxyng.h
@@ -0,0 +1,61 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/config/helper/legacysubscriber.hpp>
+
+namespace search {
+
+template <typename ME, typename CFG>
+class SubscriptionProxyNg : public config::IFetcherCallback<CFG>
+{
+ typedef void (ME::*Method)(const CFG &cfg);
+
+private:
+ ME &_target;
+ Method _method;
+ std::unique_ptr<config::LegacySubscriber> _subscriber;
+ vespalib::string _cfgId;
+
+ SubscriptionProxyNg(const SubscriptionProxyNg&);
+ SubscriptionProxyNg &operator=(const SubscriptionProxyNg&);
+
+public:
+ SubscriptionProxyNg(ME &target, Method method)
+ : _target(target),
+ _method(method),
+ _subscriber(),
+ _cfgId("")
+ {
+ }
+ virtual ~SubscriptionProxyNg() {
+ unsubscribe();
+ }
+ const char *getConfigId() const {
+ return _cfgId.c_str();
+ }
+ void subscribe(const char *configId) {
+ if (_subscriber) {
+ if (configId != nullptr && strcmp(configId, _subscriber->id().c_str()) == 0)
+ {
+ return; // same id; ignore
+ } else {
+ unsubscribe();
+ }
+ }
+ if (configId != nullptr && configId[0] != '\0') {
+ _cfgId = configId;
+ _subscriber = std::make_unique<config::LegacySubscriber>();
+ _subscriber->subscribe<CFG>(configId, this);
+ }
+ }
+ void unsubscribe() {
+ _subscriber.reset();
+ _cfgId = "";
+ }
+ void configure(std::unique_ptr<CFG> cfg) override {
+ (_target.*_method)(*cfg);
+ }
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchcommon/common/undefinedvalues.h b/searchlib/src/vespa/searchcommon/common/undefinedvalues.h
new file mode 100644
index 00000000000..bbe3198a8dc
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/common/undefinedvalues.h
@@ -0,0 +1,69 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <cmath>
+#include <limits>
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search::attribute {
+
+// for all integers
+template <typename T>
+constexpr T getUndefined() {
+ return std::numeric_limits<T>::min();
+}
+
+template <>
+inline constexpr float getUndefined<float>() {
+ return -std::numeric_limits<float>::quiet_NaN();
+}
+
+template <>
+inline constexpr double getUndefined<double>() {
+ return -std::numeric_limits<double>::quiet_NaN();
+}
+
+
+// for all signed integers
+template <typename T>
+bool isUndefined(const T & value) {
+ return value == getUndefined<T>();
+}
+
+template <>
+inline bool isUndefined<uint8_t>(const uint8_t &) {
+ return false;
+}
+
+template <>
+inline bool isUndefined<uint16_t>(const uint16_t &) {
+ return false;
+}
+
+template <>
+inline bool isUndefined<uint32_t>(const uint32_t &) {
+ return false;
+}
+
+template <>
+inline bool isUndefined<uint64_t>(const uint64_t &) {
+ return false;
+}
+
+template <>
+inline bool isUndefined<float>(const float & value) {
+ return std::isnan(value);
+}
+
+template <>
+inline bool isUndefined<double>(const double & value) {
+ return std::isnan(value);
+}
+
+template <>
+inline bool isUndefined<vespalib::string>(const vespalib::string & value) {
+ return value.empty();
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/CMakeLists.txt b/searchlib/src/vespa/searchlib/CMakeLists.txt
index dac40e0ab5f..91813a17379 100644
--- a/searchlib/src/vespa/searchlib/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/CMakeLists.txt
@@ -29,10 +29,12 @@ vespa_add_library(searchlib
$<TARGET_OBJECTS:searchlib_tensor>
$<TARGET_OBJECTS:searchlib_transactionlog>
$<TARGET_OBJECTS:searchlib_util>
+ $<TARGET_OBJECTS:searchcommon_searchcommon_common>
+ $<TARGET_OBJECTS:searchcommon_searchcommon_attribute>
INSTALL lib64
DEPENDS
- staging_vespalib
+ vespalib
${VESPA_ATOMIC_LIB}
)
diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp
index b991773c50f..51a4d392839 100644
--- a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp
@@ -450,7 +450,10 @@ void LogDataStore::compactFile(FileId fileId)
IWriteData::UP compacter;
FileId destinationFileId = FileId::active();
if (_bucketizer) {
- if ( ! shouldCompactToActiveFile(fc->getDiskFootprint() - fc->getDiskBloat())) {
+ size_t disk_footprint = fc->getDiskFootprint();
+ size_t disk_bloat = fc->getDiskBloat();
+ size_t compacted_size = (disk_footprint <= disk_bloat) ? 0u : (disk_footprint - disk_bloat);
+ if ( ! shouldCompactToActiveFile(compacted_size)) {
MonitorGuard guard(_updateLock);
destinationFileId = allocateFileId(guard);
setNewFileChunk(guard, createWritableFile(destinationFileId, fc->getLastPersistedSerialNum(), fc->getNameId().next()));
@@ -464,9 +467,8 @@ void LogDataStore::compactFile(FileId fileId)
fc->appendTo(_executor, *this, *compacter, fc->getNumChunks(), nullptr, CpuCategory::COMPACT);
- if (destinationFileId.isActive()) {
- flushActiveAndWait(0);
- } else {
+ flushActiveAndWait(0);
+ if (!destinationFileId.isActive()) {
MonitorGuard guard(_updateLock);
auto & compactTo = dynamic_cast<WriteableFileChunk &>(*_fileChunks[destinationFileId.getId()]);
flushFileAndWait(std::move(guard), compactTo, 0);
diff --git a/searchlib/src/vespa/searchlib/index/field_length_calculator.h b/searchlib/src/vespa/searchlib/index/field_length_calculator.h
index 35a18b432f9..15d4c5ec285 100644
--- a/searchlib/src/vespa/searchlib/index/field_length_calculator.h
+++ b/searchlib/src/vespa/searchlib/index/field_length_calculator.h
@@ -15,7 +15,7 @@ namespace search::index {
*/
class FieldLengthCalculator {
std::atomic<double> _average_field_length;
- uint32_t _num_samples; // Capped by _max_num_samples
+ std::atomic<uint32_t> _num_samples; // Capped by _max_num_samples
uint32_t _max_num_samples;
public:
@@ -39,7 +39,7 @@ public:
}
double get_average_field_length() const { return _average_field_length.load(std::memory_order_relaxed); }
- uint32_t get_num_samples() const { return _num_samples; }
+ uint32_t get_num_samples() const { return _num_samples.load(std::memory_order_relaxed); }
uint32_t get_max_num_samples() const { return _max_num_samples; }
FieldLengthInfo get_info() const {
@@ -47,10 +47,12 @@ public:
}
void add_field_length(uint32_t field_length) {
- if (_num_samples < _max_num_samples) {
- ++_num_samples;
+ auto num_samples = get_num_samples();
+ if (num_samples < _max_num_samples) {
+ ++num_samples;
+ _num_samples.store(num_samples, std::memory_order_relaxed);
}
- _average_field_length.store((_average_field_length.load(std::memory_order_relaxed) * (_num_samples - 1) + field_length) / _num_samples, std::memory_order_relaxed);
+ _average_field_length.store((_average_field_length.load(std::memory_order_relaxed) * (num_samples - 1) + field_length) / num_samples, std::memory_order_relaxed);
}
};