summaryrefslogtreecommitdiffstats
path: root/searchcommon
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
commit72231250ed81e10d66bfe70701e64fa5fe50f712 (patch)
tree2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /searchcommon
Publish
Diffstat (limited to 'searchcommon')
-rw-r--r--searchcommon/.gitignore2
-rw-r--r--searchcommon/CMakeLists.txt19
-rw-r--r--searchcommon/OWNERS3
-rw-r--r--searchcommon/src/.gitignore4
-rw-r--r--searchcommon/src/testlist.txt2
-rw-r--r--searchcommon/src/tests/.gitignore3
-rw-r--r--searchcommon/src/tests/attribute/config/.gitignore1
-rw-r--r--searchcommon/src/tests/attribute/config/CMakeLists.txt8
-rw-r--r--searchcommon/src/tests/attribute/config/DESC1
-rw-r--r--searchcommon/src/tests/attribute/config/FILES1
-rw-r--r--searchcommon/src/tests/attribute/config/attribute_config_test.cpp106
-rw-r--r--searchcommon/src/tests/schema/.gitignore4
-rw-r--r--searchcommon/src/tests/schema/CMakeLists.txt8
-rw-r--r--searchcommon/src/tests/schema/DESC1
-rw-r--r--searchcommon/src/tests/schema/FILES1
-rw-r--r--searchcommon/src/tests/schema/attributes.cfg22
-rw-r--r--searchcommon/src/tests/schema/indexschema.cfg26
-rw-r--r--searchcommon/src/tests/schema/schema_test.cpp387
-rw-r--r--searchcommon/src/tests/schema/summary.cfg29
-rw-r--r--searchcommon/src/vespa/searchcommon/.gitignore3
-rw-r--r--searchcommon/src/vespa/searchcommon/CMakeLists.txt8
-rw-r--r--searchcommon/src/vespa/searchcommon/attribute/.gitignore2
-rw-r--r--searchcommon/src/vespa/searchcommon/attribute/CMakeLists.txt9
-rw-r--r--searchcommon/src/vespa/searchcommon/attribute/attributecontent.h172
-rw-r--r--searchcommon/src/vespa/searchcommon/attribute/basictype.cpp40
-rw-r--r--searchcommon/src/vespa/searchcommon/attribute/basictype.h69
-rw-r--r--searchcommon/src/vespa/searchcommon/attribute/collectiontype.cpp30
-rw-r--r--searchcommon/src/vespa/searchcommon/attribute/collectiontype.h78
-rw-r--r--searchcommon/src/vespa/searchcommon/attribute/config.cpp50
-rw-r--r--searchcommon/src/vespa/searchcommon/attribute/config.h155
-rw-r--r--searchcommon/src/vespa/searchcommon/attribute/iattributecontext.h58
-rw-r--r--searchcommon/src/vespa/searchcommon/attribute/iattributevector.h352
-rw-r--r--searchcommon/src/vespa/searchcommon/attribute/status.cpp74
-rw-r--r--searchcommon/src/vespa/searchcommon/attribute/status.h80
-rw-r--r--searchcommon/src/vespa/searchcommon/common/.gitignore2
-rw-r--r--searchcommon/src/vespa/searchcommon/common/CMakeLists.txt7
-rw-r--r--searchcommon/src/vespa/searchcommon/common/growstrategy.h43
-rw-r--r--searchcommon/src/vespa/searchcommon/common/iblobconverter.h24
-rw-r--r--searchcommon/src/vespa/searchcommon/common/schema.cpp670
-rw-r--r--searchcommon/src/vespa/searchcommon/common/schema.h429
-rw-r--r--searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp241
-rw-r--r--searchcommon/src/vespa/searchcommon/common/schemaconfigurer.h78
-rw-r--r--searchcommon/src/vespa/searchcommon/common/undefinedvalues.h72
-rw-r--r--searchcommon/src/vespa/searchcommon/config/.gitignore2
-rw-r--r--searchcommon/src/vespa/searchcommon/config/CMakeLists.txt6
-rw-r--r--searchcommon/src/vespa/searchcommon/config/subscriptionproxyng.h63
-rw-r--r--searchcommon/testrun/.gitignore12
47 files changed, 3457 insertions, 0 deletions
diff --git a/searchcommon/.gitignore b/searchcommon/.gitignore
new file mode 100644
index 00000000000..a9b20e8992d
--- /dev/null
+++ b/searchcommon/.gitignore
@@ -0,0 +1,2 @@
+Makefile
+Testing
diff --git a/searchcommon/CMakeLists.txt b/searchcommon/CMakeLists.txt
new file mode 100644
index 00000000000..8418f32e736
--- /dev/null
+++ b/searchcommon/CMakeLists.txt
@@ -0,0 +1,19 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_define_module(
+ DEPENDS
+ fastos
+ vespalog
+ vespalib
+ config_cloudconfig
+ configdefinitions
+
+ TESTS
+ src/tests/attribute/config
+ src/tests/schema
+
+ LIBS
+ src/vespa/searchcommon
+ src/vespa/searchcommon/attribute
+ src/vespa/searchcommon/common
+ src/vespa/searchcommon/config
+)
diff --git a/searchcommon/OWNERS b/searchcommon/OWNERS
new file mode 100644
index 00000000000..6b6bfc6e2ac
--- /dev/null
+++ b/searchcommon/OWNERS
@@ -0,0 +1,3 @@
+geirst
+balder
+tegge
diff --git a/searchcommon/src/.gitignore b/searchcommon/src/.gitignore
new file mode 100644
index 00000000000..8b68901f2ce
--- /dev/null
+++ b/searchcommon/src/.gitignore
@@ -0,0 +1,4 @@
+/Makefile.ini
+/config_command.sh
+/project.dsw
+/searchcommon.mak
diff --git a/searchcommon/src/testlist.txt b/searchcommon/src/testlist.txt
new file mode 100644
index 00000000000..46279ceb830
--- /dev/null
+++ b/searchcommon/src/testlist.txt
@@ -0,0 +1,2 @@
+tests/attribute/config
+tests/schema
diff --git a/searchcommon/src/tests/.gitignore b/searchcommon/src/tests/.gitignore
new file mode 100644
index 00000000000..a3e9c375723
--- /dev/null
+++ b/searchcommon/src/tests/.gitignore
@@ -0,0 +1,3 @@
+.depend
+Makefile
+*_test
diff --git a/searchcommon/src/tests/attribute/config/.gitignore b/searchcommon/src/tests/attribute/config/.gitignore
new file mode 100644
index 00000000000..ffdb7b1e933
--- /dev/null
+++ b/searchcommon/src/tests/attribute/config/.gitignore
@@ -0,0 +1 @@
+searchcommon_attribute_config_test_app
diff --git a/searchcommon/src/tests/attribute/config/CMakeLists.txt b/searchcommon/src/tests/attribute/config/CMakeLists.txt
new file mode 100644
index 00000000000..d0864c68240
--- /dev/null
+++ b/searchcommon/src/tests/attribute/config/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchcommon_attribute_config_test_app
+ SOURCES
+ attribute_config_test.cpp
+ DEPENDS
+ searchcommon
+)
+vespa_add_test(NAME searchcommon_attribute_config_test_app NO_VALGRIND COMMAND searchcommon_attribute_config_test_app)
diff --git a/searchcommon/src/tests/attribute/config/DESC b/searchcommon/src/tests/attribute/config/DESC
new file mode 100644
index 00000000000..b98a0b64649
--- /dev/null
+++ b/searchcommon/src/tests/attribute/config/DESC
@@ -0,0 +1 @@
+search::attribute::Config test. Take a look at attribute_config_test.cpp for details.
diff --git a/searchcommon/src/tests/attribute/config/FILES b/searchcommon/src/tests/attribute/config/FILES
new file mode 100644
index 00000000000..90f22156a0a
--- /dev/null
+++ b/searchcommon/src/tests/attribute/config/FILES
@@ -0,0 +1 @@
+attribute_config_test.cpp
diff --git a/searchcommon/src/tests/attribute/config/attribute_config_test.cpp b/searchcommon/src/tests/attribute/config/attribute_config_test.cpp
new file mode 100644
index 00000000000..3a7994ee39b
--- /dev/null
+++ b/searchcommon/src/tests/attribute/config/attribute_config_test.cpp
@@ -0,0 +1,106 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchcommon/attribute/config.h>
+
+using search::attribute::Config;
+using search::attribute::BasicType;
+using search::attribute::CollectionType;
+using vespalib::tensor::TensorType;
+
+
+struct Fixture
+{
+ Config _config;
+ Fixture()
+ : _config()
+ {
+ }
+
+ Fixture(BasicType bt,
+ CollectionType ct = CollectionType::SINGLE,
+ bool fastSearch_ = false,
+ bool huge_ = false)
+ : _config(bt, ct, fastSearch_, huge_)
+ {
+ }
+};
+
+TEST_F("test default attribute config", Fixture)
+{
+ EXPECT_EQUAL(BasicType::Type::NONE, f._config.basicType().type());
+ EXPECT_EQUAL(CollectionType::Type::SINGLE,
+ f._config.collectionType().type());
+ EXPECT_TRUE(!f._config.fastSearch());
+ EXPECT_TRUE(!f._config.huge());
+ EXPECT_TRUE(!f._config.getEnableBitVectors());
+ EXPECT_TRUE(!f._config.getEnableOnlyBitVector());
+ EXPECT_TRUE(!f._config.getIsFilter());
+ EXPECT_TRUE(!f._config.fastAccess());
+ EXPECT_TRUE(!f._config.tensorType().is_valid());
+}
+
+TEST_F("test integer weightedset attribute config",
+ Fixture(BasicType::Type::INT32,
+ CollectionType::Type::WSET))
+{
+ EXPECT_EQUAL(BasicType::Type::INT32, f._config.basicType().type());
+ EXPECT_EQUAL(CollectionType::Type::WSET,
+ f._config.collectionType().type());
+ EXPECT_TRUE(!f._config.fastSearch());
+ EXPECT_TRUE(!f._config.huge());
+ EXPECT_TRUE(!f._config.getEnableBitVectors());
+ EXPECT_TRUE(!f._config.getEnableOnlyBitVector());
+ EXPECT_TRUE(!f._config.getIsFilter());
+ EXPECT_TRUE(!f._config.fastAccess());
+ EXPECT_TRUE(!f._config.tensorType().is_valid());
+}
+
+
+TEST("test operator== on attribute config")
+{
+ Config cfg1(BasicType::Type::INT32, CollectionType::Type::WSET);
+ Config cfg2(BasicType::Type::INT32, CollectionType::Type::ARRAY);
+ Config cfg3(BasicType::Type::INT32, CollectionType::Type::WSET);
+
+ EXPECT_TRUE(cfg1 != cfg2);
+ EXPECT_TRUE(cfg2 != cfg3);
+ EXPECT_TRUE(cfg1 == cfg3);
+}
+
+
+TEST("test operator== on attribute config for tensor type")
+{
+ Config cfg1(BasicType::Type::TENSOR);
+ Config cfg2(BasicType::Type::TENSOR);
+ Config cfg3(BasicType::Type::TENSOR);
+
+ TensorType dense_x = TensorType::fromSpec("tensor(x[10])");
+ TensorType sparse_x = TensorType::fromSpec("tensor(x{})");
+
+ // invalid tensors are not equal
+ EXPECT_TRUE(cfg1 != cfg2);
+ EXPECT_TRUE(cfg2 != cfg3);
+ EXPECT_TRUE(cfg1 != cfg3);
+
+ cfg1.setTensorType(dense_x);
+ cfg3.setTensorType(dense_x);
+ EXPECT_EQUAL(dense_x, cfg1.tensorType());
+ EXPECT_EQUAL(dense_x, cfg3.tensorType());
+ EXPECT_TRUE(cfg1.tensorType().is_valid());
+ EXPECT_TRUE(!cfg2.tensorType().is_valid());
+ EXPECT_TRUE(cfg3.tensorType().is_valid());
+
+ EXPECT_TRUE(cfg1 != cfg2);
+ EXPECT_TRUE(cfg2 != cfg3);
+ EXPECT_TRUE(cfg1 == cfg3);
+
+ cfg3.setTensorType(sparse_x);
+ EXPECT_EQUAL(sparse_x, cfg3.tensorType());
+ EXPECT_TRUE(cfg3.tensorType().is_valid());
+ EXPECT_TRUE(cfg1 != cfg3);
+}
+
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchcommon/src/tests/schema/.gitignore b/searchcommon/src/tests/schema/.gitignore
new file mode 100644
index 00000000000..79e714aa5a2
--- /dev/null
+++ b/searchcommon/src/tests/schema/.gitignore
@@ -0,0 +1,4 @@
+/.depend
+/Makefile
+/schema_test
+searchcommon_schema_test_app
diff --git a/searchcommon/src/tests/schema/CMakeLists.txt b/searchcommon/src/tests/schema/CMakeLists.txt
new file mode 100644
index 00000000000..0a600a55e7b
--- /dev/null
+++ b/searchcommon/src/tests/schema/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchcommon_schema_test_app
+ SOURCES
+ schema_test.cpp
+ DEPENDS
+ searchcommon
+)
+vespa_add_test(NAME searchcommon_schema_test_app NO_VALGRIND COMMAND searchcommon_schema_test_app)
diff --git a/searchcommon/src/tests/schema/DESC b/searchcommon/src/tests/schema/DESC
new file mode 100644
index 00000000000..e357c31742a
--- /dev/null
+++ b/searchcommon/src/tests/schema/DESC
@@ -0,0 +1 @@
+schema test. Take a look at schema.cpp for details.
diff --git a/searchcommon/src/tests/schema/FILES b/searchcommon/src/tests/schema/FILES
new file mode 100644
index 00000000000..4688fde12ce
--- /dev/null
+++ b/searchcommon/src/tests/schema/FILES
@@ -0,0 +1 @@
+schema.cpp
diff --git a/searchcommon/src/tests/schema/attributes.cfg b/searchcommon/src/tests/schema/attributes.cfg
new file mode 100644
index 00000000000..09f711b6a65
--- /dev/null
+++ b/searchcommon/src/tests/schema/attributes.cfg
@@ -0,0 +1,22 @@
+attribute[9]
+attribute[0].name a
+attribute[0].datatype STRING
+attribute[0].collectiontype SINGLE
+attribute[1].name b
+attribute[1].datatype INT8
+attribute[1].collectiontype ARRAY
+attribute[2].name c
+attribute[2].datatype INT16
+attribute[2].collectiontype WEIGHTEDSET
+attribute[3].name d
+attribute[3].datatype INT32
+attribute[4].name e
+attribute[4].datatype INT64
+attribute[5].name f
+attribute[5].datatype FLOAT
+attribute[6].name g
+attribute[6].datatype DOUBLE
+attribute[7].name h
+attribute[7].datatype PREDICATE
+attribute[8].name i
+attribute[8].datatype TENSOR
diff --git a/searchcommon/src/tests/schema/indexschema.cfg b/searchcommon/src/tests/schema/indexschema.cfg
new file mode 100644
index 00000000000..989f30f7499
--- /dev/null
+++ b/searchcommon/src/tests/schema/indexschema.cfg
@@ -0,0 +1,26 @@
+indexfield[6]
+indexfield[0].name a
+indexfield[0].datatype STRING
+indexfield[1].name b
+indexfield[1].datatype INT64
+indexfield[2].name c
+indexfield[2].datatype STRING
+indexfield[2].prefix true
+indexfield[2].phrases false
+indexfield[2].positions false
+indexfield[3].name e
+indexfield[3].datatype BOOLEANTREE
+indexfield[3].collectiontype SINGLE
+indexfield[4].name f
+indexfield[4].indextype RISE
+indexfield[4].datatype STRING
+indexfield[4].collectiontype WEIGHTEDSET
+indexfield[5].name g
+indexfield[5].indextype RISE
+indexfield[5].datatype INT64
+indexfield[5].collectiontype WEIGHTEDSET
+fieldset[1]
+fieldset[0].name default
+fieldset[0].field[2]
+fieldset[0].field[0].name a
+fieldset[0].field[1].name c
diff --git a/searchcommon/src/tests/schema/schema_test.cpp b/searchcommon/src/tests/schema/schema_test.cpp
new file mode 100644
index 00000000000..56154c7a7d4
--- /dev/null
+++ b/searchcommon/src/tests/schema/schema_test.cpp
@@ -0,0 +1,387 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <fstream>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/config/common/configparser.h>
+#include <vespa/searchcommon/common/schemaconfigurer.h>
+LOG_SETUP("schema_test");
+
+using vespalib::string;
+
+namespace search {
+namespace index {
+
+void assertField(const Schema::Field & exp, const Schema::Field & act) {
+ EXPECT_EQUAL(exp.getName(), act.getName());
+ EXPECT_EQUAL(exp.getDataType(), act.getDataType());
+ EXPECT_EQUAL(exp.getCollectionType(), act.getCollectionType());
+}
+
+void assertIndexField(const Schema::IndexField & exp,
+ const Schema::IndexField & act)
+{
+ assertField(exp, act);
+ EXPECT_EQUAL(exp.hasPrefix(), act.hasPrefix());
+ EXPECT_EQUAL(exp.hasPhrases(), act.hasPhrases());
+ EXPECT_EQUAL(exp.hasPositions(), act.hasPositions());
+}
+
+void assertSet(const Schema::FieldSet &exp,
+ const Schema::FieldSet &act)
+{
+ EXPECT_EQUAL(exp.getName(), act.getName());
+ ASSERT_EQUAL(exp.getFields().size(), act.getFields().size());
+ for (size_t i = 0; i < exp.getFields().size(); ++i) {
+ EXPECT_EQUAL(exp.getFields()[i], act.getFields()[i]);
+ }
+}
+
+void assertSchema(const Schema & exp, const Schema & act) {
+ ASSERT_EQUAL(exp.getNumIndexFields(), act.getNumIndexFields());
+ for (size_t i = 0; i < exp.getNumIndexFields(); ++i) {
+ assertIndexField(exp.getIndexField(i), act.getIndexField(i));
+ }
+ ASSERT_EQUAL(exp.getNumAttributeFields(), act.getNumAttributeFields());
+ for (size_t i = 0; i < exp.getNumAttributeFields(); ++i) {
+ assertField(exp.getAttributeField(i), act.getAttributeField(i));
+ }
+ ASSERT_EQUAL(exp.getNumSummaryFields(), act.getNumSummaryFields());
+ for (size_t i = 0; i < exp.getNumSummaryFields(); ++i) {
+ assertField(exp.getSummaryField(i), act.getSummaryField(i));
+ }
+ ASSERT_EQUAL(exp.getNumFieldSets(), act.getNumFieldSets());
+ for (size_t i = 0; i < exp.getNumFieldSets(); ++i) {
+ assertSet(exp.getFieldSet(i), act.getFieldSet(i));
+ }
+}
+
+TEST("testBasic") {
+ Schema s;
+ EXPECT_EQUAL(0u, s.getNumIndexFields());
+ EXPECT_EQUAL(0u, s.getNumAttributeFields());
+ EXPECT_EQUAL(0u, s.getNumSummaryFields());
+
+ s.addIndexField(Schema::IndexField("foo", Schema::STRING));
+ s.addIndexField(Schema::IndexField("bar", Schema::INT32));
+
+ s.addAttributeField(Schema::AttributeField("foo", Schema::STRING, Schema::ARRAY));
+ s.addAttributeField(Schema::AttributeField("bar", Schema::INT32, Schema::WEIGHTEDSET));
+ s.addAttributeField(Schema::AttributeField("cox", Schema::STRING));
+
+ s.addSummaryField(Schema::SummaryField("foo", Schema::STRING, Schema::ARRAY));
+ s.addSummaryField(Schema::SummaryField("bar", Schema::INT32, Schema::WEIGHTEDSET));
+ s.addSummaryField(Schema::SummaryField("cox", Schema::STRING));
+ s.addSummaryField(Schema::SummaryField("fox", Schema::RAW));
+
+ s.addFieldSet(Schema::FieldSet("default").
+ addField("foo").addField("bar"));
+
+ EXPECT_EQUAL(2u, s.getNumIndexFields());
+ {
+ EXPECT_EQUAL("foo", s.getIndexField(0).getName());
+ EXPECT_EQUAL(Schema::STRING, s.getIndexField(0).getDataType());
+ EXPECT_EQUAL(Schema::SINGLE, s.getIndexField(0).getCollectionType());
+ EXPECT_TRUE(!s.getIndexField(0).hasPrefix());
+ EXPECT_TRUE(!s.getIndexField(0).hasPhrases());
+ EXPECT_TRUE(s.getIndexField(0).hasPositions());
+
+ EXPECT_EQUAL("bar", s.getIndexField(1).getName());
+ EXPECT_EQUAL(Schema::INT32, s.getIndexField(1).getDataType());
+ EXPECT_EQUAL(Schema::SINGLE, s.getIndexField(1).getCollectionType());
+
+ EXPECT_EQUAL(0u, s.getIndexFieldId("foo"));
+ EXPECT_EQUAL(1u, s.getIndexFieldId("bar"));
+ EXPECT_EQUAL(Schema::UNKNOWN_FIELD_ID, s.getIndexFieldId("cox"));
+ }
+ EXPECT_EQUAL(3u, s.getNumAttributeFields());
+ {
+ EXPECT_EQUAL("foo", s.getAttributeField(0).getName());
+ EXPECT_EQUAL(Schema::STRING, s.getAttributeField(0).getDataType());
+ EXPECT_EQUAL(Schema::ARRAY,
+ s.getAttributeField(0).getCollectionType());
+
+ EXPECT_EQUAL("bar", s.getAttributeField(1).getName());
+ EXPECT_EQUAL(Schema::INT32, s.getAttributeField(1).getDataType());
+ EXPECT_EQUAL(Schema::WEIGHTEDSET,
+ s.getAttributeField(1).getCollectionType());
+
+ EXPECT_EQUAL("cox", s.getAttributeField(2).getName());
+ EXPECT_EQUAL(Schema::STRING, s.getAttributeField(2).getDataType());
+ EXPECT_EQUAL(Schema::SINGLE,
+ s.getAttributeField(2).getCollectionType());
+
+ EXPECT_EQUAL(0u, s.getAttributeFieldId("foo"));
+ EXPECT_EQUAL(1u, s.getAttributeFieldId("bar"));
+ EXPECT_EQUAL(2u, s.getAttributeFieldId("cox"));
+ EXPECT_EQUAL(Schema::UNKNOWN_FIELD_ID, s.getIndexFieldId("fox"));
+ }
+ EXPECT_EQUAL(4u, s.getNumSummaryFields());
+ {
+ EXPECT_EQUAL("foo", s.getSummaryField(0).getName());
+ EXPECT_EQUAL(Schema::STRING, s.getSummaryField(0).getDataType());
+ EXPECT_EQUAL(Schema::ARRAY, s.getSummaryField(0).getCollectionType());
+
+ EXPECT_EQUAL("bar", s.getSummaryField(1).getName());
+ EXPECT_EQUAL(Schema::INT32, s.getSummaryField(1).getDataType());
+ EXPECT_EQUAL(Schema::WEIGHTEDSET,
+ s.getSummaryField(1).getCollectionType());
+
+ EXPECT_EQUAL("cox", s.getSummaryField(2).getName());
+ EXPECT_EQUAL(Schema::STRING, s.getSummaryField(2).getDataType());
+ EXPECT_EQUAL(Schema::SINGLE, s.getSummaryField(2).getCollectionType());
+
+ EXPECT_EQUAL("fox", s.getSummaryField(3).getName());
+ EXPECT_EQUAL(Schema::RAW, s.getSummaryField(3).getDataType());
+ EXPECT_EQUAL(Schema::SINGLE, s.getSummaryField(3).getCollectionType());
+
+ EXPECT_EQUAL(0u, s.getSummaryFieldId("foo"));
+ EXPECT_EQUAL(1u, s.getSummaryFieldId("bar"));
+ EXPECT_EQUAL(2u, s.getSummaryFieldId("cox"));
+ EXPECT_EQUAL(3u, s.getSummaryFieldId("fox"));
+ EXPECT_EQUAL(Schema::UNKNOWN_FIELD_ID, s.getSummaryFieldId("not"));
+ }
+ EXPECT_EQUAL(1u, s.getNumFieldSets());
+ {
+ EXPECT_EQUAL("default", s.getFieldSet(0).getName());
+ EXPECT_EQUAL(2u, s.getFieldSet(0).getFields().size());
+ EXPECT_EQUAL("foo", s.getFieldSet(0).getFields()[0]);
+ EXPECT_EQUAL("bar", s.getFieldSet(0).getFields()[1]);
+ }
+}
+
+TEST("testLoadAndSave") {
+ typedef Schema::IndexField SIF;
+ typedef Schema::AttributeField SAF;
+ typedef Schema::SummaryField SSF;
+ typedef Schema SDT;
+ typedef Schema SCT;
+ typedef Schema::FieldSet SFS;
+
+ { // load from config -> save to file -> load from file
+ Schema s;
+ SchemaConfigurer configurer(s, "dir:.");
+ EXPECT_EQUAL(3u, s.getNumIndexFields());
+ assertIndexField(SIF("a", SDT::STRING), s.getIndexField(0));
+ assertIndexField(SIF("b", SDT::INT64), s.getIndexField(1));
+ assertIndexField(SIF("c", SDT::STRING).setPrefix(true)
+ .setPhrases(false).setPositions(false),
+ s.getIndexField(2));
+
+ EXPECT_EQUAL(9u, s.getNumAttributeFields());
+ assertField(SAF("a", SDT::STRING, SCT::SINGLE),
+ s.getAttributeField(0));
+ assertField(SAF("b", SDT::INT8, SCT::ARRAY), s.getAttributeField(1));
+ assertField(SAF("c", SDT::INT16, SCT::WEIGHTEDSET),
+ s.getAttributeField(2));
+ assertField(SAF("d", SDT::INT32), s.getAttributeField(3));
+ assertField(SAF("e", SDT::INT64), s.getAttributeField(4));
+ assertField(SAF("f", SDT::FLOAT), s.getAttributeField(5));
+ assertField(SAF("g", SDT::DOUBLE), s.getAttributeField(6));
+ assertField(SAF("h", SDT::BOOLEANTREE), s.getAttributeField(7));
+ assertField(SAF("i", SDT::TENSOR), s.getAttributeField(8));
+
+ EXPECT_EQUAL(12u, s.getNumSummaryFields());
+ assertField(SSF("a", SDT::INT8), s.getSummaryField(0));
+ assertField(SSF("b", SDT::INT16), s.getSummaryField(1));
+ assertField(SSF("c", SDT::INT32), s.getSummaryField(2));
+ assertField(SSF("d", SDT::INT64), s.getSummaryField(3));
+ assertField(SSF("e", SDT::FLOAT), s.getSummaryField(4));
+ assertField(SSF("f", SDT::DOUBLE), s.getSummaryField(5));
+ assertField(SSF("g", SDT::STRING), s.getSummaryField(6));
+ assertField(SSF("h", SDT::STRING), s.getSummaryField(7));
+ assertField(SSF("i", SDT::STRING), s.getSummaryField(8));
+ assertField(SSF("j", SDT::STRING), s.getSummaryField(9));
+ assertField(SSF("k", SDT::RAW), s.getSummaryField(10));
+ assertField(SSF("l", SDT::RAW), s.getSummaryField(11));
+
+ EXPECT_EQUAL(1u, s.getNumFieldSets());
+ assertSet(SFS("default").addField("a").addField("c"),
+ s.getFieldSet(0));
+
+ Schema s2 = s;
+ EXPECT_TRUE(s.saveToFile("schema.txt"));
+ assertSchema(s, s2); // test copy contructor
+ Schema s3;
+ EXPECT_TRUE(s3.loadFromFile("schema.txt"));
+ assertSchema(s, s3); // test that saved file is loaded correctly
+ s3.addIndexField(SIF("foo", SDT::STRING));
+ EXPECT_TRUE(s3.loadFromFile("schema.txt")); // load should clear the current content
+ assertSchema(s, s3);
+ }
+ { // empty schema
+ Schema s;
+ EXPECT_TRUE(s.saveToFile("schema2.txt"));
+ Schema s2;
+ s2.addIndexField(SIF("foo", SDT::STRING));
+ EXPECT_TRUE(s2.loadFromFile("schema2.txt"));
+ assertSchema(s, s2);
+ }
+ { // load with error
+ Schema s;
+ EXPECT_TRUE(!s.loadFromFile("not.txt"));
+ EXPECT_TRUE(!s.saveToFile("not/not.txt"));
+ }
+}
+
+TEST("require that schema can save and load timestamps for fields") {
+ const fastos::TimeStamp timestamp(42);
+ const std::string file_name = "schema-with-timestamps.txt";
+ Schema s;
+ Schema::IndexField f("foo", Schema::STRING);
+ f.setTimestamp(timestamp);
+ s.addIndexField(f);
+ ASSERT_TRUE(s.saveToFile(file_name));
+ Schema s2;
+ ASSERT_TRUE(s2.loadFromFile(file_name));
+ ASSERT_EQUAL(1u, s2.getNumIndexFields());
+ ASSERT_EQUAL(timestamp, s2.getIndexField(0).getTimestamp());
+}
+
+TEST("require that timestamps are omitted when 0.") {
+ const std::string file_name = "schema-without-timestamps.txt";
+ Schema s;
+ s.addIndexField(Schema::IndexField("foo", Schema::STRING));
+ ASSERT_TRUE(s.saveToFile(file_name));
+
+ std::ifstream file(file_name.c_str());
+ ASSERT_TRUE(file.good());
+ while (file) {
+ std::string line;
+ getline(file, line);
+ EXPECT_NOT_EQUAL("indexfield[0].timestamp 0", line);
+ }
+
+ Schema s2;
+ ASSERT_TRUE(s2.loadFromFile(file_name));
+ ASSERT_EQUAL(1u, s2.getNumIndexFields());
+}
+
+void addAllFieldTypes(const string &name, Schema &schema,
+ fastos::TimeStamp timestamp) {
+ Schema::IndexField index_field(name, Schema::STRING);
+ index_field.setTimestamp(timestamp);
+ schema.addIndexField(index_field);
+
+ Schema::AttributeField attribute_field(name, Schema::STRING);
+ attribute_field.setTimestamp(timestamp);
+ schema.addAttributeField(attribute_field);
+
+ Schema::SummaryField summary_field(name, Schema::STRING);
+ summary_field.setTimestamp(timestamp);
+ schema.addSummaryField(summary_field);
+
+ schema.addFieldSet(Schema::FieldSet(name));
+}
+
+TEST("require that schemas can be added") {
+ const string name1 = "foo";
+ const string name2 = "bar";
+ const fastos::TimeStamp timestamp1(42);
+ const fastos::TimeStamp timestamp2(84);
+ Schema s1;
+ addAllFieldTypes(name1, s1, timestamp1);
+ Schema s2;
+ addAllFieldTypes(name2, s2, timestamp2);
+
+ Schema::UP sum = Schema::make_union(s1, s2);
+ ASSERT_EQUAL(2u, sum->getNumIndexFields());
+ EXPECT_TRUE(s1.getIndexField(0) ==
+ sum->getIndexField(sum->getIndexFieldId(name1)));
+ EXPECT_TRUE(s2.getIndexField(0) ==
+ sum->getIndexField(sum->getIndexFieldId(name2)));
+ ASSERT_EQUAL(2u, sum->getNumAttributeFields());
+ EXPECT_TRUE(s1.getAttributeField(0) ==
+ sum->getAttributeField(sum->getAttributeFieldId(name1)));
+ EXPECT_TRUE(s2.getAttributeField(0) ==
+ sum->getAttributeField(sum->getAttributeFieldId(name2)));
+ ASSERT_EQUAL(2u, sum->getNumSummaryFields());
+ EXPECT_TRUE(s1.getSummaryField(0) ==
+ sum->getSummaryField(sum->getSummaryFieldId(name1)));
+ EXPECT_TRUE(s2.getSummaryField(0) ==
+ sum->getSummaryField(sum->getSummaryFieldId(name2)));
+ ASSERT_EQUAL(2u, sum->getNumFieldSets());
+ EXPECT_TRUE(s1.getFieldSet(0) ==
+ sum->getFieldSet(sum->getFieldSetId(name1)));
+ EXPECT_TRUE(s2.getFieldSet(0) ==
+ sum->getFieldSet(sum->getFieldSetId(name2)));
+}
+
+TEST("require that S union S = S for schema S") {
+ Schema schema;
+ addAllFieldTypes("foo", schema, 42);
+
+ Schema::UP sum = Schema::make_union(schema, schema);
+ EXPECT_TRUE(schema == *sum);
+}
+
+TEST("require that schema can calculate set_difference") {
+ const string name1 = "foo";
+ const string name2 = "bar";
+ const fastos::TimeStamp timestamp1(42);
+ const fastos::TimeStamp timestamp2(84);
+ Schema s1;
+ addAllFieldTypes(name1, s1, timestamp1);
+ addAllFieldTypes(name2, s1, timestamp2);
+ Schema s2;
+ addAllFieldTypes(name2, s2, timestamp2);
+
+ Schema::UP schema = Schema::set_difference(s1, s2);
+
+ Schema expected;
+ addAllFieldTypes(name1, expected, timestamp1);
+ EXPECT_TRUE(expected == *schema);
+}
+
+TEST("require that getOldFields returns a subset of a schema") {
+ Schema schema;
+ const int64_t limit_timestamp = 1000;
+
+ addAllFieldTypes("bar", schema, fastos::TimeStamp(limit_timestamp - 1));
+ addAllFieldTypes("foo", schema, fastos::TimeStamp(limit_timestamp + 1));
+
+ Schema::UP old_fields =
+ schema.getOldFields(fastos::TimeStamp(limit_timestamp));
+
+ EXPECT_EQUAL(1u, old_fields->getNumIndexFields());
+ EXPECT_EQUAL("bar", old_fields->getIndexField(0).getName());
+ EXPECT_EQUAL(1u, old_fields->getNumAttributeFields());
+ EXPECT_EQUAL(1u, old_fields->getNumSummaryFields());
+}
+
+TEST("require that schema can calculate intersection") {
+ const string name1 = "foo";
+ const string name2 = "bar";
+ const string name3 = "baz";
+ const fastos::TimeStamp timestamp1(42);
+ const fastos::TimeStamp timestamp2(84);
+ Schema s1;
+ addAllFieldTypes(name1, s1, timestamp1);
+ addAllFieldTypes(name2, s1, timestamp2);
+ Schema s2;
+ addAllFieldTypes(name2, s2, timestamp2);
+ addAllFieldTypes(name3, s2, timestamp2);
+
+ Schema::UP schema = Schema::intersect(s1, s2);
+
+ Schema expected;
+ addAllFieldTypes(name2, expected, timestamp2);
+ EXPECT_TRUE(expected == *schema);
+}
+
+TEST("require that incompatible fields are removed from intersection") {
+ const string name = "foo";
+ Schema s1;
+ s1.addIndexField(Schema::IndexField(name, Schema::STRING));
+ Schema s2;
+ s2.addIndexField(Schema::IndexField(name, Schema::INT32));
+ Schema::UP schema = Schema::intersect(s1, s2);
+ EXPECT_EQUAL(0u, schema->getNumIndexFields());
+ EXPECT_FALSE(schema->isIndexField(name));
+}
+
+} // namespace index
+} // namespace search
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchcommon/src/tests/schema/summary.cfg b/searchcommon/src/tests/schema/summary.cfg
new file mode 100644
index 00000000000..0c2de33d076
--- /dev/null
+++ b/searchcommon/src/tests/schema/summary.cfg
@@ -0,0 +1,29 @@
+defaultsummaryid 0
+classes[1]
+classes[0].id 0
+classes[0].name test
+classes[0].fields[12]
+classes[0].fields[0].name a
+classes[0].fields[0].type byte
+classes[0].fields[1].name b
+classes[0].fields[1].type short
+classes[0].fields[2].name c
+classes[0].fields[2].type integer
+classes[0].fields[3].name d
+classes[0].fields[3].type int64
+classes[0].fields[4].name e
+classes[0].fields[4].type float
+classes[0].fields[5].name f
+classes[0].fields[5].type double
+classes[0].fields[6].name g
+classes[0].fields[6].type string
+classes[0].fields[7].name h
+classes[0].fields[7].type longstring
+classes[0].fields[8].name i
+classes[0].fields[8].type xmlstring
+classes[0].fields[9].name j
+classes[0].fields[9].type jsonstring
+classes[0].fields[10].name k
+classes[0].fields[10].type data
+classes[0].fields[11].name l
+classes[0].fields[11].type longdata
diff --git a/searchcommon/src/vespa/searchcommon/.gitignore b/searchcommon/src/vespa/searchcommon/.gitignore
new file mode 100644
index 00000000000..f76a9d84bed
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/.gitignore
@@ -0,0 +1,3 @@
+/.depend
+/Makefile
+/libsearchcommon.so.5.1
diff --git a/searchcommon/src/vespa/searchcommon/CMakeLists.txt b/searchcommon/src/vespa/searchcommon/CMakeLists.txt
new file mode 100644
index 00000000000..fa17af628ef
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchcommon
+ SOURCES
+ $<TARGET_OBJECTS:searchcommon_searchcommon_common>
+ $<TARGET_OBJECTS:searchcommon_searchcommon_attribute>
+ INSTALL lib64
+ DEPENDS
+)
diff --git a/searchcommon/src/vespa/searchcommon/attribute/.gitignore b/searchcommon/src/vespa/searchcommon/attribute/.gitignore
new file mode 100644
index 00000000000..7e7c0fe7fae
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/attribute/.gitignore
@@ -0,0 +1,2 @@
+/.depend
+/Makefile
diff --git a/searchcommon/src/vespa/searchcommon/attribute/CMakeLists.txt b/searchcommon/src/vespa/searchcommon/attribute/CMakeLists.txt
new file mode 100644
index 00000000000..5343a9eac69
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/attribute/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchcommon_searchcommon_attribute OBJECT
+ SOURCES
+ basictype.cpp
+ collectiontype.cpp
+ config.cpp
+ status.cpp
+ DEPENDS
+)
diff --git a/searchcommon/src/vespa/searchcommon/attribute/attributecontent.h b/searchcommon/src/vespa/searchcommon/attribute/attributecontent.h
new file mode 100644
index 00000000000..60471b77608
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/attribute/attributecontent.h
@@ -0,0 +1,172 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "iattributevector.h"
+#include <stdint.h>
+
+namespace search {
+namespace attribute {
+
+
+/**
+ * This class is wrapping an array of type T and is used to hold the
+ * attribute vector content for a given document. The values stored for the
+ * given document in the attribute vector is copied into the array wrapped
+ * in an instance of this class.
+ *
+ * @param T the type of the data stored in this object
+ **/
+template <typename T>
+class AttributeContent
+{
+private:
+ T _staticBuf[16];
+ T * _dynamicBuf;
+ uint32_t _size;
+ uint32_t _capacity;
+
+ AttributeContent(const AttributeContent & rhs);
+ AttributeContent & operator=(const AttributeContent & rhs);
+
+public:
+ /**
+ * Creates a new object with an initial capacity of 16 without dynamic allocation.
+ **/
+ AttributeContent() :
+ _dynamicBuf(NULL),
+ _size(0),
+ _capacity(16)
+ {
+ }
+ /**
+ * Destructs the object.
+ **/
+ ~AttributeContent() {
+ if (_dynamicBuf != NULL) {
+ delete [] _dynamicBuf;
+ }
+ }
+
+ /**
+ * Returns a read-only iterator to the beginning of the underlying data array.
+ *
+ * @return iterator
+ **/
+ const T * begin() const {
+ if (_dynamicBuf != NULL) {
+ return _dynamicBuf;
+ }
+ return _staticBuf;
+ }
+
+ /**
+ * Returns a read-only iterator to the end of the underlying data array.
+ *
+ * @return iterator
+ **/
+ const T * end() const {
+ return begin() + _size;
+ }
+
+ /**
+ * Returns the element at the given position in the underlying data array.
+ *
+ * @return read-only reference to the element
+ * @param idx position into the underlying data
+ **/
+ const T & operator[](uint32_t idx) const {
+ return *(begin() + idx);
+ }
+
+ /**
+ * Returns the number of elements used in the underlying data array.
+ *
+ * @return number of elements used
+ **/
+ uint32_t size() const {
+ return _size;
+ }
+
+ /**
+ * Returns the number of elements allocated in the underlying data array.
+ *
+ * @return number of elements allocated
+ **/
+ uint32_t capacity() const {
+ return _capacity;
+ }
+
+ /**
+ * Returns a read/write pointer to the underlying data array.
+ *
+ * @return read/write pointer.
+ **/
+ T * data() {
+ if (_dynamicBuf != NULL) {
+ return _dynamicBuf;
+ }
+ return _staticBuf;
+ }
+
+ /**
+ * Sets the number of elements used in the underlying data array.
+ *
+ * @param n number of elements used
+ **/
+ void setSize(uint32_t n) {
+ _size = n;
+ }
+
+ /**
+ * Allocates memory so that the underlying data array can hold the
+ * given number of elements (capacity) and sets the size to 0.
+ * A new data array will only be allocated if n > capacity().
+ *
+ * @param n wanted number of elements
+ **/
+ void allocate(uint32_t n) {
+ if (n > _capacity) {
+ if (_dynamicBuf != NULL) {
+ delete [] _dynamicBuf;
+ }
+ _dynamicBuf = new T[n];
+ _capacity = n;
+ _size = 0;
+ }
+ }
+
+ /**
+ * Fill this buffer with the content of the given attribute vector for the given docId.
+ *
+ * @param attribute the attribute vector
+ * @param docId the docId
+ **/
+ void fill(const search::attribute::IAttributeVector & attribute,
+ search::attribute::IAttributeVector::DocId docId)
+ {
+ uint32_t count = attribute.get(docId, data(), capacity());
+ if (count > capacity()) {
+ allocate(count);
+ count = attribute.get(docId, data(), capacity());
+ }
+ setSize(count);
+ }
+};
+
+
+typedef AttributeContent<double> FloatContent;
+typedef AttributeContent<const char *> ConstCharContent;
+typedef AttributeContent<IAttributeVector::largeint_t> IntegerContent;
+typedef AttributeContent<IAttributeVector::EnumHandle> EnumContent;
+typedef AttributeContent<IAttributeVector::WeightedInt> WeightedIntegerContent;
+typedef AttributeContent<IAttributeVector::WeightedFloat> WeightedFloatContent;
+typedef AttributeContent<IAttributeVector::WeightedConstChar> WeightedConstCharContent;
+typedef AttributeContent<IAttributeVector::WeightedString> WeightedStringContent;
+typedef AttributeContent<IAttributeVector::WeightedEnum> WeightedEnumContent;
+typedef IAttributeVector::EnumHandle EnumHandle;
+
+
+} // namespace attribute
+} // namespace search
+
diff --git a/searchcommon/src/vespa/searchcommon/attribute/basictype.cpp b/searchcommon/src/vespa/searchcommon/attribute/basictype.cpp
new file mode 100644
index 00000000000..b1a4539ebb8
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/attribute/basictype.cpp
@@ -0,0 +1,40 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchcommon/attribute/basictype.h>
+#include <vespa/vespalib/util/exceptions.h>
+
+namespace search {
+namespace attribute {
+
+const BasicType::TypeInfo BasicType::_typeTable[BasicType::MAX_TYPE] = {
+ { BasicType::NONE, 0, "none" },
+ { BasicType::STRING, 0, "string" },
+ { BasicType::UINT1, sizeof(int8_t), "uint1" },
+ { BasicType::UINT2, sizeof(int8_t), "uint2" },
+ { BasicType::UINT4, sizeof(int8_t), "uint4" },
+ { BasicType::INT8, sizeof(int8_t), "int8" },
+ { BasicType::INT16, sizeof(int16_t), "int16" },
+ { BasicType::INT32, sizeof(int32_t), "int32" },
+ { BasicType::INT64, sizeof(int64_t), "int64" },
+ { BasicType::FLOAT, sizeof(float), "float" },
+ { BasicType::DOUBLE, sizeof(double), "double" },
+ { BasicType::PREDICATE, 0, "predicate" },
+ { BasicType::TENSOR, 0, "tensor" }
+};
+
+BasicType::Type
+BasicType::asType(const vespalib::string &t)
+{
+ for (size_t i(0); i < sizeof(_typeTable)/sizeof(_typeTable[0]); i++) {
+ if (t == _typeTable[i]._name) {
+ return _typeTable[i]._type;
+ }
+ }
+ throw vespalib::IllegalStateException(t +
+ " not recognized as "
+ "valid attribute data type");
+ return NONE;
+}
+
+}
+}
diff --git a/searchcommon/src/vespa/searchcommon/attribute/basictype.h b/searchcommon/src/vespa/searchcommon/attribute/basictype.h
new file mode 100644
index 00000000000..26b17c46f60
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/attribute/basictype.h
@@ -0,0 +1,69 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+namespace attribute {
+
+class BasicType
+{
+ public:
+ enum Type {
+ NONE = 0,
+ STRING = 1,
+ UINT1 = 2,
+ UINT2 = 3,
+ UINT4 = 4,
+ INT8 = 5,
+ INT16 = 6,
+ INT32 = 7,
+ INT64 = 8,
+ FLOAT = 9,
+ DOUBLE = 10,
+ PREDICATE = 11,
+ TENSOR = 12,
+ MAX_TYPE
+ };
+
+ explicit
+ BasicType(int t) : _type(Type(t)) { }
+ explicit
+ BasicType(unsigned int t) : _type(Type(t)) { }
+ BasicType(Type t) : _type(t) { }
+ explicit
+ BasicType(const vespalib::string & t) : _type(asType(t)) { }
+
+ Type type() const { return _type; }
+ const char * asString() const { return asString(_type); }
+ bool isUnsigned() const { return isUnsigned(_type); }
+ size_t fixedSize() const { return fixedSize(_type); }
+ static BasicType fromType(int8_t) { return INT8; }
+ static BasicType fromType(int16_t) { return INT16; }
+ static BasicType fromType(int32_t) { return INT32; }
+ static BasicType fromType(int64_t) { return INT64; }
+ static BasicType fromType(float) { return FLOAT; }
+ static BasicType fromType(double) { return DOUBLE; }
+ bool operator==(const BasicType &b) const { return _type == b._type; }
+ bool operator!=(const BasicType &b) const { return _type != b._type; }
+
+ private:
+ static const char * asString(Type t) { return _typeTable[t]._name; }
+ static bool isUnsigned(Type t) { return _typeTable[t]._name[0] == 'u'; }
+ static size_t fixedSize(Type t) { return _typeTable[t]._fixedSize; }
+ static Type asType(const vespalib::string & t);
+
+ Type _type;
+
+ struct TypeInfo {
+ Type _type;
+ unsigned int _fixedSize;
+ const char * _name;
+ };
+ static const TypeInfo _typeTable[MAX_TYPE];
+};
+
+}
+}
+
diff --git a/searchcommon/src/vespa/searchcommon/attribute/collectiontype.cpp b/searchcommon/src/vespa/searchcommon/attribute/collectiontype.cpp
new file mode 100644
index 00000000000..33a79fd2929
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/attribute/collectiontype.cpp
@@ -0,0 +1,30 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchcommon/attribute/collectiontype.h>
+#include <vespa/vespalib/util/exceptions.h>
+
+namespace search {
+namespace attribute {
+
+const CollectionType::TypeInfo CollectionType::_typeTable[CollectionType::MAX_TYPE] = {
+ { CollectionType::SINGLE, "single" },
+ { CollectionType::ARRAY, "array" },
+ { CollectionType::WSET, "weightedset" }
+};
+
+CollectionType::Type
+CollectionType::asType(const vespalib::string &t)
+{
+ for (size_t i(0); i < sizeof(_typeTable)/sizeof(_typeTable[0]); i++) {
+ if (t == _typeTable[i]._name) {
+ return _typeTable[i]._type;
+ }
+ }
+ throw vespalib::IllegalStateException(t +
+ " not recognized as valid attribute "
+ "collection type");
+ return SINGLE;
+}
+
+}
+}
diff --git a/searchcommon/src/vespa/searchcommon/attribute/collectiontype.h b/searchcommon/src/vespa/searchcommon/attribute/collectiontype.h
new file mode 100644
index 00000000000..045c344dec1
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/attribute/collectiontype.h
@@ -0,0 +1,78 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+namespace attribute {
+
+class CollectionType
+{
+ public:
+ enum Type {
+ /**
+ * Single value type with one value stored for each document.
+ **/
+ SINGLE = 0,
+ /**
+ * Array type with zero to n values stored for each document.
+ **/
+ ARRAY = 1,
+ /**
+ * Weighted set type with zero to n unique values stored for each document.
+ * In addition each unique value is accociated with a weight.
+ **/
+ WSET = 2,
+ MAX_TYPE
+ };
+
+ CollectionType(Type t = SINGLE, bool remove = false, bool create = false) :
+ _type(t),
+ _removeIfZero(remove),
+ _createIfNonExistant(create)
+ {
+ }
+
+ explicit
+ CollectionType(const vespalib::string & t, bool remove = false, bool create = false) :
+ _type(asType(t)),
+ _removeIfZero(remove),
+ _createIfNonExistant(create)
+ {
+ }
+
+ Type type() const { return _type; }
+ bool isMultiValue() const { return _type != SINGLE; }
+ bool isWeightedSet() const { return _type == WSET; }
+ bool isArray() const { return _type == ARRAY; }
+ bool removeIfZero() const { return _removeIfZero; }
+ bool createIfNonExistant() const { return _createIfNonExistant; }
+ const char * asString() const { return asString(_type); }
+ void removeIfZero(bool newValue) { _removeIfZero = newValue; }
+ void createIfNonExistant(bool newValue) { _createIfNonExistant = newValue; }
+ bool operator!=(const CollectionType &b) const { return !(operator==(b)); }
+ bool operator==(const CollectionType &b) const {
+ return _type == b._type &&
+ _removeIfZero == b._removeIfZero &&
+ _createIfNonExistant == b._createIfNonExistant;
+ }
+
+ private:
+ struct TypeInfo {
+ Type _type;
+ const char * _name;
+ };
+
+ static const char * asString(Type t) { return _typeTable[t]._name; }
+ static Type asType(const vespalib::string &t);
+
+ Type _type;
+ bool _removeIfZero;
+ bool _createIfNonExistant;
+ static const TypeInfo _typeTable[MAX_TYPE];
+};
+
+}
+}
+
diff --git a/searchcommon/src/vespa/searchcommon/attribute/config.cpp b/searchcommon/src/vespa/searchcommon/attribute/config.cpp
new file mode 100644
index 00000000000..e160d7b9222
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/attribute/config.cpp
@@ -0,0 +1,50 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchcommon/attribute/config.h>
+#include <vespa/vespalib/util/exceptions.h>
+#include <limits.h>
+
+namespace search {
+namespace attribute {
+
+Config::Config() :
+ _basicType(BasicType::NONE),
+ _type(CollectionType::SINGLE),
+ _fastSearch(false),
+ _huge(false),
+ _enableBitVectors(false),
+ _enableOnlyBitVector(false),
+ _isFilter(false),
+ _fastAccess(false),
+ _maxInternalBlobSize(defaultMaxInternalBlobSize),
+ _arity(8),
+ _lower_bound(LLONG_MIN),
+ _upper_bound(LLONG_MAX),
+ _dense_posting_list_threshold(0.4),
+ _tensorType(vespalib::tensor::TensorType::invalid())
+{
+}
+
+Config::Config(BasicType bt,
+ CollectionType ct,
+ bool fastSearch_,
+ bool huge_)
+ : _basicType(bt),
+ _type(ct),
+ _fastSearch(fastSearch_),
+ _huge(huge_),
+ _enableBitVectors(false),
+ _enableOnlyBitVector(false),
+ _isFilter(false),
+ _fastAccess(false),
+ _maxInternalBlobSize(defaultMaxInternalBlobSize),
+ _arity(8),
+ _lower_bound(LLONG_MIN),
+ _upper_bound(LLONG_MAX),
+ _dense_posting_list_threshold(0.4),
+ _tensorType(vespalib::tensor::TensorType::invalid())
+{
+}
+
+}
+}
diff --git a/searchcommon/src/vespa/searchcommon/attribute/config.h b/searchcommon/src/vespa/searchcommon/attribute/config.h
new file mode 100644
index 00000000000..b63ce37c93b
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/attribute/config.h
@@ -0,0 +1,155 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchcommon/attribute/basictype.h>
+#include <vespa/searchcommon/attribute/collectiontype.h>
+#include <vespa/searchcommon/common/growstrategy.h>
+#include <vespa/vespalib/tensor/tensor_type.h>
+
+namespace search {
+namespace attribute {
+
+const size_t defaultMaxInternalBlobSize = 0x400000000ul;
+
+class Config
+{
+public:
+ Config();
+
+ Config(BasicType bt,
+ CollectionType ct = CollectionType::SINGLE,
+ bool fastSearch_ = false,
+ bool huge_ = false);
+
+ BasicType basicType() const { return _basicType; }
+ CollectionType collectionType() const { return _type; }
+ bool fastSearch() const { return _fastSearch; }
+ bool huge() const { return _huge; }
+ size_t getMaxInternalBlobSize() const { return _maxInternalBlobSize; }
+ uint32_t arity() const { return _arity; }
+ int64_t lower_bound() const { return _lower_bound; }
+ int64_t upper_bound() const { return _upper_bound; }
+ double dense_posting_list_threshold() const { return _dense_posting_list_threshold; }
+ vespalib::tensor::TensorType tensorType() const { return _tensorType; }
+
+ /**
+ * Check if attribute posting list can consist of a bitvector in
+ * addition to (or instead of) a btree.
+ */
+ bool
+ getEnableBitVectors(void) const
+ {
+ return _enableBitVectors;
+ }
+
+ /**
+ * Check if attribute posting list can consist of only a bitvector with
+ * no corresponding btree.
+ */
+ bool
+ getEnableOnlyBitVector(void) const
+ {
+ return _enableOnlyBitVector;
+ }
+
+ bool
+ getIsFilter(void) const
+ {
+ return _isFilter;
+ }
+
+ /**
+ * Check if this attribute should be fast accessible at all times.
+ * If so, attribute is kept in memory also for non-searchable documents.
+ */
+ bool fastAccess() const { return _fastAccess; }
+
+ const GrowStrategy & getGrowStrategy() const { return _growStrategy; }
+ void setHuge(bool v) { _huge = v; }
+ void setFastSearch(bool v) { _fastSearch = v; }
+ void setMaxInternalBlobSize(size_t v) { _maxInternalBlobSize = v; }
+ void setArity(uint32_t v) { _arity = v; }
+ void setBounds(int64_t lower, int64_t upper) { _lower_bound = lower;
+ _upper_bound = upper; }
+ void setDensePostingListThreshold(double v) { _dense_posting_list_threshold = v; }
+ void setTensorType(const vespalib::tensor::TensorType &tensorType_in) {
+ _tensorType = tensorType_in;
+ }
+
+ /**
+ * Enable attribute posting list to consist of a bitvector in
+ * addition to (or instead of) a btree.
+ */
+ void
+ setEnableBitVectors(bool enableBitVectors)
+ {
+ _enableBitVectors = enableBitVectors;
+ }
+
+ /**
+ * Enable attribute posting list to consist of only a bitvector with
+ * no corresponding btree. Some information degradation might occur when
+ * document frequency goes down, since recreated btree representation
+ * will then have lost weight information.
+ */
+ void
+ setEnableOnlyBitVector(bool enableOnlyBitVector)
+ {
+ _enableOnlyBitVector = enableOnlyBitVector;
+ }
+
+ /**
+ * Hide weight information when searching in attributes.
+ */
+ void
+ setIsFilter(bool isFilter)
+ {
+ _isFilter = isFilter;
+ }
+
+ void setFastAccess(bool v) { _fastAccess = v; }
+ void setGrowStrategy(const GrowStrategy &gs) { _growStrategy = gs; }
+ bool operator!=(const Config &b) const { return !(operator==(b)); }
+
+ bool
+ operator==(const Config &b) const
+ {
+ return _basicType == b._basicType &&
+ _type == b._type &&
+ _huge == b._huge &&
+ _fastSearch == b._fastSearch &&
+ _enableBitVectors == b._enableBitVectors &&
+ _enableOnlyBitVector == b._enableOnlyBitVector &&
+ _isFilter == b._isFilter &&
+ _fastAccess == b._fastAccess &&
+ _maxInternalBlobSize == b._maxInternalBlobSize &&
+ _growStrategy == b._growStrategy &&
+ _arity == b._arity &&
+ _lower_bound == b._lower_bound &&
+ _upper_bound == b._upper_bound &&
+ _dense_posting_list_threshold == b._dense_posting_list_threshold &&
+ (_basicType.type() != BasicType::Type::TENSOR ||
+ _tensorType == b._tensorType);
+ }
+
+private:
+ BasicType _basicType;
+ CollectionType _type;
+ bool _fastSearch;
+ bool _huge;
+ bool _enableBitVectors;
+ bool _enableOnlyBitVector;
+ bool _isFilter;
+ bool _fastAccess;
+ size_t _maxInternalBlobSize;
+ GrowStrategy _growStrategy;
+ uint32_t _arity;
+ int64_t _lower_bound;
+ int64_t _upper_bound;
+ double _dense_posting_list_threshold;
+ vespalib::tensor::TensorType _tensorType;
+};
+} // namespace attribute
+} // namespace search
+
diff --git a/searchcommon/src/vespa/searchcommon/attribute/iattributecontext.h b/searchcommon/src/vespa/searchcommon/attribute/iattributecontext.h
new file mode 100644
index 00000000000..1cdb86cf274
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/attribute/iattributecontext.h
@@ -0,0 +1,58 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "iattributevector.h"
+#include <vector>
+#include <memory>
+
+namespace search {
+namespace attribute {
+
+/**
+ * This is an interface used to access all registered attribute vectors.
+ **/
+class IAttributeContext {
+public:
+ typedef vespalib::string string;
+ /** Convenience typedefs **/
+ typedef std::unique_ptr<IAttributeContext> UP;
+
+ /**
+ * Returns the attribute vector with the given name.
+ *
+ * @param name the name of the attribute vector.
+ * @return const view of the attribute vector or NULL if the attribute vector does not exists.
+ **/
+ virtual const IAttributeVector * getAttribute(const string & name) const = 0;
+
+ /**
+ * Returns the attribute vector with the given name.
+ * Makes sure that the underlying enum values are stable during the use of this attribute.
+ *
+ * @param name the name of the attribute vector
+ * @return const view of the attribute vector or NULL if the attribute vector does not exists.
+ **/
+ virtual const IAttributeVector * getAttributeStableEnum(const string & name) const = 0;
+
+ /**
+ * Fill the given list with all attribute vectors registered.
+ *
+ * @param list the list to fill in attribute vectors.
+ **/
+ virtual void getAttributeList(std::vector<const IAttributeVector *> & list) const = 0;
+
+ /**
+ * Releases all cached attribute guards.
+ **/
+ virtual void releaseEnumGuards() {}
+
+ /**
+ * Virtual destructor to allow safe subclassing.
+ **/
+ virtual ~IAttributeContext() {}
+};
+
+} // namespace attribute
+} // namespace search
+
diff --git a/searchcommon/src/vespa/searchcommon/attribute/iattributevector.h b/searchcommon/src/vespa/searchcommon/attribute/iattributevector.h
new file mode 100644
index 00000000000..28f7f7df061
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/attribute/iattributevector.h
@@ -0,0 +1,352 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string>
+#include <stdint.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/searchcommon/common/iblobconverter.h>
+#include <vespa/searchcommon/attribute/collectiontype.h>
+#include <vespa/searchcommon/attribute/basictype.h>
+
+namespace search {
+namespace attribute {
+
+/**
+ * This class is used to store a value and a weight.
+ * It is used when getting content from a weighted set attribute vector.
+ *
+ * @param T the type of the value stored in this object
+ **/
+template <typename T>
+class WeightedType
+{
+private:
+ T _value;
+ int32_t _weight;
+
+public:
+ WeightedType() : _value(T()), _weight(1) { }
+ WeightedType(T value_, int32_t weight_ = 1) : _value(value_), _weight(weight_) { }
+ const T & getValue() const { return _value; }
+ const T & value() const { return _value; }
+ void setValue(const T & v) { _value = v; }
+ int32_t getWeight() const { return _weight; }
+ int32_t weight() const { return _weight; }
+ void setWeight(int32_t w) { _weight = w; }
+ bool operator==(const WeightedType & rhs) const {
+ return _value == rhs._value && _weight == rhs._weight;
+ }
+};
+
+/**
+ * This is a read interface used to access the content of an attribute vector.
+ **/
+class IAttributeVector
+{
+public:
+ typedef uint32_t DocId;
+ typedef uint32_t EnumHandle;
+ typedef int64_t largeint_t;
+ typedef WeightedType<double> WeightedFloat;
+ typedef WeightedType<largeint_t> WeightedInt;
+ typedef WeightedType<EnumHandle> WeightedEnum;
+ typedef WeightedType<const char *> WeightedConstChar;
+ typedef WeightedType<vespalib::string> WeightedString;
+
+ /**
+ * Returns the name of this attribute vector.
+ *
+ * @return attribute name
+ **/
+ virtual const vespalib::string & getName() const = 0;
+
+ /**
+ * Returns the number of documents stored in this attribute vector.
+ *
+ * @return number of documents
+ **/
+ virtual uint32_t getNumDocs() const = 0;
+
+ /**
+ * Returns the number of values stored for the given document.
+ *
+ * @return number of values
+ * @param doc document identifier
+ **/
+ virtual uint32_t getValueCount(uint32_t doc) const = 0;
+
+ /**
+ * Returns the maximum number of values stored for any document.
+ *
+ * @return maximum number of values
+ **/
+ virtual uint32_t getMaxValueCount() const = 0;
+
+ /**
+ * Returns the first value stored for the given document as an integer.
+ *
+ * @param docId document identifier
+ * @return the integer value
+ **/
+ virtual largeint_t getInt(DocId doc) const = 0;
+
+ /**
+ * Returns the first value stored for the given document as a floating point number.
+ *
+ * @param docId document identifier
+ * @return the floating point value
+ **/
+ virtual double getFloat(DocId doc) const = 0;
+
+ /**
+ * Returns the first value stored for the given document as a string.
+ * Uses the given buffer to store the actual string if no underlying
+ * string storage is used for this attribute vector.
+ *
+ * @param docId document identifier
+ * @param buffer content buffer to optionally store the string
+ * @param sz the size of the buffer
+ * @return the string value
+ **/
+ virtual const char * getString(DocId doc, char * buffer, size_t sz) const = 0;
+
+ /**
+ * Returns the first value stored for the given document as an enum value.
+ *
+ * @param docId document identifier
+ * @return the enum value
+ **/
+ virtual EnumHandle getEnum(DocId doc) const = 0;
+
+ /**
+ * Copies the values stored for the given document into the given buffer.
+ *
+ * @param docId document identifier
+ * @param buffer content buffer to copy integer values into
+ * @param sz the size of the buffer
+ * @return the number of values for this document
+ **/
+ virtual uint32_t get(DocId docId, largeint_t * buffer, uint32_t sz) const = 0;
+
+ /**
+ * Copies the values stored for the given document into the given buffer.
+ *
+ * @param docId document identifier
+ * @param buffer content buffer to copy floating point values into
+ * @param sz the size of the buffer
+ * @return the number of values for this document
+ **/
+ virtual uint32_t get(DocId docId, double * buffer, uint32_t sz) const = 0;
+
+ /**
+ * Copies the values stored for the given document into the given buffer.
+ *
+ * @param docId document identifier
+ * @param buffer content buffer to copy string values into
+ * @param sz the size of the buffer
+ * @return the number of values for this document
+ **/
+// virtual uint32_t get(DocId docId, vespalib::string * buffer, uint32_t sz) const = 0;
+
+ /**
+ * Copies the values stored for the given document into the given buffer.
+ *
+ * @param docId document identifier
+ * @param buffer content buffer to copy const char values into
+ * @param sz the size of the buffer
+ * @return the number of values for this document
+ **/
+ virtual uint32_t get(DocId docId, const char ** buffer, uint32_t sz) const = 0;
+
+ /**
+ * Copies the enum values stored for the given document into the given buffer.
+ *
+ * @param docId document identifier
+ * @param buffer content object to copy enum into
+ * @param sz the size of the buffer
+ * @return the number of values for this document
+ **/
+ virtual uint32_t get(DocId docId, EnumHandle * buffer, uint32_t sz) const = 0;
+
+ /**
+ * Copies the values and weights stored for the given document into the given buffer.
+ * This method should only be invoked if @ref getCollectionType(docId) returns CollectionType::WEIGHTED_SET.
+ *
+ * @param docId document identifier
+ * @param buffer content object to copy integer values and weights into
+ * @param sz the size of the buffer
+ * @return the number of values for this document
+ **/
+ virtual uint32_t get(DocId docId, WeightedInt * buffer, uint32_t sz) const = 0;
+
+ /**
+ * Copies the values and weights stored for the given document into the given buffer.
+ * This method should only be invoked if @ref getCollectionType(docId) returns CollectionType::WEIGHTED_SET.
+ *
+ * @param docId document identifier
+ * @param buffer content object to copy floating point values and weights into
+ * @param sz the size of the buffer
+ * @return the number of values for this document
+ **/
+ virtual uint32_t get(DocId docId, WeightedFloat * buffer, uint32_t sz) const = 0;
+
+ /**
+ * Copies the values and weights stored for the given document into the given buffer.
+ * This method should only be invoked if @ref getCollectionType(docId) returns CollectionType::WEIGHTED_SET.
+ *
+ * @param docId document identifier
+ * @param buffer content object to copy string values and weights into
+ * @param sz the size of the buffer
+ * @return the number of values for this document
+ **/
+ virtual uint32_t get(DocId docId, WeightedString * buffer, uint32_t sz) const = 0;
+
+ /**
+ * Copies the values and weights stored for the given document into the given buffer.
+ * This method should only be invoked if @ref getCollectionType(docId) returns CollectionType::WEIGHTED_SET.
+ *
+ * @param docId document identifier
+ * @param buffer content object to copy const char values and weights into
+ * @param sz the size of the buffer
+ * @return the number of values for this document
+ **/
+ virtual uint32_t get(DocId docId, WeightedConstChar * buffer, uint32_t sz) const = 0;
+
+ /**
+ * Copies the enum values and weights stored for the given document into the given buffer.
+ * This method should only be invoked if @ref getCollectionType(docId) returns CollectionType::WEIGHTED_SET.
+ *
+ * @param docId document identifier
+ * @param buffer content object to copy enum values and weights into
+ * @param sz the size of the buffer
+ * @return the number of values for this document
+ **/
+ virtual uint32_t get(DocId docId, WeightedEnum * buffer, uint32_t sz) const = 0;
+
+ /**
+ * Finds the enum value for the given string value.
+ * This method will only have effect if @ref getBasicType() returns BasicType::STRING and
+ * @ref hasEnum() returns true.
+ *
+ * @param value the string value to lookup.
+ * @param e the handle in which to store the enum value.
+ * @return true if found.
+ **/
+ virtual bool findEnum(const char * value, EnumHandle & e) const = 0;
+
+ /**
+ * Returns the basic type of this attribute vector.
+ *
+ * @return basic type
+ **/
+ virtual BasicType::Type getBasicType() const = 0;
+
+ /**
+ * Returns the number of bytes a single value in this attribute occupies.
+ **/
+ virtual size_t getFixedWidth() const = 0;
+
+ /**
+ * Returns the collection type of this attribute vector.
+ *
+ * @return collection type
+ **/
+ virtual CollectionType::Type getCollectionType() const = 0;
+
+ /**
+ * Returns whether this is an integer attribute.
+ **/
+ virtual bool isIntegerType() const {
+ BasicType::Type t = getBasicType();
+ return t == BasicType::UINT1 ||
+ t == BasicType::UINT2 ||
+ t == BasicType::UINT4 ||
+ t == BasicType::INT8 ||
+ t == BasicType::INT16 ||
+ t == BasicType::INT32 ||
+ t == BasicType::INT64;
+ }
+
+ /**
+ * Returns whether this is a floating point attribute.
+ **/
+ virtual bool isFloatingPointType() const {
+ BasicType::Type t = getBasicType();
+ return t == BasicType::FLOAT || t == BasicType::DOUBLE;
+ }
+
+ /**
+ * Returns whether this is a string attribute.
+ **/
+ virtual bool isStringType() const {
+ return getBasicType() == BasicType::STRING;
+ }
+
+ /**
+ * Returns whether this is a multi value attribute.
+ **/
+ virtual bool hasMultiValue() const {
+ return getCollectionType() != CollectionType::SINGLE;
+ }
+
+ /**
+ * Returns whether this is a weighted set attribute.
+ **/
+ virtual bool hasWeightedSetType() const {
+ return getCollectionType() == CollectionType::WSET;
+ }
+
+ /**
+ * Returns whether this attribute vector has underlying enum values.
+ *
+ * @return true if it has enum values.
+ **/
+ virtual bool hasEnum() const = 0;
+
+ /**
+ * Will serialize the values for the documentid in ascending order. The serialized form can be used by memcmp and
+ * sortorder will be preserved.
+ * @param doc The document id to serialize for.
+ * @param serTo The buffer to serialize into.
+ * @param available. Number of bytes available in the serialization buffer.
+ * @param bc An optional converter to use.
+ * @return The number of bytes serialized, -1 if not enough space.
+ */
+ long serializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc=NULL) const {
+ return onSerializeForAscendingSort(doc, serTo, available, bc);
+ }
+ /**
+ * Will serialize the values for the documentid in descending order. The serialized form can be used by memcmp and
+ * sortorder will be preserved.
+ * @param doc The document id to serialize for.
+ * @param serTo The buffer to serialize into.
+ * @param available. Number of bytes available in the serialization buffer.
+ * @param bc An optional converter to use.
+ * @return The number of bytes serialized, -1 if not enough space.
+ */
+ long serializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc=NULL) const {
+ return onSerializeForDescendingSort(doc, serTo, available, bc);
+ }
+
+ /**
+ * Virtual destructor to allow safe subclassing.
+ **/
+ virtual ~IAttributeVector() {}
+
+ /**
+ * This method is used to simulate sparseness in the single value attributes.
+ * @param doc The document id to verify if attribute has a undefined value for this document.
+ * @return true if value is undefined.
+ */
+ virtual bool isUndefined(DocId doc) const { (void) doc; return false; }
+
+private:
+ virtual long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const = 0;
+ virtual long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const = 0;
+
+};
+
+} // namespace fef
+} // namespace search
+
diff --git a/searchcommon/src/vespa/searchcommon/attribute/status.cpp b/searchcommon/src/vespa/searchcommon/attribute/status.cpp
new file mode 100644
index 00000000000..7543e13fdcf
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/attribute/status.cpp
@@ -0,0 +1,74 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchcommon/attribute/status.h>
+
+namespace search {
+namespace attribute {
+
+Status::Status(const vespalib::string &)
+ : _numDocs (0),
+ _numValues (0),
+ _numUniqueValues (0),
+ _allocated (0),
+ _used (0),
+ _dead (0),
+ _unused (0),
+ _onHold (0),
+ _onHoldMax (0),
+ _lastSyncToken (0),
+ _updates (0),
+ _nonIdempotentUpdates (0),
+ _bitVectors(0)
+{
+}
+
+
+Status::Status()
+ : _numDocs (0),
+ _numValues (0),
+ _numUniqueValues (0),
+ _allocated (0),
+ _used (0),
+ _dead (0),
+ _unused (0),
+ _onHold (0),
+ _onHoldMax (0),
+ _lastSyncToken (0),
+ _updates (0),
+ _nonIdempotentUpdates (0),
+ _bitVectors(0)
+{
+}
+
+
+vespalib::string
+Status::createName(const vespalib::stringref &index,
+ const vespalib::stringref &attr)
+{
+ vespalib::string name (index);
+ name += ".attribute.";
+ name += attr;
+ return name;
+}
+
+
+void
+Status::updateStatistics(uint64_t numValues,
+ uint64_t numUniqueValue,
+ uint64_t allocated,
+ uint64_t used,
+ uint64_t dead,
+ uint64_t onHold)
+{
+ _numValues = numValues;
+ _numUniqueValues = numUniqueValue;
+ _allocated = allocated;
+ _used = used;
+ _dead = dead;
+ _unused = allocated - used;
+ _onHold = onHold;
+ _onHoldMax = std::max(_onHoldMax, onHold);
+}
+
+}
+}
diff --git a/searchcommon/src/vespa/searchcommon/attribute/status.h b/searchcommon/src/vespa/searchcommon/attribute/status.h
new file mode 100644
index 00000000000..09c7cbc6028
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/attribute/status.h
@@ -0,0 +1,80 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+namespace attribute {
+
+class Status
+{
+public:
+ // TODO: name isn't stored anywhere or used for anything
+ Status(const vespalib::string &name);
+ Status();
+
+ void
+ updateStatistics(uint64_t numValues,
+ uint64_t numUniqueValue,
+ uint64_t allocated,
+ uint64_t used,
+ uint64_t dead,
+ uint64_t onHold);
+
+ uint64_t getNumDocs() const { return _numDocs; }
+ uint64_t getNumValues() const { return _numValues; }
+ uint64_t getNumUniqueValues() const { return _numUniqueValues; }
+ uint64_t getAllocated() const { return _allocated; }
+ uint64_t getUsed() const { return _used; }
+ uint64_t getDead() const { return _dead; }
+ uint64_t getOnHold() const { return _onHold; }
+ uint64_t getOnHoldMax() const { return _onHoldMax; }
+ uint64_t getLastSyncToken() const { return _lastSyncToken; }
+ uint64_t getUpdateCount() const { return _updates; }
+ uint64_t getNonIdempotentUpdateCount() const { return _nonIdempotentUpdates; }
+ uint32_t
+ getBitVectors() const
+ {
+ return _bitVectors;
+ }
+
+ void setNumDocs(uint64_t v) { _numDocs = v; }
+ void incNumDocs() { ++_numDocs; }
+ void setLastSyncToken(uint64_t v) { _lastSyncToken = v; }
+ void incUpdates(uint64_t v=1) { _updates += v; }
+ void incNonIdempotentUpdates(uint64_t v = 1) { _nonIdempotentUpdates += v; }
+ void
+ incBitVectors()
+ {
+ ++_bitVectors;
+ }
+
+ void
+ decBitVectors()
+ {
+ --_bitVectors;
+ }
+
+ static vespalib::string
+ createName(const vespalib::stringref &index,
+ const vespalib::stringref & attr);
+private:
+ uint64_t _numDocs;
+ uint64_t _numValues;
+ uint64_t _numUniqueValues;
+ uint64_t _allocated;
+ uint64_t _used;
+ uint64_t _dead;
+ uint64_t _unused;
+ uint64_t _onHold;
+ uint64_t _onHoldMax;
+ uint64_t _lastSyncToken;
+ uint64_t _updates;
+ uint64_t _nonIdempotentUpdates;
+ uint32_t _bitVectors;
+};
+
+}
+}
+
diff --git a/searchcommon/src/vespa/searchcommon/common/.gitignore b/searchcommon/src/vespa/searchcommon/common/.gitignore
new file mode 100644
index 00000000000..7e7c0fe7fae
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/common/.gitignore
@@ -0,0 +1,2 @@
+/.depend
+/Makefile
diff --git a/searchcommon/src/vespa/searchcommon/common/CMakeLists.txt b/searchcommon/src/vespa/searchcommon/common/CMakeLists.txt
new file mode 100644
index 00000000000..a70a71772f5
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/common/CMakeLists.txt
@@ -0,0 +1,7 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchcommon_searchcommon_common OBJECT
+ SOURCES
+ schema.cpp
+ schemaconfigurer.cpp
+ DEPENDS
+)
diff --git a/searchcommon/src/vespa/searchcommon/common/growstrategy.h b/searchcommon/src/vespa/searchcommon/common/growstrategy.h
new file mode 100644
index 00000000000..07d2ee1f35d
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/common/growstrategy.h
@@ -0,0 +1,43 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <stdint.h>
+
+namespace search {
+
+class GrowStrategy
+{
+private:
+ uint32_t _docsInitialCapacity;
+ uint32_t _docsGrowPercent;
+ uint32_t _docsGrowDelta;
+public:
+ GrowStrategy(uint32_t docsInitialCapacity = 1024,
+ uint32_t docsGrowPercent = 50,
+ uint32_t docsGrowDelta = 0)
+ : _docsInitialCapacity(docsInitialCapacity),
+ _docsGrowPercent(docsGrowPercent),
+ _docsGrowDelta(docsGrowDelta)
+ {
+ }
+
+ uint32_t getDocsInitialCapacity() const { return _docsInitialCapacity; }
+ uint32_t getDocsGrowPercent() const { return _docsGrowPercent; }
+ uint32_t getDocsGrowDelta() const { return _docsGrowDelta; }
+ void setDocsInitialCapacity(uint32_t v) { _docsInitialCapacity = v; }
+ void setDocsGrowPercent(uint32_t v) { _docsGrowPercent = v; }
+ void setDocsGrowDelta(uint32_t v) { _docsGrowDelta = v; }
+
+ bool operator==(const GrowStrategy & rhs) const {
+ return _docsInitialCapacity == rhs._docsInitialCapacity &&
+ _docsGrowPercent == rhs._docsGrowPercent &&
+ _docsGrowDelta == rhs._docsGrowDelta;
+ }
+ bool operator!=(const GrowStrategy & rhs) const {
+ return !(operator==(rhs));
+ }
+};
+
+}
+
diff --git a/searchcommon/src/vespa/searchcommon/common/iblobconverter.h b/searchcommon/src/vespa/searchcommon/common/iblobconverter.h
new file mode 100644
index 00000000000..cb3c3c3d2f3
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/common/iblobconverter.h
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/util/buffer.h>
+#include <vespa/vespalib/util/linkedptr.h>
+
+namespace search {
+namespace common {
+
+class BlobConverter
+{
+public:
+ typedef std::shared_ptr<BlobConverter> SP;
+ typedef vespalib::LinkedPtr<BlobConverter> LP;
+ virtual ~BlobConverter() { }
+ vespalib::ConstBufferRef convert(const vespalib::ConstBufferRef & src) const { return onConvert(src); }
+private:
+ virtual vespalib::ConstBufferRef onConvert(const vespalib::ConstBufferRef & src) const = 0;
+};
+
+}
+}
+
diff --git a/searchcommon/src/vespa/searchcommon/common/schema.cpp b/searchcommon/src/vespa/searchcommon/common/schema.cpp
new file mode 100644
index 00000000000..3215a25e55f
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/common/schema.cpp
@@ -0,0 +1,670 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <fstream>
+#include <vespa/config/common/configparser.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vespalib/util/arraysize.h>
+#include "schema.h"
+LOG_SETUP(".index.schema");
+
+using namespace config;
+using namespace search::index;
+using config::InvalidConfigException;
+
+namespace {
+
+template <typename T>
+void
+writeFields(vespalib::asciistream & os,
+ const vespalib::stringref &prefix,
+ const std::vector<T> & fields)
+{
+ os << prefix << "[" << fields.size() << "]\n";
+ for (size_t i = 0; i < fields.size(); ++i) {
+ fields[i].write(os, vespalib::make_string("%s[%zu].", prefix.c_str(), i));
+ }
+}
+
+void
+writeFieldSets(vespalib::asciistream &os,
+ const vespalib::string &name,
+ const std::vector<Schema::FieldSet> &fss)
+{
+ vespalib::string prefix(name);
+ prefix += "[";
+ os << prefix << fss.size() << "]\n";
+ for (size_t i = 0; i < fss.size(); ++i) {
+ os << prefix << i << "].name " << fss[i].getName() << "\n";
+ os << prefix << i << "].field[" << fss[i].getFields().size() << "]\n";
+ vespalib::asciistream tmp;
+ tmp << prefix << i << "].field[";
+ for (size_t j = 0; j < fss[i].getFields().size(); ++j) {
+ os << tmp.str() << j << "].name " << fss[i].getFields()[j] << "\n";
+ }
+ }
+}
+
+struct FieldName {
+ vespalib::string name;
+ FieldName(const std::vector<vespalib::string> & lines)
+ : name(ConfigParser::parse<vespalib::string>("name", lines))
+ {
+ }
+};
+
+template <typename T>
+uint32_t
+getFieldId(const vespalib::stringref & name, const T &map)
+{
+ typename T::const_iterator it = map.find(name);
+ return (it != map.end()) ? it->second : Schema::UNKNOWN_FIELD_ID;
+}
+
+} // namespace
+
+namespace search {
+namespace index {
+
+const uint32_t Schema::UNKNOWN_FIELD_ID(std::numeric_limits<uint32_t>::max());
+
+Schema::DataType Schema::dataTypeFromName(const vespalib::stringref &name) {
+ if (name == "UINT1") { return UINT1; }
+ else if (name == "UINT2") { return UINT2; }
+ else if (name == "UINT4") { return UINT4; }
+ else if (name == "INT8") { return INT8; }
+ else if (name == "INT16") { return INT16; }
+ else if (name == "INT32") { return INT32; }
+ else if (name == "INT64") { return INT64; }
+ else if (name == "FLOAT") { return FLOAT; }
+ else if (name == "DOUBLE") { return DOUBLE; }
+ else if (name == "STRING") { return STRING; }
+ else if (name == "RAW") { return RAW; }
+ else if (name == "BOOLEANTREE") { return BOOLEANTREE; }
+ else if (name == "TENSOR") { return TENSOR; }
+ else {
+ throw InvalidConfigException("Illegal enum value '" + name + "'");
+ }
+}
+
+const char *datatype_str[] = { "UINT1",
+ "UINT2",
+ "UINT4",
+ "INT8",
+ "INT16",
+ "INT32",
+ "INT64",
+ "FLOAT",
+ "DOUBLE",
+ "STRING",
+ "RAW",
+ "FEATURE_NOTUSED",
+ "BOOLEANTREE",
+ "TENSOR" };
+
+vespalib::string Schema::getTypeName(DataType type) {
+ if (type > vespalib::arraysize(datatype_str)) {
+ vespalib::asciistream ost;
+ ost << "UNKNOWN(" << type << ")";
+ return ost.str();
+ }
+ return datatype_str[type];
+}
+
+Schema::CollectionType Schema::collectionTypeFromName(
+ const vespalib::stringref &name) {
+ if (name == "SINGLE") { return SINGLE; }
+ else if (name == "ARRAY") { return ARRAY; }
+ else if (name == "WEIGHTEDSET") { return WEIGHTEDSET; }
+ else {
+ throw InvalidConfigException("Illegal enum value '" + name + "'");
+ }
+}
+
+const char *collectiontype_str[] = { "SINGLE",
+ "ARRAY",
+ "WEIGHTEDSET" };
+
+vespalib::string Schema::getTypeName(CollectionType type) {
+ if (type > vespalib::arraysize(collectiontype_str)) {
+ vespalib::asciistream ost;
+ ost << "UNKNOWN(" << type << ")";
+ return ost.str();
+ }
+ return collectiontype_str[type];
+}
+
+Schema::Field::Field(const vespalib::stringref &n, DataType dt)
+ : _name(n),
+ _dataType(dt),
+ _collectionType(SINGLE),
+ _timestamp(0)
+{
+}
+
+Schema::Field::Field(const vespalib::stringref &n,
+ DataType dt, CollectionType ct)
+ : _name(n),
+ _dataType(dt),
+ _collectionType(ct),
+ _timestamp(0)
+{
+}
+
+// XXX: Resource leak if exception is thrown.
+Schema::Field::Field(const std::vector<vespalib::string> & lines)
+ : _name(ConfigParser::parse<vespalib::string>("name", lines)),
+ _dataType(dataTypeFromName(ConfigParser::parse<vespalib::string>(
+ "datatype", lines))),
+ _collectionType(
+ collectionTypeFromName(ConfigParser::parse<vespalib::string>(
+ "collectiontype", lines))),
+ _timestamp(ConfigParser::parse<int64_t>("timestamp", lines, 0))
+{
+}
+
+void
+Schema::Field::write(vespalib::asciistream & os, const vespalib::stringref & prefix) const
+{
+ os << prefix << "name " << _name << "\n";
+ os << prefix << "datatype " << getTypeName(_dataType) << "\n";
+ os << prefix << "collectiontype " << getTypeName(_collectionType) << "\n";
+ if (_timestamp) {
+ os << prefix << "timestamp " << _timestamp.val() << "\n";
+ }
+}
+
+
+bool
+Schema::Field::operator==(const Field &rhs) const
+{
+ return _name == rhs._name &&
+ _dataType == rhs._dataType &&
+ _collectionType == rhs._collectionType &&
+ _timestamp == rhs._timestamp;
+}
+
+
+bool
+Schema::Field::operator!=(const Field &rhs) const
+{
+ return _name != rhs._name ||
+ _dataType != rhs._dataType ||
+ _collectionType != rhs._collectionType ||
+ _timestamp != rhs._timestamp;
+}
+
+
+Schema::IndexField::IndexField(const vespalib::stringref &name, DataType dt)
+ : Field(name, dt),
+ _prefix(false),
+ _phrases(false),
+ _positions(true),
+ _avgElemLen(512)
+{
+}
+
+Schema::IndexField::IndexField(const vespalib::stringref &name, DataType dt,
+ CollectionType ct)
+ : Field(name, dt, ct),
+ _prefix(false),
+ _phrases(false),
+ _positions(true),
+ _avgElemLen(512)
+{
+}
+
+Schema::IndexField::IndexField(const std::vector<vespalib::string> &lines)
+ : Field(lines),
+ _prefix(ConfigParser::parse<bool>("prefix", lines)),
+ _phrases(ConfigParser::parse<bool>("phrases", lines)),
+ _positions(ConfigParser::parse<bool>("positions", lines)),
+ _avgElemLen(ConfigParser::parse<int32_t>("averageelementlen", lines))
+{
+}
+
+
+void
+Schema::IndexField::write(vespalib::asciistream & os, const vespalib::stringref & prefix) const
+{
+ Field::write(os, prefix);
+ os << prefix << "prefix " << (_prefix ? "true" : "false") << "\n";
+ os << prefix << "phrases " << (_phrases ? "true" : "false") << "\n";
+ os << prefix << "positions " << (_positions ? "true" : "false") << "\n";
+ os << prefix << "averageelementlen " << static_cast<int32_t>(_avgElemLen) << "\n";
+}
+
+
+bool
+Schema::IndexField::operator==(const IndexField &rhs) const
+{
+ return Field::operator==(rhs) &&
+ _prefix == rhs._prefix &&
+ _phrases == rhs._phrases &&
+ _positions == rhs._positions &&
+ _avgElemLen == rhs._avgElemLen;
+}
+
+
+bool
+Schema::IndexField::operator!=(const IndexField &rhs) const
+{
+ return Field::operator!=(rhs) ||
+ _prefix != rhs._prefix ||
+ _phrases != rhs._phrases ||
+ _positions != rhs._positions ||
+ _avgElemLen != rhs._avgElemLen;
+}
+
+
+Schema::FieldSet::FieldSet(const std::vector<vespalib::string> & lines) :
+ _name(ConfigParser::parse<vespalib::string>("name", lines)),
+ _fields()
+{
+ std::vector<FieldName> fn = ConfigParser::parseArray<FieldName>("field", lines);
+ for (size_t i = 0; i < fn.size(); ++i) {
+ _fields.push_back(fn[i].name);
+ }
+}
+
+
+bool
+Schema::FieldSet::operator==(const FieldSet &rhs) const
+{
+ return _name == rhs._name &&
+ _fields == rhs._fields;
+}
+
+bool
+Schema::FieldSet::operator!=(const FieldSet &rhs) const
+{
+ return _name != rhs._name ||
+ _fields != rhs._fields;
+}
+
+void
+Schema::writeToStream(vespalib::asciistream &os) const
+{
+ writeFields(os, "attributefield", _attributeFields);
+ writeFields(os, "summaryfield", _summaryFields);
+ writeFieldSets(os, "fieldset", _fieldSets);
+ writeFields(os, "indexfield", _indexFields);
+}
+
+Schema::Schema()
+ : _indexFields(),
+ _attributeFields(),
+ _summaryFields(),
+ _fieldSets(),
+ _indexIds(),
+ _attributeIds(),
+ _summaryIds(),
+ _fieldSetIds()
+{
+}
+
+bool
+Schema::loadFromFile(const vespalib::stringref & fileName)
+{
+ std::ifstream file(fileName.c_str());
+ if (!file) {
+ LOG(warning, "Could not open input file '%s' as part of loadFromFile()", fileName.c_str());
+ return false;
+ }
+ std::vector<vespalib::string> lines;
+ std::string tmpLine;
+ while (file) {
+ getline(file, tmpLine);
+ lines.push_back(tmpLine);
+ }
+ _indexFields = ConfigParser::parseArray<IndexField>("indexfield", lines);
+ _attributeFields = ConfigParser::parseArray<AttributeField>("attributefield", lines);
+ _summaryFields = ConfigParser::parseArray<SummaryField>("summaryfield", lines);
+ _fieldSets = ConfigParser::parseArray<FieldSet>("fieldset", lines);
+ _indexIds.clear();
+ for (size_t i(0), m(_indexFields.size()); i < m; i++) {
+ _indexIds[_indexFields[i].getName()] = i;
+ }
+ _attributeIds.clear();
+ for (size_t i(0), m(_attributeFields.size()); i < m; i++) {
+ _attributeIds[_attributeFields[i].getName()] = i;
+ }
+ _summaryIds.clear();
+ for (size_t i(0), m(_summaryFields.size()); i < m; i++) {
+ _summaryIds[_summaryFields[i].getName()] = i;
+ }
+ _fieldSetIds.clear();
+ for (size_t i(0), m(_fieldSets.size()); i < m; i++) {
+ _fieldSetIds[_fieldSets[i].getName()] = i;
+ }
+ return true;
+}
+
+bool
+Schema::saveToFile(const vespalib::stringref & fileName) const
+{
+ vespalib::asciistream os;
+ writeToStream(os);
+ std::ofstream file(fileName.c_str());
+ if (!file) {
+ LOG(warning, "Could not open output file '%s' as part of saveToFile()", fileName.c_str());
+ return false;
+ }
+ file << os.str();
+ file.close();
+ if (file.fail()) {
+ LOG(warning,
+ "Could not write to output file '%s' as part of saveToFile()",
+ fileName.c_str());
+ return false;
+ }
+ FastOS_File s;
+ s.OpenReadWrite(fileName.c_str());
+ if (!s.IsOpened()) {
+ LOG(warning,
+ "Could not open schema file '%s' for fsync",
+ fileName.c_str());
+ return false;
+ } else {
+ if (!s.Sync()) {
+ LOG(warning,
+ "Could not fsync schema file '%s'",
+ fileName.c_str());
+ return false;
+ }
+ s.Close();
+ }
+ return true;
+}
+
+vespalib::string
+Schema::toString() const
+{
+ vespalib::asciistream os;
+ writeToStream(os);
+ return os.str();
+}
+
+namespace {
+Schema::IndexField
+cloneIndexField(const Schema::IndexField &field,
+ const vespalib::string &suffix)
+{
+ return Schema::IndexField(field.getName() + suffix,
+ field.getDataType(),
+ field.getCollectionType()).
+ setPrefix(field.hasPrefix()).
+ setPhrases(field.hasPhrases()).
+ setPositions(field.hasPositions()).
+ setAvgElemLen(field.getAvgElemLen());
+}
+
+template <typename T, typename M>
+Schema &
+addField(const T &field, Schema &self,
+ std::vector<T> &fields, M &name2id_map)
+{
+ name2id_map[field.getName()] = fields.size();
+ fields.push_back(field);
+ return self;
+}
+} // namespace
+
+Schema &
+Schema::addIndexField(const IndexField &field)
+{
+ return addField(field, *this, _indexFields, _indexIds);
+}
+
+Schema &
+Schema::addUriIndexFields(const IndexField &field)
+{
+ addIndexField(field);
+ addIndexField(cloneIndexField(field, ".scheme"));
+ addIndexField(cloneIndexField(field, ".host"));
+ addIndexField(cloneIndexField(field, ".port"));
+ addIndexField(cloneIndexField(field, ".path"));
+ addIndexField(cloneIndexField(field, ".query"));
+ addIndexField(cloneIndexField(field, ".fragment"));
+ addIndexField(cloneIndexField(field, ".hostname"));
+ return *this;
+}
+
+Schema &
+Schema::addAttributeField(const AttributeField &field)
+{
+ return addField(field, *this, _attributeFields, _attributeIds);
+}
+
+Schema &
+Schema::addSummaryField(const SummaryField &field)
+{
+ return addField(field, *this, _summaryFields, _summaryIds);
+}
+
+Schema &
+Schema::addFieldSet(const FieldSet &fieldSet)
+{
+ return addField(fieldSet, *this, _fieldSets, _fieldSetIds);
+}
+
+uint32_t
+Schema::getIndexFieldId(const vespalib::stringref & name) const
+{
+ return getFieldId(name, _indexIds);
+}
+
+uint32_t
+Schema::getAttributeFieldId(const vespalib::stringref & name) const
+{
+ return getFieldId(name, _attributeIds);
+}
+
+uint32_t
+Schema::getSummaryFieldId(const vespalib::stringref & name) const
+{
+ return getFieldId(name, _summaryIds);
+}
+
+
+uint32_t
+Schema::getFieldSetId(const vespalib::stringref &name) const
+{
+ return getFieldId(name, _fieldSetIds);
+}
+
+
+void
+Schema::swap(Schema &rhs)
+{
+ _indexFields.swap(rhs._indexFields);
+ _attributeFields.swap(rhs._attributeFields);
+ _summaryFields.swap(rhs._summaryFields);
+ _fieldSets.swap(rhs._fieldSets);
+ _indexIds.swap(rhs._indexIds);
+ _attributeIds.swap(rhs._attributeIds);
+ _summaryIds.swap(rhs._summaryIds);
+ _fieldSetIds.swap(rhs._fieldSetIds);
+}
+
+
+void
+Schema::clear()
+{
+ _indexFields.clear();
+ _attributeFields.clear();
+ _summaryFields.clear();
+ _fieldSets.clear();
+ _indexIds.clear();
+ _attributeIds.clear();
+ _summaryIds.clear();
+ _fieldSetIds.clear();
+}
+
+
+namespace {
+// Helper class allowing the is_matching specialization to access the schema.
+struct IntersectHelper {
+ Schema::UP schema;
+ IntersectHelper() : schema(new Schema) {}
+
+ template <typename T>
+ bool is_matching(const T &t1, const T &t2) { return t1.matchingTypes(t2); }
+
+ template <typename T, typename Map>
+ void intersect(const std::vector<T> &set1, const std::vector<T> &set2,
+ const Map &set2_map,
+ std::vector<T> &intersection, Map &intersection_map) {
+ for (typename std::vector<T>::const_iterator
+ it = set1.begin(); it != set1.end(); ++it) {
+ typename Map::const_iterator it2 = set2_map.find(it->getName());
+ if (it2 != set2_map.end()) {
+ if (is_matching(*it, set2[it2->second])) {
+ intersection_map[it->getName()] = intersection.size();
+ intersection.push_back(*it);
+ }
+ }
+ }
+ }
+};
+
+template <>
+bool IntersectHelper::is_matching(const Schema::FieldSet &f1,
+ const Schema::FieldSet &f2) {
+ if (f1.getFields() != f2.getFields())
+ return false;
+ const std::vector<vespalib::string> fields = f1.getFields();
+ for (std::vector<vespalib::string>::const_iterator
+ i = fields.begin(), ie = fields.end(); i != ie; ++i) {
+ if (schema->getIndexFieldId(*i) == Schema::UNKNOWN_FIELD_ID) {
+ return false;
+ }
+ }
+ return true;
+}
+
+template <typename T, typename Map>
+void addOldEntries(const std::vector<T> &entries,
+ fastos::TimeStamp limit_timestamp,
+ std::vector<T> &v, Map &name2id_map) {
+ for (typename std::vector<T>::const_iterator
+ it = entries.begin(); it != entries.end(); ++it) {
+ if (it->getTimestamp() < limit_timestamp) {
+ name2id_map[it->getName()] = v.size();
+ v.push_back(*it);
+ }
+ }
+}
+
+template <typename T, typename Map>
+void addEntries(const std::vector<T> &entries, std::vector<T> &v,
+ Map &name2id_map) {
+ for (typename std::vector<T>::const_iterator
+ it = entries.begin(); it != entries.end(); ++it) {
+ if (name2id_map.find(it->getName()) == name2id_map.end()) {
+ name2id_map[it->getName()] = v.size();
+ v.push_back(*it);
+ }
+ }
+}
+
+template <typename T, typename Map>
+void difference(const std::vector<T> &minuend, const Map &subtrahend_map,
+ std::vector<T> &diff, Map &diff_map) {
+ for (typename std::vector<T>::const_iterator
+ it = minuend.begin(); it != minuend.end(); ++it) {
+ if (subtrahend_map.find(it->getName()) == subtrahend_map.end()) {
+ diff_map[it->getName()] = diff.size();
+ diff.push_back(*it);
+ }
+ }
+}
+} // namespace
+
+Schema::UP
+Schema::getOldFields(fastos::TimeStamp limit_timestamp)
+{
+ Schema::UP schema(new Schema);
+ addOldEntries(_indexFields, limit_timestamp,
+ schema->_indexFields, schema->_indexIds);
+ addOldEntries(_attributeFields, limit_timestamp,
+ schema->_attributeFields, schema->_attributeIds);
+ addOldEntries(_summaryFields, limit_timestamp,
+ schema->_summaryFields, schema->_summaryIds);
+ return schema;
+}
+
+Schema::UP
+Schema::intersect(const Schema &lhs, const Schema &rhs)
+{
+ IntersectHelper h;
+ h.intersect(lhs._indexFields, rhs._indexFields, rhs._indexIds,
+ h.schema->_indexFields, h.schema->_indexIds);
+ h.intersect(lhs._attributeFields, rhs._attributeFields, rhs._attributeIds,
+ h.schema->_attributeFields, h.schema->_attributeIds);
+ h.intersect(lhs._summaryFields, rhs._summaryFields, rhs._summaryIds,
+ h.schema->_summaryFields, h.schema->_summaryIds);
+ h.intersect(lhs._fieldSets, rhs._fieldSets, rhs._fieldSetIds,
+ h.schema->_fieldSets, h.schema->_fieldSetIds);
+ return std::move(h.schema);
+}
+
+Schema::UP
+Schema::make_union(const Schema &lhs, const Schema &rhs)
+{
+ Schema::UP schema(new Schema(lhs));
+ addEntries(rhs._indexFields, schema->_indexFields, schema->_indexIds);
+ addEntries(rhs._attributeFields, schema->_attributeFields, schema->_attributeIds);
+ addEntries(rhs._summaryFields, schema->_summaryFields, schema->_summaryIds);
+ addEntries(rhs._fieldSets, schema->_fieldSets, schema->_fieldSetIds);
+ return schema;
+}
+
+Schema::UP
+Schema::set_difference(const Schema &lhs, const Schema &rhs)
+{
+ Schema::UP schema(new Schema);
+ difference(lhs._indexFields, rhs._indexIds,
+ schema->_indexFields, schema->_indexIds);
+ difference(lhs._attributeFields, rhs._attributeIds,
+ schema->_attributeFields, schema->_attributeIds);
+ difference(lhs._summaryFields, rhs._summaryIds,
+ schema->_summaryFields, schema->_summaryIds);
+ difference(lhs._fieldSets, rhs._fieldSetIds,
+ schema->_fieldSets, schema->_fieldSetIds);
+ return schema;
+}
+
+bool
+Schema::operator==(const Schema &rhs) const
+{
+ return _indexFields == rhs._indexFields &&
+ _attributeFields == rhs._attributeFields &&
+ _summaryFields == rhs._summaryFields &&
+ _fieldSets == rhs._fieldSets;
+}
+
+
+bool
+Schema::operator!=(const Schema &rhs) const
+{
+ return _indexFields != rhs._indexFields ||
+ _attributeFields != rhs._attributeFields ||
+ _summaryFields != rhs._summaryFields ||
+ _fieldSets != rhs._fieldSets;
+}
+
+
+bool
+Schema::empty() const
+{
+ return _indexFields.empty() &&
+ _attributeFields.empty() &&
+ _summaryFields.empty() &&
+ _fieldSets.empty();
+}
+
+
+} // namespace search::index
+} // namespace search
diff --git a/searchcommon/src/vespa/searchcommon/common/schema.h b/searchcommon/src/vespa/searchcommon/common/schema.h
new file mode 100644
index 00000000000..5f5b7dd3656
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/common/schema.h
@@ -0,0 +1,429 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+#include <vespa/vespalib/util/ptrholder.h>
+#include <vector>
+
+namespace search {
+namespace index {
+
+/**
+ * Schema class used to give a high-level description of the content
+ * of an index.
+ **/
+class Schema
+{
+public:
+ typedef std::unique_ptr<Schema> UP;
+ typedef std::shared_ptr<Schema> SP;
+ typedef vespalib::PtrHolder<Schema> PH;
+
+ /**
+ * Basic data type for a field.
+ **/
+ enum DataType { UINT1 = 0,
+ UINT2 = 1,
+ UINT4 = 2,
+ INT8 = 3,
+ INT16 = 4,
+ INT32 = 5,
+ INT64 = 6,
+ FLOAT = 7,
+ DOUBLE = 8,
+ STRING = 9,
+ RAW = 10,
+ //FEATURE = 11,
+ BOOLEANTREE = 12,
+ TENSOR = 13};
+ static DataType dataTypeFromName(const vespalib::stringref &name);
+ static vespalib::string getTypeName(DataType type);
+
+ /**
+ * Collection type for a field.
+ **/
+ enum CollectionType { SINGLE = 0,
+ ARRAY = 1,
+ WEIGHTEDSET = 2 };
+ static CollectionType collectionTypeFromName(const vespalib::stringref &n);
+ static vespalib::string getTypeName(CollectionType type);
+
+ /**
+ * A single field has a name, data type and collection
+ * type. Various aspects (index/attribute/summary) may have
+ * limitations on what types are supported in the back-end.
+ **/
+ class Field
+ {
+ vespalib::string _name;
+ DataType _dataType;
+ CollectionType _collectionType;
+ fastos::TimeStamp _timestamp;
+
+ public:
+ Field(const vespalib::stringref &n, DataType dt);
+ Field(const vespalib::stringref &n, DataType dt, CollectionType ct);
+
+ /**
+ * Create this field based on the given config lines.
+ **/
+ Field(const std::vector<vespalib::string> & lines);
+
+ virtual ~Field() {}
+
+ void setTimestamp(fastos::TimeStamp ts) { _timestamp = ts; }
+
+ virtual void
+ write(vespalib::asciistream & os,
+ const vespalib::stringref & prefix) const;
+
+ const vespalib::string &getName() const { return _name; }
+ DataType getDataType() const { return _dataType; }
+ CollectionType getCollectionType() const { return _collectionType; }
+ fastos::TimeStamp getTimestamp() const { return _timestamp; }
+
+ bool matchingTypes(const Field &rhs) const
+ {
+ return getDataType() == rhs.getDataType() &&
+ getCollectionType() == rhs.getCollectionType();
+ }
+
+ bool operator==(const Field &rhs) const;
+ bool operator!=(const Field &rhs) const;
+ };
+
+ /**
+ * A representation of an index field with extra information on
+ * how the index should be generated.
+ **/
+ class IndexField : public Field
+ {
+ bool _prefix;
+ bool _phrases;
+ bool _positions;
+ uint32_t _avgElemLen;
+
+ public:
+ IndexField(const vespalib::stringref &name, DataType dt);
+ IndexField(const vespalib::stringref &name, DataType dt,
+ CollectionType ct);
+ /**
+ * Create this index field based on the given config lines.
+ **/
+ IndexField(const std::vector<vespalib::string> &lines);
+
+ IndexField &setPrefix(bool value) { _prefix = value; return *this; }
+ IndexField &setPhrases(bool value) { _phrases = value; return *this; }
+ IndexField &setPositions(bool value)
+ { _positions = value; return *this; }
+ IndexField &setAvgElemLen(uint32_t avgElemLen)
+ { _avgElemLen = avgElemLen; return *this; }
+
+ virtual void
+ write(vespalib::asciistream &os,
+ const vespalib::stringref &prefix) const;
+
+ bool hasPrefix() const { return _prefix; }
+ bool hasPhrases() const { return _phrases; }
+ bool hasPositions() const { return _positions; }
+ uint32_t getAvgElemLen() const { return _avgElemLen; }
+
+ bool operator==(const IndexField &rhs) const;
+ bool operator!=(const IndexField &rhs) const;
+ };
+
+ typedef Field AttributeField;
+ typedef Field SummaryField;
+
+ /**
+ * A field collection has a name and a list of index field names,
+ * and is a named physical view over the list of index fields.
+ **/
+ class FieldSet
+ {
+ vespalib::string _name;
+ std::vector<vespalib::string> _fields;
+
+ public:
+ FieldSet(const vespalib::stringref & n) : _name(n), _fields() {}
+
+ /**
+ * Create this field collection based on the given config lines.
+ **/
+ FieldSet(const std::vector<vespalib::string> & lines);
+
+ FieldSet &addField(const vespalib::stringref &fieldName) {
+ _fields.push_back(fieldName);
+ return *this;
+ }
+
+ const vespalib::string &getName() const { return _name; }
+ const std::vector<vespalib::string> &getFields() const
+ { return _fields; }
+
+ bool operator==(const FieldSet &rhs) const;
+ bool operator!=(const FieldSet &rhs) const;
+ };
+
+ static const uint32_t UNKNOWN_FIELD_ID;
+
+private:
+ std::vector<IndexField> _indexFields;
+ std::vector<AttributeField> _attributeFields;
+ std::vector<SummaryField> _summaryFields;
+ std::vector<FieldSet> _fieldSets;
+ typedef vespalib::hash_map<vespalib::string, uint32_t> Name2IdMap;
+ Name2IdMap _indexIds;
+ Name2IdMap _attributeIds;
+ Name2IdMap _summaryIds;
+ Name2IdMap _fieldSetIds;
+
+ void writeToStream(vespalib::asciistream &os) const;
+
+public:
+ /**
+ * Create an initially empty schema
+ **/
+ Schema();
+
+ /**
+ * Load this schema from the file with the given name.
+ *
+ * @param fileName the name of the file.
+ * @return true if the schema could be loaded.
+ **/
+ bool
+ loadFromFile(const vespalib::stringref & fileName);
+
+ /**
+ * Save this schema to the file with the given name.
+ *
+ * @param fileName the name of the file.
+ * @return true if the schema could be saved.
+ **/
+ bool
+ saveToFile(const vespalib::stringref & fileName) const;
+
+ vespalib::string toString() const;
+
+ /**
+ * Add an index field to this schema
+ *
+ * @param field the field to add
+ **/
+ Schema &
+ addIndexField(const IndexField &field);
+
+ // Only used by tests.
+ Schema &
+ addUriIndexFields(const IndexField &field);
+
+ /**
+ * Add an attribute field to this schema
+ *
+ * @param field the field to add
+ **/
+ Schema &
+ addAttributeField(const AttributeField &field);
+
+ /**
+ * Add a summary field to this schema
+ *
+ * @param field the field to add
+ **/
+ Schema &
+ addSummaryField(const SummaryField &field);
+
+ /**
+ * Add a field set to this schema.
+ *
+ * @param collection the field set to add.
+ **/
+ Schema &
+ addFieldSet(const FieldSet &collection);
+
+ /**
+ * Obtain the number of index fields in this schema.
+ *
+ * @return number of fields
+ **/
+ uint32_t getNumIndexFields() const { return _indexFields.size(); }
+
+ /**
+ * Obtain the number of attribute fields in this schema.
+ *
+ * @return number of fields
+ **/
+ uint32_t getNumAttributeFields() const { return _attributeFields.size(); }
+
+ /**
+ * Obtain the number of summary fields in this schema.
+ *
+ * @return number of fields
+ **/
+ uint32_t getNumSummaryFields() const { return _summaryFields.size(); }
+
+ /**
+ * Obtain the number of field sets in this schema.
+ *
+ * @return number of field sets.
+ **/
+ uint32_t getNumFieldSets() const { return _fieldSets.size(); }
+
+ /**
+ * Get information about a specific index field using the given fieldId.
+ *
+ * @return the field
+ * @param idx an index in the range [0, size - 1].
+ **/
+ const IndexField &
+ getIndexField(uint32_t fieldId) const
+ {
+ return _indexFields[fieldId];
+ }
+
+ /**
+ * Returns const view of the index fields.
+ */
+ const std::vector<IndexField> &getIndexFields() const {
+ return _indexFields;
+ }
+
+ /**
+ * Get the field id for the index field with the given name.
+ *
+ * @return the field id or UNKNOWN_FIELD_ID if not found.
+ * @param name the name of the field.
+ **/
+ uint32_t getIndexFieldId(const vespalib::stringref & name) const;
+
+ /**
+ * Check if a field is an index
+ *
+ * @return true if field is an index field.
+ * @param name the name of the field.
+ **/
+ bool
+ isIndexField(const vespalib::stringref & name) const
+ {
+ return _indexIds.find(name) != _indexIds.end();
+ }
+
+ /**
+ * Check if a field is a summary field
+ *
+ * @return true if field is an summary field.
+ * @param name the name of the field.
+ **/
+ bool
+ isSummaryField(const vespalib::stringref & name) const
+ {
+ return _summaryIds.find(name) != _summaryIds.end();
+ }
+ /**
+ * Check if a field is a attribute field
+ *
+ * @return true if field is an attribute field.
+ * @param name the name of the field.
+ **/
+ bool
+ isAttributeField(const vespalib::stringref & name) const
+ {
+ return _attributeIds.find(name) != _attributeIds.end();
+ }
+
+ /**
+ * Get information about a specific attribute field using the given fieldId.
+ *
+ * @return the field
+ * @param idx an index in the range [0, size - 1].
+ **/
+ const AttributeField &
+ getAttributeField(uint32_t fieldId) const
+ {
+ return _attributeFields[fieldId];
+ }
+
+ /**
+ * Returns const view of the attribute fields.
+ */
+ const std::vector<AttributeField> &getAttributeFields() const {
+ return _attributeFields;
+ }
+
+ /**
+ * Get the field id for the attribute field with the given name.
+ *
+ * @return the field id or UNKNOWN_FIELD_ID if not found.
+ * @param name the name of the field.
+ **/
+ uint32_t getAttributeFieldId(const vespalib::stringref & name) const;
+
+ /**
+ * Get information about a specific summary field using the given fieldId.
+ *
+ * @return the field
+ * @param idx an index in the range [0, size - 1]
+ **/
+ const SummaryField &
+ getSummaryField(uint32_t fieldId) const
+ {
+ return _summaryFields[fieldId];
+ }
+
+ /**
+ * Returns const view of the summary fields.
+ */
+ const std::vector<SummaryField> &getSummaryFields() const {
+ return _summaryFields;
+ }
+
+ /**
+ * Get the field id for the summary field with the given name.
+ *
+ * @return the field id or UNKNOWN_FIELD_ID if not found.
+ * @param name the name of the field.
+ **/
+ uint32_t getSummaryFieldId(const vespalib::stringref & name) const;
+
+ /**
+ * Get information about a specific field set
+ *
+ * @return the field set.
+ * @param idx an index in the range [0, size - 1].
+ **/
+ const FieldSet &
+ getFieldSet(uint32_t idx) const
+ {
+ return _fieldSets[idx];
+ }
+
+ /**
+ * Get the field id for the field set with the given name.
+ *
+ * @return the field id or UNKNOWN_FIELD_ID if not found.
+ * @param name the name of the field set.
+ **/
+ uint32_t
+ getFieldSetId(const vespalib::stringref &name) const;
+
+ void swap(Schema &rhs);
+ void clear();
+
+ Schema::UP getOldFields(fastos::TimeStamp limit_timestamp);
+
+ static Schema::UP intersect(const Schema &lhs, const Schema &rhs);
+ static Schema::UP make_union(const Schema &lhs, const Schema &rhs);
+ static Schema::UP set_difference(const Schema &lhs, const Schema &rhs);
+
+ bool operator==(const Schema &rhs) const;
+ bool operator!=(const Schema &rhs) const;
+
+ bool empty() const;
+};
+
+} // namespace search::index
+} // namespace search
diff --git a/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp b/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp
new file mode 100644
index 00000000000..34071e241d7
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp
@@ -0,0 +1,241 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/searchcommon/config/subscriptionproxyng.h>
+#include <vespa/searchcommon/common/schemaconfigurer.h>
+
+LOG_SETUP(".index.schemaconfigurer");
+
+using namespace config;
+using namespace vespa::config::search;
+
+namespace search {
+namespace index {
+
+
+Schema::DataType
+SchemaBuilder::convert(const IndexschemaConfig::Indexfield::Datatype &type)
+{
+ switch (type) {
+ case IndexschemaConfig::Indexfield::STRING:
+ return Schema::STRING;
+ case IndexschemaConfig::Indexfield::INT64:
+ return Schema::INT64;
+ case IndexschemaConfig::Indexfield::BOOLEANTREE:
+ return Schema::BOOLEANTREE;
+ }
+ return Schema::STRING;
+}
+
+
+Schema::CollectionType
+SchemaBuilder::convert(const IndexschemaConfig::Indexfield::Collectiontype & type)
+{
+ switch (type) {
+ case IndexschemaConfig::Indexfield::SINGLE:
+ return Schema::SINGLE;
+ case IndexschemaConfig::Indexfield::ARRAY:
+ return Schema::ARRAY;
+ case IndexschemaConfig::Indexfield::WEIGHTEDSET:
+ return Schema::WEIGHTEDSET;
+ }
+ return Schema::SINGLE;
+}
+
+
+Schema::DataType
+SchemaBuilder::convert(const AttributesConfig::Attribute::Datatype &type)
+{
+ switch (type) {
+ case AttributesConfig::Attribute::STRING:
+ return Schema::STRING;
+ case AttributesConfig::Attribute::UINT1:
+ return Schema::UINT1;
+ case AttributesConfig::Attribute::UINT2:
+ return Schema::UINT2;
+ case AttributesConfig::Attribute::UINT4:
+ return Schema::UINT4;
+ case AttributesConfig::Attribute::INT8:
+ return Schema::INT8;
+ case AttributesConfig::Attribute::INT16:
+ return Schema::INT16;
+ case AttributesConfig::Attribute::INT32:
+ return Schema::INT32;
+ case AttributesConfig::Attribute::INT64:
+ return Schema::INT64;
+ case AttributesConfig::Attribute::FLOAT:
+ return Schema::FLOAT;
+ case AttributesConfig::Attribute::DOUBLE:
+ return Schema::DOUBLE;
+ case AttributesConfig::Attribute::PREDICATE:
+ return Schema::BOOLEANTREE;
+ case AttributesConfig::Attribute::TENSOR:
+ return Schema::TENSOR;
+ default:
+ break;
+ }
+ // TODO: exception?
+ return Schema::STRING;
+}
+
+
+Schema::CollectionType
+SchemaBuilder::convert(const AttributesConfig::Attribute::Collectiontype &type)
+{
+ switch (type) {
+ case AttributesConfig::Attribute::SINGLE:
+ return Schema::SINGLE;
+ case AttributesConfig::Attribute::ARRAY:
+ return Schema::ARRAY;
+ case AttributesConfig::Attribute::WEIGHTEDSET:
+ return Schema::WEIGHTEDSET;
+ }
+ return Schema::SINGLE;
+}
+
+
+Schema::DataType
+SchemaBuilder::convertSummaryType(const vespalib::string & type)
+{
+ if (type == "byte") {
+ return Schema::INT8;
+ } else if (type == "short") {
+ return Schema::INT16;
+ } else if (type == "integer") {
+ return Schema::INT32;
+ } else if (type == "int64") {
+ return Schema::INT64;
+ } else if (type == "float") {
+ return Schema::FLOAT;
+ } else if (type == "double") {
+ return Schema::DOUBLE;
+ } else if (type == "string" ||
+ type == "longstring" ||
+ type == "xmlstring" ||
+ type == "featuredata" ||
+ type == "jsonstring")
+ {
+ return Schema::STRING;
+ } else if (type == "data" ||
+ type == "longdata")
+ {
+ return Schema::RAW;
+ }
+ return Schema::RAW;
+}
+
+
+void
+SchemaBuilder::build(const IndexschemaConfig &cfg, Schema &schema)
+{
+ for (size_t i = 0; i < cfg.indexfield.size(); ++i) {
+ const IndexschemaConfig::Indexfield & f = cfg.indexfield[i];
+ if ((f.datatype == IndexschemaConfig::Indexfield::BOOLEANTREE &&
+ f.collectiontype == IndexschemaConfig::Indexfield::SINGLE) ||
+ (f.indextype == IndexschemaConfig::Indexfield::RISE))
+ {
+ LOG(warning, "Your field '%s' is a rise index. Those are no longer supported as of Vespa-5.89.\n"
+ " Redeploy and follow instructions to mitigate.", f.name.c_str());
+ } else {
+ schema.addIndexField(Schema::IndexField(f.name, convert(f.datatype),
+ convert(f.collectiontype)).
+ setPrefix(f.prefix).
+ setPhrases(f.phrases).
+ setPositions(f.positions).
+ setAvgElemLen(f.averageelementlen));
+ }
+ }
+ for (size_t i = 0; i < cfg.fieldset.size(); ++i) {
+ const IndexschemaConfig::Fieldset &fs = cfg.fieldset[i];
+ Schema::FieldSet toAdd(fs.name);
+ for (size_t j = 0; j < fs.field.size(); ++j) {
+ toAdd.addField(fs.field[j].name);
+ }
+ schema.addFieldSet(toAdd);
+ }
+}
+
+
+void
+SchemaBuilder::build(const AttributesConfig &cfg, Schema &schema)
+{
+ for (size_t i = 0; i < cfg.attribute.size(); ++i) {
+ const AttributesConfig::Attribute & a = cfg.attribute[i];
+ schema.addAttributeField(Schema::Field(a.name,
+ convert(a.datatype),
+ convert(a.collectiontype)));
+ }
+}
+
+
+void
+SchemaBuilder::build(const SummaryConfig &cfg, Schema &schema)
+{
+ for (size_t i = 0; i < cfg.classes.size(); ++i) {
+ LOG(debug, "class with index %lu has id %d (default has id %d)",
+ i, cfg.classes[i].id, cfg.defaultsummaryid);
+ }
+ for (size_t i = 0; i < cfg.classes.size(); ++i) {
+ // use the default summary class that has all fields
+ if (cfg.classes[i].id == cfg.defaultsummaryid) {
+ for (size_t j = 0; j < cfg.classes[i].fields.size(); ++j) {
+ const SummaryConfig::Classes::Fields & f =
+ cfg.classes[i].fields[j];
+ schema.addSummaryField(Schema::Field(f.name,
+ convertSummaryType(f.type)));
+ }
+ return;
+ }
+ }
+ if (cfg.classes.empty()) {
+ LOG(debug,
+ "No summary class configured that match the default summary id %d",
+ cfg.defaultsummaryid);
+ } else {
+ LOG(warning,
+ "No summary class configured that match the default summary id %d",
+ cfg.defaultsummaryid);
+ }
+}
+
+
+void
+SchemaConfigurer::configure(const IndexschemaConfig &cfg)
+{
+ SchemaBuilder::build(cfg, _schema);
+}
+
+
+void
+SchemaConfigurer::configure(const AttributesConfig &cfg)
+{
+ SchemaBuilder::build(cfg, _schema);
+}
+
+
+void
+SchemaConfigurer::configure(const SummaryConfig & cfg)
+{
+ SchemaBuilder::build(cfg, _schema);
+}
+
+
+SchemaConfigurer::SchemaConfigurer(Schema &schema,
+ const vespalib::string &configId)
+ : _schema(schema)
+{
+ search::SubscriptionProxyNg<SchemaConfigurer, IndexschemaConfig>
+ indexSchemaSubscriber(*this, &SchemaConfigurer::configure);
+ search::SubscriptionProxyNg<SchemaConfigurer, AttributesConfig>
+ attributesSubscriber(*this, &SchemaConfigurer::configure);
+ search::SubscriptionProxyNg<SchemaConfigurer, SummaryConfig>
+ summarySubscriber(*this, &SchemaConfigurer::configure);
+ indexSchemaSubscriber.subscribe(configId.c_str());
+ attributesSubscriber.subscribe(configId.c_str());
+ summarySubscriber.subscribe(configId.c_str());
+}
+
+
+} // namespace search::index
+} // namespace search
diff --git a/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.h b/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.h
new file mode 100644
index 00000000000..3c63d13ed28
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.h
@@ -0,0 +1,78 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/config-attributes.h>
+#include <vespa/config-indexschema.h>
+#include <vespa/config-summary.h>
+#include <vespa/searchcommon/common/schema.h>
+#include <vespa/searchcommon/attribute/collectiontype.h>
+#include <vespa/searchcommon/attribute/basictype.h>
+
+namespace search {
+namespace index {
+
+/**
+ * Schema class used to give a high-level description of the content
+ * of an index.
+ **/
+class SchemaBuilder
+{
+ static Schema::DataType
+ convert(const vespa::config::search::IndexschemaConfig::Indexfield::Datatype &type);
+
+ static Schema::CollectionType
+ convert(const vespa::config::search::IndexschemaConfig::Indexfield::Collectiontype &type);
+
+ static Schema::DataType
+ convert(const vespa::config::search::AttributesConfig::Attribute::Datatype &type);
+
+ static Schema::CollectionType
+ convert(const vespa::config::search::AttributesConfig::Attribute::Collectiontype &type);
+
+ static Schema::DataType
+ convertSummaryType(const vespalib::string &type);
+public:
+ /**
+ * Build from indexschema config.
+ *
+ * @param indexCfg vespa::config::search::IndexschemaConfig to use
+ */
+ static void
+ build(const vespa::config::search::IndexschemaConfig &cfg, Schema &schema);
+ /**
+ * Build from attribute config.
+ *
+ * @param attributeCfg vespa::config::search::AttributesConfig to use
+ **/
+ static void
+ build(const vespa::config::search::AttributesConfig &cfg, Schema &schema);
+ /**
+ * Build from summary config.
+ *
+ * @param summaryCfg vespa::config::search::SummaryConfig to use
+ **/
+ static void
+ build(const vespa::config::search::SummaryConfig &cfg, Schema &schema);
+};
+
+class SchemaConfigurer
+{
+private:
+ Schema & _schema;
+ void configure(const vespa::config::search::IndexschemaConfig & cfg);
+ void configure(const vespa::config::search::AttributesConfig & cfg);
+ void configure(const vespa::config::search::SummaryConfig & cfg);
+
+public:
+ /**
+ * Load this schema from config using the given config id.
+ *
+ * @param configId the config id used to retrieve the relevant config.
+ **/
+ SchemaConfigurer(Schema & schema, const vespalib::string &configId);
+};
+
+} // namespace search::index
+} // namespace search
+
diff --git a/searchcommon/src/vespa/searchcommon/common/undefinedvalues.h b/searchcommon/src/vespa/searchcommon/common/undefinedvalues.h
new file mode 100644
index 00000000000..dc33153dc10
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/common/undefinedvalues.h
@@ -0,0 +1,72 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <cmath>
+#include <limits>
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+namespace attribute {
+
+// for all integers
+template <typename T>
+T getUndefined() {
+ return std::numeric_limits<T>::min();
+}
+
+template <>
+inline float getUndefined<float>() {
+ return -std::numeric_limits<float>::quiet_NaN();
+}
+
+template <>
+inline double getUndefined<double>() {
+ return -std::numeric_limits<double>::quiet_NaN();
+}
+
+
+// for all signed integers
+template <typename T>
+bool isUndefined(const T & value) {
+ return value == getUndefined<T>();
+}
+
+template <>
+inline bool isUndefined<uint8_t>(const uint8_t &) {
+ return false;
+}
+
+template <>
+inline bool isUndefined<uint16_t>(const uint16_t &) {
+ return false;
+}
+
+template <>
+inline bool isUndefined<uint32_t>(const uint32_t &) {
+ return false;
+}
+
+template <>
+inline bool isUndefined<uint64_t>(const uint64_t &) {
+ return false;
+}
+
+template <>
+inline bool isUndefined<float>(const float & value) {
+ return std::isnan(value);
+}
+
+template <>
+inline bool isUndefined<double>(const double & value) {
+ return std::isnan(value);
+}
+
+template <>
+inline bool isUndefined<vespalib::string>(const vespalib::string & value) {
+ return value.empty();
+}
+
+}
+}
+
diff --git a/searchcommon/src/vespa/searchcommon/config/.gitignore b/searchcommon/src/vespa/searchcommon/config/.gitignore
new file mode 100644
index 00000000000..7e7c0fe7fae
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/config/.gitignore
@@ -0,0 +1,2 @@
+/.depend
+/Makefile
diff --git a/searchcommon/src/vespa/searchcommon/config/CMakeLists.txt b/searchcommon/src/vespa/searchcommon/config/CMakeLists.txt
new file mode 100644
index 00000000000..1f8034bc136
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/config/CMakeLists.txt
@@ -0,0 +1,6 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchcommon_config INTERFACE
+ SOURCES
+ INSTALL lib64
+ DEPENDS
+)
diff --git a/searchcommon/src/vespa/searchcommon/config/subscriptionproxyng.h b/searchcommon/src/vespa/searchcommon/config/subscriptionproxyng.h
new file mode 100644
index 00000000000..d2b5570770f
--- /dev/null
+++ b/searchcommon/src/vespa/searchcommon/config/subscriptionproxyng.h
@@ -0,0 +1,63 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/config/helper/legacysubscriber.h>
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+
+template <typename ME, typename CFG>
+class SubscriptionProxyNg : public config::IFetcherCallback<CFG>
+{
+ typedef void (ME::*Method)(const CFG &cfg);
+
+private:
+ ME &_target;
+ Method _method;
+ config::LegacySubscriber *_subscriber;
+ vespalib::string _cfgId;
+
+ SubscriptionProxyNg(const SubscriptionProxyNg&);
+ SubscriptionProxyNg &operator=(const SubscriptionProxyNg&);
+
+public:
+ SubscriptionProxyNg(ME &target, Method method)
+ : _target(target),
+ _method(method),
+ _subscriber(NULL),
+ _cfgId("")
+ {
+ }
+ virtual ~SubscriptionProxyNg() {
+ unsubscribe();
+ }
+ const char *getConfigId() const {
+ return _cfgId.c_str();
+ }
+ void subscribe(const char *configId) {
+ if (_subscriber != NULL) {
+ if (configId != NULL && strcmp(configId, _subscriber->id().c_str()) == 0)
+ {
+ return; // same id; ignore
+ } else {
+ unsubscribe();
+ }
+ }
+ if (configId != NULL && configId[0] != '\0') {
+ _cfgId = configId;
+ _subscriber = new config::LegacySubscriber();
+ _subscriber->subscribe<CFG>(configId, this);
+ }
+ }
+ void unsubscribe() {
+ delete _subscriber;
+ _subscriber = NULL;
+ _cfgId = "";
+ }
+ virtual void configure(std::unique_ptr<CFG> cfg) {
+ (_target.*_method)(*cfg);
+ }
+};
+
+} // namespace search
+
diff --git a/searchcommon/testrun/.gitignore b/searchcommon/testrun/.gitignore
new file mode 100644
index 00000000000..8f0724a7dba
--- /dev/null
+++ b/searchcommon/testrun/.gitignore
@@ -0,0 +1,12 @@
+/test-report.html
+/test-report.html.bottom
+/test-report.html.entry
+/test-report.html.summary
+/test-report.html.top
+test.*.*.desc
+test.*.*.file.*
+test.*.*.files.html
+test.*.*.log
+tmp.*
+/test.*.*.result
+/Makefile