diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2016-11-27 22:11:36 +0100 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2016-12-12 02:55:39 +0100 |
commit | 740d87cc9cac3760b6053a921fd6398f248ca5b6 (patch) | |
tree | 62d49bfadb08f11ef838d4a69035cdc5a1708765 /searchcommon/src | |
parent | 84c6488ccd7691d389b710716c8f233a74cc0d6f (diff) |
- Use distributionkey when serializing for sort instead of gid.
- Avoid having to include the world for just the data type.
Diffstat (limited to 'searchcommon/src')
6 files changed, 179 insertions, 137 deletions
diff --git a/searchcommon/src/vespa/searchcommon/common/CMakeLists.txt b/searchcommon/src/vespa/searchcommon/common/CMakeLists.txt index a70a71772f5..fd8e5419ccd 100644 --- a/searchcommon/src/vespa/searchcommon/common/CMakeLists.txt +++ b/searchcommon/src/vespa/searchcommon/common/CMakeLists.txt @@ -1,6 +1,7 @@ # Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. vespa_add_library(searchcommon_searchcommon_common OBJECT SOURCES + datatype.cpp schema.cpp schemaconfigurer.cpp DEPENDS diff --git a/searchcommon/src/vespa/searchcommon/common/datatype.cpp b/searchcommon/src/vespa/searchcommon/common/datatype.cpp new file mode 100644 index 00000000000..f5aa9e50f3a --- /dev/null +++ b/searchcommon/src/vespa/searchcommon/common/datatype.cpp @@ -0,0 +1,86 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "datatype.h" +#include <vespa/config/common/exceptions.h> +#include <vespa/vespalib/stllike/asciistream.h> +#include <vespa/vespalib/util/arraysize.h> + +namespace search { +namespace index { +namespace schema { + +using config::InvalidConfigException; + +DataType +dataTypeFromName(const vespalib::stringref &name) { + if (name == "UINT1") { return UINT1; } + else if (name == "UINT2") { return UINT2; } + else if (name == "UINT4") { return UINT4; } + else if (name == "INT8") { return INT8; } + else if (name == "INT16") { return INT16; } + else if (name == "INT32") { return INT32; } + else if (name == "INT64") { return INT64; } + else if (name == "FLOAT") { return FLOAT; } + else if (name == "DOUBLE") { return DOUBLE; } + else if (name == "STRING") { return STRING; } + else if (name == "RAW") { return RAW; } + else if (name == "BOOLEANTREE") { return BOOLEANTREE; } + else if (name == "TENSOR") { return TENSOR; } + else { + throw InvalidConfigException("Illegal enum value '" + name + "'"); + } +} + +const char *datatype_str[] = { "UINT1", + "UINT2", + "UINT4", + "INT8", + "INT16", + "INT32", + "INT64", + "FLOAT", + "DOUBLE", + "STRING", + "RAW", + "FEATURE_NOTUSED", + "BOOLEANTREE", + "TENSOR" }; + +vespalib::string +getTypeName(DataType type) { + if (type > vespalib::arraysize(datatype_str)) { + vespalib::asciistream ost; + ost << "UNKNOWN(" << type << ")"; + return ost.str(); + } + return datatype_str[type]; +} + +CollectionType +collectionTypeFromName(const vespalib::stringref &name) { + if (name == "SINGLE") { return SINGLE; } + else if (name == "ARRAY") { return ARRAY; } + else if (name == "WEIGHTEDSET") { return WEIGHTEDSET; } + else { + throw InvalidConfigException("Illegal enum value '" + name + "'"); + } +} + +const char *collectiontype_str[] = { "SINGLE", + "ARRAY", + "WEIGHTEDSET" }; + +vespalib::string +getTypeName(CollectionType type) { + if (type > vespalib::arraysize(collectiontype_str)) { + vespalib::asciistream ost; + ost << "UNKNOWN(" << type << ")"; + return ost.str(); + } + return collectiontype_str[type]; +} + + +} +} +} diff --git a/searchcommon/src/vespa/searchcommon/common/datatype.h b/searchcommon/src/vespa/searchcommon/common/datatype.h new file mode 100644 index 00000000000..de442e139db --- /dev/null +++ b/searchcommon/src/vespa/searchcommon/common/datatype.h @@ -0,0 +1,45 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> + +namespace search { +namespace index { +namespace schema { + +/** + * Basic data type for a field. + **/ +enum DataType { UINT1 = 0, + UINT2 = 1, + UINT4 = 2, + INT8 = 3, + INT16 = 4, + INT32 = 5, + INT64 = 6, + FLOAT = 7, + DOUBLE = 8, + STRING = 9, + RAW = 10, + //FEATURE = 11, + BOOLEANTREE = 12, + TENSOR = 13 +}; + +/** + * Collection type for a field. + **/ +enum CollectionType { SINGLE = 0, + ARRAY = 1, + WEIGHTEDSET = 2 +}; + +DataType dataTypeFromName(const vespalib::stringref &name); +vespalib::string getTypeName(DataType type); +CollectionType collectionTypeFromName(const vespalib::stringref &n); +vespalib::string getTypeName(CollectionType type); + +} +} +} diff --git a/searchcommon/src/vespa/searchcommon/common/schema.cpp b/searchcommon/src/vespa/searchcommon/common/schema.cpp index 673d78fbc89..88403f49d61 100644 --- a/searchcommon/src/vespa/searchcommon/common/schema.cpp +++ b/searchcommon/src/vespa/searchcommon/common/schema.cpp @@ -1,12 +1,11 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/fastos/fastos.h> -#include <vespa/log/log.h> +#include "schema.h" #include <fstream> #include <vespa/config/common/configparser.h> #include <vespa/vespalib/stllike/asciistream.h> -#include <vespa/vespalib/util/arraysize.h> -#include "schema.h" +#include <vespa/log/log.h> LOG_SETUP(".index.schema"); using namespace config; @@ -69,76 +68,10 @@ namespace index { const uint32_t Schema::UNKNOWN_FIELD_ID(std::numeric_limits<uint32_t>::max()); -Schema::DataType Schema::dataTypeFromName(const vespalib::stringref &name) { - if (name == "UINT1") { return UINT1; } - else if (name == "UINT2") { return UINT2; } - else if (name == "UINT4") { return UINT4; } - else if (name == "INT8") { return INT8; } - else if (name == "INT16") { return INT16; } - else if (name == "INT32") { return INT32; } - else if (name == "INT64") { return INT64; } - else if (name == "FLOAT") { return FLOAT; } - else if (name == "DOUBLE") { return DOUBLE; } - else if (name == "STRING") { return STRING; } - else if (name == "RAW") { return RAW; } - else if (name == "BOOLEANTREE") { return BOOLEANTREE; } - else if (name == "TENSOR") { return TENSOR; } - else { - throw InvalidConfigException("Illegal enum value '" + name + "'"); - } -} - -const char *datatype_str[] = { "UINT1", - "UINT2", - "UINT4", - "INT8", - "INT16", - "INT32", - "INT64", - "FLOAT", - "DOUBLE", - "STRING", - "RAW", - "FEATURE_NOTUSED", - "BOOLEANTREE", - "TENSOR" }; - -vespalib::string Schema::getTypeName(DataType type) { - if (type > vespalib::arraysize(datatype_str)) { - vespalib::asciistream ost; - ost << "UNKNOWN(" << type << ")"; - return ost.str(); - } - return datatype_str[type]; -} - -Schema::CollectionType Schema::collectionTypeFromName( - const vespalib::stringref &name) { - if (name == "SINGLE") { return SINGLE; } - else if (name == "ARRAY") { return ARRAY; } - else if (name == "WEIGHTEDSET") { return WEIGHTEDSET; } - else { - throw InvalidConfigException("Illegal enum value '" + name + "'"); - } -} - -const char *collectiontype_str[] = { "SINGLE", - "ARRAY", - "WEIGHTEDSET" }; - -vespalib::string Schema::getTypeName(CollectionType type) { - if (type > vespalib::arraysize(collectiontype_str)) { - vespalib::asciistream ost; - ost << "UNKNOWN(" << type << ")"; - return ost.str(); - } - return collectiontype_str[type]; -} - Schema::Field::Field(const vespalib::stringref &n, DataType dt) : _name(n), _dataType(dt), - _collectionType(SINGLE), + _collectionType(schema::SINGLE), _timestamp(0) { } @@ -155,10 +88,10 @@ Schema::Field::Field(const vespalib::stringref &n, // XXX: Resource leak if exception is thrown. Schema::Field::Field(const std::vector<vespalib::string> & lines) : _name(ConfigParser::parse<vespalib::string>("name", lines)), - _dataType(dataTypeFromName(ConfigParser::parse<vespalib::string>( + _dataType(schema::dataTypeFromName(ConfigParser::parse<vespalib::string>( "datatype", lines))), _collectionType( - collectionTypeFromName(ConfigParser::parse<vespalib::string>( + schema::collectionTypeFromName(ConfigParser::parse<vespalib::string>( "collectiontype", lines))), _timestamp(ConfigParser::parse<int64_t>("timestamp", lines, 0)) { diff --git a/searchcommon/src/vespa/searchcommon/common/schema.h b/searchcommon/src/vespa/searchcommon/common/schema.h index dcc559f6293..f3ab4e745b4 100644 --- a/searchcommon/src/vespa/searchcommon/common/schema.h +++ b/searchcommon/src/vespa/searchcommon/common/schema.h @@ -3,11 +3,14 @@ #pragma once #include <vespa/vespalib/stllike/string.h> -#include <vespa/vespalib/stllike/asciistream.h> #include <vespa/vespalib/stllike/hash_map.h> #include <vespa/vespalib/util/ptrholder.h> #include <vector> +#include "datatype.h" +namespace vespalib { + class asciistream; +} namespace search { namespace index { @@ -22,34 +25,9 @@ public: typedef std::shared_ptr<Schema> SP; typedef vespalib::PtrHolder<Schema> PH; - /** - * Basic data type for a field. - **/ - enum DataType { UINT1 = 0, - UINT2 = 1, - UINT4 = 2, - INT8 = 3, - INT16 = 4, - INT32 = 5, - INT64 = 6, - FLOAT = 7, - DOUBLE = 8, - STRING = 9, - RAW = 10, - //FEATURE = 11, - BOOLEANTREE = 12, - TENSOR = 13}; - static DataType dataTypeFromName(const vespalib::stringref &name); - static vespalib::string getTypeName(DataType type); + using DataType = schema::DataType; - /** - * Collection type for a field. - **/ - enum CollectionType { SINGLE = 0, - ARRAY = 1, - WEIGHTEDSET = 2 }; - static CollectionType collectionTypeFromName(const vespalib::stringref &n); - static vespalib::string getTypeName(CollectionType type); + using CollectionType = schema::CollectionType; /** * A single field has a name, data type and collection @@ -107,8 +85,7 @@ public: public: IndexField(const vespalib::stringref &name, DataType dt); - IndexField(const vespalib::stringref &name, DataType dt, - CollectionType ct); + IndexField(const vespalib::stringref &name, DataType dt, CollectionType ct); /** * Create this index field based on the given config lines. **/ @@ -176,7 +153,7 @@ private: std::vector<AttributeField> _attributeFields; std::vector<SummaryField> _summaryFields; std::vector<FieldSet> _fieldSets; - typedef vespalib::hash_map<vespalib::string, uint32_t> Name2IdMap; + using Name2IdMap = vespalib::hash_map<vespalib::string, uint32_t>; Name2IdMap _indexIds; Name2IdMap _attributeIds; Name2IdMap _summaryIds; diff --git a/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp b/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp index 34071e241d7..56acb3d8ec2 100644 --- a/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp +++ b/searchcommon/src/vespa/searchcommon/common/schemaconfigurer.cpp @@ -19,13 +19,13 @@ SchemaBuilder::convert(const IndexschemaConfig::Indexfield::Datatype &type) { switch (type) { case IndexschemaConfig::Indexfield::STRING: - return Schema::STRING; + return schema::STRING; case IndexschemaConfig::Indexfield::INT64: - return Schema::INT64; + return schema::INT64; case IndexschemaConfig::Indexfield::BOOLEANTREE: - return Schema::BOOLEANTREE; + return schema::BOOLEANTREE; } - return Schema::STRING; + return schema::STRING; } @@ -34,13 +34,13 @@ SchemaBuilder::convert(const IndexschemaConfig::Indexfield::Collectiontype & typ { switch (type) { case IndexschemaConfig::Indexfield::SINGLE: - return Schema::SINGLE; + return schema::SINGLE; case IndexschemaConfig::Indexfield::ARRAY: - return Schema::ARRAY; + return schema::ARRAY; case IndexschemaConfig::Indexfield::WEIGHTEDSET: - return Schema::WEIGHTEDSET; + return schema::WEIGHTEDSET; } - return Schema::SINGLE; + return schema::SINGLE; } @@ -49,34 +49,34 @@ SchemaBuilder::convert(const AttributesConfig::Attribute::Datatype &type) { switch (type) { case AttributesConfig::Attribute::STRING: - return Schema::STRING; + return schema::STRING; case AttributesConfig::Attribute::UINT1: - return Schema::UINT1; + return schema::UINT1; case AttributesConfig::Attribute::UINT2: - return Schema::UINT2; + return schema::UINT2; case AttributesConfig::Attribute::UINT4: - return Schema::UINT4; + return schema::UINT4; case AttributesConfig::Attribute::INT8: - return Schema::INT8; + return schema::INT8; case AttributesConfig::Attribute::INT16: - return Schema::INT16; + return schema::INT16; case AttributesConfig::Attribute::INT32: - return Schema::INT32; + return schema::INT32; case AttributesConfig::Attribute::INT64: - return Schema::INT64; + return schema::INT64; case AttributesConfig::Attribute::FLOAT: - return Schema::FLOAT; + return schema::FLOAT; case AttributesConfig::Attribute::DOUBLE: - return Schema::DOUBLE; + return schema::DOUBLE; case AttributesConfig::Attribute::PREDICATE: - return Schema::BOOLEANTREE; + return schema::BOOLEANTREE; case AttributesConfig::Attribute::TENSOR: - return Schema::TENSOR; + return schema::TENSOR; default: break; } // TODO: exception? - return Schema::STRING; + return schema::STRING; } @@ -85,13 +85,13 @@ SchemaBuilder::convert(const AttributesConfig::Attribute::Collectiontype &type) { switch (type) { case AttributesConfig::Attribute::SINGLE: - return Schema::SINGLE; + return schema::SINGLE; case AttributesConfig::Attribute::ARRAY: - return Schema::ARRAY; + return schema::ARRAY; case AttributesConfig::Attribute::WEIGHTEDSET: - return Schema::WEIGHTEDSET; + return schema::WEIGHTEDSET; } - return Schema::SINGLE; + return schema::SINGLE; } @@ -99,30 +99,30 @@ Schema::DataType SchemaBuilder::convertSummaryType(const vespalib::string & type) { if (type == "byte") { - return Schema::INT8; + return schema::INT8; } else if (type == "short") { - return Schema::INT16; + return schema::INT16; } else if (type == "integer") { - return Schema::INT32; + return schema::INT32; } else if (type == "int64") { - return Schema::INT64; + return schema::INT64; } else if (type == "float") { - return Schema::FLOAT; + return schema::FLOAT; } else if (type == "double") { - return Schema::DOUBLE; + return schema::DOUBLE; } else if (type == "string" || type == "longstring" || type == "xmlstring" || type == "featuredata" || type == "jsonstring") { - return Schema::STRING; + return schema::STRING; } else if (type == "data" || type == "longdata") { - return Schema::RAW; + return schema::RAW; } - return Schema::RAW; + return schema::RAW; } |