diff options
author | Geir Storli <geirstorli@yahoo.no> | 2017-03-30 16:52:39 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-03-30 16:52:39 +0200 |
commit | 70aa236eade721b0d9fabdece9921c7d2069779e (patch) | |
tree | 516db78aa5e8fcb4253f2b40d92a84f8cf57f944 | |
parent | e1abfd25bae0956b549eebe2be03a159565451b3 (diff) | |
parent | 7cb8c63c972cf0798e0abaf580bf8f82f5ec034c (diff) |
Merge pull request #2112 from yahoo/toregge/check-weighted-set-parameters-before-loading-attribute
Check collection type parameters before loading attribute.
4 files changed, 68 insertions, 2 deletions
diff --git a/searchcore/src/tests/proton/attribute/attribute_initializer/attribute_initializer_test.cpp b/searchcore/src/tests/proton/attribute/attribute_initializer/attribute_initializer_test.cpp index b11c4f78217..e4647a58735 100644 --- a/searchcore/src/tests/proton/attribute/attribute_initializer/attribute_initializer_test.cpp +++ b/searchcore/src/tests/proton/attribute/attribute_initializer/attribute_initializer_test.cpp @@ -27,7 +27,10 @@ namespace { const Config int32_sv(BasicType::Type::INT32); const Config int16_sv(BasicType::Type::INT16); const Config int32_array(BasicType::Type::INT32, CollectionType::Type::ARRAY); +const Config string_wset(BasicType::Type::STRING, CollectionType::Type::WSET); const Config predicate(BasicType::Type::PREDICATE); +const CollectionType wset2(CollectionType::Type::WSET, false, true); +const Config string_wset2(BasicType::Type::STRING, wset2); Config getPredicateWithArity(uint32_t arity) { @@ -119,6 +122,19 @@ TEST("require that mismatching collection type is not loaded") EXPECT_EQUAL(1, av->getNumDocs()); } +TEST("require that mismatching weighted set collection type params is not loaded") +{ + saveAttr("a", string_wset, 10, 2); + saveAttr("b", string_wset2, 10, 2); + Fixture f; + auto av = f.createInitializer("a", string_wset2, 5)->init(); + EXPECT_EQUAL(5, av->getCreateSerialNum()); + EXPECT_EQUAL(1, av->getNumDocs()); + auto av2 = f.createInitializer("b", string_wset, 5)->init(); + EXPECT_EQUAL(5, av2->getCreateSerialNum()); + EXPECT_EQUAL(1, av2->getNumDocs()); +} + TEST("require that predicate attributes can be initialized") { saveAttr("a", predicate, 10, 2); diff --git a/searchcore/src/vespa/searchcore/proton/attribute/attribute_initializer.cpp b/searchcore/src/vespa/searchcore/proton/attribute/attribute_initializer.cpp index 0363b191656..1ce71968da0 100644 --- a/searchcore/src/vespa/searchcore/proton/attribute/attribute_initializer.cpp +++ b/searchcore/src/vespa/searchcore/proton/attribute/attribute_initializer.cpp @@ -14,6 +14,7 @@ LOG_SETUP(".proton.attribute.attribute_initializer"); using search::attribute::BasicType; +using search::attribute::CollectionType; using search::attribute::Config; using search::AttributeVector; using search::IndexMetaInfo; @@ -58,6 +59,29 @@ extraType(const AttributeHeader &header) return ""; } +vespalib::string +collectionTypeString(const CollectionType &type, bool detailed) +{ + vespalib::asciistream os; + os << type.asString(); + if (type.type() == CollectionType::Type::WSET && detailed) { + os << "("; + bool first = true; + if (type.createIfNonExistant()) { + os << "add"; + first = false; + } + if (type.removeIfZero()) { + if (!first) { + os << ","; + } + os << "remove"; + } + os << ")"; + } + return os.str(); +} + bool headerTypeOK(const AttributeHeader &header, const Config &cfg) { @@ -65,6 +89,10 @@ headerTypeOK(const AttributeHeader &header, const Config &cfg) (header.getCollectionType().type() != cfg.collectionType().type())) { return false; } + if (header.getCollectionTypeParamsSet() && + (header.getCollectionType() != cfg.collectionType())) { + return false; + } if (cfg.basicType().type() == BasicType::Type::TENSOR) { if (header.getTensorType() != cfg.tensorType()) { return false; @@ -118,13 +146,15 @@ logAttributeWrongType(const AttributeVector::SP &attr, const Config &cfg(attr->getConfig()); vespalib::string extraCfgType = extraType(cfg); vespalib::string extraHeaderType = extraType(header); + vespalib::string cfgCollStr = collectionTypeString(cfg.collectionType(), true); + vespalib::string headerCollStr = collectionTypeString(header.getCollectionType(), header.getCollectionTypeParamsSet()); LOG(info, "Attribute vector '%s' is of wrong type (expected %s/%s/%s, got %s/%s/%s)", attr->getBaseFileName().c_str(), cfg.basicType().asString(), - cfg.collectionType().asString(), + cfgCollStr.c_str(), extraCfgType.c_str(), header.getBasicType().asString(), - header.getCollectionType().asString(), + headerCollStr.c_str(), extraHeaderType.c_str()); } diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp index 84f0720b140..f2c73229f4c 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp @@ -11,6 +11,8 @@ namespace { const vespalib::string versionTag = "version"; const vespalib::string dataTypeTag = "datatype"; const vespalib::string collectionTypeTag = "collectiontype"; +const vespalib::string createIfNonExistentTag = "collectiontype.createIfNonExistent"; +const vespalib::string removeIfZeroTag = "collectiontype.removeIfZero"; const vespalib::string createSerialNumTag = "createSerialNum"; const vespalib::string tensorTypeTag = "tensortype"; const vespalib::string predicateArityTag = "predicate.arity"; @@ -25,6 +27,7 @@ AttributeHeader::AttributeHeader() _collectionType(attribute::CollectionType::Type::SINGLE), _tensorType(vespalib::eval::ValueType::error_type()), _enumerated(false), + _collectionTypeParamsSet(false), _predicateParamsSet(false), _predicateParams(), _numDocs(0), @@ -53,6 +56,7 @@ AttributeHeader::AttributeHeader(const vespalib::string &fileName, _collectionType(collectionType), _tensorType(tensorType), _enumerated(enumerated), + _collectionTypeParamsSet(false), _predicateParamsSet(false), _predicateParams(predicateParams), _numDocs(numDocs), @@ -80,6 +84,16 @@ AttributeHeader::internalExtractTags(const vespalib::GenericHeader &header) if (header.hasTag(collectionTypeTag)) { _collectionType = CollectionType(header.getTag(collectionTypeTag).asString()); } + if (_collectionType.type() == attribute::CollectionType::WSET) { + if (header.hasTag(createIfNonExistentTag)) { + assert(header.hasTag(removeIfZeroTag)); + _collectionTypeParamsSet = true; + _collectionType.createIfNonExistant(header.getTag(createIfNonExistentTag).asBool()); + _collectionType.removeIfZero(header.getTag(removeIfZeroTag).asBool()); + } else { + assert(!header.hasTag(removeIfZeroTag)); + } + } if (_basicType.type() == BasicType::Type::TENSOR) { assert(header.hasTag(tensorTypeTag)); _tensorType = vespalib::eval::ValueType::from_spec(header.getTag(tensorTypeTag).asString()); @@ -115,6 +129,10 @@ AttributeHeader::addTags(vespalib::GenericHeader &header) const using Tag = vespalib::GenericHeader::Tag; header.putTag(Tag(dataTypeTag, _basicType.asString())); header.putTag(Tag(collectionTypeTag, _collectionType.asString())); + if (_collectionType.type() == attribute::CollectionType::WSET) { + header.putTag(Tag(createIfNonExistentTag, _collectionType.createIfNonExistant())); + header.putTag(Tag(removeIfZeroTag, _collectionType.removeIfZero())); + } header.putTag(Tag("uniqueValueCount", _uniqueValueCount)); header.putTag(Tag("totalValueCount", _totalValueCount)); header.putTag(Tag("docIdLimit", _numDocs)); diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_header.h b/searchlib/src/vespa/searchlib/attribute/attribute_header.h index 7e9ef78dafb..e761122d28f 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_header.h +++ b/searchlib/src/vespa/searchlib/attribute/attribute_header.h @@ -24,6 +24,7 @@ private: CollectionType _collectionType; vespalib::eval::ValueType _tensorType; bool _enumerated; + bool _collectionTypeParamsSet; bool _predicateParamsSet; PersistentPredicateParams _predicateParams; uint32_t _numDocs; @@ -65,6 +66,7 @@ public: uint32_t getVersion() const { return _version; } const PersistentPredicateParams &getPredicateParams() const { return _predicateParams; } bool getPredicateParamsSet() const { return _predicateParamsSet; } + bool getCollectionTypeParamsSet() const { return _collectionTypeParamsSet; } static AttributeHeader extractTags(const vespalib::GenericHeader &header); void addTags(vespalib::GenericHeader &header) const; }; |