summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirstorli@yahoo.no>2017-03-30 16:52:39 +0200
committerGitHub <noreply@github.com>2017-03-30 16:52:39 +0200
commit70aa236eade721b0d9fabdece9921c7d2069779e (patch)
tree516db78aa5e8fcb4253f2b40d92a84f8cf57f944
parente1abfd25bae0956b549eebe2be03a159565451b3 (diff)
parent7cb8c63c972cf0798e0abaf580bf8f82f5ec034c (diff)
Merge pull request #2112 from yahoo/toregge/check-weighted-set-parameters-before-loading-attribute
Check collection type parameters before loading attribute.
-rw-r--r--searchcore/src/tests/proton/attribute/attribute_initializer/attribute_initializer_test.cpp16
-rw-r--r--searchcore/src/vespa/searchcore/proton/attribute/attribute_initializer.cpp34
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_header.cpp18
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_header.h2
4 files changed, 68 insertions, 2 deletions
diff --git a/searchcore/src/tests/proton/attribute/attribute_initializer/attribute_initializer_test.cpp b/searchcore/src/tests/proton/attribute/attribute_initializer/attribute_initializer_test.cpp
index b11c4f78217..e4647a58735 100644
--- a/searchcore/src/tests/proton/attribute/attribute_initializer/attribute_initializer_test.cpp
+++ b/searchcore/src/tests/proton/attribute/attribute_initializer/attribute_initializer_test.cpp
@@ -27,7 +27,10 @@ namespace {
const Config int32_sv(BasicType::Type::INT32);
const Config int16_sv(BasicType::Type::INT16);
const Config int32_array(BasicType::Type::INT32, CollectionType::Type::ARRAY);
+const Config string_wset(BasicType::Type::STRING, CollectionType::Type::WSET);
const Config predicate(BasicType::Type::PREDICATE);
+const CollectionType wset2(CollectionType::Type::WSET, false, true);
+const Config string_wset2(BasicType::Type::STRING, wset2);
Config getPredicateWithArity(uint32_t arity)
{
@@ -119,6 +122,19 @@ TEST("require that mismatching collection type is not loaded")
EXPECT_EQUAL(1, av->getNumDocs());
}
+TEST("require that mismatching weighted set collection type params is not loaded")
+{
+ saveAttr("a", string_wset, 10, 2);
+ saveAttr("b", string_wset2, 10, 2);
+ Fixture f;
+ auto av = f.createInitializer("a", string_wset2, 5)->init();
+ EXPECT_EQUAL(5, av->getCreateSerialNum());
+ EXPECT_EQUAL(1, av->getNumDocs());
+ auto av2 = f.createInitializer("b", string_wset, 5)->init();
+ EXPECT_EQUAL(5, av2->getCreateSerialNum());
+ EXPECT_EQUAL(1, av2->getNumDocs());
+}
+
TEST("require that predicate attributes can be initialized")
{
saveAttr("a", predicate, 10, 2);
diff --git a/searchcore/src/vespa/searchcore/proton/attribute/attribute_initializer.cpp b/searchcore/src/vespa/searchcore/proton/attribute/attribute_initializer.cpp
index 0363b191656..1ce71968da0 100644
--- a/searchcore/src/vespa/searchcore/proton/attribute/attribute_initializer.cpp
+++ b/searchcore/src/vespa/searchcore/proton/attribute/attribute_initializer.cpp
@@ -14,6 +14,7 @@
LOG_SETUP(".proton.attribute.attribute_initializer");
using search::attribute::BasicType;
+using search::attribute::CollectionType;
using search::attribute::Config;
using search::AttributeVector;
using search::IndexMetaInfo;
@@ -58,6 +59,29 @@ extraType(const AttributeHeader &header)
return "";
}
+vespalib::string
+collectionTypeString(const CollectionType &type, bool detailed)
+{
+ vespalib::asciistream os;
+ os << type.asString();
+ if (type.type() == CollectionType::Type::WSET && detailed) {
+ os << "(";
+ bool first = true;
+ if (type.createIfNonExistant()) {
+ os << "add";
+ first = false;
+ }
+ if (type.removeIfZero()) {
+ if (!first) {
+ os << ",";
+ }
+ os << "remove";
+ }
+ os << ")";
+ }
+ return os.str();
+}
+
bool
headerTypeOK(const AttributeHeader &header, const Config &cfg)
{
@@ -65,6 +89,10 @@ headerTypeOK(const AttributeHeader &header, const Config &cfg)
(header.getCollectionType().type() != cfg.collectionType().type())) {
return false;
}
+ if (header.getCollectionTypeParamsSet() &&
+ (header.getCollectionType() != cfg.collectionType())) {
+ return false;
+ }
if (cfg.basicType().type() == BasicType::Type::TENSOR) {
if (header.getTensorType() != cfg.tensorType()) {
return false;
@@ -118,13 +146,15 @@ logAttributeWrongType(const AttributeVector::SP &attr,
const Config &cfg(attr->getConfig());
vespalib::string extraCfgType = extraType(cfg);
vespalib::string extraHeaderType = extraType(header);
+ vespalib::string cfgCollStr = collectionTypeString(cfg.collectionType(), true);
+ vespalib::string headerCollStr = collectionTypeString(header.getCollectionType(), header.getCollectionTypeParamsSet());
LOG(info, "Attribute vector '%s' is of wrong type (expected %s/%s/%s, got %s/%s/%s)",
attr->getBaseFileName().c_str(),
cfg.basicType().asString(),
- cfg.collectionType().asString(),
+ cfgCollStr.c_str(),
extraCfgType.c_str(),
header.getBasicType().asString(),
- header.getCollectionType().asString(),
+ headerCollStr.c_str(),
extraHeaderType.c_str());
}
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp
index 84f0720b140..f2c73229f4c 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp
@@ -11,6 +11,8 @@ namespace {
const vespalib::string versionTag = "version";
const vespalib::string dataTypeTag = "datatype";
const vespalib::string collectionTypeTag = "collectiontype";
+const vespalib::string createIfNonExistentTag = "collectiontype.createIfNonExistent";
+const vespalib::string removeIfZeroTag = "collectiontype.removeIfZero";
const vespalib::string createSerialNumTag = "createSerialNum";
const vespalib::string tensorTypeTag = "tensortype";
const vespalib::string predicateArityTag = "predicate.arity";
@@ -25,6 +27,7 @@ AttributeHeader::AttributeHeader()
_collectionType(attribute::CollectionType::Type::SINGLE),
_tensorType(vespalib::eval::ValueType::error_type()),
_enumerated(false),
+ _collectionTypeParamsSet(false),
_predicateParamsSet(false),
_predicateParams(),
_numDocs(0),
@@ -53,6 +56,7 @@ AttributeHeader::AttributeHeader(const vespalib::string &fileName,
_collectionType(collectionType),
_tensorType(tensorType),
_enumerated(enumerated),
+ _collectionTypeParamsSet(false),
_predicateParamsSet(false),
_predicateParams(predicateParams),
_numDocs(numDocs),
@@ -80,6 +84,16 @@ AttributeHeader::internalExtractTags(const vespalib::GenericHeader &header)
if (header.hasTag(collectionTypeTag)) {
_collectionType = CollectionType(header.getTag(collectionTypeTag).asString());
}
+ if (_collectionType.type() == attribute::CollectionType::WSET) {
+ if (header.hasTag(createIfNonExistentTag)) {
+ assert(header.hasTag(removeIfZeroTag));
+ _collectionTypeParamsSet = true;
+ _collectionType.createIfNonExistant(header.getTag(createIfNonExistentTag).asBool());
+ _collectionType.removeIfZero(header.getTag(removeIfZeroTag).asBool());
+ } else {
+ assert(!header.hasTag(removeIfZeroTag));
+ }
+ }
if (_basicType.type() == BasicType::Type::TENSOR) {
assert(header.hasTag(tensorTypeTag));
_tensorType = vespalib::eval::ValueType::from_spec(header.getTag(tensorTypeTag).asString());
@@ -115,6 +129,10 @@ AttributeHeader::addTags(vespalib::GenericHeader &header) const
using Tag = vespalib::GenericHeader::Tag;
header.putTag(Tag(dataTypeTag, _basicType.asString()));
header.putTag(Tag(collectionTypeTag, _collectionType.asString()));
+ if (_collectionType.type() == attribute::CollectionType::WSET) {
+ header.putTag(Tag(createIfNonExistentTag, _collectionType.createIfNonExistant()));
+ header.putTag(Tag(removeIfZeroTag, _collectionType.removeIfZero()));
+ }
header.putTag(Tag("uniqueValueCount", _uniqueValueCount));
header.putTag(Tag("totalValueCount", _totalValueCount));
header.putTag(Tag("docIdLimit", _numDocs));
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_header.h b/searchlib/src/vespa/searchlib/attribute/attribute_header.h
index 7e9ef78dafb..e761122d28f 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_header.h
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_header.h
@@ -24,6 +24,7 @@ private:
CollectionType _collectionType;
vespalib::eval::ValueType _tensorType;
bool _enumerated;
+ bool _collectionTypeParamsSet;
bool _predicateParamsSet;
PersistentPredicateParams _predicateParams;
uint32_t _numDocs;
@@ -65,6 +66,7 @@ public:
uint32_t getVersion() const { return _version; }
const PersistentPredicateParams &getPredicateParams() const { return _predicateParams; }
bool getPredicateParamsSet() const { return _predicateParamsSet; }
+ bool getCollectionTypeParamsSet() const { return _collectionTypeParamsSet; }
static AttributeHeader extractTags(const vespalib::GenericHeader &header);
void addTags(vespalib::GenericHeader &header) const;
};