diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2020-08-14 15:21:31 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-08-14 15:21:31 +0200 |
commit | 5fc7f6e157c4e8b04f3095a01e8e85653fc301f4 (patch) | |
tree | ce0b5c6b45559a545cc72ecf8b20ed78d76c1db3 | |
parent | 64a5c07ce69e62377488f9d2e5f183418e2564b8 (diff) | |
parent | df5a57673b782ab36ef8d24893d607f04514600e (diff) |
Merge pull request #14046 from vespa-engine/balder/use-vector-backed-set
Use a vector backed set for speed.
13 files changed, 123 insertions, 96 deletions
diff --git a/document/src/tests/fieldsettest.cpp b/document/src/tests/fieldsettest.cpp index c94523ea23f..29581ff4549 100644 --- a/document/src/tests/fieldsettest.cpp +++ b/document/src/tests/fieldsettest.cpp @@ -274,14 +274,11 @@ TEST(FieldCollectionTest, testHash ) { TestDocMan testDocMan; const DocumentTypeRepo& repo = testDocMan.getTypeRepo(); const DocumentType & type = *repo.getDocumentType("testdoctype1"); - Field::Set set; - EXPECT_EQ(0ul, FieldCollection(type, set).hash()); - set.insert(&type.getField("headerval")); - EXPECT_EQ(0x548599858c77ef83ul, FieldCollection(type, set).hash()); - set.insert(&type.getField("hstringval")); - EXPECT_EQ(0x4a7ff2406d36a9b0ul, FieldCollection(type, set).hash()); - set.erase(&type.getField("headerval")); - EXPECT_EQ(0x1e0918531b19734ul, FieldCollection(type, set).hash()); + EXPECT_EQ(0ul, FieldCollection(type, Field::Set::Builder().build()).hash()); + EXPECT_EQ(0x548599858c77ef83ul, FieldCollection(type, Field::Set::Builder().add(&type.getField("headerval")).build()).hash()); + EXPECT_EQ(0x4a7ff2406d36a9b0ul, FieldCollection(type, Field::Set::Builder().add(&type.getField("headerval")).add( + &type.getField("hstringval")).build()).hash()); + EXPECT_EQ(0x1e0918531b19734ul, FieldCollection(type, Field::Set::Builder().add(&type.getField("hstringval")).build()).hash()); } } // document diff --git a/document/src/vespa/document/base/field.cpp b/document/src/vespa/document/base/field.cpp index ac195ccf7ea..fb814fc5f17 100644 --- a/document/src/vespa/document/base/field.cpp +++ b/document/src/vespa/document/base/field.cpp @@ -7,10 +7,32 @@ #include <vespa/vespalib/util/exceptions.h> #include <vespa/vespalib/stllike/asciistream.h> #include <vespa/vespalib/util/bobhash.h> +#include <algorithm> namespace document { -Field::Field() : Field("", 0, *DataType::INT) { } +Field::Set::Set(std::vector<CPtr> fields) + : _fields(std::move(fields)) +{ + std::sort(_fields.begin(), _fields.end(), Field::FieldPtrLess()); + _fields.erase(std::unique(_fields.begin(), _fields.end(), Field::FieldPtrEqual()), _fields.end()); +} + +bool +Field::Set::contains(const Field & field) const { + return std::binary_search(_fields.begin(), _fields.end(), &field, Field::FieldPtrLess()); +} + +bool +Field::Set::contains(const Set & fields) const { + return std::includes(_fields.begin(), _fields.end(), + fields._fields.begin(), fields._fields.end(), + Field::FieldPtrLess()); +} + +Field::Field() + : Field("", 0, *DataType::INT) +{ } Field::Field(vespalib::stringref name, int fieldId, const DataType& dataType) : FieldBase(name), diff --git a/document/src/vespa/document/base/field.h b/document/src/vespa/document/base/field.h index 3c739b2bfd3..7580b2b692f 100644 --- a/document/src/vespa/document/base/field.h +++ b/document/src/vespa/document/base/field.h @@ -28,16 +28,43 @@ class Field final : public vespalib::FieldBase, const DataType *_dataType; int _fieldId; public: - typedef std::shared_ptr<const Field> CSP; - typedef std::shared_ptr<Field> SP; + using CSP = std::shared_ptr<const Field>; + using SP = std::shared_ptr<Field>; + using CPtr = const Field *; - struct FieldPtrComparator { - bool operator()(const Field* f1, const Field* f2) const { + struct FieldPtrLess { + bool operator()(CPtr f1, CPtr f2) const { return (*f1 < *f2); } }; - using Set = std::set<const Field*, FieldPtrComparator>; + struct FieldPtrEqual { + bool operator()(CPtr f1, CPtr f2) const { + return (*f1 == *f2); + } + }; + + class Set { + public: + class Builder { + public: + Builder & reserve(size_t sz) { _vector.reserve(sz); return *this; } + Builder & add(CPtr field) { _vector.push_back(field); return *this; } + Set build() { return Set(std::move(_vector)); } + private: + std::vector<CPtr> _vector; + }; + bool contains(const Field & field) const; + bool contains(const Set & field) const; + size_t size() const { return _fields.size(); } + bool empty() const { return _fields.empty(); } + const CPtr * begin() const { return &_fields[0]; } + const CPtr * end() const { return begin() + _fields.size(); } + static Set emptySet() { return Builder().build(); } + private: + explicit Set(std::vector<CPtr> fields); + std::vector<CPtr> _fields; + }; /** * Creates a completely specified field instance. @@ -47,8 +74,7 @@ public: * @param type The datatype of the field. * @param headerField Whether or not this is a "header" field. */ - Field(vespalib::stringref name, int fieldId, - const DataType &type); + Field(vespalib::stringref name, int fieldId, const DataType &type); Field(); diff --git a/document/src/vespa/document/datatype/structdatatype.cpp b/document/src/vespa/document/datatype/structdatatype.cpp index ed17d22da59..c0c63ddb37f 100644 --- a/document/src/vespa/document/datatype/structdatatype.cpp +++ b/document/src/vespa/document/datatype/structdatatype.cpp @@ -164,11 +164,12 @@ bool StructDataType::hasField(int32_t fieldId) const { Field::Set StructDataType::getFieldSet() const { - Field::Set fields; + Field::Set::Builder builder; + builder.reserve(_idFieldMap.size()); for (const auto & entry : _idFieldMap) { - fields.insert(entry.second.get()); + builder.add(entry.second.get()); } - return fields; + return builder.build(); } namespace { diff --git a/document/src/vespa/document/fieldset/fieldsetrepo.cpp b/document/src/vespa/document/fieldset/fieldsetrepo.cpp index c576624b026..33cbf6185c4 100644 --- a/document/src/vespa/document/fieldset/fieldsetrepo.cpp +++ b/document/src/vespa/document/fieldset/fieldsetrepo.cpp @@ -44,18 +44,18 @@ parseFieldCollection(const DocumentTypeRepo& repo, const DocumentType& type(*typePtr); StringTokenizer tokenizer(fieldNames, ","); - Field::Set fields; + Field::Set::Builder builder; for (const auto & token : tokenizer) { const DocumentType::FieldSet * fs = type.getFieldSet(token); if (fs) { for (const auto & fieldName : fs->getFields()) { - fields.insert(&type.getField(fieldName)); + builder.add(&type.getField(fieldName)); } } else { - fields.insert(&type.getField(token)); + builder.add(&type.getField(token)); } } - return std::make_unique<FieldCollection>(type, std::move(fields)); + return std::make_unique<FieldCollection>(type, builder.build()); } } diff --git a/document/src/vespa/document/fieldset/fieldsets.cpp b/document/src/vespa/document/fieldset/fieldsets.cpp index 586cef0e5f4..317c1743bb5 100644 --- a/document/src/vespa/document/fieldset/fieldsets.cpp +++ b/document/src/vespa/document/fieldset/fieldsets.cpp @@ -4,6 +4,7 @@ #include <vespa/document/fieldvalue/document.h> #include <vespa/document/datatype/documenttype.h> #include <vespa/vespalib/stllike/asciistream.h> +#include <algorithm> #include <xxhash.h> namespace document { @@ -38,25 +39,10 @@ FieldCollection::contains(const FieldSet& fields) const { switch (fields.getType()) { case Type::FIELD: - return _set.find(static_cast<const Field*>(&fields)) != _set.end(); + return _set.contains(static_cast<const Field &>(fields)); case Type::SET: { const auto & coll = static_cast<const FieldCollection&>(fields); - - if (_set.size() < coll._set.size()) { - return false; - } - - auto iter = coll.getFields().begin(); - - while (iter != coll.getFields().end()) { - if (_set.find(*iter) == _set.end()) { - return false; - } - - ++iter; - } - - return true; + return _set.contains(coll.getFields()); } case Type::NONE: case Type::DOCID: diff --git a/document/src/vespa/document/fieldset/fieldsets.h b/document/src/vespa/document/fieldset/fieldsets.h index d37d72d6109..9537a5bdf61 100644 --- a/document/src/vespa/document/fieldset/fieldsets.h +++ b/document/src/vespa/document/fieldset/fieldsets.h @@ -65,8 +65,8 @@ public: uint64_t hash() const { return _hash; } private: - Field::Set _set; - uint64_t _hash; + Field::Set _set; + uint64_t _hash; const DocumentType& _docType; }; diff --git a/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp b/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp index ed4c8938aa1..147bd9afb84 100644 --- a/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp +++ b/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp @@ -757,9 +757,8 @@ TEST("require that document selection and timestamp range works together") { } TEST("require that fieldset limits fields returned") { - document::Field::Set fields; - fields.insert(&getDocType().getField("header")); - document::FieldCollection limited(getDocType(), std::move(fields)); + document::FieldCollection limited(getDocType(), + document::Field::Set::Builder().add(&getDocType().getField("header")).build()); DocumentIterator itr(bucket(5), limited, selectAll(), newestV(), -1, false); itr.add(doc_with_fields("id:ns:foo::xxx1", Timestamp(1), bucket(5))); IterateResult res = itr.iterate(largeNum); diff --git a/searchcore/src/tests/proton/server/documentretriever_test.cpp b/searchcore/src/tests/proton/server/documentretriever_test.cpp index c9fece26655..a934254aca3 100644 --- a/searchcore/src/tests/proton/server/documentretriever_test.cpp +++ b/searchcore/src/tests/proton/server/documentretriever_test.cpp @@ -583,7 +583,7 @@ struct Lookup : public IFieldInfo Lookup() : _count(0) {} bool isFieldAttribute(const document::Field & field) const override { _count++; - return (field.getName()[0] == 'a'); + return ((field.getName()[0] % 2) == 1); // a, c, e... are attributes } mutable unsigned _count; }; @@ -592,29 +592,27 @@ TEST("require that fieldset can figure out their attributeness and rember it") { Lookup lookup; FieldSetAttributeDB fsDB(lookup); document::Field attr1("attr1", 1, *document::DataType::LONG); - document::Field attr2("attr2", 2, *document::DataType::LONG); - document::Field not_attr1("not_attr1", 3, *document::DataType::LONG); - document::Field::Set allAttr; - allAttr.insert(&attr1); + document::Field attr2("cttr2", 2, *document::DataType::LONG); + document::Field not_attr1("b_not_attr1", 3, *document::DataType::LONG); + document::Field::Set allAttr = document::Field::Set::Builder().add(&attr1).build(); EXPECT_TRUE(fsDB.areAllFieldsAttributes(13, allAttr)); EXPECT_EQUAL(1u, lookup._count); EXPECT_TRUE(fsDB.areAllFieldsAttributes(13, allAttr)); EXPECT_EQUAL(1u, lookup._count); - allAttr.insert(&attr2); + allAttr = document::Field::Set::Builder().add(&attr1).add(&attr2).build(); EXPECT_TRUE(fsDB.areAllFieldsAttributes(17, allAttr)); EXPECT_EQUAL(3u, lookup._count); EXPECT_TRUE(fsDB.areAllFieldsAttributes(17, allAttr)); EXPECT_EQUAL(3u, lookup._count); - document::Field::Set notAllAttr; - notAllAttr.insert(¬_attr1); + document::Field::Set notAllAttr = document::Field::Set::Builder().add(¬_attr1).build(); EXPECT_FALSE(fsDB.areAllFieldsAttributes(33, notAllAttr)); EXPECT_EQUAL(4u, lookup._count); EXPECT_FALSE(fsDB.areAllFieldsAttributes(33, notAllAttr)); EXPECT_EQUAL(4u, lookup._count); - notAllAttr.insert(&attr1); + notAllAttr = document::Field::Set::Builder().add(&attr1).add(¬_attr1).add(&attr2).build(); EXPECT_FALSE(fsDB.areAllFieldsAttributes(39, notAllAttr)); EXPECT_EQUAL(6u, lookup._count); EXPECT_FALSE(fsDB.areAllFieldsAttributes(39, notAllAttr)); diff --git a/searchcore/src/vespa/searchcore/proton/server/documentretriever.cpp b/searchcore/src/vespa/searchcore/proton/server/documentretriever.cpp index 4484338ddfd..031dd4f35e7 100644 --- a/searchcore/src/vespa/searchcore/proton/server/documentretriever.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/documentretriever.cpp @@ -17,6 +17,8 @@ LOG_SETUP(".proton.server.documentretriever"); using document::Document; +using document::Field; +using document::FieldSet; using document::DocumentType; using document::DocumentTypeRepo; using document::PositionDataType; @@ -59,7 +61,7 @@ FieldSetAttributeDB::areAllFieldsAttributes(uint64_t key, const document::Field: auto found = _isFieldSetAttributeOnly.find(key); bool isAttributeOnly = true; if (found == _isFieldSetAttributeOnly.end()) { - for (const document::Field *field : set) { + for (const Field *field : set) { isAttributeOnly = _fieldInfo.isFieldAttribute(*field); if (!isAttributeOnly) break; } @@ -82,15 +84,16 @@ DocumentRetriever _attr_manager(attr_manager), _doc_store(doc_store), _possiblePositionFields(), - _attributeFields(), + _attributeFields(Field::Set::emptySet()), _areAllFieldsAttributes(true), _fieldSetAttributeInfo(*this) { const DocumentType * documentType = repo.getDocumentType(docTypeName.getName()); - document::Field::Set fields = documentType->getFieldSet(); + Field::Set fields = documentType->getFieldSet(); int32_t positionDataTypeId = PositionDataType::getInstance().getId(); LOG(debug, "checking document type '%s' for position fields", docTypeName.getName().c_str()); - for (const document::Field * field : fields) { + Field::Set::Builder attrBuilder; + for (const Field* field : fields) { if ((field->getDataType().getId() == positionDataTypeId) || is_array_of_position_type(field->getDataType())) { LOG(debug, "Field '%s' is a position field", field->getName().data()); const vespalib::string & zcurve_name = PositionDataType::getZCurveFieldName(field->getName()); @@ -109,27 +112,28 @@ DocumentRetriever && ((*attr)->getBasicType() != BasicType::PREDICATE) && ((*attr)->getBasicType() != BasicType::REFERENCE)) { - _attributeFields.insert(field); + attrBuilder.add(field); } else { _areAllFieldsAttributes = false; } } } + _attributeFields = attrBuilder.build(); } bool -DocumentRetriever::needFetchFromDocStore(const document::FieldSet & fieldSet) const { +DocumentRetriever::needFetchFromDocStore(const FieldSet & fieldSet) const { switch (fieldSet.getType()) { - case document::FieldSet::Type::NONE: - case document::FieldSet::Type::DOCID: + case FieldSet::Type::NONE: + case FieldSet::Type::DOCID: return false; - case document::FieldSet::Type::ALL: + case FieldSet::Type::ALL: return ! _areAllFieldsAttributes; - case document::FieldSet::Type::FIELD: { - const auto & field = static_cast<const document::Field &>(fieldSet); + case FieldSet::Type::FIELD: { + const auto & field = static_cast<const Field&>(fieldSet); return ! isFieldAttribute(field); } - case document::FieldSet::Type::SET: { + case FieldSet::Type::SET: { const auto &set = static_cast<const document::FieldCollection &>(fieldSet); return ! _fieldSetAttributeInfo.areAllFieldsAttributes(set.hash(), set.getFields()); } @@ -138,8 +142,8 @@ DocumentRetriever::needFetchFromDocStore(const document::FieldSet & fieldSet) co } bool -DocumentRetriever::isFieldAttribute(const document::Field & field) const { - return _attributeFields.find(&field) != _attributeFields.end(); +DocumentRetriever::isFieldAttribute(const Field& field) const { + return _attributeFields.contains(field); } DocumentRetriever::~DocumentRetriever() = default; @@ -229,34 +233,32 @@ DocumentRetriever::getFullDocument(DocumentIdT lid) const } Document::UP -DocumentRetriever::getPartialDocument(search::DocumentIdT lid, const document::DocumentId & docId, const document::FieldSet & fieldSet) const { +DocumentRetriever::getPartialDocument(search::DocumentIdT lid, const document::DocumentId & docId, const FieldSet & fieldSet) const { Document::UP doc; if (needFetchFromDocStore(fieldSet)) { doc = _doc_store.read(lid, getDocumentTypeRepo()); if (doc) { populate(lid, *doc); } - document::FieldSet::stripFields(*doc, fieldSet); + FieldSet::stripFields(*doc, fieldSet); } else { doc = std::make_unique<Document>(getDocumentType(), docId); switch (fieldSet.getType()) { - case document::FieldSet::Type::ALL: + case FieldSet::Type::ALL: populate(lid, *doc); break; - case document::FieldSet::Type::FIELD: { - const auto & field = static_cast<const document::Field &>(fieldSet); - document::Field::Set attributes; - attributes.insert(&field); - populate(lid, *doc, attributes); + case FieldSet::Type::FIELD: { + const auto & field = static_cast<const Field&>(fieldSet); + populate(lid, *doc, Field::Set::Builder().add(&field).build()); break; } - case document::FieldSet::Type::SET: { + case FieldSet::Type::SET: { const auto &set = static_cast<const document::FieldCollection &>(fieldSet); populate(lid, *doc, set.getFields()); break; } - case document::FieldSet::Type::NONE: - case document::FieldSet::Type::DOCID: + case FieldSet::Type::NONE: + case FieldSet::Type::DOCID: break; } } @@ -276,9 +278,9 @@ DocumentRetriever::populate(DocumentIdT lid, Document & doc) const { } void -DocumentRetriever::populate(DocumentIdT lid, Document & doc, const document::Field::Set & attributeFields) const +DocumentRetriever::populate(DocumentIdT lid, Document & doc, const Field::Set & attributeFields) const { - for (const document::Field * field : attributeFields) { + for (const Field* field : attributeFields) { AttributeGuard::UP attr = _attr_manager.getAttribute(field->getName()); if (lid < (*attr)->getCommittedDocIdLimit()) { DocumentFieldRetriever::populate(lid, doc, *field, **attr); diff --git a/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp b/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp index 4656a5e9edd..d82b5e0f4a5 100644 --- a/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp +++ b/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp @@ -43,21 +43,18 @@ DataType::Type convert(Schema::DataType type) { } void -insertStructType(document::DocumenttypesConfig::Documenttype & cfg, - const StructDataType & structType) +insertStructType(document::DocumenttypesConfig::Documenttype & cfg, const StructDataType & structType) { typedef document::DocumenttypesConfig DTC; DTC::Documenttype::Datatype::Sstruct cfgStruct; cfgStruct.name = structType.getName(); Field::Set fieldSet = structType.getFieldSet(); - for (Field::Set::const_iterator itr = fieldSet.begin(); - itr != fieldSet.end(); ++itr) - { - DTC::Documenttype::Datatype::Sstruct::Field field; - field.name = (*itr)->getName(); - field.datatype = (*itr)->getDataType().getId(); - field.id = (*itr)->getId(); - cfgStruct.field.push_back(field); + for (const Field * field : fieldSet) { + DTC::Documenttype::Datatype::Sstruct::Field sField; + sField.name = field->getName(); + sField.datatype = field->getDataType().getId(); + sField.id = field->getId(); + cfgStruct.field.push_back(sField); } cfg.datatype.push_back(DTC::Documenttype::Datatype()); cfg.datatype.back().sstruct = cfgStruct; @@ -66,8 +63,7 @@ insertStructType(document::DocumenttypesConfig::Documenttype & cfg, using namespace document::config_builder; -TypeOrId makeCollection(TypeOrId datatype, - Schema::CollectionType collection_type) { +TypeOrId makeCollection(TypeOrId datatype, Schema::CollectionType collection_type) { switch (collection_type) { case schema::CollectionType::ARRAY: return Array(datatype); diff --git a/storage/src/vespa/storage/persistence/fieldvisitor.cpp b/storage/src/vespa/storage/persistence/fieldvisitor.cpp index 0071d7a010f..ace99dec276 100644 --- a/storage/src/vespa/storage/persistence/fieldvisitor.cpp +++ b/storage/src/vespa/storage/persistence/fieldvisitor.cpp @@ -9,7 +9,7 @@ namespace storage { FieldVisitor::~FieldVisitor() = default; void FieldVisitor::visitFieldValueNode(const document::select::FieldValueNode & node) { - _fields.insert(&_docType.getField(node.getRealFieldName())); + _fields.add(&_docType.getField(node.getRealFieldName())); } void FieldVisitor::visitComparison(const document::select::Compare & node) { diff --git a/storage/src/vespa/storage/persistence/fieldvisitor.h b/storage/src/vespa/storage/persistence/fieldvisitor.h index f2b07e193d6..99558fe6e9c 100644 --- a/storage/src/vespa/storage/persistence/fieldvisitor.h +++ b/storage/src/vespa/storage/persistence/fieldvisitor.h @@ -16,7 +16,7 @@ namespace storage { class FieldVisitor : public document::select::Visitor { private: document::DocumentType _docType; - document::Field::Set _fields; + document::Field::Set::Builder _fields; public: explicit FieldVisitor(const document::DocumentType & docType) @@ -26,7 +26,7 @@ public: ~FieldVisitor() override; document::FieldCollection getFieldSet() { - return document::FieldCollection(_docType, std::move(_fields)); + return document::FieldCollection(_docType, _fields.build()); } void visitFieldValueNode(const document::select::FieldValueNode &) override; |