aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2020-08-13 15:14:29 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2020-08-13 15:38:58 +0000
commit7bf5c4d363334993135f9edf77d9bbed306d03e4 (patch)
treefff4671819427a68568239479b0c86e2630a699a
parenteb7242e20b41625bf5416897ab28ffcf06a68487 (diff)
Use a vector backed set for speed.
-rw-r--r--document/src/tests/fieldsettest.cpp12
-rw-r--r--document/src/vespa/document/base/field.cpp12
-rw-r--r--document/src/vespa/document/base/field.h30
-rw-r--r--document/src/vespa/document/datatype/structdatatype.cpp7
-rw-r--r--document/src/vespa/document/fieldset/fieldsetrepo.cpp8
-rw-r--r--document/src/vespa/document/fieldset/fieldsets.cpp20
-rw-r--r--document/src/vespa/document/fieldset/fieldsets.h4
-rw-r--r--searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp4
-rw-r--r--searchcore/src/tests/proton/server/documentretriever_test.cpp16
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/documentretriever.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/index/doctypebuilder.cpp20
-rw-r--r--storage/src/vespa/storage/persistence/fieldvisitor.h4
12 files changed, 77 insertions, 72 deletions
diff --git a/document/src/tests/fieldsettest.cpp b/document/src/tests/fieldsettest.cpp
index c94523ea23f..8a01c53c278 100644
--- a/document/src/tests/fieldsettest.cpp
+++ b/document/src/tests/fieldsettest.cpp
@@ -274,14 +274,10 @@ TEST(FieldCollectionTest, testHash ) {
TestDocMan testDocMan;
const DocumentTypeRepo& repo = testDocMan.getTypeRepo();
const DocumentType & type = *repo.getDocumentType("testdoctype1");
- Field::Set set;
- EXPECT_EQ(0ul, FieldCollection(type, set).hash());
- set.insert(&type.getField("headerval"));
- EXPECT_EQ(0x548599858c77ef83ul, FieldCollection(type, set).hash());
- set.insert(&type.getField("hstringval"));
- EXPECT_EQ(0x4a7ff2406d36a9b0ul, FieldCollection(type, set).hash());
- set.erase(&type.getField("headerval"));
- EXPECT_EQ(0x1e0918531b19734ul, FieldCollection(type, set).hash());
+ EXPECT_EQ(0ul, FieldCollection(type, Field::Set::Builder().build()).hash());
+ EXPECT_EQ(0x548599858c77ef83ul, FieldCollection(type, Field::Set::Builder().insert(&type.getField("headerval")).build()).hash());
+ EXPECT_EQ(0x4a7ff2406d36a9b0ul, FieldCollection(type, Field::Set::Builder().insert(&type.getField("headerval")).insert(&type.getField("hstringval")).build()).hash());
+ EXPECT_EQ(0x1e0918531b19734ul, FieldCollection(type, Field::Set::Builder().insert(&type.getField("hstringval")).build()).hash());
}
} // document
diff --git a/document/src/vespa/document/base/field.cpp b/document/src/vespa/document/base/field.cpp
index ac195ccf7ea..7f1a598cd76 100644
--- a/document/src/vespa/document/base/field.cpp
+++ b/document/src/vespa/document/base/field.cpp
@@ -7,9 +7,21 @@
#include <vespa/vespalib/util/exceptions.h>
#include <vespa/vespalib/stllike/asciistream.h>
#include <vespa/vespalib/util/bobhash.h>
+#include <algorithm>
namespace document {
+Field::Set::Set(std::vector<CPtr> fields)
+ : _fields(std::move(fields))
+{
+ std::sort(_fields.begin(), _fields.end(), Field::FieldPtrComparator());
+}
+
+bool
+Field::Set::contains(const Field & field) const {
+ return std::binary_search(_fields.begin(), _fields.end(), &field, Field::FieldPtrComparator());
+}
+
Field::Field() : Field("", 0, *DataType::INT) { }
Field::Field(vespalib::stringref name, int fieldId, const DataType& dataType)
diff --git a/document/src/vespa/document/base/field.h b/document/src/vespa/document/base/field.h
index 3c739b2bfd3..cf876903dfa 100644
--- a/document/src/vespa/document/base/field.h
+++ b/document/src/vespa/document/base/field.h
@@ -28,16 +28,35 @@ class Field final : public vespalib::FieldBase,
const DataType *_dataType;
int _fieldId;
public:
- typedef std::shared_ptr<const Field> CSP;
- typedef std::shared_ptr<Field> SP;
+ using CSP = std::shared_ptr<const Field>;
+ using SP = std::shared_ptr<Field>;
+ using CPtr = const Field *;
struct FieldPtrComparator {
- bool operator()(const Field* f1, const Field* f2) const {
+ bool operator()(CPtr f1, CPtr f2) const {
return (*f1 < *f2);
}
};
- using Set = std::set<const Field*, FieldPtrComparator>;
+ class Set {
+ public:
+ class Builder {
+ public:
+ Builder & reserve(size_t sz) { _vector.reserve(sz); return *this; }
+ Builder & insert(CPtr field) { _vector.emplace_back(field); return *this; }
+ Set build() { return Set(std::move(_vector)); }
+ private:
+ std::vector<CPtr> _vector;
+ };
+ bool contains(const Field & field) const;
+ size_t size() const { return _fields.size(); }
+ bool empty() const { return _fields.empty(); }
+ const CPtr * begin() const { return &_fields[0]; }
+ const CPtr * end() const { return begin() + _fields.size(); }
+ private:
+ explicit Set(std::vector<CPtr> fields);
+ std::vector<CPtr> _fields;
+ };
/**
* Creates a completely specified field instance.
@@ -47,8 +66,7 @@ public:
* @param type The datatype of the field.
* @param headerField Whether or not this is a "header" field.
*/
- Field(vespalib::stringref name, int fieldId,
- const DataType &type);
+ Field(vespalib::stringref name, int fieldId, const DataType &type);
Field();
diff --git a/document/src/vespa/document/datatype/structdatatype.cpp b/document/src/vespa/document/datatype/structdatatype.cpp
index ed17d22da59..11af020c465 100644
--- a/document/src/vespa/document/datatype/structdatatype.cpp
+++ b/document/src/vespa/document/datatype/structdatatype.cpp
@@ -164,11 +164,12 @@ bool StructDataType::hasField(int32_t fieldId) const {
Field::Set
StructDataType::getFieldSet() const
{
- Field::Set fields;
+ Field::Set::Builder builder;
+ builder.reserve(_idFieldMap.size());
for (const auto & entry : _idFieldMap) {
- fields.insert(entry.second.get());
+ builder.insert(entry.second.get());
}
- return fields;
+ return builder.build();
}
namespace {
diff --git a/document/src/vespa/document/fieldset/fieldsetrepo.cpp b/document/src/vespa/document/fieldset/fieldsetrepo.cpp
index c576624b026..5cc3e548d82 100644
--- a/document/src/vespa/document/fieldset/fieldsetrepo.cpp
+++ b/document/src/vespa/document/fieldset/fieldsetrepo.cpp
@@ -44,18 +44,18 @@ parseFieldCollection(const DocumentTypeRepo& repo,
const DocumentType& type(*typePtr);
StringTokenizer tokenizer(fieldNames, ",");
- Field::Set fields;
+ Field::Set::Builder builder;
for (const auto & token : tokenizer) {
const DocumentType::FieldSet * fs = type.getFieldSet(token);
if (fs) {
for (const auto & fieldName : fs->getFields()) {
- fields.insert(&type.getField(fieldName));
+ builder.insert(&type.getField(fieldName));
}
} else {
- fields.insert(&type.getField(token));
+ builder.insert(&type.getField(token));
}
}
- return std::make_unique<FieldCollection>(type, std::move(fields));
+ return std::make_unique<FieldCollection>(type, builder.build());
}
}
diff --git a/document/src/vespa/document/fieldset/fieldsets.cpp b/document/src/vespa/document/fieldset/fieldsets.cpp
index 586cef0e5f4..115ad37343d 100644
--- a/document/src/vespa/document/fieldset/fieldsets.cpp
+++ b/document/src/vespa/document/fieldset/fieldsets.cpp
@@ -4,6 +4,7 @@
#include <vespa/document/fieldvalue/document.h>
#include <vespa/document/datatype/documenttype.h>
#include <vespa/vespalib/stllike/asciistream.h>
+#include <algorithm>
#include <xxhash.h>
namespace document {
@@ -38,25 +39,10 @@ FieldCollection::contains(const FieldSet& fields) const
{
switch (fields.getType()) {
case Type::FIELD:
- return _set.find(static_cast<const Field*>(&fields)) != _set.end();
+ return _set.contains(static_cast<const Field &>(fields));
case Type::SET: {
const auto & coll = static_cast<const FieldCollection&>(fields);
-
- if (_set.size() < coll._set.size()) {
- return false;
- }
-
- auto iter = coll.getFields().begin();
-
- while (iter != coll.getFields().end()) {
- if (_set.find(*iter) == _set.end()) {
- return false;
- }
-
- ++iter;
- }
-
- return true;
+ return std::includes(_set.begin(), _set.end(), coll.getFields().begin(), coll.getFields().end(), Field::FieldPtrComparator());
}
case Type::NONE:
case Type::DOCID:
diff --git a/document/src/vespa/document/fieldset/fieldsets.h b/document/src/vespa/document/fieldset/fieldsets.h
index d37d72d6109..9537a5bdf61 100644
--- a/document/src/vespa/document/fieldset/fieldsets.h
+++ b/document/src/vespa/document/fieldset/fieldsets.h
@@ -65,8 +65,8 @@ public:
uint64_t hash() const { return _hash; }
private:
- Field::Set _set;
- uint64_t _hash;
+ Field::Set _set;
+ uint64_t _hash;
const DocumentType& _docType;
};
diff --git a/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp b/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp
index ed4c8938aa1..d941a9f35b7 100644
--- a/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp
+++ b/searchcore/src/tests/proton/document_iterator/document_iterator_test.cpp
@@ -757,9 +757,7 @@ TEST("require that document selection and timestamp range works together") {
}
TEST("require that fieldset limits fields returned") {
- document::Field::Set fields;
- fields.insert(&getDocType().getField("header"));
- document::FieldCollection limited(getDocType(), std::move(fields));
+ document::FieldCollection limited(getDocType(), document::Field::Set::Builder().insert(&getDocType().getField("header")).build());
DocumentIterator itr(bucket(5), limited, selectAll(), newestV(), -1, false);
itr.add(doc_with_fields("id:ns:foo::xxx1", Timestamp(1), bucket(5)));
IterateResult res = itr.iterate(largeNum);
diff --git a/searchcore/src/tests/proton/server/documentretriever_test.cpp b/searchcore/src/tests/proton/server/documentretriever_test.cpp
index c9fece26655..a6f98719da2 100644
--- a/searchcore/src/tests/proton/server/documentretriever_test.cpp
+++ b/searchcore/src/tests/proton/server/documentretriever_test.cpp
@@ -583,7 +583,7 @@ struct Lookup : public IFieldInfo
Lookup() : _count(0) {}
bool isFieldAttribute(const document::Field & field) const override {
_count++;
- return (field.getName()[0] == 'a');
+ return ((field.getName()[0] % 2) == 1); // a, c, e... are attributes
}
mutable unsigned _count;
};
@@ -592,29 +592,27 @@ TEST("require that fieldset can figure out their attributeness and rember it") {
Lookup lookup;
FieldSetAttributeDB fsDB(lookup);
document::Field attr1("attr1", 1, *document::DataType::LONG);
- document::Field attr2("attr2", 2, *document::DataType::LONG);
- document::Field not_attr1("not_attr1", 3, *document::DataType::LONG);
- document::Field::Set allAttr;
- allAttr.insert(&attr1);
+ document::Field attr2("cttr2", 2, *document::DataType::LONG);
+ document::Field not_attr1("b_not_attr1", 3, *document::DataType::LONG);
+ document::Field::Set allAttr = document::Field::Set::Builder().insert(&attr1).build();
EXPECT_TRUE(fsDB.areAllFieldsAttributes(13, allAttr));
EXPECT_EQUAL(1u, lookup._count);
EXPECT_TRUE(fsDB.areAllFieldsAttributes(13, allAttr));
EXPECT_EQUAL(1u, lookup._count);
- allAttr.insert(&attr2);
+ allAttr = document::Field::Set::Builder().insert(&attr1).insert(&attr2).build();
EXPECT_TRUE(fsDB.areAllFieldsAttributes(17, allAttr));
EXPECT_EQUAL(3u, lookup._count);
EXPECT_TRUE(fsDB.areAllFieldsAttributes(17, allAttr));
EXPECT_EQUAL(3u, lookup._count);
- document::Field::Set notAllAttr;
- notAllAttr.insert(&not_attr1);
+ document::Field::Set notAllAttr = document::Field::Set::Builder().insert(&not_attr1).build();
EXPECT_FALSE(fsDB.areAllFieldsAttributes(33, notAllAttr));
EXPECT_EQUAL(4u, lookup._count);
EXPECT_FALSE(fsDB.areAllFieldsAttributes(33, notAllAttr));
EXPECT_EQUAL(4u, lookup._count);
- notAllAttr.insert(&attr1);
+ notAllAttr = document::Field::Set::Builder().insert(&attr1).insert(&not_attr1).insert(&attr2).build();
EXPECT_FALSE(fsDB.areAllFieldsAttributes(39, notAllAttr));
EXPECT_EQUAL(6u, lookup._count);
EXPECT_FALSE(fsDB.areAllFieldsAttributes(39, notAllAttr));
diff --git a/searchcore/src/vespa/searchcore/proton/server/documentretriever.cpp b/searchcore/src/vespa/searchcore/proton/server/documentretriever.cpp
index 4484338ddfd..cd0a0fcc971 100644
--- a/searchcore/src/vespa/searchcore/proton/server/documentretriever.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/documentretriever.cpp
@@ -82,7 +82,7 @@ DocumentRetriever
_attr_manager(attr_manager),
_doc_store(doc_store),
_possiblePositionFields(),
- _attributeFields(),
+ _attributeFields(document::Field::Set::Builder().build()),
_areAllFieldsAttributes(true),
_fieldSetAttributeInfo(*this)
{
@@ -90,6 +90,7 @@ DocumentRetriever
document::Field::Set fields = documentType->getFieldSet();
int32_t positionDataTypeId = PositionDataType::getInstance().getId();
LOG(debug, "checking document type '%s' for position fields", docTypeName.getName().c_str());
+ document::Field::Set::Builder attrBuilder;
for (const document::Field * field : fields) {
if ((field->getDataType().getId() == positionDataTypeId) || is_array_of_position_type(field->getDataType())) {
LOG(debug, "Field '%s' is a position field", field->getName().data());
@@ -109,12 +110,13 @@ DocumentRetriever
&& ((*attr)->getBasicType() != BasicType::PREDICATE)
&& ((*attr)->getBasicType() != BasicType::REFERENCE))
{
- _attributeFields.insert(field);
+ attrBuilder.insert(field);
} else {
_areAllFieldsAttributes = false;
}
}
}
+ _attributeFields = attrBuilder.build();
}
bool
@@ -139,7 +141,7 @@ DocumentRetriever::needFetchFromDocStore(const document::FieldSet & fieldSet) co
bool
DocumentRetriever::isFieldAttribute(const document::Field & field) const {
- return _attributeFields.find(&field) != _attributeFields.end();
+ return _attributeFields.contains(field);
}
DocumentRetriever::~DocumentRetriever() = default;
@@ -245,9 +247,7 @@ DocumentRetriever::getPartialDocument(search::DocumentIdT lid, const document::D
break;
case document::FieldSet::Type::FIELD: {
const auto & field = static_cast<const document::Field &>(fieldSet);
- document::Field::Set attributes;
- attributes.insert(&field);
- populate(lid, *doc, attributes);
+ populate(lid, *doc, document::Field::Set::Builder().insert(&field).build());
break;
}
case document::FieldSet::Type::SET: {
diff --git a/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp b/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp
index 4656a5e9edd..d82b5e0f4a5 100644
--- a/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp
+++ b/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp
@@ -43,21 +43,18 @@ DataType::Type convert(Schema::DataType type) {
}
void
-insertStructType(document::DocumenttypesConfig::Documenttype & cfg,
- const StructDataType & structType)
+insertStructType(document::DocumenttypesConfig::Documenttype & cfg, const StructDataType & structType)
{
typedef document::DocumenttypesConfig DTC;
DTC::Documenttype::Datatype::Sstruct cfgStruct;
cfgStruct.name = structType.getName();
Field::Set fieldSet = structType.getFieldSet();
- for (Field::Set::const_iterator itr = fieldSet.begin();
- itr != fieldSet.end(); ++itr)
- {
- DTC::Documenttype::Datatype::Sstruct::Field field;
- field.name = (*itr)->getName();
- field.datatype = (*itr)->getDataType().getId();
- field.id = (*itr)->getId();
- cfgStruct.field.push_back(field);
+ for (const Field * field : fieldSet) {
+ DTC::Documenttype::Datatype::Sstruct::Field sField;
+ sField.name = field->getName();
+ sField.datatype = field->getDataType().getId();
+ sField.id = field->getId();
+ cfgStruct.field.push_back(sField);
}
cfg.datatype.push_back(DTC::Documenttype::Datatype());
cfg.datatype.back().sstruct = cfgStruct;
@@ -66,8 +63,7 @@ insertStructType(document::DocumenttypesConfig::Documenttype & cfg,
using namespace document::config_builder;
-TypeOrId makeCollection(TypeOrId datatype,
- Schema::CollectionType collection_type) {
+TypeOrId makeCollection(TypeOrId datatype, Schema::CollectionType collection_type) {
switch (collection_type) {
case schema::CollectionType::ARRAY:
return Array(datatype);
diff --git a/storage/src/vespa/storage/persistence/fieldvisitor.h b/storage/src/vespa/storage/persistence/fieldvisitor.h
index f2b07e193d6..99558fe6e9c 100644
--- a/storage/src/vespa/storage/persistence/fieldvisitor.h
+++ b/storage/src/vespa/storage/persistence/fieldvisitor.h
@@ -16,7 +16,7 @@ namespace storage {
class FieldVisitor : public document::select::Visitor {
private:
document::DocumentType _docType;
- document::Field::Set _fields;
+ document::Field::Set::Builder _fields;
public:
explicit FieldVisitor(const document::DocumentType & docType)
@@ -26,7 +26,7 @@ public:
~FieldVisitor() override;
document::FieldCollection getFieldSet() {
- return document::FieldCollection(_docType, std::move(_fields));
+ return document::FieldCollection(_docType, _fields.build());
}
void visitFieldValueNode(const document::select::FieldValueNode &) override;