From 0c8bd42e380fa826e1bddbf5da75e85ea0251698 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Fri, 28 Jul 2017 13:42:18 +0200 Subject: Use a presence vector to avoid expensive remove, an to lay the grouds for a faster find. --- document/src/tests/weightedsetfieldvaluetest.cpp | 1 + .../vespa/document/fieldvalue/mapfieldvalue.cpp | 110 +++++++++++++-------- .../src/vespa/document/fieldvalue/mapfieldvalue.h | 51 +++++----- .../serialization/vespadocumentserializer.cpp | 3 +- 4 files changed, 97 insertions(+), 68 deletions(-) (limited to 'document') diff --git a/document/src/tests/weightedsetfieldvaluetest.cpp b/document/src/tests/weightedsetfieldvaluetest.cpp index c14044b1940..cf8857904cc 100644 --- a/document/src/tests/weightedsetfieldvaluetest.cpp +++ b/document/src/tests/weightedsetfieldvaluetest.cpp @@ -116,6 +116,7 @@ void WeightedSetFieldValueTest::testWeightedSet() // By value buffer->setPos(0); deserialize(*buffer, value2); + CPPUNIT_ASSERT_EQUAL(size_t(3), value2.size()); CPPUNIT_ASSERT(value2.remove(IntFieldValue(1))); CPPUNIT_ASSERT(!value2.contains(IntFieldValue(1))); CPPUNIT_ASSERT_EQUAL(size_t(2), value2.size()); diff --git a/document/src/vespa/document/fieldvalue/mapfieldvalue.cpp b/document/src/vespa/document/fieldvalue/mapfieldvalue.cpp index 3a294fa1fb4..af5b5242ef6 100644 --- a/document/src/vespa/document/fieldvalue/mapfieldvalue.cpp +++ b/document/src/vespa/document/fieldvalue/mapfieldvalue.cpp @@ -36,8 +36,10 @@ const MapDataType *verifyMapType(const DataType& type) { MapFieldValue::MapFieldValue(const DataType &mapType) : FieldValue(), _type(verifyMapType(mapType)), + _count(0), _keys(static_cast(createArray(getMapType().getKeyType()).release())), _values(static_cast(createArray(getMapType().getValueType()).release())), + _present(), _altered(true) { } @@ -49,8 +51,10 @@ MapFieldValue::~MapFieldValue() MapFieldValue::MapFieldValue(const MapFieldValue & rhs) : FieldValue(rhs), _type(rhs._type), + _count(rhs._count), _keys(rhs._keys ? rhs._keys->clone() : nullptr), _values(rhs._values ? rhs._values->clone() : nullptr), + _present(rhs._present), _altered(rhs._altered) { } @@ -65,6 +69,16 @@ MapFieldValue::operator = (const MapFieldValue & rhs) return *this; } +void +MapFieldValue::swap(MapFieldValue & rhs) { + std::swap(_count, rhs._count); + std::swap(_keys, rhs._keys); + std::swap(_values, rhs._values); + std::swap(_present, rhs._present); + std::swap(_altered, rhs._altered); + std::swap(_type, rhs._type); +} + void MapFieldValue::verifyKey(const FieldValue & fv) const { const DataType &dt = getMapType().getKeyType(); @@ -95,7 +109,6 @@ MapFieldValue::insertVerify(const FieldValue& key, const FieldValue& value) } } else { push_back(key, value); - _altered = true; result = true; } return result; @@ -104,8 +117,10 @@ MapFieldValue::insertVerify(const FieldValue& key, const FieldValue& value) void MapFieldValue::push_back(const FieldValue& key, const FieldValue& value) { + _count++; _keys->push_back(key); _values->push_back(value); + _present.push_back(true); _altered = true; } @@ -113,8 +128,10 @@ MapFieldValue::push_back(const FieldValue& key, const FieldValue& value) void MapFieldValue::push_back(FieldValue::UP key, FieldValue::UP value) { + _count++; _keys->push_back(*key); _values->push_back(*value); + _present.push_back(true); _altered = true; } @@ -156,6 +173,27 @@ MapFieldValue::contains(const FieldValue& key) const return find(key) != end(); } +void +MapFieldValue::clear() { + _keys->clear(); + _values->clear(); + _present.clear(); + _count = 0; +} +void +MapFieldValue::reserve(size_t sz) { + _keys->reserve(sz); + _values->reserve(sz); + _present.reserve(sz); +} + +void MapFieldValue::resize(size_t sz) { + _keys->resize(sz); + _values->resize(sz); + _present.resize(sz, true); + _count = std::count(_present.begin(), _present.end(), true); +} + bool MapFieldValue::erase(const FieldValue& key) { @@ -163,8 +201,8 @@ MapFieldValue::erase(const FieldValue& key) iterator found(find(key)); bool result(found != end()); if (result) { - _keys->erase(_keys->begin() + found.offset()); - _values->erase(_values->begin() + found.offset()); + _count--; + _present[found.offset()] = false; _altered = true; } return result; @@ -186,8 +224,8 @@ MapFieldValue::compare(const FieldValue& other) const const MapFieldValue& o(dynamic_cast(other)); - if (_keys->size() != o._keys->size()) { - return (_keys->size() - o._keys->size()); + if (size() != o.size()) { + return (size() - o.size()); } const_iterator it1 = begin(); @@ -222,7 +260,7 @@ MapFieldValue::print(std::ostream& out, bool verbose, const std::string& indent) out << " - "; item.second->print(out, verbose, indent + " "); } - if (_keys->size() > 0) out << "\n" << indent; + if (size() > 0) out << "\n" << indent; out << ")"; } @@ -252,10 +290,9 @@ MapFieldValue::hasChanged() const MapFieldValue::const_iterator MapFieldValue::find(const FieldValue& key) const { - size_t sz = _keys->size(); - if ((sz > 0) && (key.getClass().id() == (*_keys)[0].getClass().id())) { - for (size_t i(0), m(_keys->size()); i < m; i++) { - if ((*_keys)[i].fastCompare(key) == 0) { + if ((size() > 0) && (key.getClass().id() == (*_keys)[0].getClass().id())) { + for (size_t i(0), m(_present.size()); i < m; i++) { + if (_present[i] && ((*_keys)[i].fastCompare(key) == 0)) { return const_iterator(*this, i); } } @@ -266,10 +303,9 @@ MapFieldValue::find(const FieldValue& key) const MapFieldValue::iterator MapFieldValue::find(const FieldValue& key) { - size_t sz = _keys->size(); - if ((sz > 0) && (key.getClass().id() == (*_keys)[0].getClass().id())) { - for (size_t i(0), m(_keys->size()); i < m; i++) { - if ((*_keys)[i].fastCompare(key) == 0) { + if ((size() > 0) && (key.getClass().id() == (*_keys)[0].getClass().id())) { + for (size_t i(0), m(_present.size()); i < m; i++) { + if (_present[i] && ((*_keys)[i].fastCompare(key) == 0)) { return iterator(*this, i); } } @@ -326,20 +362,20 @@ MapFieldValue::iterateNestedImpl(PathRange nested, } case FieldPathEntry::MAP_ALL_KEYS: LOG(spam, "MAP_ALL_KEYS"); - for (const_iterator it(begin()), mt(end()); it != mt; it++) { + for (const auto & entry : *this) { if (isWSet) { - handler.setWeight(static_cast(*it->second).getValue()); + handler.setWeight(static_cast(*entry.second).getValue()); } - wasModified = checkAndRemove(*it->first, - it->first->iterateNested(nested.next(), handler), + wasModified = checkAndRemove(*entry.first, + entry.first->iterateNested(nested.next(), handler), wasModified, keysToRemove); } break; case FieldPathEntry::MAP_ALL_VALUES: LOG(spam, "MAP_ALL_VALUES"); - for (const_iterator it(begin()), mt(end()); it != mt; it++) { - wasModified = checkAndRemove(*it->second, - it->second->iterateNested(nested.next(), handler), + for (const auto & entry : *this) { + wasModified = checkAndRemove(*entry.second, + entry.second->iterateNested(nested.next(), handler), wasModified, keysToRemove); } break; @@ -357,11 +393,11 @@ MapFieldValue::iterateNestedImpl(PathRange nested, } } else { PathRange next = nested.next(); - for (const_iterator it(begin()), mt(end()); it != mt; it++) { - LOG(spam, "key is '%s'", it->first->toString().c_str()); - handler.getVariables()[fpe.getVariableName()] = IndexValue(*it->first); + for (const auto & entry : *this) { + LOG(spam, "key is '%s'", entry.first->toString().c_str()); + handler.getVariables()[fpe.getVariableName()] = IndexValue(*entry.first); LOG(spam, "vars at this time = %s", handler.getVariables().toString().c_str()); - wasModified = checkAndRemove(*it->first, it->second->iterateNested(next, handler), + wasModified = checkAndRemove(*entry.first, entry.second->iterateNested(next, handler), wasModified, keysToRemove); } handler.getVariables().erase(fpe.getVariableName()); @@ -370,11 +406,11 @@ MapFieldValue::iterateNestedImpl(PathRange nested, } default: LOG(spam, "default"); - for (const_iterator it(begin()), mt(end()); it != mt; it++) { + for (const auto & entry : *this) { if (isWSet) { - handler.setWeight(static_cast(*it->second).getValue()); + handler.setWeight(static_cast(*entry.second).getValue()); } - wasModified = checkAndRemove(*it->first, it->first->iterateNested(nested, handler), + wasModified = checkAndRemove(*entry.first, entry.first->iterateNested(nested, handler), wasModified, keysToRemove); // Don't iterate over values /*wasModified = checkAndRemove(*it->second, @@ -397,13 +433,12 @@ MapFieldValue::iterateNestedImpl(PathRange nested, if (handler.handleComplex(complexFieldValue)) { LOG(spam, "calling handler.handleComplex for all map keys"); - for (const_iterator it(begin()), mt(end()); it != mt; it++) { + for (const auto & entry : *this) { if (isWSet) { - handler.setWeight(static_cast(*it->second).getValue()); + handler.setWeight(static_cast(*entry.second).getValue()); } - wasModified = checkAndRemove(*it->first, - it->first->iterateNested(nested, handler), - wasModified, keysToRemove); + wasModified = checkAndRemove(*entry.first, entry.first->iterateNested(nested, handler), + wasModified, keysToRemove); // XXX: Map value iteration is currently disabled since it changes // existing search behavior /*wasModified = checkAndRemove(*it->second, @@ -413,12 +448,9 @@ MapFieldValue::iterateNestedImpl(PathRange nested, } } handler.setWeight(1); - for (std::vector::iterator - i = keysToRemove.begin(), last = keysToRemove.end(); - i != last; ++i) - { - LOG(spam, "erasing map entry with key %s", (*i)->toString().c_str()); - const_cast(*this).erase(**i); + for (const FieldValue * key: keysToRemove) { + LOG(spam, "erasing map entry with key %s", key->toString().c_str()); + const_cast(*this).erase(*key); } return wasModified ? ModificationStatus::MODIFIED : ModificationStatus::NOT_MODIFIED; } diff --git a/document/src/vespa/document/fieldvalue/mapfieldvalue.h b/document/src/vespa/document/fieldvalue/mapfieldvalue.h index 049e0466eba..3a900cf60d7 100644 --- a/document/src/vespa/document/fieldvalue/mapfieldvalue.h +++ b/document/src/vespa/document/fieldvalue/mapfieldvalue.h @@ -18,23 +18,27 @@ class MapFieldValue : public FieldValue private: typedef vespalib::IArrayT IArray; const MapDataType *_type; - IArray::UP _keys; - IArray::UP _values; - bool _altered; + size_t _count; + IArray::UP _keys; + IArray::UP _values; + std::vector _present; + bool _altered; virtual bool addValue(const FieldValue& fv); virtual bool containsValue(const FieldValue& fv) const { return contains(fv); } virtual bool removeValue(const FieldValue& fv) { return erase(fv); } - bool checkAndRemove(const FieldValue& key, - fieldvalue::ModificationStatus status, - bool wasModified, - std::vector& keysToRemove) const; + bool checkAndRemove(const FieldValue& key, fieldvalue::ModificationStatus status, + bool wasModified, std::vector& keysToRemove) const; fieldvalue::ModificationStatus onIterateNested(PathRange nested, fieldvalue::IteratorHandler & handler) const override; // Utility method to avoid constant explicit casting const MapDataType& getMapType() const { return *_type; } void verifyKey(const FieldValue & key) const __attribute__((noinline)); void verifyValue(const FieldValue & value) const __attribute__((noinline)); + size_t nextPresent(size_t index) const { + for (; index < _present.size() && !_present[index]; ++index); + return index; + } public: typedef std::unique_ptr UP; class iterator { @@ -43,8 +47,7 @@ public: iterator(MapFieldValue & map, size_t index) : _map(&map), _index(index) { } bool operator == (const iterator & rhs) const { return _map == rhs._map && _index == rhs._index; } bool operator != (const iterator & rhs) const { return _map != rhs._map || _index != rhs._index; } - iterator& operator++() { ++_index; return *this; } - iterator operator++(int) { iterator other(*this); ++_index; return other; } + iterator& operator++() { _index = _map->nextPresent(_index+1); return *this; } const pair & operator * () const { setCurr(); return _current; } const pair * operator -> () const { setCurr(); return &_current; } size_t offset() const { return _index; } @@ -63,8 +66,7 @@ public: const_iterator(const MapFieldValue & map, size_t index) : _map(&map), _index(index) { } bool operator == (const const_iterator & rhs) const { return _map == rhs._map && _index == rhs._index; } bool operator != (const const_iterator & rhs) const { return _map != rhs._map || _index != rhs._index; } - const_iterator& operator++() { ++_index; return *this; } - const_iterator operator++(int) { const_iterator other(*this); ++_index; return other; } + const_iterator& operator++() { _index = _map->nextPresent(_index+1); return *this; } const pair & operator * () const { setCurr(); return _current; } const pair * operator -> () const { setCurr(); return &_current; } private: @@ -74,7 +76,7 @@ public: } const MapFieldValue *_map; size_t _index; - mutable pair _current; + mutable pair _current; }; MapFieldValue(const DataType &mapType); @@ -82,12 +84,7 @@ public: MapFieldValue(const MapFieldValue & rhs); MapFieldValue & operator = (const MapFieldValue & rhs); - void swap(MapFieldValue & rhs) { - std::swap(_keys, rhs._keys); - std::swap(_values, rhs._values); - std::swap(_altered, rhs._altered); - std::swap(_type, rhs._type); - } + void swap(MapFieldValue & rhs); void accept(FieldValueVisitor &visitor) override { visitor.visit(*this); } void accept(ConstFieldValueVisitor &visitor) const override { visitor.visit(*this); } @@ -112,11 +109,11 @@ public: bool contains(const FieldValue& key) const; // CollectionFieldValue methods kept for compatability's sake - virtual bool isEmpty() const { return _keys->empty(); } - virtual size_t size() const { return _keys->size(); } - virtual void clear() { _keys->clear(); _values->clear(); } - void reserve(size_t sz) { _keys->reserve(sz); _values->reserve(sz); } - void resize(size_t sz) { _keys->resize(sz); _values->resize(sz); } + virtual bool isEmpty() const { return _count == 0; } + virtual size_t size() const { return _count; } + virtual void clear(); + void reserve(size_t sz); + void resize(size_t sz); fieldvalue::ModificationStatus iterateNestedImpl(PathRange nested, fieldvalue::IteratorHandler & handler, const FieldValue& complexFieldValue) const; @@ -130,11 +127,11 @@ public: const DataType *getDataType() const override { return _type; } void printXml(XmlOutputStream& out) const override; - const_iterator begin() const { return const_iterator(*this, 0); } - iterator begin() { return iterator(*this, 0); } + const_iterator begin() const { return const_iterator(*this, nextPresent(0)); } + iterator begin() { return iterator(*this, nextPresent(0)); } - const_iterator end() const { return const_iterator(*this, size()); } - iterator end() { return iterator(*this, size()); } + const_iterator end() const { return const_iterator(*this, _present.size()); } + iterator end() { return iterator(*this, _present.size()); } const_iterator find(const FieldValue& fv) const; iterator find(const FieldValue& fv); diff --git a/document/src/vespa/document/serialization/vespadocumentserializer.cpp b/document/src/vespa/document/serialization/vespadocumentserializer.cpp index 9e39c16eec3..2b6fe0833e7 100644 --- a/document/src/vespa/document/serialization/vespadocumentserializer.cpp +++ b/document/src/vespa/document/serialization/vespadocumentserializer.cpp @@ -383,8 +383,7 @@ void VespaDocumentSerializer::write(const StructFieldValue &value, } void VespaDocumentSerializer::write(const WeightedSetFieldValue &value) { - const WeightedSetDataType *type = - static_cast(value.getDataType()); + const WeightedSetDataType *type = static_cast(value.getDataType()); _stream << static_cast(type->getNestedType().getId()); _stream << static_cast(value.size()); for (const auto & entry : value) { -- cgit v1.2.3