diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2017-07-30 21:36:44 +0200 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2017-07-31 17:42:41 +0200 |
commit | 8f903d85afe80c5463d18d7266ad271935cf6710 (patch) | |
tree | 87397918ae91fc1bacc7831859b03401c95bb14f /document | |
parent | 8b27f3d0594921560b256e2ace092370e8840d95 (diff) |
Use a on demand hash_set to do fast lookup in mapfiledvalue
Diffstat (limited to 'document')
-rw-r--r-- | document/src/vespa/document/fieldvalue/mapfieldvalue.cpp | 99 | ||||
-rw-r--r-- | document/src/vespa/document/fieldvalue/mapfieldvalue.h | 14 |
2 files changed, 101 insertions, 12 deletions
diff --git a/document/src/vespa/document/fieldvalue/mapfieldvalue.cpp b/document/src/vespa/document/fieldvalue/mapfieldvalue.cpp index f4b4c72661c..081d0325a6e 100644 --- a/document/src/vespa/document/fieldvalue/mapfieldvalue.cpp +++ b/document/src/vespa/document/fieldvalue/mapfieldvalue.cpp @@ -6,7 +6,8 @@ #include <vespa/document/base/exceptions.h> #include <vespa/document/datatype/mapdatatype.h> #include <vespa/vespalib/util/xmlstream.h> -#include <ostream> +#include <vespa/vespalib/stllike/hash_set.hpp> +#include <cassert> #include <vespa/log/log.h> LOG_SETUP(".document.fieldvalue.map"); @@ -32,7 +33,37 @@ const MapDataType *verifyMapType(const DataType& type) { } return ptr; } -} // namespace + +struct Hasher { + Hasher(const MapFieldValue::IArray * keys) : _keys(keys) {} + uint32_t operator () (uint32_t index) const { return (*_keys)[index].hash(); } + const MapFieldValue::IArray * _keys; +}; + +struct Extract { + Extract(const MapFieldValue::IArray * keys) : _keys(keys) {} + const FieldValue & operator () (uint32_t index) const { return (*_keys)[index]; } + const MapFieldValue::IArray * _keys; +}; + +struct Equal { + Equal(const MapFieldValue::IArray * keys) : _keys(keys) {} + bool operator () (uint32_t a, uint32_t b) const { return (*_keys)[a].fastCompare((*_keys)[b]) == 0; } + const MapFieldValue::IArray * _keys; +}; + +using HashMapT = vespalib::hash_set<uint32_t, Hasher, Equal>; + +} + +namespace mapfieldvalue { + +class HashMap : public HashMapT { +public: + using HashMapT::HashMapT; +}; + +} MapFieldValue::MapFieldValue(const DataType &mapType) : FieldValue(), @@ -41,6 +72,7 @@ MapFieldValue::MapFieldValue(const DataType &mapType) _keys(static_cast<IArray *>(createArray(getMapType().getKeyType()).release())), _values(static_cast<IArray *>(createArray(getMapType().getValueType()).release())), _present(), + _lookupMap(), _altered(true) { } @@ -56,6 +88,7 @@ MapFieldValue::MapFieldValue(const MapFieldValue & rhs) : _keys(rhs._keys ? rhs._keys->clone() : nullptr), _values(rhs._values ? rhs._values->clone() : nullptr), _present(rhs._present), + _lookupMap(), _altered(rhs._altered) { } @@ -72,12 +105,13 @@ MapFieldValue::operator = (const MapFieldValue & rhs) void MapFieldValue::swap(MapFieldValue & rhs) { + std::swap(_type, rhs._type); std::swap(_count, rhs._count); std::swap(_keys, rhs._keys); std::swap(_values, rhs._values); std::swap(_present, rhs._present); + std::swap(_lookupMap, rhs._lookupMap); std::swap(_altered, rhs._altered); - std::swap(_type, rhs._type); } void MapFieldValue::verifyKey(const FieldValue & fv) const @@ -122,6 +156,10 @@ MapFieldValue::push_back(const FieldValue& key, const FieldValue& value) _keys->push_back(key); _values->push_back(value); _present.push_back(true); + if (_lookupMap) { + _lookupMap->insert(_present.size() - 1); + } + _altered = true; } @@ -133,6 +171,9 @@ MapFieldValue::push_back(FieldValue::UP key, FieldValue::UP value) _keys->push_back(*key); _values->push_back(*value); _present.push_back(true); + if (_lookupMap) { + _lookupMap->insert(_present.size() - 1); + } _altered = true; } @@ -179,6 +220,7 @@ MapFieldValue::clear() { _keys->clear(); _values->clear(); _present.clear(); + _lookupMap.reset(); _count = 0; } void @@ -192,6 +234,7 @@ void MapFieldValue::resize(size_t sz) { _keys->resize(sz); _values->resize(sz); _present.resize(sz, true); + _lookupMap.reset(); _count = std::count(_present.begin(), _present.end(), true); } @@ -204,6 +247,7 @@ MapFieldValue::erase(const FieldValue& key) if (result) { _count--; _present[found.offset()] = false; + _lookupMap->erase(found.offset()); _altered = true; } return result; @@ -297,14 +341,31 @@ MapFieldValue::createValue() const { return getMapType().getValueType().createFieldValue(); } +void +MapFieldValue::ensureLookupMap() const { + if (!_lookupMap) { + _lookupMap = std::move(buildLookupMap()); + } +} + +MapFieldValue::HashMapUP +MapFieldValue::buildLookupMap() const { + HashMapUP hashMap = std::make_unique<mapfieldvalue::HashMap>(size()*2, Hasher(_keys.get()), Equal(_keys.get())); + for (size_t i(0), m(_present.size()); i < m; i++) { + if (_present[i]) { + hashMap->insert(i); + } + } + return hashMap; +} + MapFieldValue::const_iterator MapFieldValue::find(const FieldValue& key) const { if ((size() > 0) && (key.getClass().id() == (*_keys)[0].getClass().id())) { - for (size_t i(0), m(_present.size()); i < m; i++) { - if (_present[i] && ((*_keys)[i].fastCompare(key) == 0)) { - return const_iterator(*this, i); - } + ssize_t index = findIndex(key); + if (index >= 0) { + return const_iterator(*this, index); } } return end(); @@ -314,14 +375,30 @@ MapFieldValue::iterator MapFieldValue::find(const FieldValue& key) { if ((size() > 0) && (key.getClass().id() == (*_keys)[0].getClass().id())) { - for (size_t i(0), m(_present.size()); i < m; i++) { - if (_present[i] && ((*_keys)[i].fastCompare(key) == 0)) { - return iterator(*this, i); - } + ssize_t index = findIndex(key); + if (index >= 0) { + return iterator(*this, index); } } return end(); } + +ssize_t +MapFieldValue::findIndex(const FieldValue& key) const +{ + if ((size() > 0) && (key.getClass().id() == (*_keys)[0].getClass().id())) { + ensureLookupMap(); + Extract extract(_keys.get()); + auto found = _lookupMap->find<FieldValue, Extract, vespalib::hash<FieldValue>, std::equal_to<FieldValue>>(key, extract); + if (found != _lookupMap->end()) { + uint32_t index = *found; + assert(_present[index]); + return index; + } + } + return -1l; +} + bool MapFieldValue::checkAndRemove(const FieldValue& key, ModificationStatus status, bool wasModified, std::vector<const FieldValue*>& keysToRemove) const diff --git a/document/src/vespa/document/fieldvalue/mapfieldvalue.h b/document/src/vespa/document/fieldvalue/mapfieldvalue.h index b6eef78794b..d761a37612e 100644 --- a/document/src/vespa/document/fieldvalue/mapfieldvalue.h +++ b/document/src/vespa/document/fieldvalue/mapfieldvalue.h @@ -9,18 +9,27 @@ #include "fieldvalue.h" #include <vespa/vespalib/util/polymorphicarray.h> +#include <vespa/vespalib/stllike/hash_map.h> + +#define VESPA_DLL_LOCAL __attribute__ ((visibility("hidden"))) namespace document { +namespace mapfieldvalue { + class HashMap; +} class MapFieldValue : public FieldValue { +public: + using IArray=vespalib::IArrayT<FieldValue>; + using HashMapUP = std::unique_ptr<mapfieldvalue::HashMap>; private: - typedef vespalib::IArrayT<FieldValue> IArray; const MapDataType *_type; size_t _count; IArray::UP _keys; IArray::UP _values; std::vector<bool> _present; + mutable HashMapUP _lookupMap; bool _altered; virtual bool addValue(const FieldValue& fv); @@ -38,6 +47,9 @@ private: for (; index < _present.size() && !_present[index]; ++index); return index; } + VESPA_DLL_LOCAL HashMapUP buildLookupMap() const __attribute__((noinline)); + VESPA_DLL_LOCAL void ensureLookupMap() const; + VESPA_DLL_LOCAL ssize_t findIndex(const FieldValue& fv) const; public: typedef std::unique_ptr<MapFieldValue> UP; class iterator { |