summaryrefslogtreecommitdiffstats
path: root/document
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2017-07-28 13:42:18 +0200
committerHenning Baldersheim <balder@yahoo-inc.com>2017-07-31 17:40:49 +0200
commit0c8bd42e380fa826e1bddbf5da75e85ea0251698 (patch)
treeccdf2301f626b51a7134e9b56e59c4ecb24a3ebe /document
parentd67c92c0738460432bb92b337f63168ee285fad8 (diff)
Use a presence vector to avoid expensive remove, an to lay the grouds for a faster find.
Diffstat (limited to 'document')
-rw-r--r--document/src/tests/weightedsetfieldvaluetest.cpp1
-rw-r--r--document/src/vespa/document/fieldvalue/mapfieldvalue.cpp110
-rw-r--r--document/src/vespa/document/fieldvalue/mapfieldvalue.h51
-rw-r--r--document/src/vespa/document/serialization/vespadocumentserializer.cpp3
4 files changed, 97 insertions, 68 deletions
diff --git a/document/src/tests/weightedsetfieldvaluetest.cpp b/document/src/tests/weightedsetfieldvaluetest.cpp
index c14044b1940..cf8857904cc 100644
--- a/document/src/tests/weightedsetfieldvaluetest.cpp
+++ b/document/src/tests/weightedsetfieldvaluetest.cpp
@@ -116,6 +116,7 @@ void WeightedSetFieldValueTest::testWeightedSet()
// By value
buffer->setPos(0);
deserialize(*buffer, value2);
+ CPPUNIT_ASSERT_EQUAL(size_t(3), value2.size());
CPPUNIT_ASSERT(value2.remove(IntFieldValue(1)));
CPPUNIT_ASSERT(!value2.contains(IntFieldValue(1)));
CPPUNIT_ASSERT_EQUAL(size_t(2), value2.size());
diff --git a/document/src/vespa/document/fieldvalue/mapfieldvalue.cpp b/document/src/vespa/document/fieldvalue/mapfieldvalue.cpp
index 3a294fa1fb4..af5b5242ef6 100644
--- a/document/src/vespa/document/fieldvalue/mapfieldvalue.cpp
+++ b/document/src/vespa/document/fieldvalue/mapfieldvalue.cpp
@@ -36,8 +36,10 @@ const MapDataType *verifyMapType(const DataType& type) {
MapFieldValue::MapFieldValue(const DataType &mapType)
: FieldValue(),
_type(verifyMapType(mapType)),
+ _count(0),
_keys(static_cast<IArray *>(createArray(getMapType().getKeyType()).release())),
_values(static_cast<IArray *>(createArray(getMapType().getValueType()).release())),
+ _present(),
_altered(true)
{
}
@@ -49,8 +51,10 @@ MapFieldValue::~MapFieldValue()
MapFieldValue::MapFieldValue(const MapFieldValue & rhs) :
FieldValue(rhs),
_type(rhs._type),
+ _count(rhs._count),
_keys(rhs._keys ? rhs._keys->clone() : nullptr),
_values(rhs._values ? rhs._values->clone() : nullptr),
+ _present(rhs._present),
_altered(rhs._altered)
{
}
@@ -65,6 +69,16 @@ MapFieldValue::operator = (const MapFieldValue & rhs)
return *this;
}
+void
+MapFieldValue::swap(MapFieldValue & rhs) {
+ std::swap(_count, rhs._count);
+ std::swap(_keys, rhs._keys);
+ std::swap(_values, rhs._values);
+ std::swap(_present, rhs._present);
+ std::swap(_altered, rhs._altered);
+ std::swap(_type, rhs._type);
+}
+
void MapFieldValue::verifyKey(const FieldValue & fv) const
{
const DataType &dt = getMapType().getKeyType();
@@ -95,7 +109,6 @@ MapFieldValue::insertVerify(const FieldValue& key, const FieldValue& value)
}
} else {
push_back(key, value);
- _altered = true;
result = true;
}
return result;
@@ -104,8 +117,10 @@ MapFieldValue::insertVerify(const FieldValue& key, const FieldValue& value)
void
MapFieldValue::push_back(const FieldValue& key, const FieldValue& value)
{
+ _count++;
_keys->push_back(key);
_values->push_back(value);
+ _present.push_back(true);
_altered = true;
}
@@ -113,8 +128,10 @@ MapFieldValue::push_back(const FieldValue& key, const FieldValue& value)
void
MapFieldValue::push_back(FieldValue::UP key, FieldValue::UP value)
{
+ _count++;
_keys->push_back(*key);
_values->push_back(*value);
+ _present.push_back(true);
_altered = true;
}
@@ -156,6 +173,27 @@ MapFieldValue::contains(const FieldValue& key) const
return find(key) != end();
}
+void
+MapFieldValue::clear() {
+ _keys->clear();
+ _values->clear();
+ _present.clear();
+ _count = 0;
+}
+void
+MapFieldValue::reserve(size_t sz) {
+ _keys->reserve(sz);
+ _values->reserve(sz);
+ _present.reserve(sz);
+}
+
+void MapFieldValue::resize(size_t sz) {
+ _keys->resize(sz);
+ _values->resize(sz);
+ _present.resize(sz, true);
+ _count = std::count(_present.begin(), _present.end(), true);
+}
+
bool
MapFieldValue::erase(const FieldValue& key)
{
@@ -163,8 +201,8 @@ MapFieldValue::erase(const FieldValue& key)
iterator found(find(key));
bool result(found != end());
if (result) {
- _keys->erase(_keys->begin() + found.offset());
- _values->erase(_values->begin() + found.offset());
+ _count--;
+ _present[found.offset()] = false;
_altered = true;
}
return result;
@@ -186,8 +224,8 @@ MapFieldValue::compare(const FieldValue& other) const
const MapFieldValue& o(dynamic_cast<const MapFieldValue&>(other));
- if (_keys->size() != o._keys->size()) {
- return (_keys->size() - o._keys->size());
+ if (size() != o.size()) {
+ return (size() - o.size());
}
const_iterator it1 = begin();
@@ -222,7 +260,7 @@ MapFieldValue::print(std::ostream& out, bool verbose, const std::string& indent)
out << " - ";
item.second->print(out, verbose, indent + " ");
}
- if (_keys->size() > 0) out << "\n" << indent;
+ if (size() > 0) out << "\n" << indent;
out << ")";
}
@@ -252,10 +290,9 @@ MapFieldValue::hasChanged() const
MapFieldValue::const_iterator
MapFieldValue::find(const FieldValue& key) const
{
- size_t sz = _keys->size();
- if ((sz > 0) && (key.getClass().id() == (*_keys)[0].getClass().id())) {
- for (size_t i(0), m(_keys->size()); i < m; i++) {
- if ((*_keys)[i].fastCompare(key) == 0) {
+ if ((size() > 0) && (key.getClass().id() == (*_keys)[0].getClass().id())) {
+ for (size_t i(0), m(_present.size()); i < m; i++) {
+ if (_present[i] && ((*_keys)[i].fastCompare(key) == 0)) {
return const_iterator(*this, i);
}
}
@@ -266,10 +303,9 @@ MapFieldValue::find(const FieldValue& key) const
MapFieldValue::iterator
MapFieldValue::find(const FieldValue& key)
{
- size_t sz = _keys->size();
- if ((sz > 0) && (key.getClass().id() == (*_keys)[0].getClass().id())) {
- for (size_t i(0), m(_keys->size()); i < m; i++) {
- if ((*_keys)[i].fastCompare(key) == 0) {
+ if ((size() > 0) && (key.getClass().id() == (*_keys)[0].getClass().id())) {
+ for (size_t i(0), m(_present.size()); i < m; i++) {
+ if (_present[i] && ((*_keys)[i].fastCompare(key) == 0)) {
return iterator(*this, i);
}
}
@@ -326,20 +362,20 @@ MapFieldValue::iterateNestedImpl(PathRange nested,
}
case FieldPathEntry::MAP_ALL_KEYS:
LOG(spam, "MAP_ALL_KEYS");
- for (const_iterator it(begin()), mt(end()); it != mt; it++) {
+ for (const auto & entry : *this) {
if (isWSet) {
- handler.setWeight(static_cast<const IntFieldValue &>(*it->second).getValue());
+ handler.setWeight(static_cast<const IntFieldValue &>(*entry.second).getValue());
}
- wasModified = checkAndRemove(*it->first,
- it->first->iterateNested(nested.next(), handler),
+ wasModified = checkAndRemove(*entry.first,
+ entry.first->iterateNested(nested.next(), handler),
wasModified, keysToRemove);
}
break;
case FieldPathEntry::MAP_ALL_VALUES:
LOG(spam, "MAP_ALL_VALUES");
- for (const_iterator it(begin()), mt(end()); it != mt; it++) {
- wasModified = checkAndRemove(*it->second,
- it->second->iterateNested(nested.next(), handler),
+ for (const auto & entry : *this) {
+ wasModified = checkAndRemove(*entry.second,
+ entry.second->iterateNested(nested.next(), handler),
wasModified, keysToRemove);
}
break;
@@ -357,11 +393,11 @@ MapFieldValue::iterateNestedImpl(PathRange nested,
}
} else {
PathRange next = nested.next();
- for (const_iterator it(begin()), mt(end()); it != mt; it++) {
- LOG(spam, "key is '%s'", it->first->toString().c_str());
- handler.getVariables()[fpe.getVariableName()] = IndexValue(*it->first);
+ for (const auto & entry : *this) {
+ LOG(spam, "key is '%s'", entry.first->toString().c_str());
+ handler.getVariables()[fpe.getVariableName()] = IndexValue(*entry.first);
LOG(spam, "vars at this time = %s", handler.getVariables().toString().c_str());
- wasModified = checkAndRemove(*it->first, it->second->iterateNested(next, handler),
+ wasModified = checkAndRemove(*entry.first, entry.second->iterateNested(next, handler),
wasModified, keysToRemove);
}
handler.getVariables().erase(fpe.getVariableName());
@@ -370,11 +406,11 @@ MapFieldValue::iterateNestedImpl(PathRange nested,
}
default:
LOG(spam, "default");
- for (const_iterator it(begin()), mt(end()); it != mt; it++) {
+ for (const auto & entry : *this) {
if (isWSet) {
- handler.setWeight(static_cast<const IntFieldValue &>(*it->second).getValue());
+ handler.setWeight(static_cast<const IntFieldValue &>(*entry.second).getValue());
}
- wasModified = checkAndRemove(*it->first, it->first->iterateNested(nested, handler),
+ wasModified = checkAndRemove(*entry.first, entry.first->iterateNested(nested, handler),
wasModified, keysToRemove);
// Don't iterate over values
/*wasModified = checkAndRemove(*it->second,
@@ -397,13 +433,12 @@ MapFieldValue::iterateNestedImpl(PathRange nested,
if (handler.handleComplex(complexFieldValue)) {
LOG(spam, "calling handler.handleComplex for all map keys");
- for (const_iterator it(begin()), mt(end()); it != mt; it++) {
+ for (const auto & entry : *this) {
if (isWSet) {
- handler.setWeight(static_cast<const IntFieldValue &>(*it->second).getValue());
+ handler.setWeight(static_cast<const IntFieldValue &>(*entry.second).getValue());
}
- wasModified = checkAndRemove(*it->first,
- it->first->iterateNested(nested, handler),
- wasModified, keysToRemove);
+ wasModified = checkAndRemove(*entry.first, entry.first->iterateNested(nested, handler),
+ wasModified, keysToRemove);
// XXX: Map value iteration is currently disabled since it changes
// existing search behavior
/*wasModified = checkAndRemove(*it->second,
@@ -413,12 +448,9 @@ MapFieldValue::iterateNestedImpl(PathRange nested,
}
}
handler.setWeight(1);
- for (std::vector<const FieldValue*>::iterator
- i = keysToRemove.begin(), last = keysToRemove.end();
- i != last; ++i)
- {
- LOG(spam, "erasing map entry with key %s", (*i)->toString().c_str());
- const_cast<MapFieldValue&>(*this).erase(**i);
+ for (const FieldValue * key: keysToRemove) {
+ LOG(spam, "erasing map entry with key %s", key->toString().c_str());
+ const_cast<MapFieldValue&>(*this).erase(*key);
}
return wasModified ? ModificationStatus::MODIFIED : ModificationStatus::NOT_MODIFIED;
}
diff --git a/document/src/vespa/document/fieldvalue/mapfieldvalue.h b/document/src/vespa/document/fieldvalue/mapfieldvalue.h
index 049e0466eba..3a900cf60d7 100644
--- a/document/src/vespa/document/fieldvalue/mapfieldvalue.h
+++ b/document/src/vespa/document/fieldvalue/mapfieldvalue.h
@@ -18,23 +18,27 @@ class MapFieldValue : public FieldValue
private:
typedef vespalib::IArrayT<FieldValue> IArray;
const MapDataType *_type;
- IArray::UP _keys;
- IArray::UP _values;
- bool _altered;
+ size_t _count;
+ IArray::UP _keys;
+ IArray::UP _values;
+ std::vector<bool> _present;
+ bool _altered;
virtual bool addValue(const FieldValue& fv);
virtual bool containsValue(const FieldValue& fv) const { return contains(fv); }
virtual bool removeValue(const FieldValue& fv) { return erase(fv); }
- bool checkAndRemove(const FieldValue& key,
- fieldvalue::ModificationStatus status,
- bool wasModified,
- std::vector<const FieldValue*>& keysToRemove) const;
+ bool checkAndRemove(const FieldValue& key, fieldvalue::ModificationStatus status,
+ bool wasModified, std::vector<const FieldValue*>& keysToRemove) const;
fieldvalue::ModificationStatus onIterateNested(PathRange nested, fieldvalue::IteratorHandler & handler) const override;
// Utility method to avoid constant explicit casting
const MapDataType& getMapType() const { return *_type; }
void verifyKey(const FieldValue & key) const __attribute__((noinline));
void verifyValue(const FieldValue & value) const __attribute__((noinline));
+ size_t nextPresent(size_t index) const {
+ for (; index < _present.size() && !_present[index]; ++index);
+ return index;
+ }
public:
typedef std::unique_ptr<MapFieldValue> UP;
class iterator {
@@ -43,8 +47,7 @@ public:
iterator(MapFieldValue & map, size_t index) : _map(&map), _index(index) { }
bool operator == (const iterator & rhs) const { return _map == rhs._map && _index == rhs._index; }
bool operator != (const iterator & rhs) const { return _map != rhs._map || _index != rhs._index; }
- iterator& operator++() { ++_index; return *this; }
- iterator operator++(int) { iterator other(*this); ++_index; return other; }
+ iterator& operator++() { _index = _map->nextPresent(_index+1); return *this; }
const pair & operator * () const { setCurr(); return _current; }
const pair * operator -> () const { setCurr(); return &_current; }
size_t offset() const { return _index; }
@@ -63,8 +66,7 @@ public:
const_iterator(const MapFieldValue & map, size_t index) : _map(&map), _index(index) { }
bool operator == (const const_iterator & rhs) const { return _map == rhs._map && _index == rhs._index; }
bool operator != (const const_iterator & rhs) const { return _map != rhs._map || _index != rhs._index; }
- const_iterator& operator++() { ++_index; return *this; }
- const_iterator operator++(int) { const_iterator other(*this); ++_index; return other; }
+ const_iterator& operator++() { _index = _map->nextPresent(_index+1); return *this; }
const pair & operator * () const { setCurr(); return _current; }
const pair * operator -> () const { setCurr(); return &_current; }
private:
@@ -74,7 +76,7 @@ public:
}
const MapFieldValue *_map;
size_t _index;
- mutable pair _current;
+ mutable pair _current;
};
MapFieldValue(const DataType &mapType);
@@ -82,12 +84,7 @@ public:
MapFieldValue(const MapFieldValue & rhs);
MapFieldValue & operator = (const MapFieldValue & rhs);
- void swap(MapFieldValue & rhs) {
- std::swap(_keys, rhs._keys);
- std::swap(_values, rhs._values);
- std::swap(_altered, rhs._altered);
- std::swap(_type, rhs._type);
- }
+ void swap(MapFieldValue & rhs);
void accept(FieldValueVisitor &visitor) override { visitor.visit(*this); }
void accept(ConstFieldValueVisitor &visitor) const override { visitor.visit(*this); }
@@ -112,11 +109,11 @@ public:
bool contains(const FieldValue& key) const;
// CollectionFieldValue methods kept for compatability's sake
- virtual bool isEmpty() const { return _keys->empty(); }
- virtual size_t size() const { return _keys->size(); }
- virtual void clear() { _keys->clear(); _values->clear(); }
- void reserve(size_t sz) { _keys->reserve(sz); _values->reserve(sz); }
- void resize(size_t sz) { _keys->resize(sz); _values->resize(sz); }
+ virtual bool isEmpty() const { return _count == 0; }
+ virtual size_t size() const { return _count; }
+ virtual void clear();
+ void reserve(size_t sz);
+ void resize(size_t sz);
fieldvalue::ModificationStatus iterateNestedImpl(PathRange nested, fieldvalue::IteratorHandler & handler,
const FieldValue& complexFieldValue) const;
@@ -130,11 +127,11 @@ public:
const DataType *getDataType() const override { return _type; }
void printXml(XmlOutputStream& out) const override;
- const_iterator begin() const { return const_iterator(*this, 0); }
- iterator begin() { return iterator(*this, 0); }
+ const_iterator begin() const { return const_iterator(*this, nextPresent(0)); }
+ iterator begin() { return iterator(*this, nextPresent(0)); }
- const_iterator end() const { return const_iterator(*this, size()); }
- iterator end() { return iterator(*this, size()); }
+ const_iterator end() const { return const_iterator(*this, _present.size()); }
+ iterator end() { return iterator(*this, _present.size()); }
const_iterator find(const FieldValue& fv) const;
iterator find(const FieldValue& fv);
diff --git a/document/src/vespa/document/serialization/vespadocumentserializer.cpp b/document/src/vespa/document/serialization/vespadocumentserializer.cpp
index 9e39c16eec3..2b6fe0833e7 100644
--- a/document/src/vespa/document/serialization/vespadocumentserializer.cpp
+++ b/document/src/vespa/document/serialization/vespadocumentserializer.cpp
@@ -383,8 +383,7 @@ void VespaDocumentSerializer::write(const StructFieldValue &value,
}
void VespaDocumentSerializer::write(const WeightedSetFieldValue &value) {
- const WeightedSetDataType *type =
- static_cast<const WeightedSetDataType *>(value.getDataType());
+ const WeightedSetDataType *type = static_cast<const WeightedSetDataType *>(value.getDataType());
_stream << static_cast<uint32_t>(type->getNestedType().getId());
_stream << static_cast<uint32_t>(value.size());
for (const auto & entry : value) {