diff options
-rw-r--r-- | persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp | 447 |
1 files changed, 217 insertions, 230 deletions
diff --git a/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp b/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp index 944217796ab..74fef13f141 100644 --- a/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp +++ b/persistence/src/vespa/persistence/dummyimpl/dummypersistence.cpp @@ -27,108 +27,109 @@ using document::FixedBucketSpaces; namespace storage::spi::dummy { - BucketContent::BucketContent() - : _entries(), - _gidMap(), - _info(), - _inUse(false), - _outdatedInfo(true), - _active(false) {} - - BucketContent::~BucketContent() = default; - - uint32_t - BucketContent::computeEntryChecksum(const BucketEntry &e) const { - vespalib::crc_32_type checksummer; - - uint64_t ts(e.entry->getTimestamp()); - checksummer.process_bytes(&e.gid, sizeof(GlobalId)); - checksummer.process_bytes(&ts, sizeof(uint64_t)); - return checksummer.checksum(); - } - - BucketChecksum - BucketContent::updateRollingChecksum(uint32_t entryChecksum) { - uint32_t checksum = _info.getChecksum(); - checksum ^= entryChecksum; - if (checksum == 0) { - checksum = 1; - } - return BucketChecksum(checksum); +BucketContent::BucketContent() + : _entries(), + _gidMap(), + _info(), + _inUse(false), + _outdatedInfo(true), + _active(false) +{} + +BucketContent::~BucketContent() = default; + +uint32_t +BucketContent::computeEntryChecksum(const BucketEntry &e) const { + vespalib::crc_32_type checksummer; + + uint64_t ts(e.entry->getTimestamp()); + checksummer.process_bytes(&e.gid, sizeof(GlobalId)); + checksummer.process_bytes(&ts, sizeof(uint64_t)); + return checksummer.checksum(); +} + +BucketChecksum +BucketContent::updateRollingChecksum(uint32_t entryChecksum) { + uint32_t checksum = _info.getChecksum(); + checksum ^= entryChecksum; + if (checksum == 0) { + checksum = 1; } + return BucketChecksum(checksum); +} - const BucketInfo & - BucketContent::getBucketInfo() const { - if (!_outdatedInfo) { - return _info; - } +const BucketInfo & +BucketContent::getBucketInfo() const { + if (!_outdatedInfo) { + return _info; + } - // Checksum should only depend on the newest entry for each document that - // has not been removed. - uint32_t unique = 0; - uint32_t uniqueSize = 0; - uint32_t totalSize = 0; - uint32_t checksum = 0; + // Checksum should only depend on the newest entry for each document that + // has not been removed. + uint32_t unique = 0; + uint32_t uniqueSize = 0; + uint32_t totalSize = 0; + uint32_t checksum = 0; - for (const BucketEntry &bucketEntry: _entries) { - const DocEntry &entry(*bucketEntry.entry); - const GlobalId &gid(bucketEntry.gid); + for (const BucketEntry &bucketEntry: _entries) { + const DocEntry &entry(*bucketEntry.entry); + const GlobalId &gid(bucketEntry.gid); - GidMapType::const_iterator gidIt(_gidMap.find(gid)); - assert(gidIt != _gidMap.end()); + GidMapType::const_iterator gidIt(_gidMap.find(gid)); + assert(gidIt != _gidMap.end()); - totalSize += entry.getSize(); - if (entry.isRemove()) { - continue; - } - // Only include if we're newest entry for the particular GID - if (gidIt->second.get() != &entry) { - continue; - } - ++unique; - uniqueSize += entry.getSize(); - - checksum ^= computeEntryChecksum(bucketEntry); + totalSize += entry.getSize(); + if (entry.isRemove()) { + continue; } - if (!unique) { - checksum = 0; - } else if (checksum == 0) { - checksum = 1; + // Only include if we're newest entry for the particular GID + if (gidIt->second.get() != &entry) { + continue; } + ++unique; + uniqueSize += entry.getSize(); - _info = BucketInfo(BucketChecksum(checksum), - unique, - uniqueSize, - _entries.size(), - totalSize, - BucketInfo::READY, - _active ? BucketInfo::ACTIVE : BucketInfo::NOT_ACTIVE); - - _outdatedInfo = false; - return _info; + checksum ^= computeEntryChecksum(bucketEntry); + } + if (!unique) { + checksum = 0; + } else if (checksum == 0) { + checksum = 1; } - namespace { + _info = BucketInfo(BucketChecksum(checksum), + unique, + uniqueSize, + _entries.size(), + totalSize, + BucketInfo::READY, + _active ? BucketInfo::ACTIVE : BucketInfo::NOT_ACTIVE); - struct TimestampLess { - bool operator()(const BucketEntry &bucketEntry, Timestamp t) { - return bucketEntry.entry->getTimestamp() < t; - } + _outdatedInfo = false; + return _info; +} - bool operator()(Timestamp t, const BucketEntry &bucketEntry) { - return t < bucketEntry.entry->getTimestamp(); - } - }; +namespace { - } // namespace +struct TimestampLess { + bool operator()(const BucketEntry &bucketEntry, Timestamp t) { + return bucketEntry.entry->getTimestamp() < t; + } - bool - BucketContent::hasTimestamp(Timestamp t) const { - if (!_entries.empty() && _entries.back().entry->getTimestamp() < t) { - return false; - } - return binary_search(_entries.begin(), _entries.end(), t, TimestampLess()); + bool operator()(Timestamp t, const BucketEntry &bucketEntry) { + return t < bucketEntry.entry->getTimestamp(); } +}; + +} // namespace + +bool +BucketContent::hasTimestamp(Timestamp t) const { + if (!_entries.empty() && _entries.back().entry->getTimestamp() < t) { + return false; + } + return binary_search(_entries.begin(), _entries.end(), t, TimestampLess()); +} /** * GID map semantics: @@ -146,168 +147,154 @@ namespace storage::spi::dummy { * document), we can remove the mapping entirely. */ - void - BucketContent::insert(DocEntry::SP e) { - LOG(spam, "insert(%s)", e->toString().c_str()); - const DocumentId *docId(e->getDocumentId()); - assert(docId != 0); - GlobalId gid(docId->getGlobalId()); - GidMapType::iterator gidIt(_gidMap.find(gid)); - - if (!_entries.empty() && - _entries.back().entry->getTimestamp() < e->getTimestamp()) { - _entries.push_back(BucketEntry(e, gid)); - } else { - std::vector<BucketEntry>::iterator it = - lower_bound(_entries.begin(), - _entries.end(), - e->getTimestamp(), - TimestampLess()); - if (it != _entries.end()) { - if (it->entry->getTimestamp() == e->getTimestamp()) { - if (*it->entry.get() == *e) { - LOG(debug, "Ignoring duplicate put entry %s", - e->toString().c_str()); - return; - } else { - LOG(error, "Entry %s was already present." - "Was trying to insert %s.", - it->entry->toString().c_str(), - e->toString().c_str()); - LOG_ABORT("should not reach here"); - } +void +BucketContent::insert(DocEntry::SP e) { + LOG(spam, "insert(%s)", e->toString().c_str()); + const DocumentId *docId(e->getDocumentId()); + assert(docId != 0); + GlobalId gid(docId->getGlobalId()); + GidMapType::iterator gidIt(_gidMap.find(gid)); + + if (!_entries.empty() && + _entries.back().entry->getTimestamp() < e->getTimestamp()) { + _entries.push_back(BucketEntry(e, gid)); + } else { + auto it = lower_bound(_entries.begin(), _entries.end(), e->getTimestamp(), TimestampLess()); + if (it != _entries.end()) { + if (it->entry->getTimestamp() == e->getTimestamp()) { + if (*it->entry.get() == *e) { + LOG(debug, "Ignoring duplicate put entry %s", e->toString().c_str()); + return; + } else { + LOG(error, "Entry %s was already present. Was trying to insert %s.", + it->entry->toString().c_str(), e->toString().c_str()); + LOG_ABORT("should not reach here"); } } - _entries.insert(it, BucketEntry(e, gid)); } + _entries.insert(it, BucketEntry(e, gid)); + } - // GID map points to newest entry for that particular GID - if (gidIt != _gidMap.end()) { - if (gidIt->second->getTimestamp() < e->getTimestamp()) { - // TODO(vekterli): add support for cheap info updates for putting - // newer versions of a document etc. by XORing away old checksum. - gidIt->second = e; + // GID map points to newest entry for that particular GID + if (gidIt != _gidMap.end()) { + if (gidIt->second->getTimestamp() < e->getTimestamp()) { + // TODO(vekterli): add support for cheap info updates for putting + // newer versions of a document etc. by XORing away old checksum. + gidIt->second = e; + } else { + LOG(spam, "Newly inserted entry %s was older than existing entry %s; not updating GID mapping", + e->toString().c_str(), gidIt->second->toString().c_str()); + } + _outdatedInfo = true; + } else { + _gidMap.insert(GidMapType::value_type(gid, e)); + // Since GID didn't exist before, it means we can do a running + // update of the bucket info. Bucket checksum is XOR of all entry + // checksums, which is commutative. + // Only bother to update if we don't have to re-do it all afterwards + // anyway. + // Updating bucketinfo before we update entries since we assume rest + // of function is nothrow. + if (!_outdatedInfo) { + if (!e->isRemove()) { + _info = BucketInfo(updateRollingChecksum( + computeEntryChecksum(BucketEntry(e, gid))), + _info.getDocumentCount() + 1, + _info.getDocumentSize() + e->getSize(), + _info.getEntryCount() + 1, + _info.getUsedSize() + e->getSize(), + _info.getReady(), + _info.getActive()); } else { - LOG(spam, - "Newly inserted entry %s was older than existing entry %s; " - "not updating GID mapping", - e->toString().c_str(), - gidIt->second->toString().c_str()); + _info = BucketInfo(_info.getChecksum(), + _info.getDocumentCount(), + _info.getDocumentSize(), + _info.getEntryCount() + 1, + _info.getUsedSize() + e->getSize(), + _info.getReady(), + _info.getActive()); } - _outdatedInfo = true; - } else { - _gidMap.insert(GidMapType::value_type(gid, e)); - // Since GID didn't exist before, it means we can do a running - // update of the bucket info. Bucket checksum is XOR of all entry - // checksums, which is commutative. - // Only bother to update if we don't have to re-do it all afterwards - // anyway. - // Updating bucketinfo before we update entries since we assume rest - // of function is nothrow. - if (!_outdatedInfo) { - if (!e->isRemove()) { - _info = BucketInfo(updateRollingChecksum( - computeEntryChecksum(BucketEntry(e, gid))), - _info.getDocumentCount() + 1, - _info.getDocumentSize() + e->getSize(), - _info.getEntryCount() + 1, - _info.getUsedSize() + e->getSize(), - _info.getReady(), - _info.getActive()); - } else { - _info = BucketInfo(_info.getChecksum(), - _info.getDocumentCount(), - _info.getDocumentSize(), - _info.getEntryCount() + 1, - _info.getUsedSize() + e->getSize(), - _info.getReady(), - _info.getActive()); - } - LOG(spam, - "After cheap bucketinfo update, state is %s (inserted %s)", - _info.toString().c_str(), - e->toString().c_str()); - } + LOG(spam, "After cheap bucketinfo update, state is %s (inserted %s)", + _info.toString().c_str(), e->toString().c_str()); } - - assert(_outdatedInfo || _info.getEntryCount() == _entries.size()); } - DocEntry::SP - BucketContent::getEntry(const DocumentId &did) const { - GidMapType::const_iterator it(_gidMap.find(did.getGlobalId())); - if (it != _gidMap.end()) { - return it->second; - } - return DocEntry::SP(); + assert(_outdatedInfo || _info.getEntryCount() == _entries.size()); +} + +DocEntry::SP +BucketContent::getEntry(const DocumentId &did) const { + auto it(_gidMap.find(did.getGlobalId())); + if (it != _gidMap.end()) { + return it->second; } + return DocEntry::SP(); +} - DocEntry::SP - BucketContent::getEntry(Timestamp t) const { - std::vector<BucketEntry>::const_iterator iter = - lower_bound(_entries.begin(), _entries.end(), t, TimestampLess()); +DocEntry::SP +BucketContent::getEntry(Timestamp t) const { + auto iter = lower_bound(_entries.begin(), _entries.end(), t, TimestampLess()); - if (iter == _entries.end() || iter->entry->getTimestamp() != t) { - return DocEntry::SP(); - } else { - return iter->entry; - } + if (iter == _entries.end() || iter->entry->getTimestamp() != t) { + return DocEntry::SP(); + } else { + return iter->entry; } +} - void - BucketContent::eraseEntry(Timestamp t) { - std::vector<BucketEntry>::iterator iter = - lower_bound(_entries.begin(), _entries.end(), t, TimestampLess()); - - if (iter != _entries.end() && iter->entry->getTimestamp() == t) { - assert(iter->entry->getDocumentId() != 0); - GidMapType::iterator gidIt( - _gidMap.find(iter->entry->getDocumentId()->getGlobalId())); - assert(gidIt != _gidMap.end()); - _entries.erase(iter); - if (gidIt->second->getTimestamp() == t) { - LOG(debug, "erasing timestamp %" PRIu64 " from GID map", t.getValue()); - // TODO(vekterli): O(1) bucket info update for this case - // FIXME: is this correct? seems like it could cause wrong behavior! - _gidMap.erase(gidIt); - } // else: not erasing newest entry, cannot erase from GID map - _outdatedInfo = true; - } +void +BucketContent::eraseEntry(Timestamp t) { + auto iter = lower_bound(_entries.begin(), _entries.end(), t, TimestampLess()); + + if (iter != _entries.end() && iter->entry->getTimestamp() == t) { + assert(iter->entry->getDocumentId() != 0); + GidMapType::iterator gidIt = _gidMap.find(iter->entry->getDocumentId()->getGlobalId()); + assert(gidIt != _gidMap.end()); + _entries.erase(iter); + if (gidIt->second->getTimestamp() == t) { + LOG(debug, "erasing timestamp %" PRIu64 " from GID map", t.getValue()); + // TODO(vekterli): O(1) bucket info update for this case + // FIXME: is this correct? seems like it could cause wrong behavior! + _gidMap.erase(gidIt); + } // else: not erasing newest entry, cannot erase from GID map + _outdatedInfo = true; } +} - DummyPersistence::DummyPersistence(const std::shared_ptr<const document::DocumentTypeRepo> &repo) - : _initialized(false), - _repo(repo), - _content(), - _nextIterator(1), - _iterators(), - _monitor(), - _clusterState() {} - - DummyPersistence::~DummyPersistence() = default; - - document::select::Node::UP - DummyPersistence::parseDocumentSelection(const string &documentSelection, bool allowLeaf) { - document::select::Node::UP ret; - try { - document::select::Parser parser(*_repo, document::BucketIdFactory()); - ret = parser.parse(documentSelection); - } catch (document::select::ParsingFailedException &e) { - return document::select::Node::UP(); - } - if (ret->isLeafNode() && !allowLeaf) { - return document::select::Node::UP(); - } - return ret; - } +DummyPersistence::DummyPersistence(const std::shared_ptr<const document::DocumentTypeRepo> &repo) + : _initialized(false), + _repo(repo), + _content(), + _nextIterator(1), + _iterators(), + _monitor(), + _clusterState() +{} + +DummyPersistence::~DummyPersistence() = default; + +document::select::Node::UP +DummyPersistence::parseDocumentSelection(const string &documentSelection, bool allowLeaf) { + document::select::Node::UP ret; + try { + document::select::Parser parser(*_repo, document::BucketIdFactory()); + ret = parser.parse(documentSelection); + } catch (document::select::ParsingFailedException &e) { + return document::select::Node::UP(); + } + if (ret->isLeafNode() && !allowLeaf) { + return document::select::Node::UP(); + } + return ret; +} - Result - DummyPersistence::initialize() { - assert(!_initialized); - _initialized = true; - return Result(); - } +Result +DummyPersistence::initialize() { + assert(!_initialized); + _initialized = true; + return Result(); +} #define DUMMYPERSISTENCE_VERIFY_INITIALIZED \ if (!_initialized) { \ |