diff options
author | Tor Egge <Tor.Egge@oath.com> | 2017-12-12 13:22:41 +0000 |
---|---|---|
committer | Tor Egge <Tor.Egge@oath.com> | 2017-12-12 13:22:41 +0000 |
commit | 855cce458dffb6c699555a664f11a8a25fa43100 (patch) | |
tree | e6466ef1204f06a52b2872268a581efb09061416 /searchlib/src/tests/attribute/benchmark | |
parent | 9408c19cd6d340d9d2f09c67699e67afd62fbb12 (diff) |
Eliminate mutated version of Runnable.
Diffstat (limited to 'searchlib/src/tests/attribute/benchmark')
3 files changed, 587 insertions, 5 deletions
diff --git a/searchlib/src/tests/attribute/benchmark/attributebenchmark.cpp b/searchlib/src/tests/attribute/benchmark/attributebenchmark.cpp index e2dc6b2b2ef..5722b7c90ca 100644 --- a/searchlib/src/tests/attribute/benchmark/attributebenchmark.cpp +++ b/searchlib/src/tests/attribute/benchmark/attributebenchmark.cpp @@ -8,11 +8,12 @@ #include <vespa/searchlib/attribute/singlestringattribute.h> #include <vespa/searchlib/attribute/multistringattribute.h> #include <vespa/searchlib/attribute/attrvector.h> +#include <vespa/fastos/thread.h> #include <vespa/fastos/app.h> #include <iostream> #include <fstream> -#include "../attributesearcher.h" -#include "../attributeupdater.h" +#include "attributesearcher.h" +#include "attributeupdater.h" #include <sys/resource.h> #include <vespa/log/log.h> @@ -266,11 +267,11 @@ AttributeBenchmark::benchmarkSearch(const AttributePtr & ptr, const std::vector< for (uint32_t i = 0; i < _config._numSearchers; ++i) { if (_config._rangeSearch) { RangeSpec spec(_config._rangeStart, _config._rangeEnd, _config._rangeDelta); - searchers.push_back(new AttributeRangeSearcher(i, ptr, spec, _config._numQueries)); + searchers.push_back(new AttributeRangeSearcher(ptr, spec, _config._numQueries)); } else if (_config._prefixSearch) { - searchers.push_back(new AttributePrefixSearcher(i, ptr, prefixStrings, _config._numQueries)); + searchers.push_back(new AttributePrefixSearcher(ptr, prefixStrings, _config._numQueries)); } else { - searchers.push_back(new AttributeFindSearcher<T>(i, ptr, values, _config._numQueries)); + searchers.push_back(new AttributeFindSearcher<T>(ptr, values, _config._numQueries)); } _threadPool->NewThread(searchers.back()); } diff --git a/searchlib/src/tests/attribute/benchmark/attributesearcher.h b/searchlib/src/tests/attribute/benchmark/attributesearcher.h new file mode 100644 index 00000000000..f8cd614c48c --- /dev/null +++ b/searchlib/src/tests/attribute/benchmark/attributesearcher.h @@ -0,0 +1,265 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/util/runnable.h> +#include <vespa/searchlib/attribute/attribute.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/queryeval/hitcollector.h> +#include <vespa/vespalib/stllike/asciistream.h> +#include <vespa/vespalib/util/compress.h> +#include <vespa/searchlib/parsequery/parse.h> + +namespace search { + +std::unique_ptr<ResultSet> +performSearch(queryeval::SearchIterator & sb, uint32_t numDocs) +{ + queryeval::HitCollector hc(numDocs, numDocs, 0); + // assume strict toplevel search object located at start + for (sb.seek(1); ! sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) { + hc.addHit(sb.getDocId(), 0.0); + } + return hc.getResultSet(); +} + +class AttributeSearcherStatus +{ +public: + double _totalSearchTime; + uint64_t _totalHitCount; + uint64_t _numQueries; + uint64_t _numClients; + + AttributeSearcherStatus() : _totalSearchTime(0), _totalHitCount(0), _numQueries(0), _numClients(0) {} + void merge(const AttributeSearcherStatus & status) { + _totalSearchTime += status._totalSearchTime; + _totalHitCount += status._totalHitCount; + _numQueries += status._numQueries; + _numClients += status._numClients; + } + void printXML() const { + std::cout << "<total-search-time>" << _totalSearchTime << "</total-search-time>" << std::endl; // ms + std::cout << "<avg-search-time>" << avgSearchTime() << "</avg-search-time>" << std::endl; // ms + std::cout << "<search-throughput>" << searchThroughout() << "</search-throughput>" << std::endl; // per/sec + std::cout << "<total-hit-count>" << _totalHitCount << "</total-hit-count>" << std::endl; + std::cout << "<avg-hit-count>" << avgHitCount() << "</avg-hit-count>" << std::endl; + } + double avgSearchTime() const { + return _totalSearchTime / _numQueries; + } + double searchThroughout() const { + return _numClients * 1000 * _numQueries / _totalSearchTime; + } + double avgHitCount() const { + return _totalHitCount / static_cast<double>(_numQueries); + } +}; + + +class AttributeSearcher : public Runnable +{ +protected: + typedef AttributeVector::SP AttributePtr; + + const AttributePtr & _attrPtr; + FastOS_Time _timer; + AttributeSearcherStatus _status; + +public: + AttributeSearcher(const AttributePtr & attrPtr) : + Runnable(), _attrPtr(attrPtr), _timer(), _status() + { + _status._numClients = 1; + } + virtual void doRun() override = 0; + AttributeSearcherStatus & getStatus() { return _status; } + void buildTermQuery(std::vector<char> & buffer, const vespalib::string & index, const char * term, bool prefix = false); +}; + +void +AttributeSearcher::buildTermQuery(std::vector<char> & buffer, const vespalib::string & index, const char * term, bool prefix) +{ + uint32_t indexLen = index.size(); + uint32_t termLen = strlen(term); + uint32_t termIdx = prefix ? ParseItem::ITEM_PREFIXTERM : ParseItem::ITEM_TERM; + uint32_t queryPacketSize = vespalib::compress::Integer::compressedPositiveLength(termIdx) + + vespalib::compress::Integer::compressedPositiveLength(indexLen) + + vespalib::compress::Integer::compressedPositiveLength(termLen) + + indexLen + termLen; + buffer.resize(queryPacketSize); + char * p = &buffer[0]; + p += vespalib::compress::Integer::compressPositive(termIdx, p); + p += vespalib::compress::Integer::compressPositive(indexLen, p); + memcpy(p, index.c_str(), indexLen); + p += indexLen; + p += vespalib::compress::Integer::compressPositive(termLen, p); + memcpy(p, term, termLen); + p += termLen; + assert(p == (&buffer[0] + buffer.size())); +} + + +template <typename T> +class AttributeFindSearcher : public AttributeSearcher +{ +private: + const std::vector<T> & _values; + std::vector<char> _query; + +public: + AttributeFindSearcher(const AttributePtr & attrPtr, const std::vector<T> & values, + uint32_t numQueries) : + AttributeSearcher(attrPtr), _values(values), _query() + { + _status._numQueries = numQueries; + } + virtual void doRun() override; +}; + +template <typename T> +void +AttributeFindSearcher<T>::doRun() +{ + _timer.SetNow(); + for (uint32_t i = 0; i < _status._numQueries; ++i) { + // build simple term query + vespalib::asciistream ss; + ss << _values[i % _values.size()].getValue(); + this->buildTermQuery(_query, _attrPtr->getName(), ss.str().c_str()); + + AttributeGuard guard(_attrPtr); + std::unique_ptr<AttributeVector::SearchContext> searchContext = + _attrPtr->getSearch(vespalib::stringref(&_query[0], _query.size()), + attribute::SearchContextParams()); + + searchContext->fetchPostings(true); + std::unique_ptr<queryeval::SearchIterator> iterator = searchContext->createIterator(NULL, true); + std::unique_ptr<ResultSet> results = performSearch(*iterator, _attrPtr->getNumDocs()); + + _status._totalHitCount += results->getNumHits(); + } + _status._totalSearchTime += _timer.MilliSecsToNow(); +} + + +class RangeSpec +{ +public: + int64_t _min; + int64_t _max; + int64_t _range; + RangeSpec(int64_t min, int64_t max, int64_t range) : + _min(min), _max(max), _range(range) + { + assert(_min < _max); + assert(_range <= (_max - _min)); + } +}; + +class RangeIterator +{ +private: + RangeSpec _spec; + int64_t _a; + int64_t _b; + +public: + RangeIterator(const RangeSpec & spec) : _spec(spec), _a(spec._min), _b(spec._min + _spec._range) {} + RangeIterator & operator++() { + _a += _spec._range; + _b += _spec._range; + if (_b > _spec._max) { + _a = _spec._min; + _b = _spec._min + _spec._range; + } + return *this; + } + int64_t a() const { return _a; } + int64_t b() const { return _b; } +}; + +class AttributeRangeSearcher : public AttributeSearcher +{ +private: + RangeSpec _spec; + std::vector<char> _query; + +public: + AttributeRangeSearcher(const AttributePtr & attrPtr, const RangeSpec & spec, + uint32_t numQueries) : + AttributeSearcher(attrPtr), _spec(spec), _query() + { + _status._numQueries = numQueries; + } + virtual void doRun() override; +}; + +void +AttributeRangeSearcher::doRun() +{ + _timer.SetNow(); + RangeIterator iter(_spec); + for (uint32_t i = 0; i < _status._numQueries; ++i, ++iter) { + // build simple range term query + vespalib::asciistream ss; + ss << "[" << iter.a() << ";" << iter.b() << "]"; + buildTermQuery(_query, _attrPtr->getName(), ss.str().c_str()); + + AttributeGuard guard(_attrPtr); + std::unique_ptr<AttributeVector::SearchContext> searchContext = + _attrPtr->getSearch(vespalib::stringref(&_query[0], _query.size()), + attribute::SearchContextParams()); + + searchContext->fetchPostings(true); + std::unique_ptr<queryeval::SearchIterator> iterator = searchContext->createIterator(NULL, true); + std::unique_ptr<ResultSet> results = performSearch(*iterator, _attrPtr->getNumDocs()); + + _status._totalHitCount += results->getNumHits(); + } + _status._totalSearchTime += _timer.MilliSecsToNow(); +} + + +class AttributePrefixSearcher : public AttributeSearcher +{ +private: + const std::vector<vespalib::string> & _values; + std::vector<char> _query; + +public: + AttributePrefixSearcher(const AttributePtr & attrPtr, + const std::vector<vespalib::string> & values, uint32_t numQueries) : + AttributeSearcher(attrPtr), _values(values), _query() + { + _status._numQueries = numQueries; + } + virtual void doRun() override; +}; + +void +AttributePrefixSearcher::doRun() +{ + _timer.SetNow(); + for (uint32_t i = 0; i < _status._numQueries; ++i) { + // build simple prefix term query + buildTermQuery(_query, _attrPtr->getName(), _values[i % _values.size()].c_str(), true); + + AttributeGuard guard(_attrPtr); + std::unique_ptr<AttributeVector::SearchContext> searchContext = + _attrPtr->getSearch(vespalib::stringref(&_query[0], _query.size()), + attribute::SearchContextParams()); + + searchContext->fetchPostings(true); + std::unique_ptr<queryeval::SearchIterator> iterator = searchContext->createIterator(NULL, true); + std::unique_ptr<ResultSet> results = performSearch(*iterator, _attrPtr->getNumDocs()); + + _status._totalHitCount += results->getNumHits(); + } + _status._totalSearchTime += _timer.MilliSecsToNow(); +} + + + +} // search + diff --git a/searchlib/src/tests/attribute/benchmark/attributeupdater.h b/searchlib/src/tests/attribute/benchmark/attributeupdater.h new file mode 100644 index 00000000000..13360e58b2d --- /dev/null +++ b/searchlib/src/tests/attribute/benchmark/attributeupdater.h @@ -0,0 +1,316 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/util/randomgenerator.h> +#include <vespa/searchlib/util/runnable.h> +#include <vespa/searchlib/attribute/attribute.h> + +#define VALIDATOR_STR(str) #str +#define VALIDATOR_ASSERT(rc) reportAssert(rc, __FILE__, __LINE__, VALIDATOR_STR(rc)) +#define VALIDATOR_ASSERT_EQUAL(a, b) reportAssertEqual(__FILE__, __LINE__, VALIDATOR_STR(a), VALIDATOR_STR(b), a, b) + +namespace search { + +class AttributeValidator +{ +private: + uint32_t _totalCnt; + +public: + AttributeValidator() : _totalCnt(0) {} + uint32_t getTotalCnt() const { return _totalCnt; } + bool reportAssert(bool rc, const vespalib::string & file, uint32_t line, const vespalib::string & str) { + _totalCnt++; + if (!rc) { + std::cout << "Assert " << _totalCnt << " failed: \"" << str << "\" (" + << file << ":" << line << ")" << std::endl; + abort(); + } + return true; + } + template <class A, class B> + bool reportAssertEqual(const vespalib::string & file, uint32_t line, + const vespalib::string & aStr, const vespalib::string & bStr, + const A & a, const B & b) { + _totalCnt++; + if (!(a == b)) { + std::cout << "Assert equal failed: " << std::endl; + std::cout << aStr << ": " << a << std::endl; + std::cout << bStr << ": " << b << std::endl; + std::cout << "(" << file << ":" << line << ")" << std::endl; + abort(); + } + return true; + } +}; + +class AttributeUpdaterStatus +{ +public: + double _totalUpdateTime; + uint64_t _numDocumentUpdates; + uint64_t _numValueUpdates; + + AttributeUpdaterStatus() : + _totalUpdateTime(0), _numDocumentUpdates(0), _numValueUpdates(0) {} + void reset() { + _totalUpdateTime = 0; + _numDocumentUpdates = 0; + _numValueUpdates = 0; + } + void printXML() const { + std::cout << "<total-update-time>" << _totalUpdateTime << "</total-update-time>" << std::endl; + std::cout << "<documents-updated>" << _numDocumentUpdates << "</documents-updated>" << std::endl; + std::cout << "<document-update-throughput>" << documentUpdateThroughput() << "</document-update-throughput>" << std::endl; + std::cout << "<avg-document-update-time>" << avgDocumentUpdateTime() << "</avg-document-update-time>" << std::endl; + std::cout << "<values-updated>" << _numValueUpdates << "</values-updated>" << std::endl; + std::cout << "<value-update-throughput>" << valueUpdateThroughput() << "</value-update-throughput>" << std::endl; + std::cout << "<avg-value-update-time>" << avgValueUpdateTime() << "</avg-value-update-time>" << std::endl; + } + double documentUpdateThroughput() const { + return _numDocumentUpdates * 1000 / _totalUpdateTime; + } + double avgDocumentUpdateTime() const { + return _totalUpdateTime / _numDocumentUpdates; + } + double valueUpdateThroughput() const { + return _numValueUpdates * 1000 / _totalUpdateTime; + } + double avgValueUpdateTime() const { + return _totalUpdateTime / _numValueUpdates; + } +}; + +// AttributeVectorInstance, AttributeVectorType, AttributeVectorBufferType +template <typename Vector, typename T, typename BT> +class AttributeUpdater +{ +protected: + typedef AttributeVector::SP AttributePtr; + typedef std::map<uint32_t, std::vector<T> > AttributeCommit; + + const AttributePtr & _attrPtr; + Vector & _attrVec; + const std::vector<T> & _values; + std::vector<T> _buffer; + std::vector<BT> _getBuffer; + RandomGenerator & _rndGen; + AttributeCommit _expected; + FastOS_Time _timer; + AttributeUpdaterStatus _status; + AttributeValidator _validator; + + // config + bool _validate; + uint32_t _commitFreq; + uint32_t _minValueCount; + uint32_t _maxValueCount; + + uint32_t getRandomCount() { + return _rndGen.rand(_minValueCount, _maxValueCount); + } + uint32_t getRandomDoc() { + return _rndGen.rand(0, _attrPtr->getNumDocs() - 1); + } + const T & getRandomValue() { + return _values[_rndGen.rand(0, _values.size() - 1)]; + } + void updateValues(uint32_t doc); + void commit(); + +public: + AttributeUpdater(const AttributePtr & attrPtr, const std::vector<T> & values, + RandomGenerator & rndGen, bool validate, uint32_t commitFreq, + uint32_t minValueCount, uint32_t maxValueCount); + ~AttributeUpdater(); + void resetStatus() { + _status.reset(); + } + const AttributeUpdaterStatus & getStatus() const { + return _status; + } + const AttributeValidator & getValidator() const { + return _validator; + } + void populate(); + void update(uint32_t numUpdates); +}; + +template <typename Vector, typename T, typename BT> +AttributeUpdater<Vector, T, BT>::AttributeUpdater(const AttributePtr & attrPtr, const std::vector<T> & values, + RandomGenerator & rndGen, bool validate, uint32_t commitFreq, + uint32_t minValueCount, uint32_t maxValueCount) + :_attrPtr(attrPtr), _attrVec(*(static_cast<Vector *>(attrPtr.get()))), _values(values), _buffer(), + _getBuffer(), _rndGen(rndGen), _expected(), _timer(), _status(), _validator(), _validate(validate), + _commitFreq(commitFreq), _minValueCount(minValueCount), _maxValueCount(maxValueCount) +{} + +template <typename Vector, typename T, typename BT> +AttributeUpdater<Vector, T, BT>::~AttributeUpdater() {} + +template <typename Vector, typename T, typename BT> +class AttributeUpdaterThread : public AttributeUpdater<Vector, T, BT>, public Runnable +{ +private: + typedef AttributeVector::SP AttributePtr; + +public: + AttributeUpdaterThread(const AttributePtr & attrPtr, const std::vector<T> & values, + RandomGenerator & rndGen, bool validate, uint32_t commitFreq, + uint32_t minValueCount, uint32_t maxValueCount); + ~AttributeUpdaterThread(); + + virtual void doRun() override; +}; + +template <typename Vector, typename T, typename BT> +AttributeUpdaterThread<Vector, T, BT>::AttributeUpdaterThread(const AttributePtr & attrPtr, const std::vector<T> & values, + RandomGenerator & rndGen, bool validate, uint32_t commitFreq, + uint32_t minValueCount, uint32_t maxValueCount) + : AttributeUpdater<Vector, T, BT>(attrPtr, values, rndGen, validate, commitFreq, minValueCount, maxValueCount), + Runnable() +{} +template <typename Vector, typename T, typename BT> +AttributeUpdaterThread<Vector, T, BT>::~AttributeUpdaterThread() { } + + +template <typename Vector, typename T, typename BT> +void +AttributeUpdater<Vector, T, BT>::updateValues(uint32_t doc) +{ + uint32_t valueCount = getRandomCount(); + + if (_validate) { + _buffer.clear(); + if (_attrPtr->hasMultiValue()) { + _attrPtr->clearDoc(doc); + for (uint32_t j = 0; j < valueCount; ++j) { + T value = getRandomValue(); + if (_attrPtr->hasWeightedSetType()) { + bool exists = false; + for (typename std::vector<T>::iterator iter = _buffer.begin(); iter != _buffer.end(); ++iter) { + if (iter->getValue() == value.getValue()) { + exists = true; + iter->setWeight(value.getWeight()); + break; + } + } + if (!exists) { + _buffer.push_back(value); + } + } else { + _buffer.push_back(value); + } + _attrVec.append(doc, value.getValue(), value.getWeight()); + } + } else { + _buffer.push_back(getRandomValue()); + _attrVec.update(doc, _buffer.back().getValue()); + } + _expected[doc] = _buffer; + + } else { + if (_attrPtr->hasMultiValue()) { + _attrPtr->clearDoc(doc); + for (uint32_t j = 0; j < valueCount; ++j) { + T value = getRandomValue(); + _attrVec.append(doc, value.getValue(), value.getWeight()); + } + } else { + _attrVec.update(doc, getRandomValue().getValue()); + } + } + + _status._numDocumentUpdates++; + _status._numValueUpdates += (_attrPtr->hasMultiValue() ? valueCount: 1); +} + +template <typename Vector, typename T, typename BT> +void +AttributeUpdater<Vector, T, BT>::commit() +{ + AttributeGuard guard(this->_attrPtr); + if (_validate) { + _attrPtr->commit(); + _getBuffer.resize(_maxValueCount); + for (typename AttributeCommit::iterator iter = _expected.begin(); + iter != _expected.end(); ++iter) + { + uint32_t valueCount = _attrPtr->get(iter->first, &_getBuffer[0], _getBuffer.size()); + _validator.VALIDATOR_ASSERT(_minValueCount <= valueCount && valueCount <= _maxValueCount); + if (valueCount != iter->second.size()) { + std::cout << "validate(" << iter->first << ")" << std::endl; + std::cout << "expected(" << iter->second.size() << ")" << std::endl; + for (size_t i = 0; i < iter->second.size(); ++i) { + std::cout << " [" << iter->second[i].getValue() << ", " << iter->second[i].getWeight() << "]" << std::endl; + } + std::cout << "actual(" << valueCount << ")" << std::endl; + for (size_t i = 0; i < valueCount; ++i) { + std::cout << " [" << _getBuffer[i].getValue() << ", " << _getBuffer[i].getWeight() << "]" << std::endl; + } + } + _validator.VALIDATOR_ASSERT_EQUAL(valueCount, iter->second.size()); + for (uint32_t i = 0; i < valueCount; ++i) { + _validator.VALIDATOR_ASSERT_EQUAL(_getBuffer[i].getValue(), iter->second[i].getValue()); + _validator.VALIDATOR_ASSERT_EQUAL(_getBuffer[i].getWeight(), iter->second[i].getWeight()); + } + } + _expected.clear(); + } else { + _attrPtr->commit(); + } +} + +template <typename Vector, typename T, typename BT> +void +AttributeUpdater<Vector, T, BT>::populate() +{ + _timer.SetNow(); + for (uint32_t doc = 0; doc < _attrPtr->getNumDocs(); ++doc) { + updateValues(doc); + if (doc % _commitFreq == (_commitFreq - 1)) { + commit(); + } + } + commit(); + _status._totalUpdateTime += _timer.MilliSecsToNow(); +} + + +template <typename Vector, typename T, typename BT> +void +AttributeUpdater<Vector, T, BT>::update(uint32_t numUpdates) +{ + _timer.SetNow(); + for (uint32_t i = 0; i < numUpdates; ++i) { + uint32_t doc = getRandomDoc(); + updateValues(doc); + if (i % _commitFreq == (_commitFreq - 1)) { + commit(); + } + } + commit(); + _status._totalUpdateTime += _timer.MilliSecsToNow(); +} + + +template <typename Vector, typename T, typename BT> +void +AttributeUpdaterThread<Vector, T, BT>::doRun() +{ + this->_timer.SetNow(); + while(!_done) { + uint32_t doc = this->getRandomDoc(); + this->updateValues(doc); + if (this->_status._numDocumentUpdates % this->_commitFreq == (this->_commitFreq - 1)) { + this->commit(); + } + } + this->commit(); + this->_status._totalUpdateTime += this->_timer.MilliSecsToNow(); +} + + +} // search + |