diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2020-05-02 12:17:11 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2020-05-02 12:17:11 +0000 |
commit | c4dae70d1379130b4bbd46ce3012736c864a9f92 (patch) | |
tree | 0659dc2a92e054c079b4dedcb2e5166a3dcb8db0 | |
parent | f0ef95a22d5a1c53ce19bb609726fca2cdfbeea6 (diff) |
- Reserve space upfront in the vectors.
- Remove virtual interface from some very frequently called methods that are not necessary.
- Use std::move for vector and shared_ptr.
- Some c++11ification.
10 files changed, 84 insertions, 114 deletions
diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/computer.cpp b/searchlib/src/vespa/searchlib/features/fieldmatch/computer.cpp index 3ea22716432..1b943c596c6 100644 --- a/searchlib/src/vespa/searchlib/features/fieldmatch/computer.cpp +++ b/searchlib/src/vespa/searchlib/features/fieldmatch/computer.cpp @@ -13,10 +13,7 @@ LOG_SETUP(".features.fieldmatch.computer"); using namespace search::fef; -namespace search { -namespace features { -namespace fieldmatch { - +namespace search::features::fieldmatch { Computer::Computer(const vespalib::string &propertyNamespace, const PhraseSplitter &splitter, const FieldInfo &fieldInfo, const Params ¶ms) : @@ -39,18 +36,20 @@ Computer::Computer(const vespalib::string &propertyNamespace, const PhraseSplitt _cachedHits() { // Store term data for all terms searching in this field + _queryTermFieldMatch.reserve(splitter.getNumTerms()); + _cachedHits.reserve(splitter.getNumTerms()); for (uint32_t i = 0; i < splitter.getNumTerms(); ++i) { QueryTerm qt = QueryTermFactory::create(splitter, i, true, true); _totalTermWeight += qt.termData()->getWeight().percent(); _totalTermSignificance += qt.significance(); _simpleMetrics.addQueryTerm(qt.termData()->getWeight().percent()); const ITermFieldData *field = qt.termData()->lookupField(_fieldId); - if (field != 0) { + if (field != nullptr) { qt.fieldHandle(field->getHandle()); _queryTerms.push_back(qt); _simpleMetrics.addSearchedTerm(qt.termData()->getWeight().percent()); - _queryTermFieldMatch.push_back(NULL); - _cachedHits.push_back(BitVectorData()); + _queryTermFieldMatch.emplace_back(nullptr); + _cachedHits.emplace_back(); } } @@ -67,8 +66,9 @@ Computer::Computer(const vespalib::string &propertyNamespace, const PhraseSplitt _finalMetrics = Metrics(this); // num query terms searching in this field + 1 + _segments.reserve(getNumQueryTerms() + 1); for (uint32_t i = 0; i < (getNumQueryTerms() + 1); ++i) { - _segments.push_back(SegmentData(SegmentStart::SP(new SegmentStart(this, _currentMetrics)))); + _segments.emplace_back(std::make_shared<SegmentStart>(this, _currentMetrics)); } } @@ -96,7 +96,7 @@ Computer::reset(uint32_t docId) const ITermData *td = _queryTerms[i].termData(); const TermFieldMatchData *tfmd = _splitter.resolveTermField(_queryTerms[i].fieldHandle()); if (tfmd->getDocId() != docId) { // only term match data if we have a hit - tfmd = NULL; + tfmd = nullptr; } else { FieldPositionsIterator it = tfmd->getIterator(); uint32_t fieldLength = it.getFieldLength(); @@ -144,7 +144,7 @@ Computer::handleError(uint32_t fieldPos, uint32_t docId) const LOG(debug, "Bad field position %u >= fieldLength %u for field '%s' document %u. " "Document was probably refed during query (Ticket 7104969)", fieldPos, _fieldLength, - finfo != NULL ? finfo->name().c_str() : "unknown field", + finfo != nullptr ? finfo->name().c_str() : "unknown field", docId); } } @@ -180,7 +180,7 @@ Computer::findClosestInFieldBySemanticDistance(int i, int previousJ, uint32_t st } const TermFieldMatchData *termFieldMatch = _queryTermFieldMatch[i]; - if (termFieldMatch == NULL) { + if (termFieldMatch == nullptr) { return -1; // not matched } @@ -289,7 +289,7 @@ Computer::exploreSegments() _segments[0].segment->reset(_currentMetrics); _segments[0].valid = true; SegmentStart *segment = _segments[0].segment.get(); - while (segment != NULL) { + while (segment != nullptr) { if (isTracing()) { trace(vespalib::make_string("Looking for segment from %s...", segment->toString().c_str())); @@ -373,7 +373,7 @@ Computer::findAlternativeSegmentFrom(SegmentStart *segment) { } else { semanticDistanceExplored = 0; // we have a match for this term but no position information - if (_queryTermFieldMatch[i] != NULL && !_cachedHits[i].valid) { + if (_queryTermFieldMatch[i] != nullptr && !_cachedHits[i].valid) { _currentMetrics.onMatch(i); } } @@ -441,8 +441,8 @@ Computer::segmentEnd(int i, int j) SegmentStart * Computer::findOpenSegment(uint32_t startI) { for (uint32_t i = startI; i < _segments.size(); i++) { - SegmentStart *startPoint = _segments[i].valid ? _segments[i].segment.get() : NULL; - if (startPoint == NULL || !startPoint->isOpen()) { + SegmentStart *startPoint = _segments[i].valid ? _segments[i].segment.get() : nullptr; + if (startPoint == nullptr || !startPoint->isOpen()) { continue; } if (startPoint->getSemanticDistanceExplored() == 0) { @@ -454,20 +454,20 @@ Computer::findOpenSegment(uint32_t startI) { _alternativeSegmentationsTried++; return startPoint; } - return NULL; + return nullptr; } SegmentStart * Computer::findLastStartPoint() { for (int i = _segments.size(); --i >= 0; ) { - SegmentStart *startPoint = _segments[i].valid ? _segments[i].segment.get() : NULL; - if (startPoint != NULL) { + SegmentStart *startPoint = _segments[i].valid ? _segments[i].segment.get() : nullptr; + if (startPoint != nullptr) { return startPoint; } } LOG(error, "findLastStartPoint() could not find any segment start. This should never happen!"); - return NULL; + return nullptr; } void @@ -478,7 +478,7 @@ Computer::setOccurrenceCounts(Metrics &metrics) std::set<uint32_t> firstOccs; for (uint32_t i = 0; i < _queryTermFieldMatch.size(); ++i) { const TermFieldMatchData *termFieldMatch = _queryTermFieldMatch[i]; - if (termFieldMatch == NULL) { + if (termFieldMatch == nullptr) { continue; // not for this match } FieldPositionsIterator it = termFieldMatch->getIterator(); @@ -504,12 +504,10 @@ Computer::setOccurrenceCounts(Metrics &metrics) feature_t totalWeightedOccurrences = 0; feature_t totalSignificantOccurrences = 0; - for (std::vector<uint32_t>::iterator it = uniqueTerms.begin(); - it != uniqueTerms.end(); ++it) - { - const QueryTerm &queryTerm = _queryTerms[*it]; + for (uint32_t termIdx : uniqueTerms) { + const QueryTerm &queryTerm = _queryTerms[termIdx]; const ITermData &termData = *queryTerm.termData(); - const TermFieldMatchData &termFieldMatch = *_queryTermFieldMatch[*it]; + const TermFieldMatchData &termFieldMatch = *_queryTermFieldMatch[termIdx]; uint32_t termOccurrences = 0; FieldPositionsIterator pos = termFieldMatch.getIterator(); @@ -535,22 +533,16 @@ Computer::setOccurrenceCounts(Metrics &metrics) metrics.setWeightedAbsoluteOccurrence(weightedAbsoluteOccurrence / (totalWeight > 0 ? totalWeight : 1)); feature_t weightedOccurrenceSum = 0; - for (std::vector<feature_t>::iterator it = weightedOccurrences.begin(); - it != weightedOccurrences.end(); ++it) - { - weightedOccurrenceSum += totalWeightedOccurrences > 0.0f ? *it / totalWeightedOccurrences : 0.0f; + for (feature_t feature : weightedOccurrences) { + weightedOccurrenceSum += totalWeightedOccurrences > 0.0f ? feature / totalWeightedOccurrences : 0.0f; } metrics.setWeightedOccurrence(weightedOccurrenceSum); feature_t significantOccurrenceSum = 0; - for (std::vector<feature_t>::iterator it = significantOccurrences.begin(); - it != significantOccurrences.end(); ++it) - { - significantOccurrenceSum += totalSignificantOccurrences > 0.0f ? *it / totalSignificantOccurrences : 0.0f; + for (feature_t feature : significantOccurrences) { + significantOccurrenceSum += totalSignificantOccurrences > 0.0f ? feature / totalSignificantOccurrences : 0.0f; } metrics.setSignificantOccurrence(significantOccurrenceSum); } -} // fieldmatch -} // features -} // search +} diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/computer.h b/searchlib/src/vespa/searchlib/features/fieldmatch/computer.h index 8d1c035e311..c429796cdd2 100644 --- a/searchlib/src/vespa/searchlib/features/fieldmatch/computer.h +++ b/searchlib/src/vespa/searchlib/features/fieldmatch/computer.h @@ -14,9 +14,7 @@ #include "segmentstart.h" #include "simplemetrics.h" -namespace search { -namespace features { -namespace fieldmatch { +namespace search::features::fieldmatch { /** * <p>Calculates a set of metrics capturing information about the degree of agreement between a query and a field @@ -330,7 +328,7 @@ private: struct SegmentData { SegmentData() : segment(), valid(false) {} - SegmentData(const SegmentStart::SP & ss, bool v = false) : segment(ss), valid(v) {} + SegmentData(SegmentStart::SP ss, bool v = false) : segment(std::move(ss)), valid(v) {} SegmentStart::SP segment; bool valid; }; @@ -364,7 +362,4 @@ private: std::vector<BitVectorData> _cachedHits; }; -} // fieldmatch -} // features -} // search - +} diff --git a/searchlib/src/vespa/searchlib/features/fieldmatchfeature.h b/searchlib/src/vespa/searchlib/features/fieldmatchfeature.h index 0cc1c8649d1..bec6ccb2bda 100644 --- a/searchlib/src/vespa/searchlib/features/fieldmatchfeature.h +++ b/searchlib/src/vespa/searchlib/features/fieldmatchfeature.h @@ -13,16 +13,13 @@ namespace search::features { */ class FieldMatchExecutor : public fef::FeatureExecutor { private: - fef::PhraseSplitter _splitter; - const fef::FieldInfo & _field; - fieldmatch::Computer _cmp; + fef::PhraseSplitter _splitter; + const fef::FieldInfo & _field; + fieldmatch::Computer _cmp; void handle_bind_match_data(const fef::MatchData &md) override; public: - /** - * Constructs an executor. - */ FieldMatchExecutor(const fef::IQueryEnvironment & queryEnv, const fef::FieldInfo & field, const fieldmatch::Params & params); diff --git a/searchlib/src/vespa/searchlib/features/nativeproximityfeature.cpp b/searchlib/src/vespa/searchlib/features/nativeproximityfeature.cpp index 4da819b4dd3..887daa4735d 100644 --- a/searchlib/src/vespa/searchlib/features/nativeproximityfeature.cpp +++ b/searchlib/src/vespa/searchlib/features/nativeproximityfeature.cpp @@ -61,9 +61,7 @@ NativeProximityExecutor::NativeProximityExecutor(const IQueryEnvironment & env, QueryTerm qt = QueryTermFactory::create(env, i); typedef search::fef::ITermFieldRangeAdapter FRA; - for (FRA iter(*qt.termData()); iter.valid(); iter.next()) { - uint32_t fieldId = iter.get().getFieldId(); if (_params.considerField(fieldId)) { // only consider fields with contribution qt.fieldHandle(iter.get().getHandle()); @@ -71,13 +69,13 @@ NativeProximityExecutor::NativeProximityExecutor(const IQueryEnvironment & env, } } } - for (std::map<uint32_t, QueryTermVector>::const_iterator itr = fields.begin(); itr != fields.end(); ++itr) { - if (itr->second.size() >= 2) { - FieldSetup setup(itr->first); - generateTermPairs(env, itr->second, _params.slidingWindow, setup); + for (const auto & entry : fields) { + if (entry.second.size() >= 2) { + FieldSetup setup(entry.first); + generateTermPairs(env, entry.second, _params.slidingWindow, setup); if (!setup.pairs.empty()) { - _setups.push_back(setup); - _totalFieldWeight += params.vector[itr->first].fieldWeight; + _setups.push_back(std::move(setup)); + _totalFieldWeight += params.vector[entry.first].fieldWeight; } } } diff --git a/searchlib/src/vespa/searchlib/features/nativeproximityfeature.h b/searchlib/src/vespa/searchlib/features/nativeproximityfeature.h index 13f2ea2dbcd..f2acdf9c593 100644 --- a/searchlib/src/vespa/searchlib/features/nativeproximityfeature.h +++ b/searchlib/src/vespa/searchlib/features/nativeproximityfeature.h @@ -80,8 +80,8 @@ public: class NativeProximityBlueprint : public fef::Blueprint { private: NativeProximityParams _params; - vespalib::string _defaultProximityBoost; - vespalib::string _defaultRevProximityBoost; + vespalib::string _defaultProximityBoost; + vespalib::string _defaultRevProximityBoost; public: NativeProximityBlueprint(); diff --git a/searchlib/src/vespa/searchlib/fef/itermfielddata.h b/searchlib/src/vespa/searchlib/fef/itermfielddata.h index 6fb467ce25c..5944f7636b2 100644 --- a/searchlib/src/vespa/searchlib/fef/itermfielddata.h +++ b/searchlib/src/vespa/searchlib/fef/itermfielddata.h @@ -16,27 +16,28 @@ namespace search::fef { **/ class ITermFieldData { -protected: - virtual ~ITermFieldData() {} - public: + ITermFieldData(uint32_t fieldId) + : _fieldId(fieldId), + _matching_doc_count(0), + _total_doc_count(1) + { } /** * Obtain the global field id. * * @return field id **/ - virtual uint32_t getFieldId() const = 0; - + uint32_t getFieldId() const { return _fieldId; } /** * Returns the number of documents matching this term. */ - virtual uint32_t get_matching_doc_count() const = 0; + uint32_t get_matching_doc_count() const { return _matching_doc_count; } /** * Returns the total number of documents in the corpus. */ - virtual uint32_t get_total_doc_count() const = 0; + uint32_t get_total_doc_count() const { return _total_doc_count; } /** * Obtain the document frequency. This is a value between 0 and 1 @@ -49,6 +50,15 @@ public: } /** + * Sets the document frequency. + **/ + ITermFieldData &setDocFreq(uint32_t matching_doc_count, uint32_t total_doc_count) { + _matching_doc_count = matching_doc_count; + _total_doc_count = total_doc_count; + return *this; + } + + /** * Obtain the match handle for this field, * requesting normal match data in the corresponding TermFieldMatchData. * @@ -65,6 +75,12 @@ public: * @return match handle (or IllegalHandle) **/ virtual TermFieldHandle getHandle(MatchDataDetails requested_details) const = 0; +protected: + virtual ~ITermFieldData() {} +private: + uint32_t _fieldId; + uint32_t _matching_doc_count; + uint32_t _total_doc_count; }; } diff --git a/searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp b/searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp index 77a271ba76c..e84f61332e1 100644 --- a/searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp +++ b/searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp @@ -2,8 +2,7 @@ #include "phrasesplitter.h" -namespace search { -namespace fef { +namespace search::fef { void PhraseSplitter::considerTerm(uint32_t termIdx, const ITermData &term, std::vector<PhraseTerm> &phraseTerms, uint32_t fieldId) @@ -32,10 +31,9 @@ PhraseSplitter::considerTerm(uint32_t termIdx, const ITermData &term, std::vecto _termIdxMap.push_back(TermIdx(termIdx, false)); } -PhraseSplitter::PhraseSplitter(const IQueryEnvironment & queryEnv, - uint32_t fieldId) : +PhraseSplitter::PhraseSplitter(const IQueryEnvironment & queryEnv, uint32_t fieldId) : _queryEnv(queryEnv), - _matchData(NULL), + _matchData(nullptr), _terms(), _termMatches(), _termIdxMap(), @@ -47,18 +45,18 @@ PhraseSplitter::PhraseSplitter(const IQueryEnvironment & queryEnv, for (uint32_t i = 0; i < queryEnv.getNumTerms(); ++i) { const ITermData *td = queryEnv.getTerm(i); - assert(td != NULL); + assert(td != nullptr); considerTerm(i, *td, phraseTerms, fieldId); numHandles += td->numFields(); } _skipHandles = _maxHandle + 1 + numHandles; - for (uint32_t i = 0; i < _terms.size(); ++i) { + _termMatches.reserve(_terms.size()); + for (auto & term : _terms) { // start at _skipHandles + 0 - _terms[i].field(0).setHandle(_skipHandles + _termMatches.size()); - TermFieldMatchData empty; - empty.setFieldId(fieldId); - _termMatches.push_back(empty); + term.field(0).setHandle(_skipHandles + _termMatches.size()); + _termMatches.emplace_back(); + _termMatches.back().setFieldId(fieldId); } for (uint32_t i = 0; i < phraseTerms.size(); ++i) { @@ -76,7 +74,7 @@ PhraseSplitter::PhraseSplitter(const IQueryEnvironment & queryEnv, } } -PhraseSplitter::~PhraseSplitter() {} +PhraseSplitter::~PhraseSplitter() = default; void PhraseSplitter::copyTermFieldMatchData(TermFieldMatchData & dst, const TermFieldMatchData & src, uint32_t hitOffset) @@ -96,11 +94,10 @@ PhraseSplitter::update() for (uint32_t i = 0; i < _copyInfo.size(); ++i) { const TermFieldMatchData *src = _matchData->resolveTermField(_copyInfo[i].orig_handle); TermFieldMatchData *dst = resolveSplittedTermField(_copyInfo[i].split_handle); - assert(src != NULL && dst != NULL); + assert(src != nullptr && dst != nullptr); copyTermFieldMatchData(*dst, *src, _copyInfo[i].offsetInPhrase); } } -} // namespace fef -} // namespace search +} diff --git a/searchlib/src/vespa/searchlib/fef/phrasesplitter.h b/searchlib/src/vespa/searchlib/fef/phrasesplitter.h index 4e46c9eaa7c..25944158445 100644 --- a/searchlib/src/vespa/searchlib/fef/phrasesplitter.h +++ b/searchlib/src/vespa/searchlib/fef/phrasesplitter.h @@ -8,8 +8,7 @@ #include "termfieldmatchdata.h" #include "fieldinfo.h" -namespace search { -namespace fef { +namespace search::fef { /** * This class is used to split all phrase terms in a query environment @@ -94,7 +93,7 @@ public: const ITermData * getTerm(uint32_t idx) const override { if (idx >= _termIdxMap.size()) { - return NULL; + return nullptr; } const TermIdx & ti = _termIdxMap[idx]; return ti.splitted ? &_terms[ti.idx] : _queryEnv.getTerm(ti.idx); @@ -104,8 +103,8 @@ public: * Inherit doc from MatchData. **/ const TermFieldMatchData * resolveTermField(TermFieldHandle handle) const { - if (_matchData == NULL) { - return NULL; + if (_matchData == nullptr) { + return nullptr; } return handle < _skipHandles ? _matchData->resolveTermField(handle) : resolveSplittedTermField(handle); } @@ -118,6 +117,4 @@ public: void bind_match_data(const fef::MatchData &md) { _matchData = &md; } }; -} // namespace fef -} // namespace search - +} diff --git a/searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp b/searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp index 64906eed22e..f173abc7242 100644 --- a/searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp +++ b/searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp @@ -5,17 +5,13 @@ namespace search::fef { SimpleTermFieldData::SimpleTermFieldData(uint32_t fieldId) - : _fieldId(fieldId), - _matching_doc_count(0), - _total_doc_count(1), + : ITermFieldData(fieldId), _handle(IllegalHandle) { } SimpleTermFieldData::SimpleTermFieldData(const ITermFieldData &rhs) - : _fieldId(rhs.getFieldId()), - _matching_doc_count(rhs.get_matching_doc_count()), - _total_doc_count(rhs.get_total_doc_count()), + : ITermFieldData(rhs), _handle(rhs.getHandle()) { } diff --git a/searchlib/src/vespa/searchlib/fef/simpletermfielddata.h b/searchlib/src/vespa/searchlib/fef/simpletermfielddata.h index d92d3a48f03..84de88a410a 100644 --- a/searchlib/src/vespa/searchlib/fef/simpletermfielddata.h +++ b/searchlib/src/vespa/searchlib/fef/simpletermfielddata.h @@ -15,9 +15,6 @@ namespace search::fef { class SimpleTermFieldData : public ITermFieldData { private: - uint32_t _fieldId; - uint32_t _matching_doc_count; - uint32_t _total_doc_count; TermFieldHandle _handle; public: @@ -33,12 +30,6 @@ public: **/ SimpleTermFieldData(uint32_t fieldId); - uint32_t getFieldId() const override final { return _fieldId; } - - uint32_t get_matching_doc_count() const override { return _matching_doc_count; } - - uint32_t get_total_doc_count() const override { return _total_doc_count; } - using ITermFieldData::getHandle; TermFieldHandle getHandle(MatchDataDetails requestedDetails) const override { @@ -47,15 +38,6 @@ public: } /** - * Sets the document frequency. - **/ - SimpleTermFieldData &setDocFreq(uint32_t matching_doc_count, uint32_t total_doc_count) { - _matching_doc_count = matching_doc_count; - _total_doc_count = total_doc_count; - return *this; - } - - /** * Sets the match handle for this field. **/ SimpleTermFieldData &setHandle(TermFieldHandle handle) { |