summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2020-05-02 12:17:11 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2020-05-02 12:17:11 +0000
commitc4dae70d1379130b4bbd46ce3012736c864a9f92 (patch)
tree0659dc2a92e054c079b4dedcb2e5166a3dcb8db0
parentf0ef95a22d5a1c53ce19bb609726fca2cdfbeea6 (diff)
- Reserve space upfront in the vectors.
- Remove virtual interface from some very frequently called methods that are not necessary. - Use std::move for vector and shared_ptr. - Some c++11ification.
-rw-r--r--searchlib/src/vespa/searchlib/features/fieldmatch/computer.cpp64
-rw-r--r--searchlib/src/vespa/searchlib/features/fieldmatch/computer.h11
-rw-r--r--searchlib/src/vespa/searchlib/features/fieldmatchfeature.h9
-rw-r--r--searchlib/src/vespa/searchlib/features/nativeproximityfeature.cpp14
-rw-r--r--searchlib/src/vespa/searchlib/features/nativeproximityfeature.h4
-rw-r--r--searchlib/src/vespa/searchlib/fef/itermfielddata.h30
-rw-r--r--searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp27
-rw-r--r--searchlib/src/vespa/searchlib/fef/phrasesplitter.h13
-rw-r--r--searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/fef/simpletermfielddata.h18
10 files changed, 84 insertions, 114 deletions
diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/computer.cpp b/searchlib/src/vespa/searchlib/features/fieldmatch/computer.cpp
index 3ea22716432..1b943c596c6 100644
--- a/searchlib/src/vespa/searchlib/features/fieldmatch/computer.cpp
+++ b/searchlib/src/vespa/searchlib/features/fieldmatch/computer.cpp
@@ -13,10 +13,7 @@ LOG_SETUP(".features.fieldmatch.computer");
using namespace search::fef;
-namespace search {
-namespace features {
-namespace fieldmatch {
-
+namespace search::features::fieldmatch {
Computer::Computer(const vespalib::string &propertyNamespace, const PhraseSplitter &splitter,
const FieldInfo &fieldInfo, const Params &params) :
@@ -39,18 +36,20 @@ Computer::Computer(const vespalib::string &propertyNamespace, const PhraseSplitt
_cachedHits()
{
// Store term data for all terms searching in this field
+ _queryTermFieldMatch.reserve(splitter.getNumTerms());
+ _cachedHits.reserve(splitter.getNumTerms());
for (uint32_t i = 0; i < splitter.getNumTerms(); ++i) {
QueryTerm qt = QueryTermFactory::create(splitter, i, true, true);
_totalTermWeight += qt.termData()->getWeight().percent();
_totalTermSignificance += qt.significance();
_simpleMetrics.addQueryTerm(qt.termData()->getWeight().percent());
const ITermFieldData *field = qt.termData()->lookupField(_fieldId);
- if (field != 0) {
+ if (field != nullptr) {
qt.fieldHandle(field->getHandle());
_queryTerms.push_back(qt);
_simpleMetrics.addSearchedTerm(qt.termData()->getWeight().percent());
- _queryTermFieldMatch.push_back(NULL);
- _cachedHits.push_back(BitVectorData());
+ _queryTermFieldMatch.emplace_back(nullptr);
+ _cachedHits.emplace_back();
}
}
@@ -67,8 +66,9 @@ Computer::Computer(const vespalib::string &propertyNamespace, const PhraseSplitt
_finalMetrics = Metrics(this);
// num query terms searching in this field + 1
+ _segments.reserve(getNumQueryTerms() + 1);
for (uint32_t i = 0; i < (getNumQueryTerms() + 1); ++i) {
- _segments.push_back(SegmentData(SegmentStart::SP(new SegmentStart(this, _currentMetrics))));
+ _segments.emplace_back(std::make_shared<SegmentStart>(this, _currentMetrics));
}
}
@@ -96,7 +96,7 @@ Computer::reset(uint32_t docId)
const ITermData *td = _queryTerms[i].termData();
const TermFieldMatchData *tfmd = _splitter.resolveTermField(_queryTerms[i].fieldHandle());
if (tfmd->getDocId() != docId) { // only term match data if we have a hit
- tfmd = NULL;
+ tfmd = nullptr;
} else {
FieldPositionsIterator it = tfmd->getIterator();
uint32_t fieldLength = it.getFieldLength();
@@ -144,7 +144,7 @@ Computer::handleError(uint32_t fieldPos, uint32_t docId) const
LOG(debug, "Bad field position %u >= fieldLength %u for field '%s' document %u. "
"Document was probably refed during query (Ticket 7104969)",
fieldPos, _fieldLength,
- finfo != NULL ? finfo->name().c_str() : "unknown field",
+ finfo != nullptr ? finfo->name().c_str() : "unknown field",
docId);
}
}
@@ -180,7 +180,7 @@ Computer::findClosestInFieldBySemanticDistance(int i, int previousJ, uint32_t st
}
const TermFieldMatchData *termFieldMatch = _queryTermFieldMatch[i];
- if (termFieldMatch == NULL) {
+ if (termFieldMatch == nullptr) {
return -1; // not matched
}
@@ -289,7 +289,7 @@ Computer::exploreSegments()
_segments[0].segment->reset(_currentMetrics);
_segments[0].valid = true;
SegmentStart *segment = _segments[0].segment.get();
- while (segment != NULL) {
+ while (segment != nullptr) {
if (isTracing()) {
trace(vespalib::make_string("Looking for segment from %s...",
segment->toString().c_str()));
@@ -373,7 +373,7 @@ Computer::findAlternativeSegmentFrom(SegmentStart *segment) {
} else {
semanticDistanceExplored = 0;
// we have a match for this term but no position information
- if (_queryTermFieldMatch[i] != NULL && !_cachedHits[i].valid) {
+ if (_queryTermFieldMatch[i] != nullptr && !_cachedHits[i].valid) {
_currentMetrics.onMatch(i);
}
}
@@ -441,8 +441,8 @@ Computer::segmentEnd(int i, int j)
SegmentStart *
Computer::findOpenSegment(uint32_t startI) {
for (uint32_t i = startI; i < _segments.size(); i++) {
- SegmentStart *startPoint = _segments[i].valid ? _segments[i].segment.get() : NULL;
- if (startPoint == NULL || !startPoint->isOpen()) {
+ SegmentStart *startPoint = _segments[i].valid ? _segments[i].segment.get() : nullptr;
+ if (startPoint == nullptr || !startPoint->isOpen()) {
continue;
}
if (startPoint->getSemanticDistanceExplored() == 0) {
@@ -454,20 +454,20 @@ Computer::findOpenSegment(uint32_t startI) {
_alternativeSegmentationsTried++;
return startPoint;
}
- return NULL;
+ return nullptr;
}
SegmentStart *
Computer::findLastStartPoint()
{
for (int i = _segments.size(); --i >= 0; ) {
- SegmentStart *startPoint = _segments[i].valid ? _segments[i].segment.get() : NULL;
- if (startPoint != NULL) {
+ SegmentStart *startPoint = _segments[i].valid ? _segments[i].segment.get() : nullptr;
+ if (startPoint != nullptr) {
return startPoint;
}
}
LOG(error, "findLastStartPoint() could not find any segment start. This should never happen!");
- return NULL;
+ return nullptr;
}
void
@@ -478,7 +478,7 @@ Computer::setOccurrenceCounts(Metrics &metrics)
std::set<uint32_t> firstOccs;
for (uint32_t i = 0; i < _queryTermFieldMatch.size(); ++i) {
const TermFieldMatchData *termFieldMatch = _queryTermFieldMatch[i];
- if (termFieldMatch == NULL) {
+ if (termFieldMatch == nullptr) {
continue; // not for this match
}
FieldPositionsIterator it = termFieldMatch->getIterator();
@@ -504,12 +504,10 @@ Computer::setOccurrenceCounts(Metrics &metrics)
feature_t totalWeightedOccurrences = 0;
feature_t totalSignificantOccurrences = 0;
- for (std::vector<uint32_t>::iterator it = uniqueTerms.begin();
- it != uniqueTerms.end(); ++it)
- {
- const QueryTerm &queryTerm = _queryTerms[*it];
+ for (uint32_t termIdx : uniqueTerms) {
+ const QueryTerm &queryTerm = _queryTerms[termIdx];
const ITermData &termData = *queryTerm.termData();
- const TermFieldMatchData &termFieldMatch = *_queryTermFieldMatch[*it];
+ const TermFieldMatchData &termFieldMatch = *_queryTermFieldMatch[termIdx];
uint32_t termOccurrences = 0;
FieldPositionsIterator pos = termFieldMatch.getIterator();
@@ -535,22 +533,16 @@ Computer::setOccurrenceCounts(Metrics &metrics)
metrics.setWeightedAbsoluteOccurrence(weightedAbsoluteOccurrence / (totalWeight > 0 ? totalWeight : 1));
feature_t weightedOccurrenceSum = 0;
- for (std::vector<feature_t>::iterator it = weightedOccurrences.begin();
- it != weightedOccurrences.end(); ++it)
- {
- weightedOccurrenceSum += totalWeightedOccurrences > 0.0f ? *it / totalWeightedOccurrences : 0.0f;
+ for (feature_t feature : weightedOccurrences) {
+ weightedOccurrenceSum += totalWeightedOccurrences > 0.0f ? feature / totalWeightedOccurrences : 0.0f;
}
metrics.setWeightedOccurrence(weightedOccurrenceSum);
feature_t significantOccurrenceSum = 0;
- for (std::vector<feature_t>::iterator it = significantOccurrences.begin();
- it != significantOccurrences.end(); ++it)
- {
- significantOccurrenceSum += totalSignificantOccurrences > 0.0f ? *it / totalSignificantOccurrences : 0.0f;
+ for (feature_t feature : significantOccurrences) {
+ significantOccurrenceSum += totalSignificantOccurrences > 0.0f ? feature / totalSignificantOccurrences : 0.0f;
}
metrics.setSignificantOccurrence(significantOccurrenceSum);
}
-} // fieldmatch
-} // features
-} // search
+}
diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/computer.h b/searchlib/src/vespa/searchlib/features/fieldmatch/computer.h
index 8d1c035e311..c429796cdd2 100644
--- a/searchlib/src/vespa/searchlib/features/fieldmatch/computer.h
+++ b/searchlib/src/vespa/searchlib/features/fieldmatch/computer.h
@@ -14,9 +14,7 @@
#include "segmentstart.h"
#include "simplemetrics.h"
-namespace search {
-namespace features {
-namespace fieldmatch {
+namespace search::features::fieldmatch {
/**
* <p>Calculates a set of metrics capturing information about the degree of agreement between a query and a field
@@ -330,7 +328,7 @@ private:
struct SegmentData {
SegmentData() : segment(), valid(false) {}
- SegmentData(const SegmentStart::SP & ss, bool v = false) : segment(ss), valid(v) {}
+ SegmentData(SegmentStart::SP ss, bool v = false) : segment(std::move(ss)), valid(v) {}
SegmentStart::SP segment;
bool valid;
};
@@ -364,7 +362,4 @@ private:
std::vector<BitVectorData> _cachedHits;
};
-} // fieldmatch
-} // features
-} // search
-
+}
diff --git a/searchlib/src/vespa/searchlib/features/fieldmatchfeature.h b/searchlib/src/vespa/searchlib/features/fieldmatchfeature.h
index 0cc1c8649d1..bec6ccb2bda 100644
--- a/searchlib/src/vespa/searchlib/features/fieldmatchfeature.h
+++ b/searchlib/src/vespa/searchlib/features/fieldmatchfeature.h
@@ -13,16 +13,13 @@ namespace search::features {
*/
class FieldMatchExecutor : public fef::FeatureExecutor {
private:
- fef::PhraseSplitter _splitter;
- const fef::FieldInfo & _field;
- fieldmatch::Computer _cmp;
+ fef::PhraseSplitter _splitter;
+ const fef::FieldInfo & _field;
+ fieldmatch::Computer _cmp;
void handle_bind_match_data(const fef::MatchData &md) override;
public:
- /**
- * Constructs an executor.
- */
FieldMatchExecutor(const fef::IQueryEnvironment & queryEnv,
const fef::FieldInfo & field,
const fieldmatch::Params & params);
diff --git a/searchlib/src/vespa/searchlib/features/nativeproximityfeature.cpp b/searchlib/src/vespa/searchlib/features/nativeproximityfeature.cpp
index 4da819b4dd3..887daa4735d 100644
--- a/searchlib/src/vespa/searchlib/features/nativeproximityfeature.cpp
+++ b/searchlib/src/vespa/searchlib/features/nativeproximityfeature.cpp
@@ -61,9 +61,7 @@ NativeProximityExecutor::NativeProximityExecutor(const IQueryEnvironment & env,
QueryTerm qt = QueryTermFactory::create(env, i);
typedef search::fef::ITermFieldRangeAdapter FRA;
-
for (FRA iter(*qt.termData()); iter.valid(); iter.next()) {
-
uint32_t fieldId = iter.get().getFieldId();
if (_params.considerField(fieldId)) { // only consider fields with contribution
qt.fieldHandle(iter.get().getHandle());
@@ -71,13 +69,13 @@ NativeProximityExecutor::NativeProximityExecutor(const IQueryEnvironment & env,
}
}
}
- for (std::map<uint32_t, QueryTermVector>::const_iterator itr = fields.begin(); itr != fields.end(); ++itr) {
- if (itr->second.size() >= 2) {
- FieldSetup setup(itr->first);
- generateTermPairs(env, itr->second, _params.slidingWindow, setup);
+ for (const auto & entry : fields) {
+ if (entry.second.size() >= 2) {
+ FieldSetup setup(entry.first);
+ generateTermPairs(env, entry.second, _params.slidingWindow, setup);
if (!setup.pairs.empty()) {
- _setups.push_back(setup);
- _totalFieldWeight += params.vector[itr->first].fieldWeight;
+ _setups.push_back(std::move(setup));
+ _totalFieldWeight += params.vector[entry.first].fieldWeight;
}
}
}
diff --git a/searchlib/src/vespa/searchlib/features/nativeproximityfeature.h b/searchlib/src/vespa/searchlib/features/nativeproximityfeature.h
index 13f2ea2dbcd..f2acdf9c593 100644
--- a/searchlib/src/vespa/searchlib/features/nativeproximityfeature.h
+++ b/searchlib/src/vespa/searchlib/features/nativeproximityfeature.h
@@ -80,8 +80,8 @@ public:
class NativeProximityBlueprint : public fef::Blueprint {
private:
NativeProximityParams _params;
- vespalib::string _defaultProximityBoost;
- vespalib::string _defaultRevProximityBoost;
+ vespalib::string _defaultProximityBoost;
+ vespalib::string _defaultRevProximityBoost;
public:
NativeProximityBlueprint();
diff --git a/searchlib/src/vespa/searchlib/fef/itermfielddata.h b/searchlib/src/vespa/searchlib/fef/itermfielddata.h
index 6fb467ce25c..5944f7636b2 100644
--- a/searchlib/src/vespa/searchlib/fef/itermfielddata.h
+++ b/searchlib/src/vespa/searchlib/fef/itermfielddata.h
@@ -16,27 +16,28 @@ namespace search::fef {
**/
class ITermFieldData
{
-protected:
- virtual ~ITermFieldData() {}
-
public:
+ ITermFieldData(uint32_t fieldId)
+ : _fieldId(fieldId),
+ _matching_doc_count(0),
+ _total_doc_count(1)
+ { }
/**
* Obtain the global field id.
*
* @return field id
**/
- virtual uint32_t getFieldId() const = 0;
-
+ uint32_t getFieldId() const { return _fieldId; }
/**
* Returns the number of documents matching this term.
*/
- virtual uint32_t get_matching_doc_count() const = 0;
+ uint32_t get_matching_doc_count() const { return _matching_doc_count; }
/**
* Returns the total number of documents in the corpus.
*/
- virtual uint32_t get_total_doc_count() const = 0;
+ uint32_t get_total_doc_count() const { return _total_doc_count; }
/**
* Obtain the document frequency. This is a value between 0 and 1
@@ -49,6 +50,15 @@ public:
}
/**
+ * Sets the document frequency.
+ **/
+ ITermFieldData &setDocFreq(uint32_t matching_doc_count, uint32_t total_doc_count) {
+ _matching_doc_count = matching_doc_count;
+ _total_doc_count = total_doc_count;
+ return *this;
+ }
+
+ /**
* Obtain the match handle for this field,
* requesting normal match data in the corresponding TermFieldMatchData.
*
@@ -65,6 +75,12 @@ public:
* @return match handle (or IllegalHandle)
**/
virtual TermFieldHandle getHandle(MatchDataDetails requested_details) const = 0;
+protected:
+ virtual ~ITermFieldData() {}
+private:
+ uint32_t _fieldId;
+ uint32_t _matching_doc_count;
+ uint32_t _total_doc_count;
};
}
diff --git a/searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp b/searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp
index 77a271ba76c..e84f61332e1 100644
--- a/searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp
+++ b/searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp
@@ -2,8 +2,7 @@
#include "phrasesplitter.h"
-namespace search {
-namespace fef {
+namespace search::fef {
void
PhraseSplitter::considerTerm(uint32_t termIdx, const ITermData &term, std::vector<PhraseTerm> &phraseTerms, uint32_t fieldId)
@@ -32,10 +31,9 @@ PhraseSplitter::considerTerm(uint32_t termIdx, const ITermData &term, std::vecto
_termIdxMap.push_back(TermIdx(termIdx, false));
}
-PhraseSplitter::PhraseSplitter(const IQueryEnvironment & queryEnv,
- uint32_t fieldId) :
+PhraseSplitter::PhraseSplitter(const IQueryEnvironment & queryEnv, uint32_t fieldId) :
_queryEnv(queryEnv),
- _matchData(NULL),
+ _matchData(nullptr),
_terms(),
_termMatches(),
_termIdxMap(),
@@ -47,18 +45,18 @@ PhraseSplitter::PhraseSplitter(const IQueryEnvironment & queryEnv,
for (uint32_t i = 0; i < queryEnv.getNumTerms(); ++i) {
const ITermData *td = queryEnv.getTerm(i);
- assert(td != NULL);
+ assert(td != nullptr);
considerTerm(i, *td, phraseTerms, fieldId);
numHandles += td->numFields();
}
_skipHandles = _maxHandle + 1 + numHandles;
- for (uint32_t i = 0; i < _terms.size(); ++i) {
+ _termMatches.reserve(_terms.size());
+ for (auto & term : _terms) {
// start at _skipHandles + 0
- _terms[i].field(0).setHandle(_skipHandles + _termMatches.size());
- TermFieldMatchData empty;
- empty.setFieldId(fieldId);
- _termMatches.push_back(empty);
+ term.field(0).setHandle(_skipHandles + _termMatches.size());
+ _termMatches.emplace_back();
+ _termMatches.back().setFieldId(fieldId);
}
for (uint32_t i = 0; i < phraseTerms.size(); ++i) {
@@ -76,7 +74,7 @@ PhraseSplitter::PhraseSplitter(const IQueryEnvironment & queryEnv,
}
}
-PhraseSplitter::~PhraseSplitter() {}
+PhraseSplitter::~PhraseSplitter() = default;
void
PhraseSplitter::copyTermFieldMatchData(TermFieldMatchData & dst, const TermFieldMatchData & src, uint32_t hitOffset)
@@ -96,11 +94,10 @@ PhraseSplitter::update()
for (uint32_t i = 0; i < _copyInfo.size(); ++i) {
const TermFieldMatchData *src = _matchData->resolveTermField(_copyInfo[i].orig_handle);
TermFieldMatchData *dst = resolveSplittedTermField(_copyInfo[i].split_handle);
- assert(src != NULL && dst != NULL);
+ assert(src != nullptr && dst != nullptr);
copyTermFieldMatchData(*dst, *src, _copyInfo[i].offsetInPhrase);
}
}
-} // namespace fef
-} // namespace search
+}
diff --git a/searchlib/src/vespa/searchlib/fef/phrasesplitter.h b/searchlib/src/vespa/searchlib/fef/phrasesplitter.h
index 4e46c9eaa7c..25944158445 100644
--- a/searchlib/src/vespa/searchlib/fef/phrasesplitter.h
+++ b/searchlib/src/vespa/searchlib/fef/phrasesplitter.h
@@ -8,8 +8,7 @@
#include "termfieldmatchdata.h"
#include "fieldinfo.h"
-namespace search {
-namespace fef {
+namespace search::fef {
/**
* This class is used to split all phrase terms in a query environment
@@ -94,7 +93,7 @@ public:
const ITermData * getTerm(uint32_t idx) const override {
if (idx >= _termIdxMap.size()) {
- return NULL;
+ return nullptr;
}
const TermIdx & ti = _termIdxMap[idx];
return ti.splitted ? &_terms[ti.idx] : _queryEnv.getTerm(ti.idx);
@@ -104,8 +103,8 @@ public:
* Inherit doc from MatchData.
**/
const TermFieldMatchData * resolveTermField(TermFieldHandle handle) const {
- if (_matchData == NULL) {
- return NULL;
+ if (_matchData == nullptr) {
+ return nullptr;
}
return handle < _skipHandles ? _matchData->resolveTermField(handle) : resolveSplittedTermField(handle);
}
@@ -118,6 +117,4 @@ public:
void bind_match_data(const fef::MatchData &md) { _matchData = &md; }
};
-} // namespace fef
-} // namespace search
-
+}
diff --git a/searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp b/searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp
index 64906eed22e..f173abc7242 100644
--- a/searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp
+++ b/searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp
@@ -5,17 +5,13 @@
namespace search::fef {
SimpleTermFieldData::SimpleTermFieldData(uint32_t fieldId)
- : _fieldId(fieldId),
- _matching_doc_count(0),
- _total_doc_count(1),
+ : ITermFieldData(fieldId),
_handle(IllegalHandle)
{
}
SimpleTermFieldData::SimpleTermFieldData(const ITermFieldData &rhs)
- : _fieldId(rhs.getFieldId()),
- _matching_doc_count(rhs.get_matching_doc_count()),
- _total_doc_count(rhs.get_total_doc_count()),
+ : ITermFieldData(rhs),
_handle(rhs.getHandle())
{
}
diff --git a/searchlib/src/vespa/searchlib/fef/simpletermfielddata.h b/searchlib/src/vespa/searchlib/fef/simpletermfielddata.h
index d92d3a48f03..84de88a410a 100644
--- a/searchlib/src/vespa/searchlib/fef/simpletermfielddata.h
+++ b/searchlib/src/vespa/searchlib/fef/simpletermfielddata.h
@@ -15,9 +15,6 @@ namespace search::fef {
class SimpleTermFieldData : public ITermFieldData
{
private:
- uint32_t _fieldId;
- uint32_t _matching_doc_count;
- uint32_t _total_doc_count;
TermFieldHandle _handle;
public:
@@ -33,12 +30,6 @@ public:
**/
SimpleTermFieldData(uint32_t fieldId);
- uint32_t getFieldId() const override final { return _fieldId; }
-
- uint32_t get_matching_doc_count() const override { return _matching_doc_count; }
-
- uint32_t get_total_doc_count() const override { return _total_doc_count; }
-
using ITermFieldData::getHandle;
TermFieldHandle getHandle(MatchDataDetails requestedDetails) const override {
@@ -47,15 +38,6 @@ public:
}
/**
- * Sets the document frequency.
- **/
- SimpleTermFieldData &setDocFreq(uint32_t matching_doc_count, uint32_t total_doc_count) {
- _matching_doc_count = matching_doc_count;
- _total_doc_count = total_doc_count;
- return *this;
- }
-
- /**
* Sets the match handle for this field.
**/
SimpleTermFieldData &setHandle(TermFieldHandle handle) {