aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2023-04-24 19:17:04 +0200
committerGitHub <noreply@github.com>2023-04-24 19:17:04 +0200
commitf4a7f7fc45df16eedfd4a297567a2f81437a7238 (patch)
tree3797445b02d2d3d9b6363a9581890ef1fd8a8a06
parent8c52052c3150c209db37a9a2747c54ccb7d4e171 (diff)
parentf16db3ccc150f85dd9ad04ee62167ee951f4a017 (diff)
Merge pull request #26836 from vespa-engine/geirst/streaming-field-searcher-refactorv8.157.14
Streaming field searcher refactor
-rw-r--r--streamingvisitors/src/vespa/vsm/common/document.h2
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp48
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h6
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/floatfieldsearcher.cpp3
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/geo_pos_field_searcher.cpp2
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/intfieldsearcher.cpp3
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp2
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.h2
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.cpp9
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.cpp5
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp4
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.cpp9
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp7
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8substringsnippetmodifier.cpp7
14 files changed, 49 insertions, 60 deletions
diff --git a/streamingvisitors/src/vespa/vsm/common/document.h b/streamingvisitors/src/vespa/vsm/common/document.h
index de9ab052aa1..365d0e33ed0 100644
--- a/streamingvisitors/src/vespa/vsm/common/document.h
+++ b/streamingvisitors/src/vespa/vsm/common/document.h
@@ -13,7 +13,7 @@ namespace vespalib {
namespace vsm {
/// Type to identify fields in documents.
-using FieldIdT = unsigned int;
+using FieldIdT = uint32_t;
/// A type to represent a list of FieldIds.
using FieldIdTList = std::vector<FieldIdT>;
/// A type to represent all the fields contained in all the indexs.
diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp
index cbf8903caab..9a89d0bebae 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp
@@ -61,13 +61,12 @@ void FieldSearcherBase::prepare(const QueryTermList & qtl)
_qtlFastBuffer.resize(sizeof(*_qtlFast)*(_qtl.size()+1), 0x13);
_qtlFast = reinterpret_cast<v16qi *>(reinterpret_cast<unsigned long>(&_qtlFastBuffer[0]+15) & ~0xf);
_qtlFastSize = 0;
- for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) {
- const QueryTerm & qt = **it;
- memcpy(&_qtlFast[_qtlFastSize++], qt.getTerm(), std::min(size_t(16), qt.termLen()));
+ for (auto qt : _qtl) {
+ memcpy(&_qtlFast[_qtlFastSize++], qt->getTerm(), std::min(size_t(16), qt->termLen()));
}
}
-FieldSearcher::FieldSearcher(const FieldIdT & fId, bool defaultPrefix) :
+FieldSearcher::FieldSearcher(FieldIdT fId, bool defaultPrefix) :
FieldSearcherBase(),
_field(fId),
_matchType(defaultPrefix ? PREFIX : REGULAR),
@@ -89,16 +88,14 @@ FieldSearcher::~FieldSearcher() = default;
bool FieldSearcher::search(const StorageDocument & doc)
{
- for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) {
- QueryTerm & qt = **it;
- QueryTerm::FieldInfo & fInfo = qt.getFieldInfo(field());
- fInfo.setHitOffset(qt.getHitList().size());
+ for (auto qt : _qtl) {
+ QueryTerm::FieldInfo & fInfo = qt->getFieldInfo(field());
+ fInfo.setHitOffset(qt->getHitList().size());
}
onSearch(doc);
- for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) {
- QueryTerm & qt = **it;
- QueryTerm::FieldInfo & fInfo = qt.getFieldInfo(field());
- fInfo.setHitCount(qt.getHitList().size() - fInfo.getHitOffset());
+ for(auto qt : _qtl) {
+ QueryTerm::FieldInfo & fInfo = qt->getFieldInfo(field());
+ fInfo.setHitCount(qt->getHitList().size() - fInfo.getHitOffset());
fInfo.setFieldLength(_words);
}
_words = 0;
@@ -132,9 +129,8 @@ size_t FieldSearcher::countWords(const FieldRef & f)
void FieldSearcher::prepareFieldId()
{
- for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) {
- QueryTerm & qt = **it;
- qt.resizeFieldId(field());
+ for(auto qt : _qtl) {
+ qt->resizeFieldId(field());
}
}
@@ -232,26 +228,26 @@ void FieldIdTSearcherMap::prepare(const DocumentTypeIndexFieldMapT& difm,
QueryTermList qtl;
query.getLeafs(qtl);
vespalib::string tmp;
- for (FieldIdTSearcherMap::iterator it = begin(), mt = end(); it != mt; it++) {
+ for (auto& searcher : *this) {
QueryTermList onlyInIndex;
- FieldIdT fid = (*it)->field();
- for (QueryTermList::iterator qt = qtl.begin(), mqt = qtl.end(); qt != mqt; qt++) {
- QueryTerm * q = *qt;
- for (DocumentTypeIndexFieldMapT::const_iterator dt(difm.begin()), dmt(difm.end()); dt != dmt; dt++) {
- const IndexFieldMapT & fim = dt->second;
- IndexFieldMapT::const_iterator found = fim.find(FieldSearchSpecMap::stripNonFields(q->index()));
+ FieldIdT fid = searcher->field();
+ for (auto qt : qtl) {
+ for (const auto& doc_type_elem : difm) {
+ const IndexFieldMapT & fim = doc_type_elem.second;
+ auto found = fim.find(FieldSearchSpecMap::stripNonFields(qt->index()));
if (found != fim.end()) {
const FieldIdTList & index = found->second;
- if ((find(index.begin(), index.end(), fid) != index.end()) && (find(onlyInIndex.begin(), onlyInIndex.end(), q) == onlyInIndex.end())) {
- onlyInIndex.push_back(q);
+ if ((find(index.begin(), index.end(), fid) != index.end()) && (find(onlyInIndex.begin(), onlyInIndex.end(), qt) == onlyInIndex.end())) {
+ onlyInIndex.push_back(qt);
}
} else {
- LOG(debug, "Could not find the requested index=%s in the index config map. Query does not fit search definition.", q->index().c_str());
+ LOG(debug, "Could not find the requested index=%s in the index config map. Query does not fit search definition.",
+ qt->index().c_str());
}
}
}
/// Should perhaps do a unique on onlyInIndex
- (*it)->prepare(onlyInIndex, searcherBuf, field_paths, query_env);
+ searcher->prepare(onlyInIndex, searcherBuf, field_paths, query_env);
if (LOG_WOULD_LOG(spam)) {
char tmpBuf[16];
snprintf(tmpBuf, sizeof(tmpBuf), "%d", fid);
diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h
index 879902ca514..abc2bc9d870 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h
+++ b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h
@@ -52,7 +52,7 @@ public:
EXACT
};
- FieldSearcher(const FieldIdT & fId, bool defaultPrefix=false);
+ FieldSearcher(FieldIdT fId, bool defaultPrefix=false);
~FieldSearcher() override;
virtual std::unique_ptr<FieldSearcher> duplicate() const = 0;
bool search(const StorageDocument & doc);
@@ -61,8 +61,8 @@ public:
const vsm::FieldPathMapT& field_paths,
search::fef::IQueryEnvironment& query_env);
- const FieldIdT & field() const { return _field; }
- void field(const FieldIdT & v) { _field = v; prepareFieldId(); }
+ FieldIdT field() const { return _field; }
+ void field(FieldIdT v) { _field = v; prepareFieldId(); }
bool prefix() const { return _matchType == PREFIX; }
bool substring() const { return _matchType == SUBSTRING; }
bool suffix() const { return _matchType == SUFFIX; }
diff --git a/streamingvisitors/src/vespa/vsm/searcher/floatfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/floatfieldsearcher.cpp
index 8585975ca3c..578fc9fe0e5 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/floatfieldsearcher.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/floatfieldsearcher.cpp
@@ -36,8 +36,7 @@ void FloatFieldSearcherT<T>::prepare(search::streaming::QueryTermList& qtl,
{
_floatTerm.clear();
FieldSearcher::prepare(qtl, buf, field_paths, query_env);
- for (QueryTermList::const_iterator it=qtl.begin(); it < qtl.end(); it++) {
- const QueryTerm * qt = *it;
+ for (auto qt : qtl) {
size_t sz(qt->termLen());
if (sz) {
double low;
diff --git a/streamingvisitors/src/vespa/vsm/searcher/geo_pos_field_searcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/geo_pos_field_searcher.cpp
index 6b0bbbb368d..43ecba29b33 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/geo_pos_field_searcher.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/geo_pos_field_searcher.cpp
@@ -35,7 +35,7 @@ void GeoPosFieldSearcher::prepare(search::streaming::QueryTermList& qtl,
{
_geoPosTerm.clear();
FieldSearcher::prepare(qtl, buf, field_paths, query_env);
- for (const QueryTerm * qt : qtl) {
+ for (auto qt : qtl) {
const vespalib::string & str = qt->getTermString();
GeoLocationParser parser;
bool valid = parser.parseNoField(str);
diff --git a/streamingvisitors/src/vespa/vsm/searcher/intfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/intfieldsearcher.cpp
index 18b286946f7..0fb71a3c3c6 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/intfieldsearcher.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/intfieldsearcher.cpp
@@ -26,8 +26,7 @@ void IntFieldSearcher::prepare(search::streaming::QueryTermList& qtl,
{
_intTerm.clear();
FieldSearcher::prepare(qtl, buf, field_paths, query_env);
- for (QueryTermList::const_iterator it=qtl.begin(); it < qtl.end(); it++) {
- const QueryTerm * qt = *it;
+ for (auto qt : qtl) {
size_t sz(qt->termLen());
if (sz) {
int64_t low;
diff --git a/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp
index 045ec9b04a3..f064760e55d 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp
@@ -52,7 +52,7 @@ NearestNeighborFieldSearcher::NodeAndCalc::NodeAndCalc(search::streaming::Neares
{
}
-NearestNeighborFieldSearcher::NearestNeighborFieldSearcher(const FieldIdT& fid,
+NearestNeighborFieldSearcher::NearestNeighborFieldSearcher(FieldIdT fid,
search::attribute::DistanceMetric metric)
: FieldSearcher(fid),
_metric(metric),
diff --git a/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.h b/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.h
index 83f2c444e5a..ba39b91c677 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.h
+++ b/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.h
@@ -38,7 +38,7 @@ private:
std::vector<NodeAndCalc> _calcs;
public:
- NearestNeighborFieldSearcher(const FieldIdT& fid,
+ NearestNeighborFieldSearcher(FieldIdT fid,
search::attribute::DistanceMetric metric);
~NearestNeighborFieldSearcher();
diff --git a/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.cpp
index f15290526d9..6a46e4604be 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.cpp
@@ -34,10 +34,9 @@ bool StrChrFieldSearcher::matchDoc(const FieldRef & fieldRef)
_words += countWords(fieldRef);
}
} else {
- for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) {
- QueryTerm & qt = **it;
- if (fieldRef.size() >= qt.termLen()) {
- _words += matchTerm(fieldRef, qt);
+ for (auto qt : _qtl) {
+ if (fieldRef.size() >= qt->termLen()) {
+ _words += matchTerm(fieldRef, *qt);
} else {
_words += countWords(fieldRef);
}
@@ -49,7 +48,7 @@ bool StrChrFieldSearcher::matchDoc(const FieldRef & fieldRef)
size_t StrChrFieldSearcher::shortestTerm() const
{
size_t mintsz(_qtl.front()->termLen());
- for(QueryTermList::const_iterator it=_qtl.begin()+1, mt=_qtl.end(); it != mt; it++) {
+ for (auto it=_qtl.begin()+1, mt=_qtl.end(); it != mt; it++) {
const QueryTerm & qt = **it;
mintsz = std::min(mintsz, qt.termLen());
}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.cpp
index 977602a691c..a7ad02fa9d9 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.cpp
@@ -17,9 +17,8 @@ size_t
UTF8ExactStringFieldSearcher::matchTerms(const FieldRef & f, const size_t mintsz)
{
(void) mintsz;
- for (QueryTermList::iterator it = _qtl.begin(), mt = _qtl.end(); it != mt; ++it) {
- QueryTerm & qt = **it;
- matchTermExact(f, qt);
+ for (auto qt : _qtl) {
+ matchTermExact(f, *qt);
}
return 1;
}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp
index 9aef99f9fa1..5809738456f 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp
@@ -20,8 +20,8 @@ UTF8FlexibleStringFieldSearcher::matchTerms(const FieldRef & f, const size_t min
{
(void) mintsz;
size_t words = 0;
- for (QueryTermList::iterator it = _qtl.begin(); it != _qtl.end(); ++it) {
- words = matchTerm(f, **it);
+ for (auto qt : _qtl) {
+ words = matchTerm(f, *qt);
}
return words;
}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.cpp
index 0d93009655c..e8ac87b836b 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.cpp
@@ -29,15 +29,14 @@ UTF8StrChrFieldSearcher::matchTerms(const FieldRef & f, const size_t mintsz)
for( ; n < e; ) {
if (!*n) { _zeroCount++; n++; }
n = tokenize(n, _buf->capacity(), fn, fl);
- for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) {
- QueryTerm & qt = **it;
+ for (auto qt : _qtl) {
const cmptype_t * term;
- termsize_t tsz = qt.term(term);
- if ((tsz <= fl) && (prefix() || qt.isPrefix() || (tsz == fl))) {
+ termsize_t tsz = qt->term(term);
+ if ((tsz <= fl) && (prefix() || qt->isPrefix() || (tsz == fl))) {
const cmptype_t *tt=term, *et=term+tsz;
for (const cmptype_t *fnt=fn; (tt < et) && (*tt == *fnt); tt++, fnt++);
if (tt == et) {
- addHit(qt, words);
+ addHit(*qt, words);
}
}
}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp
index fd327d3a3df..adcf7a937c1 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp
@@ -29,15 +29,14 @@ UTF8SubStringFieldSearcher::matchTerms(const FieldRef & f, const size_t mintsz)
const cmptype_t * fre = fe - mintsz;
termcount_t words(0);
for(words = 0; fn <= fre; ) {
- for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) {
- QueryTerm & qt = **it;
+ for (auto qt : _qtl) {
const cmptype_t * term;
- termsize_t tsz = qt.term(term);
+ termsize_t tsz = qt->term(term);
const cmptype_t *tt=term, *et=term+tsz, *fnt=fn;
for (; (tt < et) && (*tt == *fnt); tt++, fnt++);
if (tt == et) {
- addHit(qt, words);
+ addHit(*qt, words);
}
}
if ( ! Fast_UnicodeUtil::IsWordChar(*fn++) ) {
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsnippetmodifier.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8substringsnippetmodifier.cpp
index 9046c0063d5..89388c01354 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsnippetmodifier.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/utf8substringsnippetmodifier.cpp
@@ -41,10 +41,9 @@ UTF8SubstringSnippetModifier::matchTerms(const FieldRef & f, const size_t mintsz
const cmptype_t * drend = dend - mintsz;
termcount_t words = 0;
for(; ditr <= drend; ) {
- for (QueryTermList::iterator itr = _qtl.begin(); itr != _qtl.end(); ++itr) {
- QueryTerm & qt = **itr;
+ for (auto qt : _qtl) {
const cmptype_t * term;
- termsize_t tsz = qt.term(term);
+ termsize_t tsz = qt->term(term);
const cmptype_t * titr = term;
const cmptype_t * tend = term + tsz;
@@ -58,7 +57,7 @@ UTF8SubstringSnippetModifier::matchTerms(const FieldRef & f, const size_t mintsz
// If we have overlapping matches only the first one will be considered.
insertSeparators(mbegin, mend);
}
- addHit(qt, words);
+ addHit(*qt, words);
}
}
if ( ! Fast_UnicodeUtil::IsWordChar(*ditr++) ) {