summaryrefslogtreecommitdiffstats
path: root/searchsummary/src/vespa/juniper/matchobject.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'searchsummary/src/vespa/juniper/matchobject.cpp')
-rw-r--r--searchsummary/src/vespa/juniper/matchobject.cpp117
1 files changed, 46 insertions, 71 deletions
diff --git a/searchsummary/src/vespa/juniper/matchobject.cpp b/searchsummary/src/vespa/juniper/matchobject.cpp
index 7bd0bead5cd..78f508f3ed0 100644
--- a/searchsummary/src/vespa/juniper/matchobject.cpp
+++ b/searchsummary/src/vespa/juniper/matchobject.cpp
@@ -1,8 +1,6 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "query.h"
#include "matchobject.h"
-#include "juniperdebug.h"
#include "juniper_separators.h"
#include "result.h"
#include "charutil.h"
@@ -16,7 +14,7 @@ using namespace juniper::separators;
class traverser : public IQueryExprVisitor
{
public:
- traverser(MatchObject& mo) : _mo(mo) {}
+ explicit traverser(MatchObject& mo) noexcept : _mo(mo) {}
void VisitQueryNode(QueryNode*) override {
// We must not add this node to nonterminals before all children has been added!
@@ -24,13 +22,11 @@ public:
// until no more candidates...
}
- void RevisitQueryNode(QueryNode* n) override
- {
+ void RevisitQueryNode(QueryNode* n) override {
_mo.add_nonterm(n);
}
- void VisitQueryTerm(QueryTerm* t) override
- {
+ void VisitQueryTerm(QueryTerm* t) override {
if (t->rewriter && t->rewriter->ForDocument())
_mo.add_reduction_term(t, t->rewriter);
else
@@ -44,21 +40,18 @@ private:
class query_expander : public IQueryExprVisitor
{
public:
- query_expander(MatchObject& mo, uint32_t langid)
+ query_expander(MatchObject& mo, uint32_t langid) noexcept
: _caller(), _mo(mo), _langid(langid) {}
- void VisitQueryTerm(QueryTerm* orig) override
- {
- const char* nt = NULL;
+ void VisitQueryTerm(QueryTerm* orig) override {
+ const char* nt = nullptr;
size_t length;
- juniper::RewriteHandle* te = NULL;
+ juniper::RewriteHandle* te = nullptr;
bool reduction = false;
- if (orig->rewriter)
- {
+ if (orig->rewriter) {
// Check if expansions are necessary
- if (orig->rewriter->ForQuery())
- {
+ if (orig->rewriter->ForQuery()) {
te = orig->rewriter->Rewrite(_langid, orig->term());
if (te)
nt = orig->rewriter->NextTerm(te, length);
@@ -69,8 +62,7 @@ public:
// to a separate mapping
reduction = orig->rewriter->ForDocument();
}
- if (nt == NULL)
- {
+ if (nt == nullptr) {
QueryTerm* t = new QueryTerm(orig); // No matches found, just clone term..
if (!reduction)
_mo.add_queryterm(t);
@@ -81,9 +73,8 @@ public:
}
// Start expanding...
std::vector<QueryTerm*> newterms;
- while (nt != NULL)
- {
- QueryTerm* nqt = new QueryTerm(nt, length, -1);
+ while (nt != nullptr) {
+ QueryTerm* nqt = new QueryTerm(vespalib::stringref(nt, length), -1, 100);
// Copy options but do not apply juniper stem match for expanded terms
nqt->_options = orig->_options | X_EXACT;
if (!reduction)
@@ -93,8 +84,7 @@ public:
newterms.push_back(nqt);
nt = orig->rewriter->NextTerm(te, length);
}
- if (newterms.size() == 1)
- {
+ if (newterms.size() == 1) {
update(newterms.front());
return;
}
@@ -102,10 +92,8 @@ public:
QueryNode* qn = new QueryNode(newterms.size(), orig->_weight, orig->_weight);
// preserve options for nodes too, but make the node an OR..
qn->_options = orig->_options | X_OR;
- for (std::vector<QueryTerm*>::iterator it = newterms.begin();
- it != newterms.end(); ++it)
- {
- qn->AddChild(*it);
+ for (QueryTerm * newTerm : newterms) {
+ qn->AddChild(newTerm);
}
update(qn);
_mo.add_nonterm(qn);
@@ -128,12 +116,11 @@ public:
}
QueryExpr* NewQuery() {
- if (_caller.empty()) return NULL;
+ if (_caller.empty()) return nullptr;
return _caller.top();
}
private:
- void update(QueryExpr* e)
- {
+ void update(QueryExpr* e) {
if (!_caller.empty())
_caller.top()->AddChild(e);
}
@@ -161,7 +148,7 @@ MatchObject::MatchObject(QueryExpr* query, bool has_reductions) :
MatchObject::MatchObject(QueryExpr* query, bool has_reductions, uint32_t langid) :
- _query(NULL),
+ _query(nullptr),
_qt(),
_nonterms(),
_match_overlap(false),
@@ -178,8 +165,7 @@ MatchObject::MatchObject(QueryExpr* query, bool has_reductions, uint32_t langid
if (LOG_WOULD_LOG(debug)) {
std::string s;
_query->Dump(s);
- LOG(debug, "juniper::MatchObject(language id %d): modified stack: %s",
- langid, s.c_str());
+ LOG(debug, "juniper::MatchObject(language id %d): modified stack: %s", langid, s.c_str());
}
_max_arity = _query->MaxArity();
}
@@ -199,8 +185,7 @@ bool MatchObject::Match(MatchObject::iterator& mi, Token& token, unsigned& optio
if (!q) return false;
options = 0;
q->total_match_cnt++;
- if (q->ucs4_len == static_cast<size_t>(token.curlen))
- {
+ if (q->ucs4_len == static_cast<size_t>(token.curlen)) {
options |= X_EXACT;
q->exact_match_cnt++;
}
@@ -221,8 +206,7 @@ void MatchObject::add_queryterm(QueryTerm* nt)
_qt.push_back(nt);
nt->idx = _qt.size() - 1;
- _qt_byname.Insert(
- *(reinterpret_cast<const queryterm_hashtable::keytype*>(nt->ucs4_term())), nt);
+ _qt_byname.Insert(*(reinterpret_cast<const queryterm_hashtable::keytype*>(nt->ucs4_term())), nt);
LOG(debug, "MatchObject: adding term '%s'", nt->term());
}
@@ -242,25 +226,24 @@ void MatchObject::add_reduction_term(QueryTerm* nt, juniper::Rewriter* rw)
match_iterator::match_iterator(MatchObject* mo, Result* rhandle) :
- _table(mo->_qt_byname), _el(NULL), _rhandle(rhandle),
- _reductions(mo->HasReductions()), _reduce_matches(NULL), _reduce_matches_it(),
+ _table(mo->_qt_byname), _el(nullptr), _rhandle(rhandle),
+ _reductions(mo->HasReductions()), _reduce_matches(nullptr), _reduce_matches_it(),
_mo(mo), _len(0), _stem_min(rhandle->StemMin()), _stemext(rhandle->StemExt()),
- _term(NULL)
+ _term(nullptr)
{}
-QueryTerm* match_iterator::first()
+QueryTerm*
+match_iterator::first()
{
- for (; _el != NULL; _el = _el->GetNext())
- {
+ for (; _el != nullptr; _el = _el->GetNext()) {
QueryTerm* q = _el->GetItem();
// If exact match is desired by this subexpression,
// only have effect if exact match
- if (q->Exact() && _len > q->len) continue;
+ if (q->Exact() && _len > q->len()) continue;
- if (q->is_wildcard())
- {
+ if (q->is_wildcard()) {
if (fast::util::wildcard_match(_term, q->ucs4_term()) == false) continue;
return q;
}
@@ -268,30 +251,28 @@ QueryTerm* match_iterator::first()
if (_len < q->ucs4_len) continue;
// allow prefix match iff prefix query term or
// rest < _stem_extend and length > stem_min
- if (!q->is_prefix())
- {
+ if (!q->is_prefix()) {
size_t stem_extend = (q->ucs4_len <= _stem_min ? 0 : _stemext);
if (_len > q->ucs4_len + stem_extend) continue;
}
if (juniper::strncmp(_term, q->ucs4_term(), q->ucs4_len) != 0) continue;
return q;
}
- return NULL;
+ return nullptr;
}
QueryTerm* match_iterator::next_reduce_match()
{
- if (!_reduce_matches) return NULL;
- if (_reduce_matches_it != _reduce_matches->end())
- {
+ if (!_reduce_matches) return nullptr;
+ if (_reduce_matches_it != _reduce_matches->end()) {
QueryTerm* t = *_reduce_matches_it;
++_reduce_matches_it;
return t;
}
delete _reduce_matches;
- _reduce_matches = NULL;
- return NULL;
+ _reduce_matches = nullptr;
+ return nullptr;
}
@@ -321,7 +302,7 @@ QueryTerm* match_iterator::first_match(Token& token)
token.curlen = term - token.token;
LOG(debug, "recurse A to match token %u..%u len %d", token.token[0], token.token[token.curlen-1], token.curlen);
qt = this->first_match(token);
- if (qt != NULL) {
+ if (qt != nullptr) {
return qt;
}
token.token = ++term; // skip SPACE
@@ -348,7 +329,7 @@ QueryTerm* match_iterator::first_match(Token& token)
queryterm_hashtable::keytype keyval = termval;
if (LOG_WOULD_LOG(spam)) {
char utf8term[1024];
- Fast_UnicodeUtil::utf8ncopy(utf8term, term, 1024, (term != NULL ? len : 0));
+ Fast_UnicodeUtil::utf8ncopy(utf8term, term, 1024, (term != nullptr ? len : 0));
LOG(spam, "term %s, len %ld, keyval 0x%x termval 0x%x",
utf8term, len, keyval, termval);
}
@@ -356,22 +337,18 @@ QueryTerm* match_iterator::first_match(Token& token)
_len = len;
QueryTerm* rtrn = first();
- if (rtrn == 0)
- {
+ if (rtrn == 0) {
_el = _table.FindRef('*');
- if ((rtrn = first()) == 0)
- {
+ if ((rtrn = first()) == 0) {
_el = _table.FindRef('?');
rtrn = first();
}
}
- if (_reductions)
- {
+ if (_reductions) {
_reduce_matches = _mo->_reduce_matchers.match(_rhandle->_langid,
&_rhandle->_docsum[token.bytepos],
token.bytelen);
- if (_reduce_matches)
- {
+ if (_reduce_matches) {
_reduce_matches_it = _reduce_matches->begin();
// Find the first reduce match only if no other match was found
@@ -388,26 +365,24 @@ QueryTerm* match_iterator::first_match(Token& token)
QueryTerm* match_iterator::current()
{
if (_el) return _el->GetItem();
- if (!_reduce_matches) return NULL;
- if (_reduce_matches_it != _reduce_matches->end())
- {
+ if (!_reduce_matches) return nullptr;
+ if (_reduce_matches_it != _reduce_matches->end()) {
QueryTerm* t = *_reduce_matches_it;
return t;
}
delete _reduce_matches;
- return NULL;
+ return nullptr;
}
QueryTerm* match_iterator::next()
{
- if (_el)
- {
+ if (_el) {
_el = _el->GetNext();
return first();
}
else if (_reduce_matches)
return next_reduce_match();
- return NULL;
+ return nullptr;
}