diff options
Diffstat (limited to 'searchsummary')
-rw-r--r-- | searchsummary/src/tests/juniper/auxTest.cpp | 6 | ||||
-rw-r--r-- | searchsummary/src/vespa/juniper/matchobject.cpp | 117 | ||||
-rw-r--r-- | searchsummary/src/vespa/juniper/matchobject.h | 5 | ||||
-rw-r--r-- | searchsummary/src/vespa/juniper/querynode.cpp | 90 | ||||
-rw-r--r-- | searchsummary/src/vespa/juniper/querynode.h | 61 | ||||
-rw-r--r-- | searchsummary/src/vespa/juniper/queryvisitor.cpp | 2 |
6 files changed, 116 insertions, 165 deletions
diff --git a/searchsummary/src/tests/juniper/auxTest.cpp b/searchsummary/src/tests/juniper/auxTest.cpp index b5c3bb91d05..439296037d3 100644 --- a/searchsummary/src/tests/juniper/auxTest.cpp +++ b/searchsummary/src/tests/juniper/auxTest.cpp @@ -594,7 +594,7 @@ void AuxTest::TestJuniperStack() // Stack simplification tests QueryExpr* q = new QueryNode(1, 0, 0); QueryExpr* q1 = new QueryNode(1, 0, 0); - QueryExpr* q2 = new QueryTerm("Hepp", 4, 0); + QueryExpr* q2 = new QueryTerm("Hepp", 0, 100); q->AddChild(q1); q1->AddChild(q2); @@ -653,7 +653,7 @@ struct QB { QB(size_t numTerms) : q(new QueryNode(numTerms, 0, 0)) {} QB(QB & rhs) : q(std::move(rhs.q)) { } QB & add(const char * t, bool st = true) { - QueryTerm * qt = new QueryTerm(t, strlen(t), 0); + QueryTerm * qt = new QueryTerm(t, 0, 100); if (st) qt->_options |= X_SPECIALTOKEN; q->AddChild(qt); return *this; @@ -671,7 +671,7 @@ struct Ctx { }; Ctx::Ctx(const std::string & text_, QB & qb_) : text(text_), qb(qb_), str(qb.q.get()), wf(), tp(text), jt(&wf, text.c_str(), text.size(), &tp, &str) { jt.scan(); } -Ctx::~Ctx() { } +Ctx::~Ctx() = default; void AuxTest::TestSpecialTokenRegistry() diff --git a/searchsummary/src/vespa/juniper/matchobject.cpp b/searchsummary/src/vespa/juniper/matchobject.cpp index 7bd0bead5cd..78f508f3ed0 100644 --- a/searchsummary/src/vespa/juniper/matchobject.cpp +++ b/searchsummary/src/vespa/juniper/matchobject.cpp @@ -1,8 +1,6 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "query.h" #include "matchobject.h" -#include "juniperdebug.h" #include "juniper_separators.h" #include "result.h" #include "charutil.h" @@ -16,7 +14,7 @@ using namespace juniper::separators; class traverser : public IQueryExprVisitor { public: - traverser(MatchObject& mo) : _mo(mo) {} + explicit traverser(MatchObject& mo) noexcept : _mo(mo) {} void VisitQueryNode(QueryNode*) override { // We must not add this node to nonterminals before all children has been added! @@ -24,13 +22,11 @@ public: // until no more candidates... } - void RevisitQueryNode(QueryNode* n) override - { + void RevisitQueryNode(QueryNode* n) override { _mo.add_nonterm(n); } - void VisitQueryTerm(QueryTerm* t) override - { + void VisitQueryTerm(QueryTerm* t) override { if (t->rewriter && t->rewriter->ForDocument()) _mo.add_reduction_term(t, t->rewriter); else @@ -44,21 +40,18 @@ private: class query_expander : public IQueryExprVisitor { public: - query_expander(MatchObject& mo, uint32_t langid) + query_expander(MatchObject& mo, uint32_t langid) noexcept : _caller(), _mo(mo), _langid(langid) {} - void VisitQueryTerm(QueryTerm* orig) override - { - const char* nt = NULL; + void VisitQueryTerm(QueryTerm* orig) override { + const char* nt = nullptr; size_t length; - juniper::RewriteHandle* te = NULL; + juniper::RewriteHandle* te = nullptr; bool reduction = false; - if (orig->rewriter) - { + if (orig->rewriter) { // Check if expansions are necessary - if (orig->rewriter->ForQuery()) - { + if (orig->rewriter->ForQuery()) { te = orig->rewriter->Rewrite(_langid, orig->term()); if (te) nt = orig->rewriter->NextTerm(te, length); @@ -69,8 +62,7 @@ public: // to a separate mapping reduction = orig->rewriter->ForDocument(); } - if (nt == NULL) - { + if (nt == nullptr) { QueryTerm* t = new QueryTerm(orig); // No matches found, just clone term.. if (!reduction) _mo.add_queryterm(t); @@ -81,9 +73,8 @@ public: } // Start expanding... std::vector<QueryTerm*> newterms; - while (nt != NULL) - { - QueryTerm* nqt = new QueryTerm(nt, length, -1); + while (nt != nullptr) { + QueryTerm* nqt = new QueryTerm(vespalib::stringref(nt, length), -1, 100); // Copy options but do not apply juniper stem match for expanded terms nqt->_options = orig->_options | X_EXACT; if (!reduction) @@ -93,8 +84,7 @@ public: newterms.push_back(nqt); nt = orig->rewriter->NextTerm(te, length); } - if (newterms.size() == 1) - { + if (newterms.size() == 1) { update(newterms.front()); return; } @@ -102,10 +92,8 @@ public: QueryNode* qn = new QueryNode(newterms.size(), orig->_weight, orig->_weight); // preserve options for nodes too, but make the node an OR.. qn->_options = orig->_options | X_OR; - for (std::vector<QueryTerm*>::iterator it = newterms.begin(); - it != newterms.end(); ++it) - { - qn->AddChild(*it); + for (QueryTerm * newTerm : newterms) { + qn->AddChild(newTerm); } update(qn); _mo.add_nonterm(qn); @@ -128,12 +116,11 @@ public: } QueryExpr* NewQuery() { - if (_caller.empty()) return NULL; + if (_caller.empty()) return nullptr; return _caller.top(); } private: - void update(QueryExpr* e) - { + void update(QueryExpr* e) { if (!_caller.empty()) _caller.top()->AddChild(e); } @@ -161,7 +148,7 @@ MatchObject::MatchObject(QueryExpr* query, bool has_reductions) : MatchObject::MatchObject(QueryExpr* query, bool has_reductions, uint32_t langid) : - _query(NULL), + _query(nullptr), _qt(), _nonterms(), _match_overlap(false), @@ -178,8 +165,7 @@ MatchObject::MatchObject(QueryExpr* query, bool has_reductions, uint32_t langid if (LOG_WOULD_LOG(debug)) { std::string s; _query->Dump(s); - LOG(debug, "juniper::MatchObject(language id %d): modified stack: %s", - langid, s.c_str()); + LOG(debug, "juniper::MatchObject(language id %d): modified stack: %s", langid, s.c_str()); } _max_arity = _query->MaxArity(); } @@ -199,8 +185,7 @@ bool MatchObject::Match(MatchObject::iterator& mi, Token& token, unsigned& optio if (!q) return false; options = 0; q->total_match_cnt++; - if (q->ucs4_len == static_cast<size_t>(token.curlen)) - { + if (q->ucs4_len == static_cast<size_t>(token.curlen)) { options |= X_EXACT; q->exact_match_cnt++; } @@ -221,8 +206,7 @@ void MatchObject::add_queryterm(QueryTerm* nt) _qt.push_back(nt); nt->idx = _qt.size() - 1; - _qt_byname.Insert( - *(reinterpret_cast<const queryterm_hashtable::keytype*>(nt->ucs4_term())), nt); + _qt_byname.Insert(*(reinterpret_cast<const queryterm_hashtable::keytype*>(nt->ucs4_term())), nt); LOG(debug, "MatchObject: adding term '%s'", nt->term()); } @@ -242,25 +226,24 @@ void MatchObject::add_reduction_term(QueryTerm* nt, juniper::Rewriter* rw) match_iterator::match_iterator(MatchObject* mo, Result* rhandle) : - _table(mo->_qt_byname), _el(NULL), _rhandle(rhandle), - _reductions(mo->HasReductions()), _reduce_matches(NULL), _reduce_matches_it(), + _table(mo->_qt_byname), _el(nullptr), _rhandle(rhandle), + _reductions(mo->HasReductions()), _reduce_matches(nullptr), _reduce_matches_it(), _mo(mo), _len(0), _stem_min(rhandle->StemMin()), _stemext(rhandle->StemExt()), - _term(NULL) + _term(nullptr) {} -QueryTerm* match_iterator::first() +QueryTerm* +match_iterator::first() { - for (; _el != NULL; _el = _el->GetNext()) - { + for (; _el != nullptr; _el = _el->GetNext()) { QueryTerm* q = _el->GetItem(); // If exact match is desired by this subexpression, // only have effect if exact match - if (q->Exact() && _len > q->len) continue; + if (q->Exact() && _len > q->len()) continue; - if (q->is_wildcard()) - { + if (q->is_wildcard()) { if (fast::util::wildcard_match(_term, q->ucs4_term()) == false) continue; return q; } @@ -268,30 +251,28 @@ QueryTerm* match_iterator::first() if (_len < q->ucs4_len) continue; // allow prefix match iff prefix query term or // rest < _stem_extend and length > stem_min - if (!q->is_prefix()) - { + if (!q->is_prefix()) { size_t stem_extend = (q->ucs4_len <= _stem_min ? 0 : _stemext); if (_len > q->ucs4_len + stem_extend) continue; } if (juniper::strncmp(_term, q->ucs4_term(), q->ucs4_len) != 0) continue; return q; } - return NULL; + return nullptr; } QueryTerm* match_iterator::next_reduce_match() { - if (!_reduce_matches) return NULL; - if (_reduce_matches_it != _reduce_matches->end()) - { + if (!_reduce_matches) return nullptr; + if (_reduce_matches_it != _reduce_matches->end()) { QueryTerm* t = *_reduce_matches_it; ++_reduce_matches_it; return t; } delete _reduce_matches; - _reduce_matches = NULL; - return NULL; + _reduce_matches = nullptr; + return nullptr; } @@ -321,7 +302,7 @@ QueryTerm* match_iterator::first_match(Token& token) token.curlen = term - token.token; LOG(debug, "recurse A to match token %u..%u len %d", token.token[0], token.token[token.curlen-1], token.curlen); qt = this->first_match(token); - if (qt != NULL) { + if (qt != nullptr) { return qt; } token.token = ++term; // skip SPACE @@ -348,7 +329,7 @@ QueryTerm* match_iterator::first_match(Token& token) queryterm_hashtable::keytype keyval = termval; if (LOG_WOULD_LOG(spam)) { char utf8term[1024]; - Fast_UnicodeUtil::utf8ncopy(utf8term, term, 1024, (term != NULL ? len : 0)); + Fast_UnicodeUtil::utf8ncopy(utf8term, term, 1024, (term != nullptr ? len : 0)); LOG(spam, "term %s, len %ld, keyval 0x%x termval 0x%x", utf8term, len, keyval, termval); } @@ -356,22 +337,18 @@ QueryTerm* match_iterator::first_match(Token& token) _len = len; QueryTerm* rtrn = first(); - if (rtrn == 0) - { + if (rtrn == 0) { _el = _table.FindRef('*'); - if ((rtrn = first()) == 0) - { + if ((rtrn = first()) == 0) { _el = _table.FindRef('?'); rtrn = first(); } } - if (_reductions) - { + if (_reductions) { _reduce_matches = _mo->_reduce_matchers.match(_rhandle->_langid, &_rhandle->_docsum[token.bytepos], token.bytelen); - if (_reduce_matches) - { + if (_reduce_matches) { _reduce_matches_it = _reduce_matches->begin(); // Find the first reduce match only if no other match was found @@ -388,26 +365,24 @@ QueryTerm* match_iterator::first_match(Token& token) QueryTerm* match_iterator::current() { if (_el) return _el->GetItem(); - if (!_reduce_matches) return NULL; - if (_reduce_matches_it != _reduce_matches->end()) - { + if (!_reduce_matches) return nullptr; + if (_reduce_matches_it != _reduce_matches->end()) { QueryTerm* t = *_reduce_matches_it; return t; } delete _reduce_matches; - return NULL; + return nullptr; } QueryTerm* match_iterator::next() { - if (_el) - { + if (_el) { _el = _el->GetNext(); return first(); } else if (_reduce_matches) return next_reduce_match(); - return NULL; + return nullptr; } diff --git a/searchsummary/src/vespa/juniper/matchobject.h b/searchsummary/src/vespa/juniper/matchobject.h index d31e071d688..8289d6275ed 100644 --- a/searchsummary/src/vespa/juniper/matchobject.h +++ b/searchsummary/src/vespa/juniper/matchobject.h @@ -17,9 +17,8 @@ using Token = ITokenProcessor::Token; // struct QueryTermLengthComparator { - inline bool operator()(QueryTerm* m1, QueryTerm* m2) - { - return m1->len <= m2->len; + bool operator()(const QueryTerm* m1, const QueryTerm* m2) { + return m1->len() <= m2->len(); } }; diff --git a/searchsummary/src/vespa/juniper/querynode.cpp b/searchsummary/src/vespa/juniper/querynode.cpp index a52f8d07c80..b805c12e86f 100644 --- a/searchsummary/src/vespa/juniper/querynode.cpp +++ b/searchsummary/src/vespa/juniper/querynode.cpp @@ -12,65 +12,67 @@ LOG_SETUP(".juniper.querynode"); * in Matcher.h */ -QueryExpr::QueryExpr(int weight, int arity) : - _options(0), _weight(weight), _arity(arity), _parent(nullptr), _childno(0) +QueryExpr::QueryExpr(int weight, int arity) + : _parent(nullptr), + _options(0), + _weight(weight), + _arity(arity), + _childno(0) { } -QueryExpr::QueryExpr(QueryExpr* e) : - _options(e->_options), - _weight(e->_weight), - _arity(e->_arity), - _parent(nullptr), - _childno(0) +QueryExpr::QueryExpr(QueryExpr* e) + : _parent(nullptr), + _options(e->_options), + _weight(e->_weight), + _arity(e->_arity), + _childno(0) { } QueryExpr::~QueryExpr() = default; -QueryTerm::QueryTerm(const char* t, int length, int ix, int wgt) - : QueryExpr(wgt, 0), len(length), +QueryTerm::QueryTerm(vespalib::stringref term, int ix, int wgt) + : QueryExpr(wgt, 0), ucs4_len(0), - total_match_cnt(0), exact_match_cnt(0), - idx(ix), rewriter(nullptr), reduce_matcher(nullptr), _rep(nullptr), - _ucs4_term(nullptr) + total_match_cnt(0), + exact_match_cnt(0), + idx(ix), + rewriter(nullptr), + reduce_matcher(nullptr), + _term(term), + _ucs4_term(new ucs4_t[_term.size()+1]) { - if (len <= 0) - len = strlen(t); - _rep = new char[len+1]; - strncpy(_rep, t, len); _rep[len] = '\0'; - _ucs4_term = new ucs4_t[len+1]; - Fast_UnicodeUtil::ucs4copy(_ucs4_term, _rep); + Fast_UnicodeUtil::ucs4copy(_ucs4_term, _term.c_str()); ucs4_len = Fast_UnicodeUtil::ucs4strlen(_ucs4_term); } QueryTerm::QueryTerm(QueryTerm* t) - : QueryExpr(t), len(t->len), + : QueryExpr(t), ucs4_len(0), total_match_cnt(0), exact_match_cnt(0), - idx(-1), rewriter(nullptr), reduce_matcher(nullptr), _rep(nullptr), - _ucs4_term(nullptr) + idx(-1), rewriter(nullptr), reduce_matcher(nullptr), + _term(t->_term), + _ucs4_term(new ucs4_t[_term.size()+1]) { - _rep = new char[len+1]; - strncpy(_rep, t->term(), len); _rep[len] = '\0'; - _ucs4_term = new ucs4_t[len+1]; - Fast_UnicodeUtil::ucs4copy(_ucs4_term, _rep); + Fast_UnicodeUtil::ucs4copy(_ucs4_term, _term.c_str()); ucs4_len = Fast_UnicodeUtil::ucs4strlen(_ucs4_term); } -QueryTerm::~QueryTerm() -{ - delete[] _rep; +QueryTerm::~QueryTerm() { delete[] _ucs4_term; } -QueryNode::QueryNode(int arity, int threshold, int weight) : - QueryExpr(weight, arity), _threshold(threshold), _limit(0), - _children(nullptr), - _nchild(0), _node_idx(-1) +QueryNode::QueryNode(int arity, int threshold, int weight) + : QueryExpr(weight, arity), + _children(nullptr), + _threshold(threshold), + _limit(0), + _nchild(0), + _node_idx(-1) { assert(arity > 0); _children = new QueryExpr*[arity]; @@ -79,9 +81,9 @@ QueryNode::QueryNode(int arity, int threshold, int weight) : QueryNode::QueryNode(QueryNode* n) : QueryExpr(n), + _children(nullptr), _threshold(n->_threshold), _limit(n->_limit), - _children(nullptr), _nchild(0), _node_idx(n->_node_idx) { @@ -115,12 +117,10 @@ QueryTerm::AddChild(QueryExpr*) QueryNode* -QueryNode::AddChild(QueryExpr* child) -{ - if (!child) +QueryNode::AddChild(QueryExpr* child) { + if (!child) { _arity--; - else - { + } else { child->_parent = this; child->_childno = _nchild; _children[_nchild++] = child; @@ -223,18 +223,6 @@ QueryNode::MaxArity() return max_arity; } - -bool -QueryNode::AcceptsInitially(QueryExpr* n) -{ - assert(n->_parent == this); -// return (!(_options & X_ORDERED)) || n->_childno == 0; - // currently implicitly add all terms even for ordered.. - (void) n; - return true; -} - - /** Modify the given stack by eliminating unnecessary internal nodes * with arity 1 or non-terms with arity 0 */ diff --git a/searchsummary/src/vespa/juniper/querynode.h b/searchsummary/src/vespa/juniper/querynode.h index cf8f7eb119f..b80831c302d 100644 --- a/searchsummary/src/vespa/juniper/querynode.h +++ b/searchsummary/src/vespa/juniper/querynode.h @@ -41,7 +41,7 @@ using querynode_vector = std::vector<QueryNode*>; class IQueryExprVisitor { public: - virtual ~IQueryExprVisitor() {} + virtual ~IQueryExprVisitor() = default; // Visit before visiting subnodes virtual void VisitQueryNode(QueryNode*) = 0; @@ -56,7 +56,9 @@ public: class QueryExpr { public: - explicit QueryExpr(int weight, int arity); + QueryExpr(const QueryExpr &) = delete; + QueryExpr &operator=(const QueryExpr &) = delete; + QueryExpr(int weight, int arity); explicit QueryExpr(QueryExpr* e); /** Add a child to the end of the list of children for this node. @@ -79,20 +81,14 @@ public: virtual int MaxArity() { return 0; } - inline bool HasConstraints() { return _options & X_CONSTR; } - inline bool UsesValid() { return _options & X_CHKVAL; } - inline bool HasLimit() { return _options & X_LIMIT; } - inline bool Exact() { return _options & X_EXACT; } + bool HasLimit() const noexcept { return _options & X_LIMIT; } + bool Exact() const noexcept { return _options & X_EXACT; } - int _options; // Applied options (bitmap) for this node - int _weight; // Weight of this term by parent - if 0: weight is sum of children - int _arity; // Arity of this query subexpression (may get decremented..) - QueryNode* _parent; // Pointer to parent or NULL if this is the root of the query - int _childno; // Position number within parent's children (0 if no parents) - -private: - QueryExpr(QueryExpr &); - QueryExpr &operator=(QueryExpr &); + QueryNode* _parent; // Pointer to parent or NULL if this is the root of the query + int _options; // Applied options (bitmap) for this node + int _weight; // Weight of this term by parent - if 0: weight is sum of children + int _arity; // Arity of this query subexpression (may get decremented..) + int _childno; // Position number within parent's children (0 if no parents) }; @@ -123,16 +119,11 @@ public: void Accept(IQueryExprVisitor& v) override; - // return true if a match for n should lead to creation of a new candidate node - // corresponding to this query tree node: - bool AcceptsInitially(QueryExpr* n); - - int _threshold; // Threshold for this expression node to be considered complete - int _limit; // NEAR/WITHIN limit if X_LIMIT option set - /* Pointer to an array of length _arity of pointers to * subqueries associated with this query */ QueryExpr** _children; + int _threshold; // Threshold for this expression node to be considered complete + int _limit; // NEAR/WITHIN limit if X_LIMIT option set int _nchild; // end pointer (fill level) of _children int _node_idx; // Index (position) of this nonterminal within table of all nonterminals }; @@ -143,9 +134,11 @@ public: class QueryTerm : public QueryExpr { public: - QueryTerm(const char* t, int length, int ix, int weight = 100); - explicit QueryTerm(QueryTerm* const); - ~QueryTerm(); + QueryTerm(const QueryTerm &) = delete; + QueryTerm &operator=(const QueryTerm &) = delete; + QueryTerm(vespalib::stringref, int ix, int weight); + explicit QueryTerm(QueryTerm*); + ~QueryTerm() override; int Limit() override; QueryNode* AddChild(QueryExpr* child) override; void Dump(std::string&) override; @@ -155,13 +148,12 @@ public: bool Complex() override { return false; } void Accept(IQueryExprVisitor& v) override; - inline const char* term() { return _rep; } - inline const ucs4_t* ucs4_term() { return _ucs4_term; } - inline bool is_prefix() { return _options & X_PREFIX; } - inline bool is_wildcard() { return _options & X_WILD; } - inline bool isSpecialToken() { return _options & X_SPECIALTOKEN; } - - size_t len; + const char* term() const noexcept { return _term.c_str(); } + const ucs4_t* ucs4_term() const noexcept { return _ucs4_term; } + bool is_prefix() const noexcept { return _options & X_PREFIX; } + bool is_wildcard() const noexcept { return _options & X_WILD; } + bool isSpecialToken() const noexcept { return _options & X_SPECIALTOKEN; } + size_t len() const noexcept { return _term.size(); } size_t ucs4_len; int total_match_cnt; int exact_match_cnt; @@ -169,11 +161,8 @@ public: juniper::Rewriter* rewriter; juniper::string_matcher* reduce_matcher; private: - char* _rep; + vespalib::string _term; ucs4_t* _ucs4_term; - - QueryTerm(QueryTerm &); - QueryTerm &operator=(QueryTerm &); }; diff --git a/searchsummary/src/vespa/juniper/queryvisitor.cpp b/searchsummary/src/vespa/juniper/queryvisitor.cpp index 6192cb985b9..6a4631bc3ae 100644 --- a/searchsummary/src/vespa/juniper/queryvisitor.cpp +++ b/searchsummary/src/vespa/juniper/queryvisitor.cpp @@ -251,7 +251,7 @@ QueryVisitor::visitKeyword(const QueryItem* item, vespalib::stringref keyword, b ind.c_str(), (!ind.empty() ? ":" : ""), s.c_str()); } - auto * term = new QueryTerm(keyword.data(), keyword.size(), _term_index++, item->get_weight()); + auto * term = new QueryTerm(keyword, _term_index++, item->get_weight()); if (prefix) { bool is_wild = std::any_of(keyword.begin(), keyword.end(), [](char c) {return (c == '*') || (c == '?'); }); term->_options |= (is_wild ? X_WILD : X_PREFIX); |