diff options
author | Henning Baldersheim <balder@oath.com> | 2018-06-05 12:26:35 +0200 |
---|---|---|
committer | Henning Baldersheim <balder@oath.com> | 2018-06-05 15:46:55 +0200 |
commit | a010d42a3d2397a744362a22e673c63fd49e0ae3 (patch) | |
tree | fc7a7c4ba463f00bfb423996204fd92239d0a82c /searchlib | |
parent | 4cd15286b8c0a1015549a69de6bec665d828392a (diff) |
Add SameElementQueryNode
Diffstat (limited to 'searchlib')
-rw-r--r-- | searchlib/src/vespa/searchlib/query/query.cpp | 220 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/query/query.h | 11 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/query/querynode.cpp | 84 |
3 files changed, 183 insertions, 132 deletions
diff --git a/searchlib/src/vespa/searchlib/query/query.cpp b/searchlib/src/vespa/searchlib/query/query.cpp index 46f50d926f5..1cad8a8e41c 100644 --- a/searchlib/src/vespa/searchlib/query/query.cpp +++ b/searchlib/src/vespa/searchlib/query/query.cpp @@ -28,85 +28,77 @@ const HitList & QueryConnector::evaluateHits(HitList & hl) const void QueryConnector::reset() { - for(iterator it=begin(), mt=end(); it != mt; it++) { - QueryNode & qn = **it; - qn.reset(); + for(const auto & node : *this) { + node->reset(); } } void QueryConnector::getLeafs(QueryTermList & tl) { - for(iterator it=begin(), mt=end(); it != mt; it++) { - QueryNode & qn = **it; - qn.getLeafs(tl); - } + for(const auto & node : *this) { + node->getLeafs(tl); + } } void QueryConnector::getLeafs(ConstQueryTermList & tl) const { - for(const_iterator it=begin(), mt=end(); it != mt; it++) { - const QueryNode & qn = **it; - qn.getLeafs(tl); - } + for(const auto & node : *this) { + node->getLeafs(tl); + } } void QueryConnector::getPhrases(QueryNodeRefList & tl) { - for(iterator it=begin(), mt=end(); it != mt; it++) { - QueryNode & qn = **it; - qn.getPhrases(tl); - } + for(const auto & node : *this) { + node->getPhrases(tl); + } } void QueryConnector::getPhrases(ConstQueryNodeRefList & tl) const { - for(const_iterator it=begin(), mt=end(); it != mt; it++) { - const QueryNode & qn = **it; - qn.getPhrases(tl); - } + for(const auto & node : *this) { + node->getPhrases(tl); + } } size_t QueryConnector::depth() const { - size_t d(0); - for(const_iterator it=begin(), mt=end(); (it!=mt); it++) { - const QueryNode & qn = **it; - size_t t = qn.depth(); - if (t > d) - d = t; - } - return d+1; + size_t d(0); + for(const auto & node : *this) { + size_t t = node->depth(); + if (t > d) + d = t; + } + return d+1; } size_t QueryConnector::width() const { size_t w(0); - for(const_iterator it=begin(), mt=end(); (it!=mt); it++) { - const QueryNode & qn = **it; - w += qn.width(); + for(const auto & node : *this) { + w += node->width(); } return w; } -QueryConnector * +std::unique_ptr<QueryConnector> QueryConnector::create(ParseItem::ItemType type) { switch (type) { - case search::ParseItem::ITEM_AND: return new AndQueryNode(); - case search::ParseItem::ITEM_OR: return new OrQueryNode(); - case search::ParseItem::ITEM_WEAK_AND: return new OrQueryNode(); - case search::ParseItem::ITEM_EQUIV: return new EquivQueryNode(); - case search::ParseItem::ITEM_WEIGHTED_SET: return new EquivQueryNode(); - case search::ParseItem::ITEM_DOT_PRODUCT: return new OrQueryNode(); - case search::ParseItem::ITEM_WAND: return new OrQueryNode(); - case search::ParseItem::ITEM_NOT: return new AndNotQueryNode(); - case search::ParseItem::ITEM_PHRASE: return new PhraseQueryNode(); - case search::ParseItem::ITEM_SAME_ELEMENT: return new AndQueryNode(); // TODO: This needs a same element operation to work for streaming search too. - case search::ParseItem::ITEM_NEAR: return new NearQueryNode(); - case search::ParseItem::ITEM_ONEAR: return new ONearQueryNode(); - default: - return nullptr; + case search::ParseItem::ITEM_AND: return std::make_unique<AndQueryNode>(); + case search::ParseItem::ITEM_OR: return std::make_unique<OrQueryNode>(); + case search::ParseItem::ITEM_WEAK_AND: return std::make_unique<OrQueryNode>(); + case search::ParseItem::ITEM_EQUIV: return std::make_unique<EquivQueryNode>(); + case search::ParseItem::ITEM_WEIGHTED_SET: return std::make_unique<EquivQueryNode>(); + case search::ParseItem::ITEM_DOT_PRODUCT: return std::make_unique<OrQueryNode>(); + case search::ParseItem::ITEM_WAND: return std::make_unique<OrQueryNode>(); + case search::ParseItem::ITEM_NOT: return std::make_unique<AndNotQueryNode>(); + case search::ParseItem::ITEM_PHRASE: return std::make_unique<PhraseQueryNode>(); + case search::ParseItem::ITEM_SAME_ELEMENT: return std::make_unique<SameElementQueryNode>(); + case search::ParseItem::ITEM_NEAR: return std::make_unique<NearQueryNode>(); + case search::ParseItem::ITEM_ONEAR: return std::make_unique<ONearQueryNode>(); + default: return nullptr; } } @@ -153,13 +145,63 @@ bool EquivQueryNode::evaluate() const return OrQueryNode::evaluate(); } +bool SameElementQueryNode::evaluate() const { + HitList hl; + return evaluateHits(hl).empty(); +} + +const HitList & +SameElementQueryNode::evaluateHits(HitList & hl) const +{ + hl.clear(); + if ( !AndQueryNode::evaluate()) return hl; + + HitList tmpHL; + unsigned int numFields = size(); + unsigned int currMatchCount = 0; + std::vector<unsigned int> indexVector(numFields, 0); + auto curr = static_cast<const QueryTerm *> (&(*(*this)[currMatchCount])); + bool exhausted( curr->evaluateHits(tmpHL).empty()); + for (; !exhausted; ) { + auto next = static_cast<const QueryTerm &>(*(*this)[currMatchCount+1]); + unsigned int & currIndex = indexVector[currMatchCount]; + unsigned int & nextIndex = indexVector[currMatchCount+1]; + + const auto & currHit = curr->evaluateHits(tmpHL)[currIndex]; + uint32_t currElemId = currHit.elemId(); + uint32_t curContext = currHit.context(); + + const HitList & nextHL = next.evaluateHits(tmpHL); + + size_t nextIndexMax = nextHL.size(); + while ((nextIndex < nextIndexMax) && + ((nextHL[nextIndex].context() < curContext) || + ((nextHL[nextIndex].context() == curContext) && (nextHL[nextIndex].elemId() <= currElemId)))) + { + nextIndex++; + } + if ((nextHL[nextIndex].context() < curContext) && (nextHL[nextIndex].elemId() == currElemId)) { + currMatchCount++; + if ((currMatchCount+1) == numFields) { + Hit h = nextHL[indexVector[currMatchCount]]; + hl.emplace_back(0, h.context(), h.elemId(), h.weight()); + currMatchCount = 0; + indexVector[0]++; + } + } else { + currMatchCount = 0; + indexVector[currMatchCount]++; + } + curr = static_cast<const QueryTerm *>(&*(*this)[currMatchCount]); + exhausted = (nextIndex >= nextIndexMax) || (indexVector[currMatchCount] >= curr->evaluateHits(tmpHL).size()); + } + return hl; +} bool PhraseQueryNode::evaluate() const { - bool ok(false); HitList hl; - ok = ! evaluateHits(hl).empty(); - return ok; + return evaluateHits(hl).empty(); } void PhraseQueryNode::getPhrases(QueryNodeRefList & tl) { tl.push_back(this); } @@ -168,56 +210,55 @@ void PhraseQueryNode::getPhrases(ConstQueryNodeRefList & tl) const { tl.push_bac const HitList & PhraseQueryNode::evaluateHits(HitList & hl) const { - hl.clear(); - _fieldInfo.clear(); - bool andResult(AndQueryNode::evaluate()); - if (andResult) { + hl.clear(); + _fieldInfo.clear(); + if ( ! AndQueryNode::evaluate()) return hl; + HitList tmpHL; unsigned int fullPhraseLen = size(); unsigned int currPhraseLen = 0; std::vector<unsigned int> indexVector(fullPhraseLen, 0); - const QueryTerm * curr = static_cast<const QueryTerm *> (&(*(*this)[currPhraseLen])); + auto curr = static_cast<const QueryTerm *> (&(*(*this)[currPhraseLen])); bool exhausted( curr->evaluateHits(tmpHL).empty()); for (; !exhausted; ) { - const QueryTerm & next = static_cast<const QueryTerm &>(*(*this)[currPhraseLen+1]); - unsigned int & currIndex = indexVector[currPhraseLen]; - unsigned int & nextIndex = indexVector[currPhraseLen+1]; + auto next = static_cast<const QueryTerm &>(*(*this)[currPhraseLen+1]); + unsigned int & currIndex = indexVector[currPhraseLen]; + unsigned int & nextIndex = indexVector[currPhraseLen+1]; - size_t firstPosition = curr->evaluateHits(tmpHL)[currIndex].pos(); - uint32_t currElemId = curr->evaluateHits(tmpHL)[currIndex].elemId(); - uint32_t curContext = curr->evaluateHits(tmpHL)[currIndex].context(); + const auto & currHit = curr->evaluateHits(tmpHL)[currIndex]; + size_t firstPosition = currHit.pos(); + uint32_t currElemId = currHit.elemId(); + uint32_t curContext = currHit.context(); - const HitList & nextHL = next.evaluateHits(tmpHL); + const HitList & nextHL = next.evaluateHits(tmpHL); - int diff(0); - size_t nextIndexMax = nextHL.size(); - while ((nextIndex < nextIndexMax) && + int diff(0); + size_t nextIndexMax = nextHL.size(); + while ((nextIndex < nextIndexMax) && ((nextHL[nextIndex].context() < curContext) || ((nextHL[nextIndex].context() == curContext) && (nextHL[nextIndex].elemId() <= currElemId))) && ((diff = nextHL[nextIndex].pos()-firstPosition) < 1)) - { - nextIndex++; - } - if ((diff == 1) && (nextHL[nextIndex].elemId() == currElemId)) { - currPhraseLen++; - bool ok = ((currPhraseLen+1)==fullPhraseLen); - if (ok) { - Hit h = nextHL[indexVector[currPhraseLen]]; - hl.push_back(h); - const QueryTerm::FieldInfo & fi = next.getFieldInfo(h.context()); - updateFieldInfo(h.context(), hl.size() - 1, fi.getFieldLength()); - currPhraseLen = 0; - indexVector[0]++; + { + nextIndex++; + } + if ((diff == 1) && (nextHL[nextIndex].context() == curContext) && (nextHL[nextIndex].elemId() == currElemId)) { + currPhraseLen++; + if ((currPhraseLen+1) == fullPhraseLen) { + Hit h = nextHL[indexVector[currPhraseLen]]; + hl.push_back(h); + const QueryTerm::FieldInfo & fi = next.getFieldInfo(h.context()); + updateFieldInfo(h.context(), hl.size() - 1, fi.getFieldLength()); + currPhraseLen = 0; + indexVector[0]++; + } + } else { + currPhraseLen = 0; + indexVector[currPhraseLen]++; } - } else { - currPhraseLen = 0; - indexVector[currPhraseLen]++; - } - curr = static_cast<const QueryTerm *>(&*(*this)[currPhraseLen]); - exhausted = (nextIndex >= nextIndexMax) || (indexVector[currPhraseLen] >= curr->evaluateHits(tmpHL).size()); + curr = static_cast<const QueryTerm *>(&*(*this)[currPhraseLen]); + exhausted = (nextIndex >= nextIndexMax) || (indexVector[currPhraseLen] >= curr->evaluateHits(tmpHL).size()); } - } - return hl; + return hl; } void @@ -237,9 +278,8 @@ PhraseQueryNode::updateFieldInfo(size_t fid, size_t offset, size_t fieldLength) bool NotQueryNode::evaluate() const { bool ok(false); - for (const_iterator it=begin(), mt=end(); it!=mt; it++) { - const QueryNode & qn = **it; - ok |= ! qn.evaluate(); + for (const auto & node : *this) { + ok |= ! node->evaluate(); } return ok; } @@ -263,9 +303,7 @@ bool ONearQueryNode::evaluate() const return ok; } -Query::Query() : - _root() -{ } +Query::Query() = default; Query::Query(const QueryNodeResultFactory & factory, const QueryPacketT & queryRep) : _root() @@ -283,7 +321,7 @@ bool Query::build(const QueryNodeResultFactory & factory, const QueryPacketT & q { search::SimpleQueryStackDumpIterator stack(queryRep); if (stack.next()) { - _root.reset(QueryNode::Build(NULL, factory, stack, true).release()); + _root = QueryNode::Build(nullptr, factory, stack, true); } return valid(); } diff --git a/searchlib/src/vespa/searchlib/query/query.h b/searchlib/src/vespa/searchlib/query/query.h index 9bb1b29aae5..e96ed9b6bff 100644 --- a/searchlib/src/vespa/searchlib/query/query.h +++ b/searchlib/src/vespa/searchlib/query/query.h @@ -28,7 +28,7 @@ public: virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; void setIndex(const vespalib::string & index) override { _index = index; } const vespalib::string & getIndex() const override { return _index; } - static QueryConnector * create(ParseItem::ItemType type); + static std::unique_ptr<QueryConnector> create(ParseItem::ItemType type); virtual bool isFlattenable(ParseItem::ItemType type) const { (void) type; return false; } private: vespalib::string _opName; @@ -123,6 +123,15 @@ private: #endif }; +class SameElementQueryNode : public AndQueryNode +{ +public: + SameElementQueryNode() : AndQueryNode("SAME_ELEMENT") { } + bool evaluate() const override; + const HitList & evaluateHits(HitList & hl) const override; + bool isFlattenable(ParseItem::ItemType type) const override { return type == ParseItem::ITEM_NOT; } +}; + /** Unary Not operator. Just inverts the nodes result. */ diff --git a/searchlib/src/vespa/searchlib/query/querynode.cpp b/searchlib/src/vespa/searchlib/query/querynode.cpp index 0d0a06de7af..4060033663a 100644 --- a/searchlib/src/vespa/searchlib/query/querynode.cpp +++ b/searchlib/src/vespa/searchlib/query/querynode.cpp @@ -9,38 +9,44 @@ namespace search { namespace { vespalib::stringref DEFAULT("default"); + bool isPhraseOrNear(const QueryNode * qn) { + return dynamic_cast<const NearQueryNode *> (qn) || dynamic_cast<const PhraseQueryNode *> (qn); + } } -QueryNode::UP QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factory, search::SimpleQueryStackDumpIterator & queryRep, bool allowRewrite) +QueryNode::UP +QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factory, + SimpleQueryStackDumpIterator & queryRep, bool allowRewrite) { unsigned int arity = queryRep.getArity(); - search::ParseItem::ItemType type = queryRep.getType(); + ParseItem::ItemType type = queryRep.getType(); UP qn; switch (type) { - case search::ParseItem::ITEM_AND: - case search::ParseItem::ITEM_OR: - case search::ParseItem::ITEM_WEAK_AND: - case search::ParseItem::ITEM_EQUIV: - case search::ParseItem::ITEM_WEIGHTED_SET: - case search::ParseItem::ITEM_DOT_PRODUCT: - case search::ParseItem::ITEM_WAND: - case search::ParseItem::ITEM_NOT: - case search::ParseItem::ITEM_PHRASE: - case search::ParseItem::ITEM_SAME_ELEMENT: - case search::ParseItem::ITEM_NEAR: - case search::ParseItem::ITEM_ONEAR: + case ParseItem::ITEM_AND: + case ParseItem::ITEM_OR: + case ParseItem::ITEM_WEAK_AND: + case ParseItem::ITEM_EQUIV: + case ParseItem::ITEM_WEIGHTED_SET: + case ParseItem::ITEM_DOT_PRODUCT: + case ParseItem::ITEM_WAND: + case ParseItem::ITEM_NOT: + case ParseItem::ITEM_PHRASE: + case ParseItem::ITEM_SAME_ELEMENT: + case ParseItem::ITEM_NEAR: + case ParseItem::ITEM_ONEAR: { - qn.reset(QueryConnector::create(type)); - if (qn.get()) { + qn = QueryConnector::create(type); + if (qn) { QueryConnector * qc = dynamic_cast<QueryConnector *> (qn.get()); NearQueryNode * nqn = dynamic_cast<NearQueryNode *> (qc); if (nqn) { nqn->distance(queryRep.getArg1()); } - if ((type == search::ParseItem::ITEM_WEAK_AND) || - (type == search::ParseItem::ITEM_WEIGHTED_SET) || - (type == search::ParseItem::ITEM_DOT_PRODUCT) || - (type == search::ParseItem::ITEM_WAND)) + if ((type == ParseItem::ITEM_WEAK_AND) || + (type == ParseItem::ITEM_WEIGHTED_SET) || + (type == ParseItem::ITEM_DOT_PRODUCT) || + (type == ParseItem::ITEM_SAME_ELEMENT) || + (type == ParseItem::ITEM_WAND)) { qn->setIndex(queryRep.getIndexName()); } @@ -49,27 +55,26 @@ QueryNode::UP QueryNode::Build(const QueryNode * parent, const QueryNodeResultFa if (qc->isFlattenable(queryRep.getType())) { arity += queryRep.getArity(); } else { - UP child(Build(qc, factory, queryRep, - allowRewrite && ((dynamic_cast<NearQueryNode *> (qn.get()) == NULL) && (dynamic_cast<PhraseQueryNode *> (qn.get()) == NULL)))); + UP child = Build(qc, factory, queryRep, allowRewrite && !isPhraseOrNear(qn.get())); qc->push_back(std::move(child)); } } } } break; - case search::ParseItem::ITEM_NUMTERM: - case search::ParseItem::ITEM_TERM: - case search::ParseItem::ITEM_PREFIXTERM: - case search::ParseItem::ITEM_REGEXP: - case search::ParseItem::ITEM_SUBSTRINGTERM: - case search::ParseItem::ITEM_EXACTSTRINGTERM: - case search::ParseItem::ITEM_SUFFIXTERM: - case search::ParseItem::ITEM_PURE_WEIGHTED_STRING: - case search::ParseItem::ITEM_PURE_WEIGHTED_LONG: + case ParseItem::ITEM_NUMTERM: + case ParseItem::ITEM_TERM: + case ParseItem::ITEM_PREFIXTERM: + case ParseItem::ITEM_REGEXP: + case ParseItem::ITEM_SUBSTRINGTERM: + case ParseItem::ITEM_EXACTSTRINGTERM: + case ParseItem::ITEM_SUFFIXTERM: + case ParseItem::ITEM_PURE_WEIGHTED_STRING: + case ParseItem::ITEM_PURE_WEIGHTED_LONG: { vespalib::stringref index = queryRep.getIndexName(); if (index.empty()) { - if ((type == search::ParseItem::ITEM_PURE_WEIGHTED_STRING) || (type == search::ParseItem::ITEM_PURE_WEIGHTED_LONG)) { + if ((type == ParseItem::ITEM_PURE_WEIGHTED_STRING) || (type == ParseItem::ITEM_PURE_WEIGHTED_LONG)) { index = parent->getIndex(); } else { index = DEFAULT; @@ -78,19 +83,19 @@ QueryNode::UP QueryNode::Build(const QueryNode * parent, const QueryNodeResultFa vespalib::stringref term = queryRep.getTerm(); QueryTerm::SearchTerm sTerm(QueryTerm::WORD); switch (type) { - case search::ParseItem::ITEM_REGEXP: + case ParseItem::ITEM_REGEXP: sTerm = QueryTerm::REGEXP; break; - case search::ParseItem::ITEM_PREFIXTERM: + case ParseItem::ITEM_PREFIXTERM: sTerm = QueryTerm::PREFIXTERM; break; - case search::ParseItem::ITEM_SUBSTRINGTERM: + case ParseItem::ITEM_SUBSTRINGTERM: sTerm = QueryTerm::SUBSTRINGTERM; break; - case search::ParseItem::ITEM_EXACTSTRINGTERM: + case ParseItem::ITEM_EXACTSTRINGTERM: sTerm = QueryTerm::EXACTSTRINGTERM; break; - case search::ParseItem::ITEM_SUFFIXTERM: + case ParseItem::ITEM_SUFFIXTERM: sTerm = QueryTerm::SUFFIXTERM; break; default: @@ -107,10 +112,9 @@ QueryNode::UP QueryNode::Build(const QueryNode * parent, const QueryNodeResultFa qt->setWeight(queryRep.GetWeight()); qt->setUniqueId(queryRep.getUniqueId()); if ( qt->encoding().isBase10Integer() || ! qt->encoding().isFloat() || ! factory.getRewriteFloatTerms() || !allowRewrite || (ssTerm.find('.') == vespalib::string::npos)) { - qn.reset(qt.release()); + qn = std::move(qt); } else { std::unique_ptr<PhraseQueryNode> phrase(new PhraseQueryNode()); - phrase->push_back(UP(new QueryTerm(factory.create(), ssTerm.substr(0, ssTerm.find('.')), ssIndex, QueryTerm::WORD))); phrase->push_back(UP(new QueryTerm(factory.create(), ssTerm.substr(ssTerm.find('.') + 1), ssIndex, QueryTerm::WORD))); std::unique_ptr<EquivQueryNode> orqn(new EquivQueryNode()); @@ -121,7 +125,7 @@ QueryNode::UP QueryNode::Build(const QueryNode * parent, const QueryNodeResultFa } } break; - case search::ParseItem::ITEM_RANK: + case ParseItem::ITEM_RANK: { if (arity >= 1) { queryRep.next(); |