summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@oath.com>2018-06-05 12:26:35 +0200
committerHenning Baldersheim <balder@oath.com>2018-06-05 15:46:55 +0200
commita010d42a3d2397a744362a22e673c63fd49e0ae3 (patch)
treefc7a7c4ba463f00bfb423996204fd92239d0a82c /searchlib
parent4cd15286b8c0a1015549a69de6bec665d828392a (diff)
Add SameElementQueryNode
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/query/query.cpp220
-rw-r--r--searchlib/src/vespa/searchlib/query/query.h11
-rw-r--r--searchlib/src/vespa/searchlib/query/querynode.cpp84
3 files changed, 183 insertions, 132 deletions
diff --git a/searchlib/src/vespa/searchlib/query/query.cpp b/searchlib/src/vespa/searchlib/query/query.cpp
index 46f50d926f5..1cad8a8e41c 100644
--- a/searchlib/src/vespa/searchlib/query/query.cpp
+++ b/searchlib/src/vespa/searchlib/query/query.cpp
@@ -28,85 +28,77 @@ const HitList & QueryConnector::evaluateHits(HitList & hl) const
void QueryConnector::reset()
{
- for(iterator it=begin(), mt=end(); it != mt; it++) {
- QueryNode & qn = **it;
- qn.reset();
+ for(const auto & node : *this) {
+ node->reset();
}
}
void QueryConnector::getLeafs(QueryTermList & tl)
{
- for(iterator it=begin(), mt=end(); it != mt; it++) {
- QueryNode & qn = **it;
- qn.getLeafs(tl);
- }
+ for(const auto & node : *this) {
+ node->getLeafs(tl);
+ }
}
void QueryConnector::getLeafs(ConstQueryTermList & tl) const
{
- for(const_iterator it=begin(), mt=end(); it != mt; it++) {
- const QueryNode & qn = **it;
- qn.getLeafs(tl);
- }
+ for(const auto & node : *this) {
+ node->getLeafs(tl);
+ }
}
void QueryConnector::getPhrases(QueryNodeRefList & tl)
{
- for(iterator it=begin(), mt=end(); it != mt; it++) {
- QueryNode & qn = **it;
- qn.getPhrases(tl);
- }
+ for(const auto & node : *this) {
+ node->getPhrases(tl);
+ }
}
void QueryConnector::getPhrases(ConstQueryNodeRefList & tl) const
{
- for(const_iterator it=begin(), mt=end(); it != mt; it++) {
- const QueryNode & qn = **it;
- qn.getPhrases(tl);
- }
+ for(const auto & node : *this) {
+ node->getPhrases(tl);
+ }
}
size_t QueryConnector::depth() const
{
- size_t d(0);
- for(const_iterator it=begin(), mt=end(); (it!=mt); it++) {
- const QueryNode & qn = **it;
- size_t t = qn.depth();
- if (t > d)
- d = t;
- }
- return d+1;
+ size_t d(0);
+ for(const auto & node : *this) {
+ size_t t = node->depth();
+ if (t > d)
+ d = t;
+ }
+ return d+1;
}
size_t QueryConnector::width() const
{
size_t w(0);
- for(const_iterator it=begin(), mt=end(); (it!=mt); it++) {
- const QueryNode & qn = **it;
- w += qn.width();
+ for(const auto & node : *this) {
+ w += node->width();
}
return w;
}
-QueryConnector *
+std::unique_ptr<QueryConnector>
QueryConnector::create(ParseItem::ItemType type)
{
switch (type) {
- case search::ParseItem::ITEM_AND: return new AndQueryNode();
- case search::ParseItem::ITEM_OR: return new OrQueryNode();
- case search::ParseItem::ITEM_WEAK_AND: return new OrQueryNode();
- case search::ParseItem::ITEM_EQUIV: return new EquivQueryNode();
- case search::ParseItem::ITEM_WEIGHTED_SET: return new EquivQueryNode();
- case search::ParseItem::ITEM_DOT_PRODUCT: return new OrQueryNode();
- case search::ParseItem::ITEM_WAND: return new OrQueryNode();
- case search::ParseItem::ITEM_NOT: return new AndNotQueryNode();
- case search::ParseItem::ITEM_PHRASE: return new PhraseQueryNode();
- case search::ParseItem::ITEM_SAME_ELEMENT: return new AndQueryNode(); // TODO: This needs a same element operation to work for streaming search too.
- case search::ParseItem::ITEM_NEAR: return new NearQueryNode();
- case search::ParseItem::ITEM_ONEAR: return new ONearQueryNode();
- default:
- return nullptr;
+ case search::ParseItem::ITEM_AND: return std::make_unique<AndQueryNode>();
+ case search::ParseItem::ITEM_OR: return std::make_unique<OrQueryNode>();
+ case search::ParseItem::ITEM_WEAK_AND: return std::make_unique<OrQueryNode>();
+ case search::ParseItem::ITEM_EQUIV: return std::make_unique<EquivQueryNode>();
+ case search::ParseItem::ITEM_WEIGHTED_SET: return std::make_unique<EquivQueryNode>();
+ case search::ParseItem::ITEM_DOT_PRODUCT: return std::make_unique<OrQueryNode>();
+ case search::ParseItem::ITEM_WAND: return std::make_unique<OrQueryNode>();
+ case search::ParseItem::ITEM_NOT: return std::make_unique<AndNotQueryNode>();
+ case search::ParseItem::ITEM_PHRASE: return std::make_unique<PhraseQueryNode>();
+ case search::ParseItem::ITEM_SAME_ELEMENT: return std::make_unique<SameElementQueryNode>();
+ case search::ParseItem::ITEM_NEAR: return std::make_unique<NearQueryNode>();
+ case search::ParseItem::ITEM_ONEAR: return std::make_unique<ONearQueryNode>();
+ default: return nullptr;
}
}
@@ -153,13 +145,63 @@ bool EquivQueryNode::evaluate() const
return OrQueryNode::evaluate();
}
+bool SameElementQueryNode::evaluate() const {
+ HitList hl;
+ return evaluateHits(hl).empty();
+}
+
+const HitList &
+SameElementQueryNode::evaluateHits(HitList & hl) const
+{
+ hl.clear();
+ if ( !AndQueryNode::evaluate()) return hl;
+
+ HitList tmpHL;
+ unsigned int numFields = size();
+ unsigned int currMatchCount = 0;
+ std::vector<unsigned int> indexVector(numFields, 0);
+ auto curr = static_cast<const QueryTerm *> (&(*(*this)[currMatchCount]));
+ bool exhausted( curr->evaluateHits(tmpHL).empty());
+ for (; !exhausted; ) {
+ auto next = static_cast<const QueryTerm &>(*(*this)[currMatchCount+1]);
+ unsigned int & currIndex = indexVector[currMatchCount];
+ unsigned int & nextIndex = indexVector[currMatchCount+1];
+
+ const auto & currHit = curr->evaluateHits(tmpHL)[currIndex];
+ uint32_t currElemId = currHit.elemId();
+ uint32_t curContext = currHit.context();
+
+ const HitList & nextHL = next.evaluateHits(tmpHL);
+
+ size_t nextIndexMax = nextHL.size();
+ while ((nextIndex < nextIndexMax) &&
+ ((nextHL[nextIndex].context() < curContext) ||
+ ((nextHL[nextIndex].context() == curContext) && (nextHL[nextIndex].elemId() <= currElemId))))
+ {
+ nextIndex++;
+ }
+ if ((nextHL[nextIndex].context() < curContext) && (nextHL[nextIndex].elemId() == currElemId)) {
+ currMatchCount++;
+ if ((currMatchCount+1) == numFields) {
+ Hit h = nextHL[indexVector[currMatchCount]];
+ hl.emplace_back(0, h.context(), h.elemId(), h.weight());
+ currMatchCount = 0;
+ indexVector[0]++;
+ }
+ } else {
+ currMatchCount = 0;
+ indexVector[currMatchCount]++;
+ }
+ curr = static_cast<const QueryTerm *>(&*(*this)[currMatchCount]);
+ exhausted = (nextIndex >= nextIndexMax) || (indexVector[currMatchCount] >= curr->evaluateHits(tmpHL).size());
+ }
+ return hl;
+}
bool PhraseQueryNode::evaluate() const
{
- bool ok(false);
HitList hl;
- ok = ! evaluateHits(hl).empty();
- return ok;
+ return evaluateHits(hl).empty();
}
void PhraseQueryNode::getPhrases(QueryNodeRefList & tl) { tl.push_back(this); }
@@ -168,56 +210,55 @@ void PhraseQueryNode::getPhrases(ConstQueryNodeRefList & tl) const { tl.push_bac
const HitList &
PhraseQueryNode::evaluateHits(HitList & hl) const
{
- hl.clear();
- _fieldInfo.clear();
- bool andResult(AndQueryNode::evaluate());
- if (andResult) {
+ hl.clear();
+ _fieldInfo.clear();
+ if ( ! AndQueryNode::evaluate()) return hl;
+
HitList tmpHL;
unsigned int fullPhraseLen = size();
unsigned int currPhraseLen = 0;
std::vector<unsigned int> indexVector(fullPhraseLen, 0);
- const QueryTerm * curr = static_cast<const QueryTerm *> (&(*(*this)[currPhraseLen]));
+ auto curr = static_cast<const QueryTerm *> (&(*(*this)[currPhraseLen]));
bool exhausted( curr->evaluateHits(tmpHL).empty());
for (; !exhausted; ) {
- const QueryTerm & next = static_cast<const QueryTerm &>(*(*this)[currPhraseLen+1]);
- unsigned int & currIndex = indexVector[currPhraseLen];
- unsigned int & nextIndex = indexVector[currPhraseLen+1];
+ auto next = static_cast<const QueryTerm &>(*(*this)[currPhraseLen+1]);
+ unsigned int & currIndex = indexVector[currPhraseLen];
+ unsigned int & nextIndex = indexVector[currPhraseLen+1];
- size_t firstPosition = curr->evaluateHits(tmpHL)[currIndex].pos();
- uint32_t currElemId = curr->evaluateHits(tmpHL)[currIndex].elemId();
- uint32_t curContext = curr->evaluateHits(tmpHL)[currIndex].context();
+ const auto & currHit = curr->evaluateHits(tmpHL)[currIndex];
+ size_t firstPosition = currHit.pos();
+ uint32_t currElemId = currHit.elemId();
+ uint32_t curContext = currHit.context();
- const HitList & nextHL = next.evaluateHits(tmpHL);
+ const HitList & nextHL = next.evaluateHits(tmpHL);
- int diff(0);
- size_t nextIndexMax = nextHL.size();
- while ((nextIndex < nextIndexMax) &&
+ int diff(0);
+ size_t nextIndexMax = nextHL.size();
+ while ((nextIndex < nextIndexMax) &&
((nextHL[nextIndex].context() < curContext) ||
((nextHL[nextIndex].context() == curContext) && (nextHL[nextIndex].elemId() <= currElemId))) &&
((diff = nextHL[nextIndex].pos()-firstPosition) < 1))
- {
- nextIndex++;
- }
- if ((diff == 1) && (nextHL[nextIndex].elemId() == currElemId)) {
- currPhraseLen++;
- bool ok = ((currPhraseLen+1)==fullPhraseLen);
- if (ok) {
- Hit h = nextHL[indexVector[currPhraseLen]];
- hl.push_back(h);
- const QueryTerm::FieldInfo & fi = next.getFieldInfo(h.context());
- updateFieldInfo(h.context(), hl.size() - 1, fi.getFieldLength());
- currPhraseLen = 0;
- indexVector[0]++;
+ {
+ nextIndex++;
+ }
+ if ((diff == 1) && (nextHL[nextIndex].context() == curContext) && (nextHL[nextIndex].elemId() == currElemId)) {
+ currPhraseLen++;
+ if ((currPhraseLen+1) == fullPhraseLen) {
+ Hit h = nextHL[indexVector[currPhraseLen]];
+ hl.push_back(h);
+ const QueryTerm::FieldInfo & fi = next.getFieldInfo(h.context());
+ updateFieldInfo(h.context(), hl.size() - 1, fi.getFieldLength());
+ currPhraseLen = 0;
+ indexVector[0]++;
+ }
+ } else {
+ currPhraseLen = 0;
+ indexVector[currPhraseLen]++;
}
- } else {
- currPhraseLen = 0;
- indexVector[currPhraseLen]++;
- }
- curr = static_cast<const QueryTerm *>(&*(*this)[currPhraseLen]);
- exhausted = (nextIndex >= nextIndexMax) || (indexVector[currPhraseLen] >= curr->evaluateHits(tmpHL).size());
+ curr = static_cast<const QueryTerm *>(&*(*this)[currPhraseLen]);
+ exhausted = (nextIndex >= nextIndexMax) || (indexVector[currPhraseLen] >= curr->evaluateHits(tmpHL).size());
}
- }
- return hl;
+ return hl;
}
void
@@ -237,9 +278,8 @@ PhraseQueryNode::updateFieldInfo(size_t fid, size_t offset, size_t fieldLength)
bool NotQueryNode::evaluate() const
{
bool ok(false);
- for (const_iterator it=begin(), mt=end(); it!=mt; it++) {
- const QueryNode & qn = **it;
- ok |= ! qn.evaluate();
+ for (const auto & node : *this) {
+ ok |= ! node->evaluate();
}
return ok;
}
@@ -263,9 +303,7 @@ bool ONearQueryNode::evaluate() const
return ok;
}
-Query::Query() :
- _root()
-{ }
+Query::Query() = default;
Query::Query(const QueryNodeResultFactory & factory, const QueryPacketT & queryRep) :
_root()
@@ -283,7 +321,7 @@ bool Query::build(const QueryNodeResultFactory & factory, const QueryPacketT & q
{
search::SimpleQueryStackDumpIterator stack(queryRep);
if (stack.next()) {
- _root.reset(QueryNode::Build(NULL, factory, stack, true).release());
+ _root = QueryNode::Build(nullptr, factory, stack, true);
}
return valid();
}
diff --git a/searchlib/src/vespa/searchlib/query/query.h b/searchlib/src/vespa/searchlib/query/query.h
index 9bb1b29aae5..e96ed9b6bff 100644
--- a/searchlib/src/vespa/searchlib/query/query.h
+++ b/searchlib/src/vespa/searchlib/query/query.h
@@ -28,7 +28,7 @@ public:
virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
void setIndex(const vespalib::string & index) override { _index = index; }
const vespalib::string & getIndex() const override { return _index; }
- static QueryConnector * create(ParseItem::ItemType type);
+ static std::unique_ptr<QueryConnector> create(ParseItem::ItemType type);
virtual bool isFlattenable(ParseItem::ItemType type) const { (void) type; return false; }
private:
vespalib::string _opName;
@@ -123,6 +123,15 @@ private:
#endif
};
+class SameElementQueryNode : public AndQueryNode
+{
+public:
+ SameElementQueryNode() : AndQueryNode("SAME_ELEMENT") { }
+ bool evaluate() const override;
+ const HitList & evaluateHits(HitList & hl) const override;
+ bool isFlattenable(ParseItem::ItemType type) const override { return type == ParseItem::ITEM_NOT; }
+};
+
/**
Unary Not operator. Just inverts the nodes result.
*/
diff --git a/searchlib/src/vespa/searchlib/query/querynode.cpp b/searchlib/src/vespa/searchlib/query/querynode.cpp
index 0d0a06de7af..4060033663a 100644
--- a/searchlib/src/vespa/searchlib/query/querynode.cpp
+++ b/searchlib/src/vespa/searchlib/query/querynode.cpp
@@ -9,38 +9,44 @@ namespace search {
namespace {
vespalib::stringref DEFAULT("default");
+ bool isPhraseOrNear(const QueryNode * qn) {
+ return dynamic_cast<const NearQueryNode *> (qn) || dynamic_cast<const PhraseQueryNode *> (qn);
+ }
}
-QueryNode::UP QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factory, search::SimpleQueryStackDumpIterator & queryRep, bool allowRewrite)
+QueryNode::UP
+QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factory,
+ SimpleQueryStackDumpIterator & queryRep, bool allowRewrite)
{
unsigned int arity = queryRep.getArity();
- search::ParseItem::ItemType type = queryRep.getType();
+ ParseItem::ItemType type = queryRep.getType();
UP qn;
switch (type) {
- case search::ParseItem::ITEM_AND:
- case search::ParseItem::ITEM_OR:
- case search::ParseItem::ITEM_WEAK_AND:
- case search::ParseItem::ITEM_EQUIV:
- case search::ParseItem::ITEM_WEIGHTED_SET:
- case search::ParseItem::ITEM_DOT_PRODUCT:
- case search::ParseItem::ITEM_WAND:
- case search::ParseItem::ITEM_NOT:
- case search::ParseItem::ITEM_PHRASE:
- case search::ParseItem::ITEM_SAME_ELEMENT:
- case search::ParseItem::ITEM_NEAR:
- case search::ParseItem::ITEM_ONEAR:
+ case ParseItem::ITEM_AND:
+ case ParseItem::ITEM_OR:
+ case ParseItem::ITEM_WEAK_AND:
+ case ParseItem::ITEM_EQUIV:
+ case ParseItem::ITEM_WEIGHTED_SET:
+ case ParseItem::ITEM_DOT_PRODUCT:
+ case ParseItem::ITEM_WAND:
+ case ParseItem::ITEM_NOT:
+ case ParseItem::ITEM_PHRASE:
+ case ParseItem::ITEM_SAME_ELEMENT:
+ case ParseItem::ITEM_NEAR:
+ case ParseItem::ITEM_ONEAR:
{
- qn.reset(QueryConnector::create(type));
- if (qn.get()) {
+ qn = QueryConnector::create(type);
+ if (qn) {
QueryConnector * qc = dynamic_cast<QueryConnector *> (qn.get());
NearQueryNode * nqn = dynamic_cast<NearQueryNode *> (qc);
if (nqn) {
nqn->distance(queryRep.getArg1());
}
- if ((type == search::ParseItem::ITEM_WEAK_AND) ||
- (type == search::ParseItem::ITEM_WEIGHTED_SET) ||
- (type == search::ParseItem::ITEM_DOT_PRODUCT) ||
- (type == search::ParseItem::ITEM_WAND))
+ if ((type == ParseItem::ITEM_WEAK_AND) ||
+ (type == ParseItem::ITEM_WEIGHTED_SET) ||
+ (type == ParseItem::ITEM_DOT_PRODUCT) ||
+ (type == ParseItem::ITEM_SAME_ELEMENT) ||
+ (type == ParseItem::ITEM_WAND))
{
qn->setIndex(queryRep.getIndexName());
}
@@ -49,27 +55,26 @@ QueryNode::UP QueryNode::Build(const QueryNode * parent, const QueryNodeResultFa
if (qc->isFlattenable(queryRep.getType())) {
arity += queryRep.getArity();
} else {
- UP child(Build(qc, factory, queryRep,
- allowRewrite && ((dynamic_cast<NearQueryNode *> (qn.get()) == NULL) && (dynamic_cast<PhraseQueryNode *> (qn.get()) == NULL))));
+ UP child = Build(qc, factory, queryRep, allowRewrite && !isPhraseOrNear(qn.get()));
qc->push_back(std::move(child));
}
}
}
}
break;
- case search::ParseItem::ITEM_NUMTERM:
- case search::ParseItem::ITEM_TERM:
- case search::ParseItem::ITEM_PREFIXTERM:
- case search::ParseItem::ITEM_REGEXP:
- case search::ParseItem::ITEM_SUBSTRINGTERM:
- case search::ParseItem::ITEM_EXACTSTRINGTERM:
- case search::ParseItem::ITEM_SUFFIXTERM:
- case search::ParseItem::ITEM_PURE_WEIGHTED_STRING:
- case search::ParseItem::ITEM_PURE_WEIGHTED_LONG:
+ case ParseItem::ITEM_NUMTERM:
+ case ParseItem::ITEM_TERM:
+ case ParseItem::ITEM_PREFIXTERM:
+ case ParseItem::ITEM_REGEXP:
+ case ParseItem::ITEM_SUBSTRINGTERM:
+ case ParseItem::ITEM_EXACTSTRINGTERM:
+ case ParseItem::ITEM_SUFFIXTERM:
+ case ParseItem::ITEM_PURE_WEIGHTED_STRING:
+ case ParseItem::ITEM_PURE_WEIGHTED_LONG:
{
vespalib::stringref index = queryRep.getIndexName();
if (index.empty()) {
- if ((type == search::ParseItem::ITEM_PURE_WEIGHTED_STRING) || (type == search::ParseItem::ITEM_PURE_WEIGHTED_LONG)) {
+ if ((type == ParseItem::ITEM_PURE_WEIGHTED_STRING) || (type == ParseItem::ITEM_PURE_WEIGHTED_LONG)) {
index = parent->getIndex();
} else {
index = DEFAULT;
@@ -78,19 +83,19 @@ QueryNode::UP QueryNode::Build(const QueryNode * parent, const QueryNodeResultFa
vespalib::stringref term = queryRep.getTerm();
QueryTerm::SearchTerm sTerm(QueryTerm::WORD);
switch (type) {
- case search::ParseItem::ITEM_REGEXP:
+ case ParseItem::ITEM_REGEXP:
sTerm = QueryTerm::REGEXP;
break;
- case search::ParseItem::ITEM_PREFIXTERM:
+ case ParseItem::ITEM_PREFIXTERM:
sTerm = QueryTerm::PREFIXTERM;
break;
- case search::ParseItem::ITEM_SUBSTRINGTERM:
+ case ParseItem::ITEM_SUBSTRINGTERM:
sTerm = QueryTerm::SUBSTRINGTERM;
break;
- case search::ParseItem::ITEM_EXACTSTRINGTERM:
+ case ParseItem::ITEM_EXACTSTRINGTERM:
sTerm = QueryTerm::EXACTSTRINGTERM;
break;
- case search::ParseItem::ITEM_SUFFIXTERM:
+ case ParseItem::ITEM_SUFFIXTERM:
sTerm = QueryTerm::SUFFIXTERM;
break;
default:
@@ -107,10 +112,9 @@ QueryNode::UP QueryNode::Build(const QueryNode * parent, const QueryNodeResultFa
qt->setWeight(queryRep.GetWeight());
qt->setUniqueId(queryRep.getUniqueId());
if ( qt->encoding().isBase10Integer() || ! qt->encoding().isFloat() || ! factory.getRewriteFloatTerms() || !allowRewrite || (ssTerm.find('.') == vespalib::string::npos)) {
- qn.reset(qt.release());
+ qn = std::move(qt);
} else {
std::unique_ptr<PhraseQueryNode> phrase(new PhraseQueryNode());
-
phrase->push_back(UP(new QueryTerm(factory.create(), ssTerm.substr(0, ssTerm.find('.')), ssIndex, QueryTerm::WORD)));
phrase->push_back(UP(new QueryTerm(factory.create(), ssTerm.substr(ssTerm.find('.') + 1), ssIndex, QueryTerm::WORD)));
std::unique_ptr<EquivQueryNode> orqn(new EquivQueryNode());
@@ -121,7 +125,7 @@ QueryNode::UP QueryNode::Build(const QueryNode * parent, const QueryNodeResultFa
}
}
break;
- case search::ParseItem::ITEM_RANK:
+ case ParseItem::ITEM_RANK:
{
if (arity >= 1) {
queryRep.next();