diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2020-07-09 19:17:49 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-07-09 19:17:49 +0200 |
commit | 36f1f82001f4a1b973dfbd6bb28cf57315219420 (patch) | |
tree | 39153ca2799910f6f61f9b7e9939b1ad181fac99 /searchsummary | |
parent | 005500dc628d65c2a1ca8182cfa6e8981a8d9f90 (diff) | |
parent | 68d95a43f7c3664c3a26dc84b03e2d42a161f5dc (diff) |
Merge pull request #13848 from vespa-engine/arnej/cleanup-parse-item-and-simple-query-stack
Arnej/cleanup parse item and simple query stack
Diffstat (limited to 'searchsummary')
8 files changed, 419 insertions, 31 deletions
diff --git a/searchsummary/src/tests/docsummary/positionsdfw_test.cpp b/searchsummary/src/tests/docsummary/positionsdfw_test.cpp index f54b4c19dc3..6fd0c39f06f 100644 --- a/searchsummary/src/tests/docsummary/positionsdfw_test.cpp +++ b/searchsummary/src/tests/docsummary/positionsdfw_test.cpp @@ -16,7 +16,6 @@ #include <vespa/log/log.h> LOG_SETUP("positionsdfw_test"); -using search::RawBuf; using search::IAttributeManager; using search::MatchingElements; using search::SingleInt64ExtAttribute; diff --git a/searchsummary/src/tests/extractkeywords/CMakeLists.txt b/searchsummary/src/tests/extractkeywords/CMakeLists.txt index edcede16bc6..5eae390a5ec 100644 --- a/searchsummary/src/tests/extractkeywords/CMakeLists.txt +++ b/searchsummary/src/tests/extractkeywords/CMakeLists.txt @@ -2,6 +2,8 @@ vespa_add_executable(searchsummary_extractkeywordstest_app TEST SOURCES extractkeywordstest.cpp + simplequerystack.cpp + simplequerystackitem.cpp DEPENDS searchsummary ) diff --git a/searchsummary/src/tests/extractkeywords/extractkeywordstest.cpp b/searchsummary/src/tests/extractkeywords/extractkeywordstest.cpp index 4abbe7d2613..87317234a27 100644 --- a/searchsummary/src/tests/extractkeywords/extractkeywordstest.cpp +++ b/searchsummary/src/tests/extractkeywords/extractkeywordstest.cpp @@ -2,7 +2,7 @@ #include "extractkeywordstest.h" #include <vespa/searchsummary/docsummary/keywordextractor.h> -#include <vespa/searchlib/parsequery/simplequerystack.h> +#include "simplequerystack.h" #include <vespa/vespalib/util/time.h> #define NUMTESTS 5 @@ -165,7 +165,7 @@ ExtractKeywordsTest::RunTest(int testno, bool verify) case 0: { // Simple term query - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foobar")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foobar")); stack.AppendBuffer(&buf); keywords = _extractor->ExtractKeywords(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); @@ -178,11 +178,15 @@ ExtractKeywordsTest::RunTest(int testno, bool verify) case 1: { + // check that skipping these works also: + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_LOCATION_TERM, "no")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_NEAREST_NEIGHBOR, "no")); // multi term query - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foobar")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "bar")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_OR, 3)); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foobar")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foo")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "bar")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_OR, 3)); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_AND, 3)); stack.AppendBuffer(&buf); keywords = _extractor->ExtractKeywords(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); @@ -196,10 +200,10 @@ ExtractKeywordsTest::RunTest(int testno, bool verify) case 2: { // phrase term query - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foobar")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "bar")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_PHRASE, 3, "index")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foobar")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foo")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "bar")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_PHRASE, 3, "index")); stack.AppendBuffer(&buf); keywords = _extractor->ExtractKeywords(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); @@ -213,16 +217,16 @@ ExtractKeywordsTest::RunTest(int testno, bool verify) case 3: { // multiple phrase and term query - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "xyzzy")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "xyz")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_PHRASE, 2, "index")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foobar")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "bar")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_PHRASE, 3, "index")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "baz")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "zog")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_AND, 3)); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "xyzzy")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "xyz")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_PHRASE, 2, "index")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foobar")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foo")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "bar")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_PHRASE, 3, "index")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "baz")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "zog")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_AND, 3)); stack.AppendBuffer(&buf); keywords = _extractor->ExtractKeywords(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); @@ -236,11 +240,11 @@ ExtractKeywordsTest::RunTest(int testno, bool verify) case 4: { // phrase term query with wrong argument items - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foobar")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_AND, 2)); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "bar")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_PHRASE, 2, "index")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foobar")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foo")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_AND, 2)); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "bar")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_PHRASE, 2, "index")); stack.AppendBuffer(&buf); keywords = _extractor->ExtractKeywords(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); diff --git a/searchsummary/src/tests/extractkeywords/simplequerystack.cpp b/searchsummary/src/tests/extractkeywords/simplequerystack.cpp new file mode 100644 index 00000000000..8c85417c79a --- /dev/null +++ b/searchsummary/src/tests/extractkeywords/simplequerystack.cpp @@ -0,0 +1,36 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "simplequerystack.h" +#include <vespa/vespalib/util/compress.h> + +#include <vespa/log/log.h> +LOG_SETUP(".search.simplequerystack"); + +namespace search { + +SimpleQueryStack::SimpleQueryStack() + : _stack(nullptr) +{ +} + +SimpleQueryStack::~SimpleQueryStack() +{ + delete _stack; +} + +void +SimpleQueryStack::Push(SimpleQueryStackItem *item) +{ + item->_next = _stack; + _stack = item; +} + +void +SimpleQueryStack::AppendBuffer(RawBuf *buf) const +{ + for (SimpleQueryStackItem *item = _stack; item != nullptr; item = item->_next) { + item->AppendBuffer(buf); + } +} + +} // namespace search diff --git a/searchsummary/src/tests/extractkeywords/simplequerystack.h b/searchsummary/src/tests/extractkeywords/simplequerystack.h new file mode 100644 index 00000000000..7347c66c925 --- /dev/null +++ b/searchsummary/src/tests/extractkeywords/simplequerystack.h @@ -0,0 +1,49 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "simplequerystackitem.h" +#include <vespa/searchlib/util/rawbuf.h> +#include <vespa/vespalib/stllike/string.h> + +namespace search { + +/** + * A stack of SimpleQueryStackItems. + * + * A simple stack consisting of a list of SimpleQueryStackItems. + * It is able to generate a binary encoding of itself + * to a RawBuf. + */ +class SimpleQueryStack +{ +private: + /** The top of the stack. */ + SimpleQueryStackItem *_stack; + +public: + SimpleQueryStack(const SimpleQueryStack &) = delete; + SimpleQueryStack& operator=(const SimpleQueryStack &) = delete; + /** + * Constructor for SimpleQueryStack. + */ + SimpleQueryStack(); + /** + * Destructor for SimpleQueryStack. + */ + ~SimpleQueryStack(); + /** + * Push an item on the stack. + * @param item The SimpleQueryStackItem to push. + */ + void Push(SimpleQueryStackItem *item); + + /** + * Encode the contents of the stack in a binary buffer. + * @param buf Pointer to a buffer containing the encoded contents. + */ + void AppendBuffer(RawBuf *buf) const; +}; + +} // namespace search + diff --git a/searchsummary/src/tests/extractkeywords/simplequerystackitem.cpp b/searchsummary/src/tests/extractkeywords/simplequerystackitem.cpp new file mode 100644 index 00000000000..5a4b6d76b8f --- /dev/null +++ b/searchsummary/src/tests/extractkeywords/simplequerystackitem.cpp @@ -0,0 +1,193 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "simplequerystackitem.h" +#include <vespa/vespalib/objects/nbo.h> +#include <vespa/vespalib/stllike/asciistream.h> +#include <cassert> + +namespace search { + +SimpleQueryStackItem::SimpleQueryStackItem() + : _next(NULL), + _arg1(0), + _arg2(0), + _arg3(0), + _type(ITEM_UNDEF), + _arity(0), + _indexName(), + _term() +{} + +namespace { + +void assert_term_type(ParseItem::ItemType type) { + assert(type == ParseItem::ITEM_TERM || + type == ParseItem::ITEM_NUMTERM || + type == ParseItem::ITEM_NEAREST_NEIGHBOR || + type == ParseItem::ITEM_LOCATION_TERM || + type == ParseItem::ITEM_PREFIXTERM || + type == ParseItem::ITEM_SUBSTRINGTERM || + type == ParseItem::ITEM_SUFFIXTERM || + type == ParseItem::ITEM_PURE_WEIGHTED_STRING || + type == ParseItem::ITEM_PURE_WEIGHTED_LONG || + type == ParseItem::ITEM_EXACTSTRINGTERM || + type == ParseItem::ITEM_PREDICATE_QUERY); + (void) type; +} + +void assert_arity_type(ParseItem::ItemType type) { + // types with arity, but without an index name: + assert(type == ParseItem::ITEM_OR || + type == ParseItem::ITEM_WEAK_AND || + type == ParseItem::ITEM_EQUIV || + type == ParseItem::ITEM_AND || + type == ParseItem::ITEM_NOT || + type == ParseItem::ITEM_RANK || + type == ParseItem::ITEM_ANY || + type == ParseItem::ITEM_NEAR || + type == ParseItem::ITEM_ONEAR); + (void) type; +} + +void assert_arity_and_index_type(ParseItem::ItemType type) { + // types with arity and an index name: + assert(type == ParseItem::ITEM_PHRASE || + type == ParseItem::ITEM_SAME_ELEMENT || + type == ParseItem::ITEM_WEIGHTED_SET || + type == ParseItem::ITEM_DOT_PRODUCT || + type == ParseItem::ITEM_WAND || + type == ParseItem::ITEM_WORD_ALTERNATIVES); + (void) type; +} + +int64_t term_as_n64(vespalib::stringref term) { + int64_t tmp; + vespalib::asciistream generatedTerm(term); + generatedTerm >> tmp; + return vespalib::nbo::n2h(tmp); +} + +} // namespace <unnamed> + + +SimpleQueryStackItem::SimpleQueryStackItem(ItemType type, int arity) : SimpleQueryStackItem() +{ + assert_arity_type(type); + SetType(type); + _arity = arity; +} + +SimpleQueryStackItem::SimpleQueryStackItem(ItemType type, int arity, const char *idx) : SimpleQueryStackItem() +{ + assert_arity_and_index_type(type); + SetType(type); + _arity = arity; + SetIndex(idx); +} + +SimpleQueryStackItem::SimpleQueryStackItem(ItemType type, const char *term) : SimpleQueryStackItem() +{ + assert_term_type(type); + SetType(type); + SetTerm(term); +} + +SimpleQueryStackItem::~SimpleQueryStackItem() +{ + delete _next; +} + +void +SimpleQueryStackItem::AppendBuffer(RawBuf *buf) const +{ + // Calculate lengths + uint32_t indexLen = _indexName.size(); + uint32_t termLen = _term.size(); + double nboVal = 0.0; + + // Put the values into the buffer. + buf->append(_type); + switch (Type()) { + case ITEM_OR: + case ITEM_EQUIV: + case ITEM_AND: + case ITEM_NOT: + case ITEM_RANK: + case ITEM_ANY: + buf->appendCompressedPositiveNumber(_arity); + break; + case ITEM_NEAR: + case ITEM_ONEAR: + buf->appendCompressedPositiveNumber(_arity); + buf->appendCompressedPositiveNumber(_arg1); + break; + case ITEM_SAME_ELEMENT: + case ITEM_WEIGHTED_SET: + case ITEM_DOT_PRODUCT: + case ITEM_PHRASE: + buf->appendCompressedPositiveNumber(_arity); + buf->appendCompressedPositiveNumber(indexLen); + buf->append(_indexName.c_str(), indexLen); + break; + case ITEM_WORD_ALTERNATIVES: + buf->appendCompressedPositiveNumber(indexLen); + buf->append(_indexName.c_str(), indexLen); + buf->appendCompressedPositiveNumber(_arity); + break; + case ITEM_WEAK_AND: + buf->appendCompressedPositiveNumber(_arity); + buf->appendCompressedPositiveNumber(_arg1); + buf->appendCompressedPositiveNumber(indexLen); + buf->append(_indexName.c_str(), indexLen); + break; + case ITEM_WAND: + buf->appendCompressedPositiveNumber(_arity); + buf->appendCompressedPositiveNumber(indexLen); + buf->append(_indexName.c_str(), indexLen); + buf->appendCompressedPositiveNumber(_arg1); // targetNumHits + nboVal = vespalib::nbo::n2h(_arg2); + buf->append(&nboVal, sizeof(nboVal)); // scoreThreshold + nboVal = vespalib::nbo::n2h(_arg3); + buf->append(&nboVal, sizeof(nboVal)); // thresholdBoostFactor + break; + case ITEM_TERM: + case ITEM_NUMTERM: + case ITEM_LOCATION_TERM: + case ITEM_PREFIXTERM: + case ITEM_SUBSTRINGTERM: + case ITEM_EXACTSTRINGTERM: + case ITEM_SUFFIXTERM: + case ITEM_REGEXP: + buf->appendCompressedPositiveNumber(indexLen); + buf->append(_indexName.c_str(), indexLen); + buf->appendCompressedPositiveNumber(termLen); + buf->append(_term.c_str(), termLen); + break; + case ITEM_PURE_WEIGHTED_STRING: + buf->appendCompressedPositiveNumber(termLen); + buf->append(_term.c_str(), termLen); + break; + case ITEM_PURE_WEIGHTED_LONG: + { + int64_t tmp = term_as_n64(_term); + buf->append(&tmp, sizeof(int64_t)); + } + break; + case ITEM_NEAREST_NEIGHBOR: + buf->appendCompressedPositiveNumber(indexLen); + buf->append(_indexName.c_str(), indexLen); + buf->appendCompressedPositiveNumber(termLen); + buf->append(_term.c_str(), termLen); + buf->appendCompressedPositiveNumber(_arg1); // targetNumHits + buf->appendCompressedPositiveNumber(_arg2); // allow_approximate + buf->appendCompressedPositiveNumber(_arg3); // explore_additional_hits + break; + case ITEM_PREDICATE_QUERY: // not handled at all here + case ITEM_MAX: + case ITEM_UNDEF: + abort(); + break; + } +} + +} diff --git a/searchsummary/src/tests/extractkeywords/simplequerystackitem.h b/searchsummary/src/tests/extractkeywords/simplequerystackitem.h new file mode 100644 index 00000000000..15d8f7e5938 --- /dev/null +++ b/searchsummary/src/tests/extractkeywords/simplequerystackitem.h @@ -0,0 +1,107 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/query/weight.h> +#include <vespa/searchlib/util/rawbuf.h> +#include <vespa/vespalib/stllike/string.h> +#include <vespa/searchlib/parsequery/parse.h> + +namespace search { + +/** + * An item on the simple query stack. + * + * An object of this class represents a single item + * on the simple query stack. It has a type, which corresponds + * to the different query stack execution operations. It also + * provides an arity, and the string values indexName and term, to + * accomodate the different needs of the operations. + */ +class SimpleQueryStackItem : public ParseItem +{ +private: + SimpleQueryStackItem(const SimpleQueryStackItem &) = delete; + SimpleQueryStackItem& operator=(const SimpleQueryStackItem &) = delete; + SimpleQueryStackItem(); +public: + /** Pointer to next item in a linked list. */ + SimpleQueryStackItem *_next; + +private: + uint32_t _arg1; + double _arg2; + double _arg3; + ItemType _type; + +public: + ItemType Type() const { return _type; } + + /** The number of operands for the operation. */ + uint32_t _arity; + /** The name of the specified index, or empty if no index. */ + vespalib::string _indexName; + /** The specified search term. */ + vespalib::string _term; + +/** + * Overloaded constructor for SimpleQueryStackItem. Used primarily for + * the operators, or phrase without indexName. + * + * @param type The type of the SimpleQueryStackItem. + * @param arity The arity of the operation indicated by the SimpleQueryStackItem. + */ + SimpleQueryStackItem(ItemType type, int arity); + +/** + * Overloaded constructor for SimpleQueryStackItem. Used for PHRASEs. + * + * @param type The type of the SimpleQueryStackItem. + * @param arity The arity of the operation indicated by the SimpleQueryStackItem. + * @param idx The name of the index of the SimpleQueryStackItem. + */ + SimpleQueryStackItem(ItemType type, int arity, const char *index); + +/** + * Overloaded constructor for SimpleQueryStackItem. Used for TERMs without index. + * + * @param type The type of the SimpleQueryStackItem. + * @param term The actual term string of the SimpleQueryStackItem. + */ + SimpleQueryStackItem(ItemType type, const char *term); + +/** + * Destructor for SimpleQueryStackItem. + */ + ~SimpleQueryStackItem(); + +/** + * Set the value of the _term field. + * @param term The string to set the _term field to. + */ + void SetTerm(const char *term) { _term = term; } + +/** + * Set the value of the _indexName field. + * @param idx The string to set the _indexName field to. + */ + void SetIndex(const char *index) { _indexName = index; } + + /** + * Set the type of the operator. Use this with caution, + * as this changes the semantics of the item. + * + * @param type The new type. + */ + void SetType(ItemType type) { + _type = type; + } + + /** + * Encode the item in a binary buffer. + * @param buf Pointer to a buffer containing the encoded contents. + */ + void AppendBuffer(RawBuf *buf) const; +}; + +} diff --git a/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp index 4872a183358..37239fe9da6 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp @@ -187,7 +187,7 @@ JuniperQueryAdapter::Traverse(juniper::IQueryVisitor *v) const v->VisitAND(&item, 2); } while (rc && iterator.next()) { - bool isSpecialToken = search::ParseItem::getFlag(iterator.getFlags(), search::ParseItem::IFLAG_SPECIALTOKEN); + bool isSpecialToken = iterator.hasSpecialTokenFlag(); switch (iterator.getType()) { case search::ParseItem::ITEM_OR: case search::ParseItem::ITEM_WEAK_AND: @@ -241,10 +241,6 @@ JuniperQueryAdapter::Traverse(juniper::IQueryVisitor *v) const if (!v->VisitPHRASE(&item, iterator.getArity())) rc = SkipItem(&iterator); break; - case search::ParseItem::ITEM_PAREN: - if (!v->VisitOther(&item, iterator.getArity())) - rc = SkipItem(&iterator); - break; case search::ParseItem::ITEM_PREFIXTERM: case search::ParseItem::ITEM_SUBSTRINGTERM: { @@ -273,6 +269,8 @@ JuniperQueryAdapter::Traverse(juniper::IQueryVisitor *v) const case search::ParseItem::ITEM_REGEXP: case search::ParseItem::ITEM_PREDICATE_QUERY: case search::ParseItem::ITEM_SAME_ELEMENT: + case search::ParseItem::ITEM_NEAREST_NEIGHBOR: + case search::ParseItem::ITEM_LOCATION_TERM: if (!v->VisitOther(&item, iterator.getArity())) { rc = SkipItem(&iterator); } |