diff options
18 files changed, 429 insertions, 334 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/Item.java b/container-search/src/main/java/com/yahoo/prelude/query/Item.java index 475a80f7ae0..bd368864e9a 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/Item.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/Item.java @@ -42,7 +42,7 @@ public abstract class Item implements Cloneable { WORD(4), INT(5), PHRASE(6), - PAREN(7), + PAREN(7), // TODO not used - remove on Vespa 8 PREFIX(8), SUBSTRING(9), NEAR(11), @@ -60,7 +60,8 @@ public abstract class Item implements Cloneable { PREDICATE_QUERY(23), REGEXP(24), WORD_ALTERNATIVES(25), - NEAREST_NEIGHBOR(26); + NEAREST_NEIGHBOR(26), + LOCATION_TERM(27); public final int code; diff --git a/searchlib/src/tests/query/querybuilder_test.cpp b/searchlib/src/tests/query/querybuilder_test.cpp index 8560cb0e091..d093bc4242e 100644 --- a/searchlib/src/tests/query/querybuilder_test.cpp +++ b/searchlib/src/tests/query/querybuilder_test.cpp @@ -2,7 +2,6 @@ // Unit tests for querybuilder. #include <vespa/searchlib/parsequery/parse.h> -#include <vespa/searchlib/parsequery/simplequerystack.h> #include <vespa/searchlib/query/tree/customtypevisitor.h> #include <vespa/searchlib/query/tree/point.h> #include <vespa/searchlib/query/tree/querybuilder.h> diff --git a/searchlib/src/vespa/searchlib/parsequery/CMakeLists.txt b/searchlib/src/vespa/searchlib/parsequery/CMakeLists.txt index 0f73c102374..481faeecfb6 100644 --- a/searchlib/src/vespa/searchlib/parsequery/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/parsequery/CMakeLists.txt @@ -2,7 +2,6 @@ vespa_add_library(searchlib_parsequery OBJECT SOURCES parse.cpp - simplequerystack.cpp stackdumpiterator.cpp DEPENDS ) diff --git a/searchlib/src/vespa/searchlib/parsequery/parse.cpp b/searchlib/src/vespa/searchlib/parsequery/parse.cpp index c8fcce037ae..bf2f4b530ca 100644 --- a/searchlib/src/vespa/searchlib/parsequery/parse.cpp +++ b/searchlib/src/vespa/searchlib/parsequery/parse.cpp @@ -1,168 +1,3 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "parse.h" -#include <vespa/vespalib/objects/nbo.h> -#include <cassert> - -namespace search { - -#define PARSEITEM_DEFAULT_CONSTRUCTOR_LIST \ - _next(NULL), \ - _sibling(NULL), \ - _weight(100), \ - _uniqueId(0), \ - _arg1(0), \ - _arg2(0), \ - _arg3(0), \ - _type(ITEM_UNDEF), \ - _flags(0), \ - _arity(0), \ - _indexName(), \ - _term() - - -ParseItem::ParseItem(ItemType type, int arity) - : PARSEITEM_DEFAULT_CONSTRUCTOR_LIST -{ - assert(type==ITEM_OR || type==ITEM_WEAK_AND || type==ITEM_EQUIV || type==ITEM_AND || type==ITEM_NOT - || type==ITEM_RANK || type==ITEM_ANY || type==ITEM_NEAR || type==ITEM_ONEAR); - SetType(type); - _arity = arity; -} - -ParseItem::ParseItem(ItemType type, int arity, const char *idx) - : PARSEITEM_DEFAULT_CONSTRUCTOR_LIST -{ - assert(type==ITEM_PHRASE || type==ITEM_SAME_ELEMENT || type==ITEM_WEIGHTED_SET - || type==ITEM_DOT_PRODUCT || type==ITEM_WAND || type==ITEM_WORD_ALTERNATIVES); - SetType(type); - _arity = arity; - SetIndex(idx); -} - -namespace { - -void assert_type(ParseItem::ItemType type) -{ - assert(type == ParseItem::ITEM_TERM || - type == ParseItem::ITEM_NUMTERM || - type == ParseItem::ITEM_PREFIXTERM || - type == ParseItem::ITEM_SUBSTRINGTERM || - type == ParseItem::ITEM_SUFFIXTERM || - type == ParseItem::ITEM_PURE_WEIGHTED_STRING || - type == ParseItem::ITEM_PURE_WEIGHTED_LONG || - type == ParseItem::ITEM_EXACTSTRINGTERM || - type == ParseItem::ITEM_PREDICATE_QUERY); - (void) type; -} - -} - -ParseItem::ParseItem(ItemType type, const char *term) - : PARSEITEM_DEFAULT_CONSTRUCTOR_LIST -{ - assert_type(type); - SetType(type); - SetTerm(term); -} - -ParseItem::~ParseItem() -{ - delete _next; - delete _sibling; -} - -void -ParseItem::AppendBuffer(RawBuf *buf) const -{ - // Calculate the length of the buffer. - uint32_t indexLen = _indexName.size(); - uint32_t termLen = _term.size(); - - // Put the values into the buffer. - buf->append(_type); - if (Feature_Weight()) { // this item has weight - buf->appendCompressedNumber(_weight.percent()); - } - if (feature_UniqueId()) { - buf->appendCompressedPositiveNumber(_uniqueId); - } - if (feature_Flags()) { - buf->append(_flags); - } - switch (Type()) { - case ITEM_OR: - case ITEM_EQUIV: - case ITEM_AND: - case ITEM_NOT: - case ITEM_RANK: - case ITEM_ANY: - buf->appendCompressedPositiveNumber(_arity); - break; - case ITEM_NEAR: - case ITEM_ONEAR: - buf->appendCompressedPositiveNumber(_arity); - buf->appendCompressedPositiveNumber(_arg1); - break; - case ITEM_SAME_ELEMENT: - buf->appendCompressedPositiveNumber(_arity); - buf->appendCompressedPositiveNumber(indexLen); - if (indexLen != 0) { - buf->append(_indexName.c_str(), indexLen); - } - break; - case ITEM_WORD_ALTERNATIVES: - buf->appendCompressedPositiveNumber(indexLen); - if (indexLen != 0) { - buf->append(_indexName.c_str(), indexLen); - } - buf->appendCompressedPositiveNumber(_arity); - break; - case ITEM_WEAK_AND: - buf->appendCompressedPositiveNumber(_arity); - buf->appendCompressedPositiveNumber(_arg1); - buf->appendCompressedPositiveNumber(indexLen); - if (indexLen != 0) { - buf->append(_indexName.c_str(), indexLen); - } - break; - case ITEM_WEIGHTED_SET: - case ITEM_DOT_PRODUCT: - case ITEM_WAND: - case ITEM_PHRASE: - buf->appendCompressedPositiveNumber(_arity); - buf->appendCompressedPositiveNumber(indexLen); - if (indexLen != 0) { - buf->append(_indexName.c_str(), indexLen); - } - if (Type() == ITEM_WAND) { - buf->appendCompressedPositiveNumber(_arg1); // targetNumHits - double nboVal = vespalib::nbo::n2h(_arg2); - buf->append(&nboVal, sizeof(nboVal)); // scoreThreshold - nboVal = vespalib::nbo::n2h(_arg3); - buf->append(&nboVal, sizeof(nboVal)); // thresholdBoostFactor - } - break; - case ITEM_TERM: - case ITEM_NUMTERM: - case ITEM_PREFIXTERM: - case ITEM_SUBSTRINGTERM: - case ITEM_EXACTSTRINGTERM: - case ITEM_SUFFIXTERM: - case ITEM_REGEXP: - buf->appendCompressedPositiveNumber(indexLen); - if (indexLen != 0) { - buf->append(_indexName.c_str(), indexLen); - } - buf->appendCompressedPositiveNumber(termLen); - if (termLen != 0) { - buf->append(_term.c_str(), termLen); - } - break; - case ITEM_UNDEF: - default: - break; - } -} - -} diff --git a/searchlib/src/vespa/searchlib/parsequery/parse.h b/searchlib/src/vespa/searchlib/parsequery/parse.h index 83352b571c8..b4dd9826b84 100644 --- a/searchlib/src/vespa/searchlib/parsequery/parse.h +++ b/searchlib/src/vespa/searchlib/parsequery/parse.h @@ -9,7 +9,7 @@ namespace search { /** - * An item on the simple query stack. + * Items on a simple query stack. * * An object of this class represents a single item * on the simple query stack. It has a type, which corresponds @@ -22,15 +22,7 @@ namespace search { */ class ParseItem { -private: - ParseItem(const ParseItem &); - ParseItem& operator=(const ParseItem &); public: - /** Pointer to next item in a linked list. */ - ParseItem *_next; - /** Pointer to first item in a sublist. */ - ParseItem *_sibling; - /** The type of the item is from this set of values. It is important that these defines match those in prelude/source/com/yahoo/prelude/query/Item.java */ enum ItemType { @@ -41,7 +33,7 @@ public: ITEM_TERM = 4, ITEM_NUMTERM = 5, ITEM_PHRASE = 6, - ITEM_PAREN = 7, + /* removed: ITEM_PAREN = 7, */ ITEM_PREFIXTERM = 8, ITEM_SUBSTRINGTERM = 9, ITEM_ANY = 10, @@ -61,7 +53,8 @@ public: ITEM_REGEXP = 24, ITEM_WORD_ALTERNATIVES = 25, ITEM_NEAREST_NEIGHBOR = 26, - ITEM_MAX = 27, // Indicates how long tables must be. + ITEM_LOCATION_TERM = 27, + ITEM_MAX = 28, // Indicates how long tables must be. ITEM_UNDEF = 31, }; @@ -88,21 +81,10 @@ public: IFLAG_NOPOSITIONDATA = 0x00000004, // we should not use position data when ranking this term }; -private: - query::Weight _weight; - uint32_t _uniqueId; - uint32_t _arg1; - double _arg2; - double _arg3; - uint8_t _type; - uint8_t _flags; - -public: /** Extra information on each item (creator id) coded in bits 12-19 of _type */ static inline ItemCreator GetCreator(uint8_t type) { return static_cast<ItemCreator>((type >> 3) & 0x01); } /** The old item type now uses only the lower 12 bits in a backward compatible way) */ static inline ItemType GetType(uint8_t type) { return static_cast<ItemType>(type & 0x1F); } - inline ItemType Type() const { return GetType(_type); } static inline bool GetFeature(uint8_t type, uint8_t feature) { return ((type & feature) != 0); } @@ -115,95 +97,6 @@ public: static inline bool getFeature_Flags(uint8_t type) { return GetFeature(type, IF_FLAGS); } - - inline bool Feature(uint8_t feature) const - { return GetFeature(_type, feature); } - - inline bool Feature_Weight() const - { return GetFeature_Weight(_type); } - - inline bool feature_UniqueId() const - { return getFeature_UniqueId(_type); } - - inline bool feature_Flags() const - { return getFeature_Flags(_type); } - - static inline bool getFlag(uint8_t flags, uint8_t flag) - { return ((flags & flag) != 0); } - - /** The number of operands for the operation. */ - uint32_t _arity; - /** The name of the specified index, or NULL if no index. */ - vespalib::string _indexName; - /** The specified search term. */ - vespalib::string _term; - -/** - * Overloaded constructor for ParseItem. Used primarily for - * the operators, or pharse without indexName. - * - * @param type The type of the ParseItem. - * @param arity The arity of the operation indicated by the ParseItem. - */ - ParseItem(ItemType type, int arity); - -/** - * Overloaded constructor for ParseItem. Used for PHRASEs. - * - * @param type The type of the ParseItem. - * @param arity The arity of the operation indicated by the ParseItem. - * @param idx The name of the index of the ParseItem. - */ - ParseItem(ItemType type, int arity, const char *index); - -/** - * Overloaded constructor for ParseItem. Used for TERMs without index. - * - * @param type The type of the ParseItem. - * @param term The actual term string of the ParseItem. - */ - ParseItem(ItemType type, const char *term); - -/** - * Destructor for ParseItem. - */ - ~ParseItem(); - -/** - * Set the value of the _term field. - * @param term The string to set the _term field to. - */ - void SetTerm(const char *term) { _term = term; } - -/** - * Set the value of the _indexName field. - * @param idx The string to set the _indexName field to. - */ - void SetIndex(const char *index) { _indexName = index; } - - /** - * Set the type of the operator. Use this with caution, - * as this changes the semantics of the item. - * - * @param type The new type. - */ - void SetType(ItemType type) { - _type = (_type & ~0x1F) | type; - } - - /** - * Get the unique id for this item. - * - * @return unique id for this item - **/ - uint32_t getUniqueId() const { return _uniqueId; } - - /** - * Encode the item in a binary buffer. - * @param buf Pointer to a buffer containing the encoded contents. - */ - void AppendBuffer(RawBuf *buf) const; }; -} - +} // namespace search diff --git a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp index c42cf8fc370..17cbd6dce1b 100644 --- a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp +++ b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp @@ -207,6 +207,7 @@ SimpleQueryStackDumpIterator::next() } break; case ParseItem::ITEM_NUMTERM: + case ParseItem::ITEM_LOCATION_TERM: case ParseItem::ITEM_TERM: case ParseItem::ITEM_PREFIXTERM: case ParseItem::ITEM_SUBSTRINGTERM: diff --git a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h index 73c97bb5fb3..d60765f3fe1 100644 --- a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h +++ b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h @@ -110,12 +110,10 @@ public: **/ uint32_t getUniqueId() const { return _currUniqueId; } - /** - * Get the flags of the current item. - * - * @return flags of current item - **/ - uint32_t getFlags() const { return _currFlags; } + // Get the flags of the current item. + bool hasNoRankFlag() const { return (_currFlags & ParseItem::IFLAG_NORANK) != 0; } + bool hasSpecialTokenFlag() const { return (_currFlags & ParseItem::IFLAG_SPECIALTOKEN) != 0; } + bool hasNoPositionDataFlag() const { return (_currFlags & ParseItem::IFLAG_NOPOSITIONDATA) != 0; } uint32_t getArity() const { return _currArity; } diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp index 3db6c8e68c8..f1599e820ef 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp @@ -64,6 +64,7 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor } break; case ParseItem::ITEM_NUMTERM: + case ParseItem::ITEM_LOCATION_TERM: case ParseItem::ITEM_TERM: case ParseItem::ITEM_PREFIXTERM: case ParseItem::ITEM_REGEXP: diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp index aafeaa46a22..f33520d8b0e 100644 --- a/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp +++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp @@ -228,7 +228,7 @@ class QueryNodeConverter : public QueryVisitor { } void visit(LocationTerm &node) override { - createTerm(node, ParseItem::ITEM_NUMTERM); + createTerm(node, ParseItem::ITEM_LOCATION_TERM); } void visit(PrefixTerm &node) override { diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h b/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h index 65d6abeeaad..898db9785f6 100644 --- a/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h +++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h @@ -6,7 +6,6 @@ #include "querybuilder.h" #include "term.h" #include <vespa/searchlib/parsequery/stackdumpiterator.h> -#include <vespa/searchlib/parsequery/simplequerystack.h> #include <vespa/vespalib/objects/hexdump.h> namespace search::query { @@ -28,10 +27,10 @@ public: while (!builder.hasError() && queryStack.next()) { Term *t = createQueryTerm(queryStack, builder, pureTermView); if (!builder.hasError() && t) { - if (queryStack.getFlags() & ParseItem::IFLAG_NORANK) { + if (queryStack.hasNoRankFlag()) { t->setRanked(false); } - if (queryStack.getFlags() & ParseItem::IFLAG_NOPOSITIONDATA) { + if (queryStack.hasNoPositionDataFlag()) { t->setPositionData(false); } } @@ -142,11 +141,15 @@ private: t = &builder.addStringTerm(term, view, id, weight); } else if (type == ParseItem::ITEM_SUFFIXTERM) { t = &builder.addSuffixTerm(term, view, id, weight); + } else if (type == ParseItem::ITEM_LOCATION_TERM) { + Location loc(term); + t = &builder.addLocationTerm(loc, view, id, weight); } else if (type == ParseItem::ITEM_NUMTERM) { if (term[0] == '[' || term[0] == '<' || term[0] == '>') { Range range(term); t = &builder.addRangeTerm(range, view, id, weight); } else if (term[0] == '(') { + // TODO: handled above, should remove this block Location loc(term); t = &builder.addLocationTerm(loc, view, id, weight); } else { diff --git a/searchlib/src/vespa/searchlib/util/rawbuf.cpp b/searchlib/src/vespa/searchlib/util/rawbuf.cpp index cf019014fd1..c4fb3dd72cc 100644 --- a/searchlib/src/vespa/searchlib/util/rawbuf.cpp +++ b/searchlib/src/vespa/searchlib/util/rawbuf.cpp @@ -79,9 +79,11 @@ RawBuf::expandBuf(size_t needlen) void RawBuf::append(const void *data, size_t len) { - ensureSize(len); - memcpy(_bufFillPos, data, len); - _bufFillPos += len; + if (__builtin_expect(len != 0, true)) { + ensureSize(len); + memcpy(_bufFillPos, data, len); + _bufFillPos += len; + } } void diff --git a/searchsummary/src/tests/extractkeywords/CMakeLists.txt b/searchsummary/src/tests/extractkeywords/CMakeLists.txt index edcede16bc6..5eae390a5ec 100644 --- a/searchsummary/src/tests/extractkeywords/CMakeLists.txt +++ b/searchsummary/src/tests/extractkeywords/CMakeLists.txt @@ -2,6 +2,8 @@ vespa_add_executable(searchsummary_extractkeywordstest_app TEST SOURCES extractkeywordstest.cpp + simplequerystack.cpp + simplequerystackitem.cpp DEPENDS searchsummary ) diff --git a/searchsummary/src/tests/extractkeywords/extractkeywordstest.cpp b/searchsummary/src/tests/extractkeywords/extractkeywordstest.cpp index 4abbe7d2613..017ac0b075d 100644 --- a/searchsummary/src/tests/extractkeywords/extractkeywordstest.cpp +++ b/searchsummary/src/tests/extractkeywords/extractkeywordstest.cpp @@ -2,7 +2,7 @@ #include "extractkeywordstest.h" #include <vespa/searchsummary/docsummary/keywordextractor.h> -#include <vespa/searchlib/parsequery/simplequerystack.h> +#include "simplequerystack.h" #include <vespa/vespalib/util/time.h> #define NUMTESTS 5 @@ -165,7 +165,7 @@ ExtractKeywordsTest::RunTest(int testno, bool verify) case 0: { // Simple term query - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foobar")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foobar")); stack.AppendBuffer(&buf); keywords = _extractor->ExtractKeywords(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); @@ -178,11 +178,14 @@ ExtractKeywordsTest::RunTest(int testno, bool verify) case 1: { + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_LOCATION_TERM, "no")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_NEAREST_NEIGHBOR, "no")); // multi term query - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foobar")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "bar")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_OR, 3)); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foobar")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foo")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "bar")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_OR, 3)); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_AND, 3)); stack.AppendBuffer(&buf); keywords = _extractor->ExtractKeywords(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); @@ -196,10 +199,10 @@ ExtractKeywordsTest::RunTest(int testno, bool verify) case 2: { // phrase term query - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foobar")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "bar")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_PHRASE, 3, "index")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foobar")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foo")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "bar")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_PHRASE, 3, "index")); stack.AppendBuffer(&buf); keywords = _extractor->ExtractKeywords(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); @@ -213,16 +216,16 @@ ExtractKeywordsTest::RunTest(int testno, bool verify) case 3: { // multiple phrase and term query - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "xyzzy")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "xyz")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_PHRASE, 2, "index")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foobar")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "bar")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_PHRASE, 3, "index")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "baz")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "zog")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_AND, 3)); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "xyzzy")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "xyz")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_PHRASE, 2, "index")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foobar")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foo")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "bar")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_PHRASE, 3, "index")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "baz")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "zog")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_AND, 3)); stack.AppendBuffer(&buf); keywords = _extractor->ExtractKeywords(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); @@ -236,11 +239,11 @@ ExtractKeywordsTest::RunTest(int testno, bool verify) case 4: { // phrase term query with wrong argument items - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foobar")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_AND, 2)); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "bar")); - stack.Push(new search::ParseItem(search::ParseItem::ITEM_PHRASE, 2, "index")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foobar")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foo")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_AND, 2)); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "bar")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_PHRASE, 2, "index")); stack.AppendBuffer(&buf); keywords = _extractor->ExtractKeywords(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); diff --git a/searchlib/src/vespa/searchlib/parsequery/simplequerystack.cpp b/searchsummary/src/tests/extractkeywords/simplequerystack.cpp index 0908132aa87..40948612858 100644 --- a/searchlib/src/vespa/searchlib/parsequery/simplequerystack.cpp +++ b/searchsummary/src/tests/extractkeywords/simplequerystack.cpp @@ -24,7 +24,7 @@ SimpleQueryStack::~SimpleQueryStack() } void -SimpleQueryStack::Push(ParseItem *item) +SimpleQueryStack::Push(SimpleQueryStackItem *item) { item->_next = _stack; _stack = item; @@ -35,7 +35,7 @@ SimpleQueryStack::Push(ParseItem *item) void SimpleQueryStack::AppendBuffer(RawBuf *buf) const { - for (ParseItem *item = _stack; item != nullptr; item = item->_next) { + for (SimpleQueryStackItem *item = _stack; item != nullptr; item = item->_next) { item->AppendBuffer(buf); } } diff --git a/searchlib/src/vespa/searchlib/parsequery/simplequerystack.h b/searchsummary/src/tests/extractkeywords/simplequerystack.h index 3fff9103b2b..97dd6418e48 100644 --- a/searchlib/src/vespa/searchlib/parsequery/simplequerystack.h +++ b/searchsummary/src/tests/extractkeywords/simplequerystack.h @@ -2,18 +2,18 @@ #pragma once -#include <vespa/searchlib/parsequery/parse.h> +#include "simplequerystackitem.h" #include <vespa/searchlib/util/rawbuf.h> #include <vespa/vespalib/stllike/string.h> namespace search { /** - * A stack of ParseItems. + * A stack of SimpleQueryStackItems. * - * A simple stack consisting of a list of ParseItems. + * A simple stack consisting of a list of SimpleQueryStackItems. * It is able to generate a binary encoding of itself - * to a search::RawBuf. + * to a RawBuf. */ class SimpleQueryStack { @@ -25,7 +25,7 @@ private: * Warning: FastQT_ProximityEmul currently assumes this is the head * of a singly linked list (linked with _next). */ - search::ParseItem *_stack; + SimpleQueryStackItem *_stack; public: SimpleQueryStack(const SimpleQueryStack &) = delete; @@ -40,16 +40,16 @@ public: ~SimpleQueryStack(); /** * Push an item on the stack. - * @param item The search::ParseItem to push. + * @param item The SimpleQueryStackItem to push. */ - void Push(search::ParseItem *item); + void Push(SimpleQueryStackItem *item); /** * Encode the contents of the stack in a binary buffer. * @param buf Pointer to a buffer containing the encoded contents. */ - void AppendBuffer(search::RawBuf *buf) const; + void AppendBuffer(RawBuf *buf) const; /** * Return the number of items on the stack. diff --git a/searchsummary/src/tests/extractkeywords/simplequerystackitem.cpp b/searchsummary/src/tests/extractkeywords/simplequerystackitem.cpp new file mode 100644 index 00000000000..f717cfced64 --- /dev/null +++ b/searchsummary/src/tests/extractkeywords/simplequerystackitem.cpp @@ -0,0 +1,207 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "simplequerystackitem.h" +#include <vespa/vespalib/objects/nbo.h> +#include <vespa/vespalib/stllike/asciistream.h> +#include <cassert> + +namespace search { + +SimpleQueryStackItem::SimpleQueryStackItem() + : _next(NULL), + _sibling(NULL), + _weight(100), + _uniqueId(0), + _arg1(0), + _arg2(0), + _arg3(0), + _type(ITEM_UNDEF), + _flags(0), + _arity(0), + _indexName(), + _term() +{} + +namespace { + +void assert_term_type(ParseItem::ItemType type) { + assert(type == ParseItem::ITEM_TERM || + type == ParseItem::ITEM_NUMTERM || + type == ParseItem::ITEM_NEAREST_NEIGHBOR || + type == ParseItem::ITEM_LOCATION_TERM || + type == ParseItem::ITEM_PREFIXTERM || + type == ParseItem::ITEM_SUBSTRINGTERM || + type == ParseItem::ITEM_SUFFIXTERM || + type == ParseItem::ITEM_PURE_WEIGHTED_STRING || + type == ParseItem::ITEM_PURE_WEIGHTED_LONG || + type == ParseItem::ITEM_EXACTSTRINGTERM || + type == ParseItem::ITEM_PREDICATE_QUERY); + (void) type; +} + +void assert_arity_type(ParseItem::ItemType type) { + // types with arity, but without an index name: + assert(type == ParseItem::ITEM_OR || + type == ParseItem::ITEM_WEAK_AND || + type == ParseItem::ITEM_EQUIV || + type == ParseItem::ITEM_AND || + type == ParseItem::ITEM_NOT || + type == ParseItem::ITEM_RANK || + type == ParseItem::ITEM_ANY || + type == ParseItem::ITEM_NEAR || + type == ParseItem::ITEM_ONEAR); + (void) type; +} + +void assert_arity_and_index_type(ParseItem::ItemType type) { + // types with arity and an index name: + assert(type == ParseItem::ITEM_PHRASE || + type == ParseItem::ITEM_SAME_ELEMENT || + type == ParseItem::ITEM_WEIGHTED_SET || + type == ParseItem::ITEM_DOT_PRODUCT || + type == ParseItem::ITEM_WAND || + type == ParseItem::ITEM_WORD_ALTERNATIVES); + (void) type; +} + +int64_t term_as_n64(vespalib::stringref term) { + int64_t tmp; + vespalib::asciistream generatedTerm(term); + generatedTerm >> tmp; + return vespalib::nbo::n2h(tmp); +} + +} // namespace <unnamed> + + +SimpleQueryStackItem::SimpleQueryStackItem(ItemType type, int arity) : SimpleQueryStackItem() +{ + assert_arity_type(type); + SetType(type); + _arity = arity; +} + +SimpleQueryStackItem::SimpleQueryStackItem(ItemType type, int arity, const char *idx) : SimpleQueryStackItem() +{ + assert_arity_and_index_type(type); + SetType(type); + _arity = arity; + SetIndex(idx); +} + +SimpleQueryStackItem::SimpleQueryStackItem(ItemType type, const char *term) : SimpleQueryStackItem() +{ + assert_term_type(type); + SetType(type); + SetTerm(term); +} + +SimpleQueryStackItem::~SimpleQueryStackItem() +{ + delete _next; + delete _sibling; +} + +void +SimpleQueryStackItem::AppendBuffer(RawBuf *buf) const +{ + // Calculate lengths + uint32_t indexLen = _indexName.size(); + uint32_t termLen = _term.size(); + double nboVal = 0.0; + + // Put the values into the buffer. + buf->append(_type); + if (Feature_Weight()) { // this item has weight + buf->appendCompressedNumber(_weight.percent()); + } + if (feature_UniqueId()) { + buf->appendCompressedPositiveNumber(_uniqueId); + } + if (feature_Flags()) { + buf->append(_flags); + } + switch (Type()) { + case ITEM_OR: + case ITEM_EQUIV: + case ITEM_AND: + case ITEM_NOT: + case ITEM_RANK: + case ITEM_ANY: + buf->appendCompressedPositiveNumber(_arity); + break; + case ITEM_NEAR: + case ITEM_ONEAR: + buf->appendCompressedPositiveNumber(_arity); + buf->appendCompressedPositiveNumber(_arg1); + break; + case ITEM_SAME_ELEMENT: + case ITEM_WEIGHTED_SET: + case ITEM_DOT_PRODUCT: + case ITEM_PHRASE: + buf->appendCompressedPositiveNumber(_arity); + buf->appendCompressedPositiveNumber(indexLen); + buf->append(_indexName.c_str(), indexLen); + break; + case ITEM_WORD_ALTERNATIVES: + buf->appendCompressedPositiveNumber(indexLen); + buf->append(_indexName.c_str(), indexLen); + buf->appendCompressedPositiveNumber(_arity); + break; + case ITEM_WEAK_AND: + buf->appendCompressedPositiveNumber(_arity); + buf->appendCompressedPositiveNumber(_arg1); + buf->appendCompressedPositiveNumber(indexLen); + buf->append(_indexName.c_str(), indexLen); + break; + case ITEM_WAND: + buf->appendCompressedPositiveNumber(_arity); + buf->appendCompressedPositiveNumber(indexLen); + buf->append(_indexName.c_str(), indexLen); + buf->appendCompressedPositiveNumber(_arg1); // targetNumHits + nboVal = vespalib::nbo::n2h(_arg2); + buf->append(&nboVal, sizeof(nboVal)); // scoreThreshold + nboVal = vespalib::nbo::n2h(_arg3); + buf->append(&nboVal, sizeof(nboVal)); // thresholdBoostFactor + break; + case ITEM_TERM: + case ITEM_NUMTERM: + case ITEM_LOCATION_TERM: + case ITEM_PREFIXTERM: + case ITEM_SUBSTRINGTERM: + case ITEM_EXACTSTRINGTERM: + case ITEM_SUFFIXTERM: + case ITEM_REGEXP: + buf->appendCompressedPositiveNumber(indexLen); + buf->append(_indexName.c_str(), indexLen); + buf->appendCompressedPositiveNumber(termLen); + buf->append(_term.c_str(), termLen); + break; + case ITEM_PURE_WEIGHTED_STRING: + buf->appendCompressedPositiveNumber(termLen); + buf->append(_term.c_str(), termLen); + break; + case ITEM_PURE_WEIGHTED_LONG: + { + int64_t tmp = term_as_n64(_term); + buf->append(&tmp, sizeof(int64_t)); + } + break; + case ITEM_NEAREST_NEIGHBOR: + buf->appendCompressedPositiveNumber(indexLen); + buf->append(_indexName.c_str(), indexLen); + buf->appendCompressedPositiveNumber(termLen); + buf->append(_term.c_str(), termLen); + buf->appendCompressedPositiveNumber(_arg1); // targetNumHits + buf->appendCompressedPositiveNumber(_arg2); // allow_approximate + buf->appendCompressedPositiveNumber(_arg3); // explore_additional_hits + break; + case ITEM_PREDICATE_QUERY: // not handled at all here + case ITEM_MAX: + case ITEM_UNDEF: + abort(); + break; + } +} + +} diff --git a/searchsummary/src/tests/extractkeywords/simplequerystackitem.h b/searchsummary/src/tests/extractkeywords/simplequerystackitem.h new file mode 100644 index 00000000000..05250154d18 --- /dev/null +++ b/searchsummary/src/tests/extractkeywords/simplequerystackitem.h @@ -0,0 +1,153 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/query/weight.h> +#include <vespa/searchlib/util/rawbuf.h> +#include <vespa/vespalib/stllike/string.h> +#include <vespa/searchlib/parsequery/parse.h> + +namespace search { + +/** + * An item on the simple query stack. + * + * An object of this class represents a single item + * on the simple query stack. It has a type, which corresponds + * to the different query stack execution operations. It also + * provides an arity, and the string values indexName and term, to + * accomodate the different needs of the operations. + * It also includes a mechanism for making singly linked lists + * with sub-lists. This is used during the parsing, and also + * when constructing the simple query stack. + */ +class SimpleQueryStackItem : public ParseItem +{ +private: + SimpleQueryStackItem(const SimpleQueryStackItem &) = delete; + SimpleQueryStackItem& operator=(const SimpleQueryStackItem &) = delete; + SimpleQueryStackItem(); +public: + /** Pointer to next item in a linked list. */ + SimpleQueryStackItem *_next; + /** Pointer to first item in a sublist. */ + SimpleQueryStackItem *_sibling; + +private: + query::Weight _weight; + uint32_t _uniqueId; + uint32_t _arg1; + double _arg2; + double _arg3; + uint8_t _type; + uint8_t _flags; + +public: + /** Extra information on each item (creator id) coded in bits 12-19 of _type */ + static inline ItemCreator GetCreator(uint8_t type) { return static_cast<ItemCreator>((type >> 3) & 0x01); } + /** The old item type now uses only the lower 12 bits in a backward compatible way) */ + static inline ItemType GetType(uint8_t type) { return static_cast<ItemType>(type & 0x1F); } + inline ItemType Type() const { return GetType(_type); } + + static inline bool GetFeature(uint8_t type, uint8_t feature) + { return ((type & feature) != 0); } + + static inline bool GetFeature_Weight(uint8_t type) + { return GetFeature(type, IF_WEIGHT); } + + static inline bool getFeature_UniqueId(uint8_t type) + { return GetFeature(type, IF_UNIQUEID); } + + static inline bool getFeature_Flags(uint8_t type) + { return GetFeature(type, IF_FLAGS); } + + inline bool Feature(uint8_t feature) const + { return GetFeature(_type, feature); } + + inline bool Feature_Weight() const + { return GetFeature_Weight(_type); } + + inline bool feature_UniqueId() const + { return getFeature_UniqueId(_type); } + + inline bool feature_Flags() const + { return getFeature_Flags(_type); } + + static inline bool getFlag(uint8_t flags, uint8_t flag) + { return ((flags & flag) != 0); } + + /** The number of operands for the operation. */ + uint32_t _arity; + /** The name of the specified index, or NULL if no index. */ + vespalib::string _indexName; + /** The specified search term. */ + vespalib::string _term; + +/** + * Overloaded constructor for SimpleQueryStackItem. Used primarily for + * the operators, or phrase without indexName. + * + * @param type The type of the SimpleQueryStackItem. + * @param arity The arity of the operation indicated by the SimpleQueryStackItem. + */ + SimpleQueryStackItem(ItemType type, int arity); + +/** + * Overloaded constructor for SimpleQueryStackItem. Used for PHRASEs. + * + * @param type The type of the SimpleQueryStackItem. + * @param arity The arity of the operation indicated by the SimpleQueryStackItem. + * @param idx The name of the index of the SimpleQueryStackItem. + */ + SimpleQueryStackItem(ItemType type, int arity, const char *index); + +/** + * Overloaded constructor for SimpleQueryStackItem. Used for TERMs without index. + * + * @param type The type of the SimpleQueryStackItem. + * @param term The actual term string of the SimpleQueryStackItem. + */ + SimpleQueryStackItem(ItemType type, const char *term); + +/** + * Destructor for SimpleQueryStackItem. + */ + ~SimpleQueryStackItem(); + +/** + * Set the value of the _term field. + * @param term The string to set the _term field to. + */ + void SetTerm(const char *term) { _term = term; } + +/** + * Set the value of the _indexName field. + * @param idx The string to set the _indexName field to. + */ + void SetIndex(const char *index) { _indexName = index; } + + /** + * Set the type of the operator. Use this with caution, + * as this changes the semantics of the item. + * + * @param type The new type. + */ + void SetType(ItemType type) { + _type = (_type & ~0x1F) | type; + } + + /** + * Get the unique id for this item. + * + * @return unique id for this item + **/ + uint32_t getUniqueId() const { return _uniqueId; } + + /** + * Encode the item in a binary buffer. + * @param buf Pointer to a buffer containing the encoded contents. + */ + void AppendBuffer(RawBuf *buf) const; +}; + +} diff --git a/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp index 4872a183358..37239fe9da6 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp @@ -187,7 +187,7 @@ JuniperQueryAdapter::Traverse(juniper::IQueryVisitor *v) const v->VisitAND(&item, 2); } while (rc && iterator.next()) { - bool isSpecialToken = search::ParseItem::getFlag(iterator.getFlags(), search::ParseItem::IFLAG_SPECIALTOKEN); + bool isSpecialToken = iterator.hasSpecialTokenFlag(); switch (iterator.getType()) { case search::ParseItem::ITEM_OR: case search::ParseItem::ITEM_WEAK_AND: @@ -241,10 +241,6 @@ JuniperQueryAdapter::Traverse(juniper::IQueryVisitor *v) const if (!v->VisitPHRASE(&item, iterator.getArity())) rc = SkipItem(&iterator); break; - case search::ParseItem::ITEM_PAREN: - if (!v->VisitOther(&item, iterator.getArity())) - rc = SkipItem(&iterator); - break; case search::ParseItem::ITEM_PREFIXTERM: case search::ParseItem::ITEM_SUBSTRINGTERM: { @@ -273,6 +269,8 @@ JuniperQueryAdapter::Traverse(juniper::IQueryVisitor *v) const case search::ParseItem::ITEM_REGEXP: case search::ParseItem::ITEM_PREDICATE_QUERY: case search::ParseItem::ITEM_SAME_ELEMENT: + case search::ParseItem::ITEM_NEAREST_NEIGHBOR: + case search::ParseItem::ITEM_LOCATION_TERM: if (!v->VisitOther(&item, iterator.getArity())) { rc = SkipItem(&iterator); } |