diff options
author | Arne Juul <arnej@verizonmedia.com> | 2020-07-07 08:51:21 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2020-07-09 09:55:22 +0000 |
commit | 88099bec32481b83b41f3966c2a88ebf4034f1fe (patch) | |
tree | bd703eae788918a6430dd5973af70ce84e40f7a4 /searchlib | |
parent | 1d12f4b4b92771dfe7e245b88ce89adc911c509e (diff) |
clean up various issues with ParseItem class
* SimpleQueryStack only used for one unit test, move it there
* Actual instances of ParseItem also only used for same unit test.
Split out the object representation into a separate
SimpleQueryStackItem class in the unit test directory.
* give location ITEM_LOCATION_TERM instead of overloading NUMTERM
* ParseItem::ITEM_PAREN never used for anything, remove it
* add comment for removal of PAREN enum in prelude/query/Item.java
* refactor flag handling with one method per flag
Diffstat (limited to 'searchlib')
12 files changed, 23 insertions, 411 deletions
diff --git a/searchlib/src/tests/query/querybuilder_test.cpp b/searchlib/src/tests/query/querybuilder_test.cpp index 8560cb0e091..d093bc4242e 100644 --- a/searchlib/src/tests/query/querybuilder_test.cpp +++ b/searchlib/src/tests/query/querybuilder_test.cpp @@ -2,7 +2,6 @@ // Unit tests for querybuilder. #include <vespa/searchlib/parsequery/parse.h> -#include <vespa/searchlib/parsequery/simplequerystack.h> #include <vespa/searchlib/query/tree/customtypevisitor.h> #include <vespa/searchlib/query/tree/point.h> #include <vespa/searchlib/query/tree/querybuilder.h> diff --git a/searchlib/src/vespa/searchlib/parsequery/CMakeLists.txt b/searchlib/src/vespa/searchlib/parsequery/CMakeLists.txt index 0f73c102374..481faeecfb6 100644 --- a/searchlib/src/vespa/searchlib/parsequery/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/parsequery/CMakeLists.txt @@ -2,7 +2,6 @@ vespa_add_library(searchlib_parsequery OBJECT SOURCES parse.cpp - simplequerystack.cpp stackdumpiterator.cpp DEPENDS ) diff --git a/searchlib/src/vespa/searchlib/parsequery/parse.cpp b/searchlib/src/vespa/searchlib/parsequery/parse.cpp index c8fcce037ae..bf2f4b530ca 100644 --- a/searchlib/src/vespa/searchlib/parsequery/parse.cpp +++ b/searchlib/src/vespa/searchlib/parsequery/parse.cpp @@ -1,168 +1,3 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "parse.h" -#include <vespa/vespalib/objects/nbo.h> -#include <cassert> - -namespace search { - -#define PARSEITEM_DEFAULT_CONSTRUCTOR_LIST \ - _next(NULL), \ - _sibling(NULL), \ - _weight(100), \ - _uniqueId(0), \ - _arg1(0), \ - _arg2(0), \ - _arg3(0), \ - _type(ITEM_UNDEF), \ - _flags(0), \ - _arity(0), \ - _indexName(), \ - _term() - - -ParseItem::ParseItem(ItemType type, int arity) - : PARSEITEM_DEFAULT_CONSTRUCTOR_LIST -{ - assert(type==ITEM_OR || type==ITEM_WEAK_AND || type==ITEM_EQUIV || type==ITEM_AND || type==ITEM_NOT - || type==ITEM_RANK || type==ITEM_ANY || type==ITEM_NEAR || type==ITEM_ONEAR); - SetType(type); - _arity = arity; -} - -ParseItem::ParseItem(ItemType type, int arity, const char *idx) - : PARSEITEM_DEFAULT_CONSTRUCTOR_LIST -{ - assert(type==ITEM_PHRASE || type==ITEM_SAME_ELEMENT || type==ITEM_WEIGHTED_SET - || type==ITEM_DOT_PRODUCT || type==ITEM_WAND || type==ITEM_WORD_ALTERNATIVES); - SetType(type); - _arity = arity; - SetIndex(idx); -} - -namespace { - -void assert_type(ParseItem::ItemType type) -{ - assert(type == ParseItem::ITEM_TERM || - type == ParseItem::ITEM_NUMTERM || - type == ParseItem::ITEM_PREFIXTERM || - type == ParseItem::ITEM_SUBSTRINGTERM || - type == ParseItem::ITEM_SUFFIXTERM || - type == ParseItem::ITEM_PURE_WEIGHTED_STRING || - type == ParseItem::ITEM_PURE_WEIGHTED_LONG || - type == ParseItem::ITEM_EXACTSTRINGTERM || - type == ParseItem::ITEM_PREDICATE_QUERY); - (void) type; -} - -} - -ParseItem::ParseItem(ItemType type, const char *term) - : PARSEITEM_DEFAULT_CONSTRUCTOR_LIST -{ - assert_type(type); - SetType(type); - SetTerm(term); -} - -ParseItem::~ParseItem() -{ - delete _next; - delete _sibling; -} - -void -ParseItem::AppendBuffer(RawBuf *buf) const -{ - // Calculate the length of the buffer. - uint32_t indexLen = _indexName.size(); - uint32_t termLen = _term.size(); - - // Put the values into the buffer. - buf->append(_type); - if (Feature_Weight()) { // this item has weight - buf->appendCompressedNumber(_weight.percent()); - } - if (feature_UniqueId()) { - buf->appendCompressedPositiveNumber(_uniqueId); - } - if (feature_Flags()) { - buf->append(_flags); - } - switch (Type()) { - case ITEM_OR: - case ITEM_EQUIV: - case ITEM_AND: - case ITEM_NOT: - case ITEM_RANK: - case ITEM_ANY: - buf->appendCompressedPositiveNumber(_arity); - break; - case ITEM_NEAR: - case ITEM_ONEAR: - buf->appendCompressedPositiveNumber(_arity); - buf->appendCompressedPositiveNumber(_arg1); - break; - case ITEM_SAME_ELEMENT: - buf->appendCompressedPositiveNumber(_arity); - buf->appendCompressedPositiveNumber(indexLen); - if (indexLen != 0) { - buf->append(_indexName.c_str(), indexLen); - } - break; - case ITEM_WORD_ALTERNATIVES: - buf->appendCompressedPositiveNumber(indexLen); - if (indexLen != 0) { - buf->append(_indexName.c_str(), indexLen); - } - buf->appendCompressedPositiveNumber(_arity); - break; - case ITEM_WEAK_AND: - buf->appendCompressedPositiveNumber(_arity); - buf->appendCompressedPositiveNumber(_arg1); - buf->appendCompressedPositiveNumber(indexLen); - if (indexLen != 0) { - buf->append(_indexName.c_str(), indexLen); - } - break; - case ITEM_WEIGHTED_SET: - case ITEM_DOT_PRODUCT: - case ITEM_WAND: - case ITEM_PHRASE: - buf->appendCompressedPositiveNumber(_arity); - buf->appendCompressedPositiveNumber(indexLen); - if (indexLen != 0) { - buf->append(_indexName.c_str(), indexLen); - } - if (Type() == ITEM_WAND) { - buf->appendCompressedPositiveNumber(_arg1); // targetNumHits - double nboVal = vespalib::nbo::n2h(_arg2); - buf->append(&nboVal, sizeof(nboVal)); // scoreThreshold - nboVal = vespalib::nbo::n2h(_arg3); - buf->append(&nboVal, sizeof(nboVal)); // thresholdBoostFactor - } - break; - case ITEM_TERM: - case ITEM_NUMTERM: - case ITEM_PREFIXTERM: - case ITEM_SUBSTRINGTERM: - case ITEM_EXACTSTRINGTERM: - case ITEM_SUFFIXTERM: - case ITEM_REGEXP: - buf->appendCompressedPositiveNumber(indexLen); - if (indexLen != 0) { - buf->append(_indexName.c_str(), indexLen); - } - buf->appendCompressedPositiveNumber(termLen); - if (termLen != 0) { - buf->append(_term.c_str(), termLen); - } - break; - case ITEM_UNDEF: - default: - break; - } -} - -} diff --git a/searchlib/src/vespa/searchlib/parsequery/parse.h b/searchlib/src/vespa/searchlib/parsequery/parse.h index 83352b571c8..b4dd9826b84 100644 --- a/searchlib/src/vespa/searchlib/parsequery/parse.h +++ b/searchlib/src/vespa/searchlib/parsequery/parse.h @@ -9,7 +9,7 @@ namespace search { /** - * An item on the simple query stack. + * Items on a simple query stack. * * An object of this class represents a single item * on the simple query stack. It has a type, which corresponds @@ -22,15 +22,7 @@ namespace search { */ class ParseItem { -private: - ParseItem(const ParseItem &); - ParseItem& operator=(const ParseItem &); public: - /** Pointer to next item in a linked list. */ - ParseItem *_next; - /** Pointer to first item in a sublist. */ - ParseItem *_sibling; - /** The type of the item is from this set of values. It is important that these defines match those in prelude/source/com/yahoo/prelude/query/Item.java */ enum ItemType { @@ -41,7 +33,7 @@ public: ITEM_TERM = 4, ITEM_NUMTERM = 5, ITEM_PHRASE = 6, - ITEM_PAREN = 7, + /* removed: ITEM_PAREN = 7, */ ITEM_PREFIXTERM = 8, ITEM_SUBSTRINGTERM = 9, ITEM_ANY = 10, @@ -61,7 +53,8 @@ public: ITEM_REGEXP = 24, ITEM_WORD_ALTERNATIVES = 25, ITEM_NEAREST_NEIGHBOR = 26, - ITEM_MAX = 27, // Indicates how long tables must be. + ITEM_LOCATION_TERM = 27, + ITEM_MAX = 28, // Indicates how long tables must be. ITEM_UNDEF = 31, }; @@ -88,21 +81,10 @@ public: IFLAG_NOPOSITIONDATA = 0x00000004, // we should not use position data when ranking this term }; -private: - query::Weight _weight; - uint32_t _uniqueId; - uint32_t _arg1; - double _arg2; - double _arg3; - uint8_t _type; - uint8_t _flags; - -public: /** Extra information on each item (creator id) coded in bits 12-19 of _type */ static inline ItemCreator GetCreator(uint8_t type) { return static_cast<ItemCreator>((type >> 3) & 0x01); } /** The old item type now uses only the lower 12 bits in a backward compatible way) */ static inline ItemType GetType(uint8_t type) { return static_cast<ItemType>(type & 0x1F); } - inline ItemType Type() const { return GetType(_type); } static inline bool GetFeature(uint8_t type, uint8_t feature) { return ((type & feature) != 0); } @@ -115,95 +97,6 @@ public: static inline bool getFeature_Flags(uint8_t type) { return GetFeature(type, IF_FLAGS); } - - inline bool Feature(uint8_t feature) const - { return GetFeature(_type, feature); } - - inline bool Feature_Weight() const - { return GetFeature_Weight(_type); } - - inline bool feature_UniqueId() const - { return getFeature_UniqueId(_type); } - - inline bool feature_Flags() const - { return getFeature_Flags(_type); } - - static inline bool getFlag(uint8_t flags, uint8_t flag) - { return ((flags & flag) != 0); } - - /** The number of operands for the operation. */ - uint32_t _arity; - /** The name of the specified index, or NULL if no index. */ - vespalib::string _indexName; - /** The specified search term. */ - vespalib::string _term; - -/** - * Overloaded constructor for ParseItem. Used primarily for - * the operators, or pharse without indexName. - * - * @param type The type of the ParseItem. - * @param arity The arity of the operation indicated by the ParseItem. - */ - ParseItem(ItemType type, int arity); - -/** - * Overloaded constructor for ParseItem. Used for PHRASEs. - * - * @param type The type of the ParseItem. - * @param arity The arity of the operation indicated by the ParseItem. - * @param idx The name of the index of the ParseItem. - */ - ParseItem(ItemType type, int arity, const char *index); - -/** - * Overloaded constructor for ParseItem. Used for TERMs without index. - * - * @param type The type of the ParseItem. - * @param term The actual term string of the ParseItem. - */ - ParseItem(ItemType type, const char *term); - -/** - * Destructor for ParseItem. - */ - ~ParseItem(); - -/** - * Set the value of the _term field. - * @param term The string to set the _term field to. - */ - void SetTerm(const char *term) { _term = term; } - -/** - * Set the value of the _indexName field. - * @param idx The string to set the _indexName field to. - */ - void SetIndex(const char *index) { _indexName = index; } - - /** - * Set the type of the operator. Use this with caution, - * as this changes the semantics of the item. - * - * @param type The new type. - */ - void SetType(ItemType type) { - _type = (_type & ~0x1F) | type; - } - - /** - * Get the unique id for this item. - * - * @return unique id for this item - **/ - uint32_t getUniqueId() const { return _uniqueId; } - - /** - * Encode the item in a binary buffer. - * @param buf Pointer to a buffer containing the encoded contents. - */ - void AppendBuffer(RawBuf *buf) const; }; -} - +} // namespace search diff --git a/searchlib/src/vespa/searchlib/parsequery/simplequerystack.cpp b/searchlib/src/vespa/searchlib/parsequery/simplequerystack.cpp deleted file mode 100644 index 0908132aa87..00000000000 --- a/searchlib/src/vespa/searchlib/parsequery/simplequerystack.cpp +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "simplequerystack.h" -#include <vespa/vespalib/util/compress.h> -#include <vespa/vespalib/objects/nbo.h> -#include <vespa/vespalib/util/stringfmt.h> - -#include <vespa/log/log.h> -LOG_SETUP(".search.simplequerystack"); - -using vespalib::make_string; - -namespace search { - -SimpleQueryStack::SimpleQueryStack() - : _numItems(0), - _stack(nullptr) -{ -} - -SimpleQueryStack::~SimpleQueryStack() -{ - delete _stack; -} - -void -SimpleQueryStack::Push(ParseItem *item) -{ - item->_next = _stack; - _stack = item; - - _numItems++; -} - -void -SimpleQueryStack::AppendBuffer(RawBuf *buf) const -{ - for (ParseItem *item = _stack; item != nullptr; item = item->_next) { - item->AppendBuffer(buf); - } -} - - -uint32_t -SimpleQueryStack::GetSize() -{ - return _numItems; -} - -} // namespace search diff --git a/searchlib/src/vespa/searchlib/parsequery/simplequerystack.h b/searchlib/src/vespa/searchlib/parsequery/simplequerystack.h deleted file mode 100644 index 3fff9103b2b..00000000000 --- a/searchlib/src/vespa/searchlib/parsequery/simplequerystack.h +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <vespa/searchlib/parsequery/parse.h> -#include <vespa/searchlib/util/rawbuf.h> -#include <vespa/vespalib/stllike/string.h> - -namespace search { - -/** - * A stack of ParseItems. - * - * A simple stack consisting of a list of ParseItems. - * It is able to generate a binary encoding of itself - * to a search::RawBuf. - */ -class SimpleQueryStack -{ -private: - /** The number of items on the stack. */ - uint32_t _numItems; - - /** The top of the stack. - * Warning: FastQT_ProximityEmul currently assumes this is the head - * of a singly linked list (linked with _next). - */ - search::ParseItem *_stack; - -public: - SimpleQueryStack(const SimpleQueryStack &) = delete; - SimpleQueryStack& operator=(const SimpleQueryStack &) = delete; - /** - * Constructor for SimpleQueryStack. - */ - SimpleQueryStack(); - /** - * Destructor for SimpleQueryStack. - */ - ~SimpleQueryStack(); - /** - * Push an item on the stack. - * @param item The search::ParseItem to push. - */ - void Push(search::ParseItem *item); - - - /** - * Encode the contents of the stack in a binary buffer. - * @param buf Pointer to a buffer containing the encoded contents. - */ - void AppendBuffer(search::RawBuf *buf) const; - - /** - * Return the number of items on the stack. - * @return The number of items on the stack. - */ - uint32_t GetSize(); - /** - * Set the number of items on the stack. - * This can be used by QTs that change the stack - * under the hood. Use with care! - * @param numItems The number of items on the stack. - */ - void SetSize(uint32_t numItems) { _numItems = numItems; } -}; - -} // namespace search - diff --git a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp index c42cf8fc370..17cbd6dce1b 100644 --- a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp +++ b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp @@ -207,6 +207,7 @@ SimpleQueryStackDumpIterator::next() } break; case ParseItem::ITEM_NUMTERM: + case ParseItem::ITEM_LOCATION_TERM: case ParseItem::ITEM_TERM: case ParseItem::ITEM_PREFIXTERM: case ParseItem::ITEM_SUBSTRINGTERM: diff --git a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h index 73c97bb5fb3..d60765f3fe1 100644 --- a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h +++ b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h @@ -110,12 +110,10 @@ public: **/ uint32_t getUniqueId() const { return _currUniqueId; } - /** - * Get the flags of the current item. - * - * @return flags of current item - **/ - uint32_t getFlags() const { return _currFlags; } + // Get the flags of the current item. + bool hasNoRankFlag() const { return (_currFlags & ParseItem::IFLAG_NORANK) != 0; } + bool hasSpecialTokenFlag() const { return (_currFlags & ParseItem::IFLAG_SPECIALTOKEN) != 0; } + bool hasNoPositionDataFlag() const { return (_currFlags & ParseItem::IFLAG_NOPOSITIONDATA) != 0; } uint32_t getArity() const { return _currArity; } diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp index 3db6c8e68c8..f1599e820ef 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp @@ -64,6 +64,7 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor } break; case ParseItem::ITEM_NUMTERM: + case ParseItem::ITEM_LOCATION_TERM: case ParseItem::ITEM_TERM: case ParseItem::ITEM_PREFIXTERM: case ParseItem::ITEM_REGEXP: diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp index aafeaa46a22..f33520d8b0e 100644 --- a/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp +++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp @@ -228,7 +228,7 @@ class QueryNodeConverter : public QueryVisitor { } void visit(LocationTerm &node) override { - createTerm(node, ParseItem::ITEM_NUMTERM); + createTerm(node, ParseItem::ITEM_LOCATION_TERM); } void visit(PrefixTerm &node) override { diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h b/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h index 65d6abeeaad..898db9785f6 100644 --- a/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h +++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h @@ -6,7 +6,6 @@ #include "querybuilder.h" #include "term.h" #include <vespa/searchlib/parsequery/stackdumpiterator.h> -#include <vespa/searchlib/parsequery/simplequerystack.h> #include <vespa/vespalib/objects/hexdump.h> namespace search::query { @@ -28,10 +27,10 @@ public: while (!builder.hasError() && queryStack.next()) { Term *t = createQueryTerm(queryStack, builder, pureTermView); if (!builder.hasError() && t) { - if (queryStack.getFlags() & ParseItem::IFLAG_NORANK) { + if (queryStack.hasNoRankFlag()) { t->setRanked(false); } - if (queryStack.getFlags() & ParseItem::IFLAG_NOPOSITIONDATA) { + if (queryStack.hasNoPositionDataFlag()) { t->setPositionData(false); } } @@ -142,11 +141,15 @@ private: t = &builder.addStringTerm(term, view, id, weight); } else if (type == ParseItem::ITEM_SUFFIXTERM) { t = &builder.addSuffixTerm(term, view, id, weight); + } else if (type == ParseItem::ITEM_LOCATION_TERM) { + Location loc(term); + t = &builder.addLocationTerm(loc, view, id, weight); } else if (type == ParseItem::ITEM_NUMTERM) { if (term[0] == '[' || term[0] == '<' || term[0] == '>') { Range range(term); t = &builder.addRangeTerm(range, view, id, weight); } else if (term[0] == '(') { + // TODO: handled above, should remove this block Location loc(term); t = &builder.addLocationTerm(loc, view, id, weight); } else { diff --git a/searchlib/src/vespa/searchlib/util/rawbuf.cpp b/searchlib/src/vespa/searchlib/util/rawbuf.cpp index cf019014fd1..c4fb3dd72cc 100644 --- a/searchlib/src/vespa/searchlib/util/rawbuf.cpp +++ b/searchlib/src/vespa/searchlib/util/rawbuf.cpp @@ -79,9 +79,11 @@ RawBuf::expandBuf(size_t needlen) void RawBuf::append(const void *data, size_t len) { - ensureSize(len); - memcpy(_bufFillPos, data, len); - _bufFillPos += len; + if (__builtin_expect(len != 0, true)) { + ensureSize(len); + memcpy(_bufFillPos, data, len); + _bufFillPos += len; + } } void |