diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-03-29 11:03:26 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2021-03-29 11:03:26 +0000 |
commit | c9f8d4aec5ec0cec0bb93e01716f9a707b6ca8af (patch) | |
tree | 54613b5a04f7de816a8eac3c3e11f610bffe8a84 | |
parent | f9c63106a9800731b272aaf9c1be32513b303c87 (diff) |
- Simplify code by catching exception in one place.
- Unify error handling to prevent reading past the buffer.
- track positions by using to integers insteda of 2 pointers.
4 files changed, 107 insertions, 143 deletions
diff --git a/searchlib/src/tests/query/querybuilder_test.cpp b/searchlib/src/tests/query/querybuilder_test.cpp index c170f07f441..055d245e420 100644 --- a/searchlib/src/tests/query/querybuilder_test.cpp +++ b/searchlib/src/tests/query/querybuilder_test.cpp @@ -593,7 +593,7 @@ TEST("require that empty intermediate node can be added") { } TEST("control size of SimpleQueryStackDumpIterator") { - EXPECT_EQUAL(152u, sizeof(SimpleQueryStackDumpIterator)); + EXPECT_EQUAL(144u, sizeof(SimpleQueryStackDumpIterator)); } TEST("test query parsing error") { diff --git a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp index 622fee6d5c8..d8be4a45af4 100644 --- a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp +++ b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp @@ -13,8 +13,8 @@ namespace search { SimpleQueryStackDumpIterator::SimpleQueryStackDumpIterator(vespalib::stringref buf) : _buf(buf.begin()), _bufEnd(buf.end()), - _currPos(_buf), - _currEnd(_buf), + _currPos(0), + _currEnd(0), _currType(ParseItem::ITEM_UNDEF), _currFlags(0), _currWeight(100), @@ -34,34 +34,20 @@ SimpleQueryStackDumpIterator::SimpleQueryStackDumpIterator(vespalib::stringref b SimpleQueryStackDumpIterator::~SimpleQueryStackDumpIterator() = default; -vespalib::string -SimpleQueryStackDumpIterator::readString(const char *&p) -{ - if (p >= _bufEnd) throw false; - uint64_t tmp; - p += vespalib::compress::Integer::decompressPositive(tmp, p); - vespalib::string s(p, tmp); - p += s.size(); - return s; -} - vespalib::stringref SimpleQueryStackDumpIterator::read_stringref(const char *&p) { - if (p >= _bufEnd) throw false; uint64_t len; p += vespalib::compress::Integer::decompressPositive(len, p); - if (p > _bufEnd) throw false; + if ((p + len) > _bufEnd) throw false; vespalib::stringref result(p, len); p += len; - if (p > _bufEnd) throw false; return result; } uint64_t SimpleQueryStackDumpIterator::readUint64(const char *&p) { - if (p + sizeof(uint64_t) > _bufEnd) throw false; uint64_t l = vespalib::nbo::n2h(*(const uint64_t *)(const void *)p); p += sizeof(uint64_t); return l; @@ -70,17 +56,14 @@ SimpleQueryStackDumpIterator::readUint64(const char *&p) double SimpleQueryStackDumpIterator::read_double(const char *&p) { - if (p >= _bufEnd) throw false; double result = vespalib::nbo::n2h(*reinterpret_cast<const double *>(p)); p += sizeof(double); - if (p > _bufEnd) throw false; return result; } uint64_t SimpleQueryStackDumpIterator::readCompressedPositiveInt(const char *&p) { - if (p >= _bufEnd) throw false; uint64_t tmp; p += vespalib::compress::Integer::decompressPositive(tmp, p); if (p > _bufEnd) throw false; @@ -88,9 +71,16 @@ SimpleQueryStackDumpIterator::readCompressedPositiveInt(const char *&p) } bool -SimpleQueryStackDumpIterator::next() -{ - if (_currEnd >= _bufEnd) +SimpleQueryStackDumpIterator::next() { + try { + return readNext(); + } catch (...) { + return false; + } +} + +bool SimpleQueryStackDumpIterator::readNext() { + if ((_buf + _currEnd) >= _bufEnd) // End of buffer, so no more items available return false; @@ -98,7 +88,7 @@ SimpleQueryStackDumpIterator::next() _currPos = _currEnd; // Find an item at the current position - const char *p = _currPos; + const char *p = _buf + _currPos; uint8_t typefield = *p++; _currType = ParseItem::GetType(typefield); @@ -112,11 +102,7 @@ SimpleQueryStackDumpIterator::next() _currWeight.setPercent(100); } if (ParseItem::getFeature_UniqueId(typefield)) { - try { - _currUniqueId = readCompressedPositiveInt(p); - } catch (...) { - return false; - } + _currUniqueId = readCompressedPositiveInt(p); } else { _currUniqueId = 0; } @@ -134,54 +120,34 @@ SimpleQueryStackDumpIterator::next() case ParseItem::ITEM_NOT: case ParseItem::ITEM_RANK: case ParseItem::ITEM_ANY: - try { - _currArity = readCompressedPositiveInt(p); - _curr_index_name = vespalib::stringref(); - _curr_term = vespalib::stringref(); - } catch (...) { - return false; - } + _currArity = readCompressedPositiveInt(p); + _curr_index_name = vespalib::stringref(); + _curr_term = vespalib::stringref(); break; case ParseItem::ITEM_NEAR: case ParseItem::ITEM_ONEAR: - try { - _currArity = readCompressedPositiveInt(p); - _extraIntArg1 = readCompressedPositiveInt(p); - _curr_index_name = vespalib::stringref(); - _curr_term = vespalib::stringref(); - } catch (...) { - return false; - } + _currArity = readCompressedPositiveInt(p); + _extraIntArg1 = readCompressedPositiveInt(p); + _curr_index_name = vespalib::stringref(); + _curr_term = vespalib::stringref(); break; case ParseItem::ITEM_WEAK_AND: - try { - _currArity = readCompressedPositiveInt(p); - _extraIntArg1 = readCompressedPositiveInt(p); // targetNumHits - _curr_index_name = read_stringref(p); - _curr_term = vespalib::stringref(); - } catch (...) { - return false; - } + _currArity = readCompressedPositiveInt(p); + _extraIntArg1 = readCompressedPositiveInt(p); // targetNumHits + _curr_index_name = read_stringref(p); + _curr_term = vespalib::stringref(); break; case ParseItem::ITEM_SAME_ELEMENT: - try { - _currArity = readCompressedPositiveInt(p); - _curr_index_name = read_stringref(p); - _curr_term = vespalib::stringref(); - } catch (...) { - return false; - } + _currArity = readCompressedPositiveInt(p); + _curr_index_name = read_stringref(p); + _curr_term = vespalib::stringref(); break; case ParseItem::ITEM_PURE_WEIGHTED_STRING: - try { - _curr_term = read_stringref(p); - _currArity = 0; - } catch (...) { - return false; - } + _curr_term = read_stringref(p); + _currArity = 0; break; case ParseItem::ITEM_PURE_WEIGHTED_LONG: { @@ -190,18 +156,13 @@ SimpleQueryStackDumpIterator::next() auto res = std::to_chars(_scratch, _scratch + sizeof(_scratch), value, 10); _curr_term = vespalib::stringref(_scratch, res.ptr - _scratch); p += sizeof(int64_t); - if (p > _bufEnd) return false; _currArity = 0; } break; case ParseItem::ITEM_WORD_ALTERNATIVES: - try { - _curr_index_name = read_stringref(p); - _currArity = readCompressedPositiveInt(p); - _curr_term = vespalib::stringref(); - } catch (...) { - return false; - } + _curr_index_name = read_stringref(p); + _currArity = readCompressedPositiveInt(p); + _curr_term = vespalib::stringref(); break; case ParseItem::ITEM_NUMTERM: case ParseItem::ITEM_GEO_LOCATION_TERM: @@ -211,83 +172,84 @@ SimpleQueryStackDumpIterator::next() case ParseItem::ITEM_EXACTSTRINGTERM: case ParseItem::ITEM_SUFFIXTERM: case ParseItem::ITEM_REGEXP: - try { - _curr_index_name = read_stringref(p); - _curr_term = read_stringref(p); - _currArity = 0; - } catch (...) { - return false; - } + _curr_index_name = read_stringref(p); + _curr_term = read_stringref(p); + _currArity = 0; break; case ParseItem::ITEM_PREDICATE_QUERY: - try { - _curr_index_name = read_stringref(p); - _predicate_query_term = std::make_unique<PredicateQueryTerm>(); - - size_t count = readCompressedPositiveInt(p); - for (size_t i = 0; i < count; ++i) { - vespalib::string key = readString(p); - vespalib::string value = readString(p); - uint64_t sub_queries = readUint64(p); - _predicate_query_term->addFeature(key, value, sub_queries); - } - count = readCompressedPositiveInt(p); - for (size_t i = 0; i < count; ++i) { - vespalib::string key = readString(p); - uint64_t value = readUint64(p); - uint64_t sub_queries = readUint64(p); - _predicate_query_term->addRangeFeature(key, value, sub_queries); - } - if (p > _bufEnd) return false; - } catch (...) { - return false; - } + if ( ! readPredicate(p)) return false; break; case ParseItem::ITEM_WEIGHTED_SET: case ParseItem::ITEM_DOT_PRODUCT: case ParseItem::ITEM_WAND: case ParseItem::ITEM_PHRASE: - try { - _currArity = readCompressedPositiveInt(p); - _curr_index_name = read_stringref(p); - if (_currType == ParseItem::ITEM_WAND) { - _extraIntArg1 = readCompressedPositiveInt(p); // targetNumHits - _extraDoubleArg4 = read_double(p); // scoreThreshold - _extraDoubleArg5 = read_double(p); // thresholdBoostFactor - } - _curr_term = vespalib::stringref(); - } catch (...) { - return false; - } + if (!readComplexTerm(p)) return false; break; - case ParseItem::ITEM_NEAREST_NEIGHBOR: - try { - _curr_index_name = read_stringref(p); - _curr_term = read_stringref(p); // query_tensor_name - _extraIntArg1 = readCompressedPositiveInt(p); // targetNumHits - _extraIntArg2 = readCompressedPositiveInt(p); // allow_approximate - _extraIntArg3 = readCompressedPositiveInt(p); // explore_additional_hits - // XXX: remove later when QRS doesn't send this extra flag - _extraIntArg2 &= ~0x40; - // QRS always sends this now: - _extraDoubleArg4 = read_double(p); // distance threshold - _currArity = 0; - } catch (...) { - return false; - } + if ( ! readNN(p)) return false; break; - default: // Unknown item, so report that no more are available return false; } - _currEnd = p; + _currEnd = p - _buf; // We should not have passed the buffer - assert(_currEnd <= _bufEnd); + return (p <= _bufEnd); +} + +bool +SimpleQueryStackDumpIterator::readPredicate(const char *&p) { + _curr_index_name = read_stringref(p); + _predicate_query_term = std::make_unique<PredicateQueryTerm>(); + size_t count = readCompressedPositiveInt(p); + for (size_t i = 0; i < count; ++i) { + vespalib::stringref key = read_stringref(p); + vespalib::stringref value = read_stringref(p); + if (p + sizeof(uint64_t) > _bufEnd) return false; + uint64_t sub_queries = readUint64(p); + _predicate_query_term->addFeature(key, value, sub_queries); + } + count = readCompressedPositiveInt(p); + for (size_t i = 0; i < count; ++i) { + vespalib::stringref key = read_stringref(p); + if (p + 2*sizeof(uint64_t) > _bufEnd) return false; + uint64_t value = readUint64(p); + uint64_t sub_queries = readUint64(p); + _predicate_query_term->addRangeFeature(key, value, sub_queries); + } + return true; +} + +bool +SimpleQueryStackDumpIterator::readNN(const char *& p) { + _curr_index_name = read_stringref(p); + _curr_term = read_stringref(p); // query_tensor_name + _extraIntArg1 = readCompressedPositiveInt(p); // targetNumHits + _extraIntArg2 = readCompressedPositiveInt(p); // allow_approximate + _extraIntArg3 = readCompressedPositiveInt(p); // explore_additional_hits + // XXX: remove later when QRS doesn't send this extra flag + _extraIntArg2 &= ~0x40; + // QRS always sends this now: + if ((p + sizeof(double))> _bufEnd) return false; + _extraDoubleArg4 = read_double(p); // distance threshold + _currArity = 0; + return true; +} + +bool +SimpleQueryStackDumpIterator::readComplexTerm(const char *& p) { + _currArity = readCompressedPositiveInt(p); + _curr_index_name = read_stringref(p); + if (_currType == ParseItem::ITEM_WAND) { + _extraIntArg1 = readCompressedPositiveInt(p); // targetNumHits + if ((p + 2*sizeof(double))> _bufEnd) return false; + _extraDoubleArg4 = read_double(p); // scoreThreshold + _extraDoubleArg5 = read_double(p); // thresholdBoostFactor + } + _curr_term = vespalib::stringref(); return true; } diff --git a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h index 99b1b33f78c..9d076e81e37 100644 --- a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h +++ b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h @@ -18,11 +18,10 @@ private: const char *_buf; /** Pointer to just past the input buffer */ const char *_bufEnd; - /** Pointer to the position of the current item in the buffer */ - const char *_currPos; + uint32_t _currPos; /** Pointer to after the current item */ - const char *_currEnd; + uint32_t _currEnd; /** The type of the current item */ ParseItem::ItemType _currType; /** flags of the current item **/ @@ -48,11 +47,14 @@ private: /** The predicate query specification */ query::PredicateQueryTerm::UP _predicate_query_term; - VESPA_DLL_LOCAL vespalib::string readString(const char *&p); VESPA_DLL_LOCAL vespalib::stringref read_stringref(const char *&p); VESPA_DLL_LOCAL uint64_t readUint64(const char *&p); VESPA_DLL_LOCAL double read_double(const char *&p); VESPA_DLL_LOCAL uint64_t readCompressedPositiveInt(const char *&p); + VESPA_DLL_LOCAL bool readPredicate(const char *&p); + VESPA_DLL_LOCAL bool readNN(const char *&p); + VESPA_DLL_LOCAL bool readComplexTerm(const char *& p); + VESPA_DLL_LOCAL bool readNext(); public: /** * Make an iterator on a buffer. To get the first item, next must be called. @@ -63,7 +65,7 @@ public: ~SimpleQueryStackDumpIterator(); vespalib::stringref getStack() const { return vespalib::stringref(_buf, _bufEnd - _buf); } - size_t getPosition() const { return _currPos - _buf; } + size_t getPosition() const { return _currPos; } /** * Moves to the next item in the buffer. diff --git a/searchlib/src/vespa/searchlib/query/tree/predicate_query_term.h b/searchlib/src/vespa/searchlib/query/tree/predicate_query_term.h index 8602eb1ac57..6c78ce51085 100644 --- a/searchlib/src/vespa/searchlib/query/tree/predicate_query_term.h +++ b/searchlib/src/vespa/searchlib/query/tree/predicate_query_term.h @@ -21,12 +21,12 @@ class PredicateQueryTerm { uint64_t _sub_query_bitmap; public: - Entry(const vespalib::string &key, const ValueType &value, + Entry(vespalib::stringref key, const ValueType &value, uint64_t sub_query_bitmap = ALL_SUB_QUERIES) noexcept : _key(key), _value(value), _sub_query_bitmap(sub_query_bitmap) {} - vespalib::string getKey() const { return _key; } - ValueType getValue() const { return _value; } + const vespalib::string & getKey() const { return _key; } + const ValueType & getValue() const { return _value; } uint64_t getSubQueryBitmap() const { return _sub_query_bitmap; } bool operator==(const Entry<ValueType> &other) const { return _key == other._key @@ -43,12 +43,12 @@ public: PredicateQueryTerm() noexcept : _features(), _range_features() {} - void addFeature(const vespalib::string &key, const vespalib::string &value, + void addFeature(vespalib::stringref key, vespalib::stringref value, uint64_t sub_query_bitmask = ALL_SUB_QUERIES) { _features.emplace_back(key, value, sub_query_bitmask); } - void addRangeFeature(const vespalib::string &key, uint64_t value, + void addRangeFeature(vespalib::stringref key, uint64_t value, uint64_t sub_query_bitmask = ALL_SUB_QUERIES) { _range_features.emplace_back(key, value, sub_query_bitmask); } |