summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2021-03-29 11:03:26 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2021-03-29 11:03:26 +0000
commitc9f8d4aec5ec0cec0bb93e01716f9a707b6ca8af (patch)
tree54613b5a04f7de816a8eac3c3e11f610bffe8a84
parentf9c63106a9800731b272aaf9c1be32513b303c87 (diff)
- Simplify code by catching exception in one place.
- Unify error handling to prevent reading past the buffer. - track positions by using to integers insteda of 2 pointers.
-rw-r--r--searchlib/src/tests/query/querybuilder_test.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp226
-rw-r--r--searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h12
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/predicate_query_term.h10
4 files changed, 107 insertions, 143 deletions
diff --git a/searchlib/src/tests/query/querybuilder_test.cpp b/searchlib/src/tests/query/querybuilder_test.cpp
index c170f07f441..055d245e420 100644
--- a/searchlib/src/tests/query/querybuilder_test.cpp
+++ b/searchlib/src/tests/query/querybuilder_test.cpp
@@ -593,7 +593,7 @@ TEST("require that empty intermediate node can be added") {
}
TEST("control size of SimpleQueryStackDumpIterator") {
- EXPECT_EQUAL(152u, sizeof(SimpleQueryStackDumpIterator));
+ EXPECT_EQUAL(144u, sizeof(SimpleQueryStackDumpIterator));
}
TEST("test query parsing error") {
diff --git a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp
index 622fee6d5c8..d8be4a45af4 100644
--- a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp
+++ b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp
@@ -13,8 +13,8 @@ namespace search {
SimpleQueryStackDumpIterator::SimpleQueryStackDumpIterator(vespalib::stringref buf)
: _buf(buf.begin()),
_bufEnd(buf.end()),
- _currPos(_buf),
- _currEnd(_buf),
+ _currPos(0),
+ _currEnd(0),
_currType(ParseItem::ITEM_UNDEF),
_currFlags(0),
_currWeight(100),
@@ -34,34 +34,20 @@ SimpleQueryStackDumpIterator::SimpleQueryStackDumpIterator(vespalib::stringref b
SimpleQueryStackDumpIterator::~SimpleQueryStackDumpIterator() = default;
-vespalib::string
-SimpleQueryStackDumpIterator::readString(const char *&p)
-{
- if (p >= _bufEnd) throw false;
- uint64_t tmp;
- p += vespalib::compress::Integer::decompressPositive(tmp, p);
- vespalib::string s(p, tmp);
- p += s.size();
- return s;
-}
-
vespalib::stringref
SimpleQueryStackDumpIterator::read_stringref(const char *&p)
{
- if (p >= _bufEnd) throw false;
uint64_t len;
p += vespalib::compress::Integer::decompressPositive(len, p);
- if (p > _bufEnd) throw false;
+ if ((p + len) > _bufEnd) throw false;
vespalib::stringref result(p, len);
p += len;
- if (p > _bufEnd) throw false;
return result;
}
uint64_t
SimpleQueryStackDumpIterator::readUint64(const char *&p)
{
- if (p + sizeof(uint64_t) > _bufEnd) throw false;
uint64_t l = vespalib::nbo::n2h(*(const uint64_t *)(const void *)p);
p += sizeof(uint64_t);
return l;
@@ -70,17 +56,14 @@ SimpleQueryStackDumpIterator::readUint64(const char *&p)
double
SimpleQueryStackDumpIterator::read_double(const char *&p)
{
- if (p >= _bufEnd) throw false;
double result = vespalib::nbo::n2h(*reinterpret_cast<const double *>(p));
p += sizeof(double);
- if (p > _bufEnd) throw false;
return result;
}
uint64_t
SimpleQueryStackDumpIterator::readCompressedPositiveInt(const char *&p)
{
- if (p >= _bufEnd) throw false;
uint64_t tmp;
p += vespalib::compress::Integer::decompressPositive(tmp, p);
if (p > _bufEnd) throw false;
@@ -88,9 +71,16 @@ SimpleQueryStackDumpIterator::readCompressedPositiveInt(const char *&p)
}
bool
-SimpleQueryStackDumpIterator::next()
-{
- if (_currEnd >= _bufEnd)
+SimpleQueryStackDumpIterator::next() {
+ try {
+ return readNext();
+ } catch (...) {
+ return false;
+ }
+}
+
+bool SimpleQueryStackDumpIterator::readNext() {
+ if ((_buf + _currEnd) >= _bufEnd)
// End of buffer, so no more items available
return false;
@@ -98,7 +88,7 @@ SimpleQueryStackDumpIterator::next()
_currPos = _currEnd;
// Find an item at the current position
- const char *p = _currPos;
+ const char *p = _buf + _currPos;
uint8_t typefield = *p++;
_currType = ParseItem::GetType(typefield);
@@ -112,11 +102,7 @@ SimpleQueryStackDumpIterator::next()
_currWeight.setPercent(100);
}
if (ParseItem::getFeature_UniqueId(typefield)) {
- try {
- _currUniqueId = readCompressedPositiveInt(p);
- } catch (...) {
- return false;
- }
+ _currUniqueId = readCompressedPositiveInt(p);
} else {
_currUniqueId = 0;
}
@@ -134,54 +120,34 @@ SimpleQueryStackDumpIterator::next()
case ParseItem::ITEM_NOT:
case ParseItem::ITEM_RANK:
case ParseItem::ITEM_ANY:
- try {
- _currArity = readCompressedPositiveInt(p);
- _curr_index_name = vespalib::stringref();
- _curr_term = vespalib::stringref();
- } catch (...) {
- return false;
- }
+ _currArity = readCompressedPositiveInt(p);
+ _curr_index_name = vespalib::stringref();
+ _curr_term = vespalib::stringref();
break;
case ParseItem::ITEM_NEAR:
case ParseItem::ITEM_ONEAR:
- try {
- _currArity = readCompressedPositiveInt(p);
- _extraIntArg1 = readCompressedPositiveInt(p);
- _curr_index_name = vespalib::stringref();
- _curr_term = vespalib::stringref();
- } catch (...) {
- return false;
- }
+ _currArity = readCompressedPositiveInt(p);
+ _extraIntArg1 = readCompressedPositiveInt(p);
+ _curr_index_name = vespalib::stringref();
+ _curr_term = vespalib::stringref();
break;
case ParseItem::ITEM_WEAK_AND:
- try {
- _currArity = readCompressedPositiveInt(p);
- _extraIntArg1 = readCompressedPositiveInt(p); // targetNumHits
- _curr_index_name = read_stringref(p);
- _curr_term = vespalib::stringref();
- } catch (...) {
- return false;
- }
+ _currArity = readCompressedPositiveInt(p);
+ _extraIntArg1 = readCompressedPositiveInt(p); // targetNumHits
+ _curr_index_name = read_stringref(p);
+ _curr_term = vespalib::stringref();
break;
case ParseItem::ITEM_SAME_ELEMENT:
- try {
- _currArity = readCompressedPositiveInt(p);
- _curr_index_name = read_stringref(p);
- _curr_term = vespalib::stringref();
- } catch (...) {
- return false;
- }
+ _currArity = readCompressedPositiveInt(p);
+ _curr_index_name = read_stringref(p);
+ _curr_term = vespalib::stringref();
break;
case ParseItem::ITEM_PURE_WEIGHTED_STRING:
- try {
- _curr_term = read_stringref(p);
- _currArity = 0;
- } catch (...) {
- return false;
- }
+ _curr_term = read_stringref(p);
+ _currArity = 0;
break;
case ParseItem::ITEM_PURE_WEIGHTED_LONG:
{
@@ -190,18 +156,13 @@ SimpleQueryStackDumpIterator::next()
auto res = std::to_chars(_scratch, _scratch + sizeof(_scratch), value, 10);
_curr_term = vespalib::stringref(_scratch, res.ptr - _scratch);
p += sizeof(int64_t);
- if (p > _bufEnd) return false;
_currArity = 0;
}
break;
case ParseItem::ITEM_WORD_ALTERNATIVES:
- try {
- _curr_index_name = read_stringref(p);
- _currArity = readCompressedPositiveInt(p);
- _curr_term = vespalib::stringref();
- } catch (...) {
- return false;
- }
+ _curr_index_name = read_stringref(p);
+ _currArity = readCompressedPositiveInt(p);
+ _curr_term = vespalib::stringref();
break;
case ParseItem::ITEM_NUMTERM:
case ParseItem::ITEM_GEO_LOCATION_TERM:
@@ -211,83 +172,84 @@ SimpleQueryStackDumpIterator::next()
case ParseItem::ITEM_EXACTSTRINGTERM:
case ParseItem::ITEM_SUFFIXTERM:
case ParseItem::ITEM_REGEXP:
- try {
- _curr_index_name = read_stringref(p);
- _curr_term = read_stringref(p);
- _currArity = 0;
- } catch (...) {
- return false;
- }
+ _curr_index_name = read_stringref(p);
+ _curr_term = read_stringref(p);
+ _currArity = 0;
break;
case ParseItem::ITEM_PREDICATE_QUERY:
- try {
- _curr_index_name = read_stringref(p);
- _predicate_query_term = std::make_unique<PredicateQueryTerm>();
-
- size_t count = readCompressedPositiveInt(p);
- for (size_t i = 0; i < count; ++i) {
- vespalib::string key = readString(p);
- vespalib::string value = readString(p);
- uint64_t sub_queries = readUint64(p);
- _predicate_query_term->addFeature(key, value, sub_queries);
- }
- count = readCompressedPositiveInt(p);
- for (size_t i = 0; i < count; ++i) {
- vespalib::string key = readString(p);
- uint64_t value = readUint64(p);
- uint64_t sub_queries = readUint64(p);
- _predicate_query_term->addRangeFeature(key, value, sub_queries);
- }
- if (p > _bufEnd) return false;
- } catch (...) {
- return false;
- }
+ if ( ! readPredicate(p)) return false;
break;
case ParseItem::ITEM_WEIGHTED_SET:
case ParseItem::ITEM_DOT_PRODUCT:
case ParseItem::ITEM_WAND:
case ParseItem::ITEM_PHRASE:
- try {
- _currArity = readCompressedPositiveInt(p);
- _curr_index_name = read_stringref(p);
- if (_currType == ParseItem::ITEM_WAND) {
- _extraIntArg1 = readCompressedPositiveInt(p); // targetNumHits
- _extraDoubleArg4 = read_double(p); // scoreThreshold
- _extraDoubleArg5 = read_double(p); // thresholdBoostFactor
- }
- _curr_term = vespalib::stringref();
- } catch (...) {
- return false;
- }
+ if (!readComplexTerm(p)) return false;
break;
-
case ParseItem::ITEM_NEAREST_NEIGHBOR:
- try {
- _curr_index_name = read_stringref(p);
- _curr_term = read_stringref(p); // query_tensor_name
- _extraIntArg1 = readCompressedPositiveInt(p); // targetNumHits
- _extraIntArg2 = readCompressedPositiveInt(p); // allow_approximate
- _extraIntArg3 = readCompressedPositiveInt(p); // explore_additional_hits
- // XXX: remove later when QRS doesn't send this extra flag
- _extraIntArg2 &= ~0x40;
- // QRS always sends this now:
- _extraDoubleArg4 = read_double(p); // distance threshold
- _currArity = 0;
- } catch (...) {
- return false;
- }
+ if ( ! readNN(p)) return false;
break;
-
default:
// Unknown item, so report that no more are available
return false;
}
- _currEnd = p;
+ _currEnd = p - _buf;
// We should not have passed the buffer
- assert(_currEnd <= _bufEnd);
+ return (p <= _bufEnd);
+}
+
+bool
+SimpleQueryStackDumpIterator::readPredicate(const char *&p) {
+ _curr_index_name = read_stringref(p);
+ _predicate_query_term = std::make_unique<PredicateQueryTerm>();
+ size_t count = readCompressedPositiveInt(p);
+ for (size_t i = 0; i < count; ++i) {
+ vespalib::stringref key = read_stringref(p);
+ vespalib::stringref value = read_stringref(p);
+ if (p + sizeof(uint64_t) > _bufEnd) return false;
+ uint64_t sub_queries = readUint64(p);
+ _predicate_query_term->addFeature(key, value, sub_queries);
+ }
+ count = readCompressedPositiveInt(p);
+ for (size_t i = 0; i < count; ++i) {
+ vespalib::stringref key = read_stringref(p);
+ if (p + 2*sizeof(uint64_t) > _bufEnd) return false;
+ uint64_t value = readUint64(p);
+ uint64_t sub_queries = readUint64(p);
+ _predicate_query_term->addRangeFeature(key, value, sub_queries);
+ }
+ return true;
+}
+
+bool
+SimpleQueryStackDumpIterator::readNN(const char *& p) {
+ _curr_index_name = read_stringref(p);
+ _curr_term = read_stringref(p); // query_tensor_name
+ _extraIntArg1 = readCompressedPositiveInt(p); // targetNumHits
+ _extraIntArg2 = readCompressedPositiveInt(p); // allow_approximate
+ _extraIntArg3 = readCompressedPositiveInt(p); // explore_additional_hits
+ // XXX: remove later when QRS doesn't send this extra flag
+ _extraIntArg2 &= ~0x40;
+ // QRS always sends this now:
+ if ((p + sizeof(double))> _bufEnd) return false;
+ _extraDoubleArg4 = read_double(p); // distance threshold
+ _currArity = 0;
+ return true;
+}
+
+bool
+SimpleQueryStackDumpIterator::readComplexTerm(const char *& p) {
+ _currArity = readCompressedPositiveInt(p);
+ _curr_index_name = read_stringref(p);
+ if (_currType == ParseItem::ITEM_WAND) {
+ _extraIntArg1 = readCompressedPositiveInt(p); // targetNumHits
+ if ((p + 2*sizeof(double))> _bufEnd) return false;
+ _extraDoubleArg4 = read_double(p); // scoreThreshold
+ _extraDoubleArg5 = read_double(p); // thresholdBoostFactor
+ }
+ _curr_term = vespalib::stringref();
return true;
}
diff --git a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h
index 99b1b33f78c..9d076e81e37 100644
--- a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h
+++ b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h
@@ -18,11 +18,10 @@ private:
const char *_buf;
/** Pointer to just past the input buffer */
const char *_bufEnd;
-
/** Pointer to the position of the current item in the buffer */
- const char *_currPos;
+ uint32_t _currPos;
/** Pointer to after the current item */
- const char *_currEnd;
+ uint32_t _currEnd;
/** The type of the current item */
ParseItem::ItemType _currType;
/** flags of the current item **/
@@ -48,11 +47,14 @@ private:
/** The predicate query specification */
query::PredicateQueryTerm::UP _predicate_query_term;
- VESPA_DLL_LOCAL vespalib::string readString(const char *&p);
VESPA_DLL_LOCAL vespalib::stringref read_stringref(const char *&p);
VESPA_DLL_LOCAL uint64_t readUint64(const char *&p);
VESPA_DLL_LOCAL double read_double(const char *&p);
VESPA_DLL_LOCAL uint64_t readCompressedPositiveInt(const char *&p);
+ VESPA_DLL_LOCAL bool readPredicate(const char *&p);
+ VESPA_DLL_LOCAL bool readNN(const char *&p);
+ VESPA_DLL_LOCAL bool readComplexTerm(const char *& p);
+ VESPA_DLL_LOCAL bool readNext();
public:
/**
* Make an iterator on a buffer. To get the first item, next must be called.
@@ -63,7 +65,7 @@ public:
~SimpleQueryStackDumpIterator();
vespalib::stringref getStack() const { return vespalib::stringref(_buf, _bufEnd - _buf); }
- size_t getPosition() const { return _currPos - _buf; }
+ size_t getPosition() const { return _currPos; }
/**
* Moves to the next item in the buffer.
diff --git a/searchlib/src/vespa/searchlib/query/tree/predicate_query_term.h b/searchlib/src/vespa/searchlib/query/tree/predicate_query_term.h
index 8602eb1ac57..6c78ce51085 100644
--- a/searchlib/src/vespa/searchlib/query/tree/predicate_query_term.h
+++ b/searchlib/src/vespa/searchlib/query/tree/predicate_query_term.h
@@ -21,12 +21,12 @@ class PredicateQueryTerm {
uint64_t _sub_query_bitmap;
public:
- Entry(const vespalib::string &key, const ValueType &value,
+ Entry(vespalib::stringref key, const ValueType &value,
uint64_t sub_query_bitmap = ALL_SUB_QUERIES) noexcept
: _key(key), _value(value), _sub_query_bitmap(sub_query_bitmap) {}
- vespalib::string getKey() const { return _key; }
- ValueType getValue() const { return _value; }
+ const vespalib::string & getKey() const { return _key; }
+ const ValueType & getValue() const { return _value; }
uint64_t getSubQueryBitmap() const { return _sub_query_bitmap; }
bool operator==(const Entry<ValueType> &other) const {
return _key == other._key
@@ -43,12 +43,12 @@ public:
PredicateQueryTerm() noexcept : _features(), _range_features() {}
- void addFeature(const vespalib::string &key, const vespalib::string &value,
+ void addFeature(vespalib::stringref key, vespalib::stringref value,
uint64_t sub_query_bitmask = ALL_SUB_QUERIES) {
_features.emplace_back(key, value, sub_query_bitmask);
}
- void addRangeFeature(const vespalib::string &key, uint64_t value,
+ void addRangeFeature(vespalib::stringref key, uint64_t value,
uint64_t sub_query_bitmask = ALL_SUB_QUERIES) {
_range_features.emplace_back(key, value, sub_query_bitmask);
}