diff options
author | Tor Egge <Tor.Egge@broadpark.no> | 2021-01-06 10:33:07 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@broadpark.no> | 2021-01-06 10:33:07 +0100 |
commit | fe5956dcceb67c882fe6e1ffdd05bc783e529b36 (patch) | |
tree | f77d3134b4415025fd2ab29e051d66089871f169 /searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp | |
parent | 130ac9c430496b5e31098818bff19b869d6c9f44 (diff) |
Detect NUL byte in word when inverting field. Truncate the word and
drop it if the reduced length is zero.
Diffstat (limited to 'searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp')
-rw-r--r-- | searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp | 49 |
1 files changed, 49 insertions, 0 deletions
diff --git a/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp b/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp index d3a286b3c1b..e7116eceb2d 100644 --- a/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp +++ b/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp @@ -7,6 +7,7 @@ #include <vespa/searchlib/memoryindex/field_inverter.h> #include <vespa/searchlib/memoryindex/word_store.h> #include <vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h> +#include <vespa/vespalib/objects/nbostream.h> #include <vespa/vespalib/gtest/gtest.h> namespace search { @@ -103,6 +104,31 @@ makeDoc17(DocBuilder &b) return b.endDocument(); } +vespalib::string corruptWord = "corruptWord"; + +Document::UP +makeCorruptDocument(DocBuilder &b, size_t wordOffset) +{ + b.startDocument("id:ns:searchdocument::18"); + b.startIndexField("f0").addStr("before").addStr(corruptWord).addStr("after").addStr("z").endField(); + auto doc = b.endDocument(); + vespalib::nbostream stream; + doc->serialize(stream); + std::vector<char> raw; + raw.resize(stream.size()); + stream.read(&raw[0], stream.size()); + assert(wordOffset < corruptWord.size()); + for (size_t i = 0; i + corruptWord.size() <= raw.size(); ++i) { + if (memcmp(&raw[i], corruptWord.c_str(), corruptWord.size()) == 0) { + raw[i + wordOffset] = '\0'; + break; + } + } + vespalib::nbostream badstream; + badstream.write(&raw[0], raw.size()); + return std::make_unique<Document>(*b.getDocumentTypeRepo(), badstream); +} + } struct FieldInverterTest : public ::testing::Test { @@ -362,6 +388,29 @@ TEST_F(FieldInverterTest, require_that_average_field_length_is_calculated) assert_calculator(1, 2.0, 1); } +TEST_F(FieldInverterTest, require_that_word_with_NUL_byte_is_truncated) +{ + invertDocument(1, *makeCorruptDocument(_b, 7)); + pushDocuments(); + EXPECT_EQ("f=0," + "w=after,a=1," + "w=before,a=1," + "w=corrupt,a=1," + "w=z,a=1", + _inserter.toStr()); +} + +TEST_F(FieldInverterTest, require_that_word_with_NUL_byte_is_dropped_when_truncated_to_zero_length) +{ + invertDocument(1, *makeCorruptDocument(_b, 0)); + pushDocuments(); + EXPECT_EQ("f=0," + "w=after,a=1," + "w=before,a=1," + "w=z,a=1", + _inserter.toStr()); +} + } } |