summaryrefslogtreecommitdiffstats
path: root/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@broadpark.no>2021-01-06 10:33:07 +0100
committerTor Egge <Tor.Egge@broadpark.no>2021-01-06 10:33:07 +0100
commitfe5956dcceb67c882fe6e1ffdd05bc783e529b36 (patch)
treef77d3134b4415025fd2ab29e051d66089871f169 /searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp
parent130ac9c430496b5e31098818bff19b869d6c9f44 (diff)
Detect NUL byte in word when inverting field. Truncate the word and
drop it if the reduced length is zero.
Diffstat (limited to 'searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp')
-rw-r--r--searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp49
1 files changed, 49 insertions, 0 deletions
diff --git a/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp b/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp
index d3a286b3c1b..e7116eceb2d 100644
--- a/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp
+++ b/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp
@@ -7,6 +7,7 @@
#include <vespa/searchlib/memoryindex/field_inverter.h>
#include <vespa/searchlib/memoryindex/word_store.h>
#include <vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h>
+#include <vespa/vespalib/objects/nbostream.h>
#include <vespa/vespalib/gtest/gtest.h>
namespace search {
@@ -103,6 +104,31 @@ makeDoc17(DocBuilder &b)
return b.endDocument();
}
+vespalib::string corruptWord = "corruptWord";
+
+Document::UP
+makeCorruptDocument(DocBuilder &b, size_t wordOffset)
+{
+ b.startDocument("id:ns:searchdocument::18");
+ b.startIndexField("f0").addStr("before").addStr(corruptWord).addStr("after").addStr("z").endField();
+ auto doc = b.endDocument();
+ vespalib::nbostream stream;
+ doc->serialize(stream);
+ std::vector<char> raw;
+ raw.resize(stream.size());
+ stream.read(&raw[0], stream.size());
+ assert(wordOffset < corruptWord.size());
+ for (size_t i = 0; i + corruptWord.size() <= raw.size(); ++i) {
+ if (memcmp(&raw[i], corruptWord.c_str(), corruptWord.size()) == 0) {
+ raw[i + wordOffset] = '\0';
+ break;
+ }
+ }
+ vespalib::nbostream badstream;
+ badstream.write(&raw[0], raw.size());
+ return std::make_unique<Document>(*b.getDocumentTypeRepo(), badstream);
+}
+
}
struct FieldInverterTest : public ::testing::Test {
@@ -362,6 +388,29 @@ TEST_F(FieldInverterTest, require_that_average_field_length_is_calculated)
assert_calculator(1, 2.0, 1);
}
+TEST_F(FieldInverterTest, require_that_word_with_NUL_byte_is_truncated)
+{
+ invertDocument(1, *makeCorruptDocument(_b, 7));
+ pushDocuments();
+ EXPECT_EQ("f=0,"
+ "w=after,a=1,"
+ "w=before,a=1,"
+ "w=corrupt,a=1,"
+ "w=z,a=1",
+ _inserter.toStr());
+}
+
+TEST_F(FieldInverterTest, require_that_word_with_NUL_byte_is_dropped_when_truncated_to_zero_length)
+{
+ invertDocument(1, *makeCorruptDocument(_b, 0));
+ pushDocuments();
+ EXPECT_EQ("f=0,"
+ "w=after,a=1,"
+ "w=before,a=1,"
+ "w=z,a=1",
+ _inserter.toStr());
+}
+
}
}