summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2022-01-03 16:19:07 +0100
committerGitHub <noreply@github.com>2022-01-03 16:19:07 +0100
commit8bb156519b991469a229dbf5b45dd5b073837e2b (patch)
treeb43f9adaf72d78c0c5b329ab150c24a924641ee0
parentdf05bdab7c18ac7950be99d17fc6b9ab7f74f6a2 (diff)
parentd1bd7d73e74a5f1b1f49fedcf2a50737739b6fbe (diff)
Merge pull request #20637 from vespa-engine/toregge/avoid-illegal-char-sequence-in-debug-log
Avoid illegal character sequence in debug log.
-rw-r--r--juniper/src/vespa/juniper/sumdesc.cpp27
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp2
2 files changed, 20 insertions, 9 deletions
diff --git a/juniper/src/vespa/juniper/sumdesc.cpp b/juniper/src/vespa/juniper/sumdesc.cpp
index 1a27ffaefb0..969616423e8 100644
--- a/juniper/src/vespa/juniper/sumdesc.cpp
+++ b/juniper/src/vespa/juniper/sumdesc.cpp
@@ -18,6 +18,17 @@ LOG_SETUP(".juniper.sumdesc");
namespace {
+static constexpr char replacement_char = '.';
+
+char printable_char(char c)
+{
+ unsigned char uc = (unsigned char) c;
+ if (uc >= 0x80 || uc < (unsigned char) ' ') {
+ return replacement_char;
+ }
+ return c;
+}
+
bool wordchar(const unsigned char* s)
{
unsigned char c = *s;
@@ -98,7 +109,7 @@ int complete_word(unsigned char* start, ssize_t length,
// the read:
for (;;) {
LOG(spam, "[%s%d%s%c]", (whitespace_elim ? "^" : ""),
- moved, (increment > 0 ? "+" : "-"), *ptr);
+ moved, (increment > 0 ? "+" : "-"), printable_char(*ptr));
int cur_move = Fast_UnicodeUtil::UTF8move(start, length,
ptr, increment);
@@ -114,11 +125,11 @@ int complete_word(unsigned char* start, ssize_t length,
// Give up if we found a split of a word
if (cur_move <= 0) // == 0 to avoid UTF8move bug in fastlib 1.3.3..
{
- LOG(spam, "complete_word: Failing at char %c/0x%x", *ptr, *ptr);
+ LOG(spam, "complete_word: Failing at char %c/0x%x", printable_char(*ptr), *ptr);
break;
}
if (chartest(ptr)) {
- LOG(spam, "complete_word: Breaking at char %c/0x%x (%d)", *ptr,
+ LOG(spam, "complete_word: Breaking at char %c/0x%x (%d)", printable_char(*ptr),
*ptr, cur_move);
// count this character (it is the first blank/wordchar)
// only if we are going forward and it is a word character
@@ -459,12 +470,12 @@ int SummaryDesc::complete_extended_token(unsigned char* start, ssize_t length,
// Handle default case ("ordinary" space)
if (!word_connector(preptr)) {
- LOG(spam, "Not a word connector case (%c)", *preptr);
+ LOG(spam, "Not a word connector case (%c)", printable_char(*preptr));
return moved;
}
char wconn = *preptr;
(void) wconn;
- LOG(spam, "Found word connector case candidate (%c)", wconn);
+ LOG(spam, "Found word connector case candidate (%c)", printable_char(wconn));
// Read the character before/after the connector character:
int addlen = Fast_UnicodeUtil::UTF8move(start, length,
@@ -498,7 +509,7 @@ int SummaryDesc::complete_extended_token(unsigned char* start, ssize_t length,
ptr = preptr;
LOG(spam, "Found proper word connector case (%c,%c) yet moved %d",
- wconn, *preptr, moved);
+ printable_char(wconn), printable_char(*preptr), moved);
}
}
@@ -590,7 +601,7 @@ std::string SummaryDesc::get_summary(const char* buffer, size_t bytes,
} else if (!d._highlight) {
LOG(spam, "Not completing word at "
"char %c/0x%x, prev_end %" PRId64 ", pos %" PRId64,
- *ptr, *ptr, static_cast<int64_t>(prev_end), static_cast<int64_t>(pos));
+ printable_char(*ptr), *ptr, static_cast<int64_t>(prev_end), static_cast<int64_t>(pos));
}
/* Point to "current" endpos to check for split word/ending
@@ -616,7 +627,7 @@ std::string SummaryDesc::get_summary(const char* buffer, size_t bytes,
} else if (!d._highlight) {
LOG(spam, "Not completing word at "
"char %c/0x%x, next_pos %" PRId64,
- *ptr, *ptr, static_cast<int64_t>(next_pos));
+ printable_char(*ptr), *ptr, static_cast<int64_t>(next_pos));
}
JD_INVAR(JD_DESC, len >= 0, len = 0,
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp
index 79957217267..f9301557c0c 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp
@@ -392,7 +392,7 @@ DynamicTeaserDFW::makeDynamicTeaser(uint32_t docid, vespalib::stringref input, G
std::ostringstream hexDump;
hexDump << vespalib::HexDump(input.data(), input.length());
LOG(spam, "makeDynamicTeaser: docid=%d, input='%s', hexdump:\n%s",
- docid, input.data(), hexDump.str().c_str());
+ docid, std::string(input.data(), input.length()).c_str(), hexDump.str().c_str());
}
auto langid = static_cast<uint32_t>(-1);