diff options
author | Tor Egge <Tor.Egge@online.no> | 2021-12-16 16:54:52 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2021-12-16 16:54:52 +0100 |
commit | 2e36199793b60b141f84890d0d92b74ede6b1a13 (patch) | |
tree | 0d751eac89bde442401ac58a494b8d695512917d /juniper | |
parent | 4331c9d2e31e69d99e3b8e0d2f8a1e4de6c62e39 (diff) |
Factor out common code to helper functions.
Adjust comment.
Diffstat (limited to 'juniper')
-rw-r--r-- | juniper/src/vespa/juniper/sumdesc.cpp | 48 |
1 files changed, 21 insertions, 27 deletions
diff --git a/juniper/src/vespa/juniper/sumdesc.cpp b/juniper/src/vespa/juniper/sumdesc.cpp index 82b64d40971..331a46fdbfa 100644 --- a/juniper/src/vespa/juniper/sumdesc.cpp +++ b/juniper/src/vespa/juniper/sumdesc.cpp @@ -33,28 +33,26 @@ bool wordchar(const unsigned char* s) } } -bool wordchar_or_il_ann_anchor(const unsigned char* s) +bool wordchar_or_il_ann_char(const unsigned char* s, ucs4_t annotation_char) { unsigned char c = *s; if (c & 0x80) { ucs4_t u = Fast_UnicodeUtil::GetUTF8Char(s); return Fast_UnicodeUtil::IsWordChar(u) || - u == il_ann_anchor; + u == annotation_char; } else { return isalnum(c); } } +bool wordchar_or_il_ann_anchor(const unsigned char* s) +{ + return wordchar_or_il_ann_char(s, il_ann_anchor); +} + bool wordchar_or_il_ann_terminator(const unsigned char* s) { - unsigned char c = *s; - if (c & 0x80) { - ucs4_t u = Fast_UnicodeUtil::GetUTF8Char(s); - return Fast_UnicodeUtil::IsWordChar(u) || - u == il_ann_terminator; - } else { - return isalnum(c); - } + return wordchar_or_il_ann_char(s, il_ann_terminator); } bool nonwordchar(const unsigned char* s) @@ -69,39 +67,33 @@ bool nonwordchar(const unsigned char* s) } bool -il_ann_anchor_char(const unsigned char* s) +il_ann_char(const unsigned char* s, ucs4_t annotation_char) { unsigned char c = *s; if (c & 0x80) { ucs4_t u = Fast_UnicodeUtil::GetUTF8Char(s); - return u == il_ann_anchor; + return u == annotation_char; } else { return false; } } bool +il_ann_anchor_char(const unsigned char* s) +{ + return il_ann_char(s, il_ann_anchor); +} + +bool il_ann_separator_char(const unsigned char* s) { - unsigned char c = *s; - if (c & 0x80) { - ucs4_t u = Fast_UnicodeUtil::GetUTF8Char(s); - return u == il_ann_separator; - } else { - return false; - } + return il_ann_char(s, il_ann_separator); } bool il_ann_terminator_char(const unsigned char* s) { - unsigned char c = *s; - if (c & 0x80) { - ucs4_t u = Fast_UnicodeUtil::GetUTF8Char(s); - return u == il_ann_terminator; - } else { - return false; - } + return il_ann_char(s, il_ann_terminator); } /* Move backwards/forwards from ptr (no longer than to start) in an @@ -127,7 +119,9 @@ int complete_word(unsigned char* start, ssize_t length, } // Figure out if a word needs completion or if we are just going - // to eliminate whitespace + // to eliminate whitespace. Consider sequence from interlinear + // annotation anchor to interlinear annotation terminator to be a + // word. if (!wordchar(ptr)) { if (increment > 0 && il_ann_anchor_char(ptr)) { chartest = il_ann_terminator_char; |