summaryrefslogtreecommitdiffstats
path: root/juniper
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2021-12-16 16:54:52 +0100
committerTor Egge <Tor.Egge@online.no>2021-12-16 16:54:52 +0100
commit2e36199793b60b141f84890d0d92b74ede6b1a13 (patch)
tree0d751eac89bde442401ac58a494b8d695512917d /juniper
parent4331c9d2e31e69d99e3b8e0d2f8a1e4de6c62e39 (diff)
Factor out common code to helper functions.
Adjust comment.
Diffstat (limited to 'juniper')
-rw-r--r--juniper/src/vespa/juniper/sumdesc.cpp48
1 files changed, 21 insertions, 27 deletions
diff --git a/juniper/src/vespa/juniper/sumdesc.cpp b/juniper/src/vespa/juniper/sumdesc.cpp
index 82b64d40971..331a46fdbfa 100644
--- a/juniper/src/vespa/juniper/sumdesc.cpp
+++ b/juniper/src/vespa/juniper/sumdesc.cpp
@@ -33,28 +33,26 @@ bool wordchar(const unsigned char* s)
}
}
-bool wordchar_or_il_ann_anchor(const unsigned char* s)
+bool wordchar_or_il_ann_char(const unsigned char* s, ucs4_t annotation_char)
{
unsigned char c = *s;
if (c & 0x80) {
ucs4_t u = Fast_UnicodeUtil::GetUTF8Char(s);
return Fast_UnicodeUtil::IsWordChar(u) ||
- u == il_ann_anchor;
+ u == annotation_char;
} else {
return isalnum(c);
}
}
+bool wordchar_or_il_ann_anchor(const unsigned char* s)
+{
+ return wordchar_or_il_ann_char(s, il_ann_anchor);
+}
+
bool wordchar_or_il_ann_terminator(const unsigned char* s)
{
- unsigned char c = *s;
- if (c & 0x80) {
- ucs4_t u = Fast_UnicodeUtil::GetUTF8Char(s);
- return Fast_UnicodeUtil::IsWordChar(u) ||
- u == il_ann_terminator;
- } else {
- return isalnum(c);
- }
+ return wordchar_or_il_ann_char(s, il_ann_terminator);
}
bool nonwordchar(const unsigned char* s)
@@ -69,39 +67,33 @@ bool nonwordchar(const unsigned char* s)
}
bool
-il_ann_anchor_char(const unsigned char* s)
+il_ann_char(const unsigned char* s, ucs4_t annotation_char)
{
unsigned char c = *s;
if (c & 0x80) {
ucs4_t u = Fast_UnicodeUtil::GetUTF8Char(s);
- return u == il_ann_anchor;
+ return u == annotation_char;
} else {
return false;
}
}
bool
+il_ann_anchor_char(const unsigned char* s)
+{
+ return il_ann_char(s, il_ann_anchor);
+}
+
+bool
il_ann_separator_char(const unsigned char* s)
{
- unsigned char c = *s;
- if (c & 0x80) {
- ucs4_t u = Fast_UnicodeUtil::GetUTF8Char(s);
- return u == il_ann_separator;
- } else {
- return false;
- }
+ return il_ann_char(s, il_ann_separator);
}
bool
il_ann_terminator_char(const unsigned char* s)
{
- unsigned char c = *s;
- if (c & 0x80) {
- ucs4_t u = Fast_UnicodeUtil::GetUTF8Char(s);
- return u == il_ann_terminator;
- } else {
- return false;
- }
+ return il_ann_char(s, il_ann_terminator);
}
/* Move backwards/forwards from ptr (no longer than to start) in an
@@ -127,7 +119,9 @@ int complete_word(unsigned char* start, ssize_t length,
}
// Figure out if a word needs completion or if we are just going
- // to eliminate whitespace
+ // to eliminate whitespace. Consider sequence from interlinear
+ // annotation anchor to interlinear annotation terminator to be a
+ // word.
if (!wordchar(ptr)) {
if (increment > 0 && il_ann_anchor_char(ptr)) {
chartest = il_ann_terminator_char;