aboutsummaryrefslogtreecommitdiffstats
path: root/vespalib
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@vespa.ai>2024-01-22 10:35:11 +0000
committerTor Brede Vekterli <vekterli@vespa.ai>2024-01-22 10:35:11 +0000
commit07d9299b95711c0694d95d423dd9d8b184ef3684 (patch)
tree7f4c177de9c57cb8123ccffc653117370754e022 /vespalib
parent7d737bcdc09d4aefd3cad56383871e6530a84561 (diff)
Add `noexcept` to Unicode util functions
Also remove seemingly pointless vtable in class.
Diffstat (limited to 'vespalib')
-rw-r--r--vespalib/src/vespa/fastlib/text/unicodeutil.cpp21
-rw-r--r--vespalib/src/vespa/fastlib/text/unicodeutil.h33
2 files changed, 26 insertions, 28 deletions
diff --git a/vespalib/src/vespa/fastlib/text/unicodeutil.cpp b/vespalib/src/vespa/fastlib/text/unicodeutil.cpp
index e29b91d6522..bd4ff5d93a9 100644
--- a/vespalib/src/vespa/fastlib/text/unicodeutil.cpp
+++ b/vespalib/src/vespa/fastlib/text/unicodeutil.cpp
@@ -11,15 +11,15 @@ namespace {
class Initialize
{
public:
- Initialize() { Fast_UnicodeUtil::InitTables(); }
+ Initialize() noexcept { Fast_UnicodeUtil::InitTables(); }
};
-Initialize _G_Initializer;
+Initialize _g_initializer;
}
void
-Fast_UnicodeUtil::InitTables()
+Fast_UnicodeUtil::InitTables() noexcept
{
/**
* Hack for Katakana accent marks (torgeir)
@@ -29,8 +29,7 @@ Fast_UnicodeUtil::InitTables()
}
char *
-Fast_UnicodeUtil::utf8ncopy(char *dst, const ucs4_t *src,
- int maxdst, int maxsrc)
+Fast_UnicodeUtil::utf8ncopy(char *dst, const ucs4_t *src, int maxdst, int maxsrc) noexcept
{
char * p = dst;
char * edst = dst + maxdst;
@@ -83,7 +82,7 @@ Fast_UnicodeUtil::utf8ncopy(char *dst, const ucs4_t *src,
int
-Fast_UnicodeUtil::utf8cmp(const char *s1, const ucs4_t *s2)
+Fast_UnicodeUtil::utf8cmp(const char *s1, const ucs4_t *s2) noexcept
{
ucs4_t i1;
ucs4_t i2;
@@ -101,7 +100,7 @@ Fast_UnicodeUtil::utf8cmp(const char *s1, const ucs4_t *s2)
}
size_t
-Fast_UnicodeUtil::ucs4strlen(const ucs4_t *str)
+Fast_UnicodeUtil::ucs4strlen(const ucs4_t *str) noexcept
{
const ucs4_t *p = str;
while (*p++ != 0) {
@@ -111,7 +110,7 @@ Fast_UnicodeUtil::ucs4strlen(const ucs4_t *str)
}
ucs4_t *
-Fast_UnicodeUtil::ucs4copy(ucs4_t *dst, const char *src)
+Fast_UnicodeUtil::ucs4copy(ucs4_t *dst, const char *src) noexcept
{
ucs4_t i;
ucs4_t *p;
@@ -127,7 +126,7 @@ Fast_UnicodeUtil::ucs4copy(ucs4_t *dst, const char *src)
}
ucs4_t
-Fast_UnicodeUtil::GetUTF8CharNonAscii(unsigned const char *&src)
+Fast_UnicodeUtil::GetUTF8CharNonAscii(unsigned const char *&src) noexcept
{
ucs4_t retval;
@@ -222,7 +221,7 @@ Fast_UnicodeUtil::GetUTF8CharNonAscii(unsigned const char *&src)
}
ucs4_t
-Fast_UnicodeUtil::GetUTF8Char(unsigned const char *&src)
+Fast_UnicodeUtil::GetUTF8Char(unsigned const char *&src) noexcept
{
return (*src >= 0x80)
? GetUTF8CharNonAscii(src)
@@ -246,7 +245,7 @@ Fast_UnicodeUtil::GetUTF8Char(unsigned const char *&src)
#define UTF8_STARTCHAR(c) (!((c) & 0x80) || ((c) & 0x40))
int Fast_UnicodeUtil::UTF8move(unsigned const char* start, size_t length,
- unsigned const char*& pos, off_t offset)
+ unsigned const char*& pos, off_t offset) noexcept
{
int increment = offset > 0 ? 1 : -1;
unsigned const char* p = pos;
diff --git a/vespalib/src/vespa/fastlib/text/unicodeutil.h b/vespalib/src/vespa/fastlib/text/unicodeutil.h
index 87c09826948..740cc9381b7 100644
--- a/vespalib/src/vespa/fastlib/text/unicodeutil.h
+++ b/vespalib/src/vespa/fastlib/text/unicodeutil.h
@@ -16,7 +16,7 @@ using ucs4_t = uint32_t;
* Used to examine properties of unicode characters, and
* provide fast conversion methods between often used encodings.
*/
-class Fast_UnicodeUtil {
+class Fast_UnicodeUtil final {
private:
/**
* Is true when the tables have been initialized. Is set by
@@ -46,9 +46,8 @@ private:
};
public:
- virtual ~Fast_UnicodeUtil() { }
/** Initialize the ISO 8859-1 static tables. */
- static void InitTables();
+ static void InitTables() noexcept;
/** Indicates an invalid UTF-8 character sequence. */
enum { _BadUTF8Char = 0xfffffffeu };
@@ -64,7 +63,7 @@ public:
* one or more of the properties alphabetic, ideographic,
* combining char, decimal digit char, private use, extender.
*/
- static bool IsWordChar(ucs4_t testchar) {
+ static bool IsWordChar(ucs4_t testchar) noexcept {
return (testchar < 65536 &&
(_compCharProps[testchar >> 8][testchar & 255] &
_wordcharProp) != 0);
@@ -80,8 +79,8 @@ public:
* @return The next UCS4 character, or _BadUTF8Char if the
* next character is invalid.
*/
- static ucs4_t GetUTF8Char(const unsigned char *& src);
- static ucs4_t GetUTF8Char(const char *& src) {
+ static ucs4_t GetUTF8Char(const unsigned char *& src) noexcept;
+ static ucs4_t GetUTF8Char(const char *& src) noexcept {
const unsigned char *temp = reinterpret_cast<const unsigned char *>(src);
ucs4_t res = GetUTF8Char(temp);
src = reinterpret_cast<const char *>(temp);
@@ -94,7 +93,7 @@ public:
* @param i The UCS4 character.
* @return Pointer to the next position in dst after the putted byte(s).
*/
- static char *utf8cput(char *dst, ucs4_t i) {
+ static char *utf8cput(char *dst, ucs4_t i) noexcept {
if (i < 128)
*dst++ = i;
else if (i < 0x800) {
@@ -132,14 +131,14 @@ public:
* @param src The UTF-8 source buffer.
* @return A pointer to the destination string.
*/
- static ucs4_t *ucs4copy(ucs4_t *dst, const char *src);
+ static ucs4_t *ucs4copy(ucs4_t *dst, const char *src) noexcept;
/**
* Get the length of the UTF-8 representation of an UCS4 character.
* @param i The UCS4 character.
* @return The number of bytes required for the UTF-8 representation.
*/
- static size_t utf8clen(ucs4_t i) {
+ static size_t utf8clen(ucs4_t i) noexcept {
if (i < 128)
return 1;
else if (i < 0x800)
@@ -159,7 +158,7 @@ public:
* @param testchar The character to lowercase.
* @return The lowercase of the input, if defined. Else the input character.
*/
- static ucs4_t ToLower(ucs4_t testchar)
+ static ucs4_t ToLower(ucs4_t testchar) noexcept
{
ucs4_t ret;
if (testchar < 65536) {
@@ -182,14 +181,14 @@ public:
* @return Number of bytes moved, or -1 if out of range
*/
static int UTF8move(unsigned const char* start, size_t length,
- unsigned const char*& pos, off_t offset);
+ unsigned const char*& pos, off_t offset) noexcept;
/**
* Find the number of characters in an UCS4 string.
* @param str The UCS4 string.
* @return The number of characters.
*/
- static size_t ucs4strlen(const ucs4_t *str);
+ static size_t ucs4strlen(const ucs4_t *str) noexcept;
/**
* Convert UCS4 to UTF-8, bounded by max lengths.
@@ -199,7 +198,7 @@ public:
* @param maxsrc The maximum number of characters to convert from src.
* @return A pointer to the destination.
*/
- static char *utf8ncopy(char *dst, const ucs4_t *src, int maxdst, int maxsrc);
+ static char *utf8ncopy(char *dst, const ucs4_t *src, int maxdst, int maxsrc) noexcept;
/**
@@ -210,7 +209,7 @@ public:
* if s1 is, respectively, less than, matching, or greater than s2.
* NB Only used in local test
*/
- static int utf8cmp(const char *s1, const ucs4_t *s2);
+ static int utf8cmp(const char *s1, const ucs4_t *s2) noexcept;
/**
* Test for terminal punctuation.
@@ -218,7 +217,7 @@ public:
* @return true if testchar is a terminal punctuation character,
* i.e. if it has the terminal punctuation char property.
*/
- static bool IsTerminalPunctuationChar(ucs4_t testchar) {
+ static bool IsTerminalPunctuationChar(ucs4_t testchar) noexcept {
return (testchar < 65536 &&
(_compCharProps[testchar >> 8][testchar & 255] &
_terminalPunctuationCharProp) != 0);
@@ -233,10 +232,10 @@ public:
* @return The next UCS4 character, or _BadUTF8Char if the
* next character is invalid.
*/
- static ucs4_t GetUTF8CharNonAscii(unsigned const char *&src);
+ static ucs4_t GetUTF8CharNonAscii(unsigned const char *&src) noexcept;
// this is really an alias of the above function
- static ucs4_t GetUTF8CharNonAscii(const char *&src) {
+ static ucs4_t GetUTF8CharNonAscii(const char *&src) noexcept {
unsigned const char *temp = reinterpret_cast<unsigned const char *>(src);
ucs4_t res = GetUTF8CharNonAscii(temp);
src = reinterpret_cast<const char *>(temp);