diff options
author | Tor Brede Vekterli <vekterli@vespa.ai> | 2024-01-22 10:35:11 +0000 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@vespa.ai> | 2024-01-22 10:35:11 +0000 |
commit | 07d9299b95711c0694d95d423dd9d8b184ef3684 (patch) | |
tree | 7f4c177de9c57cb8123ccffc653117370754e022 /vespalib | |
parent | 7d737bcdc09d4aefd3cad56383871e6530a84561 (diff) |
Add `noexcept` to Unicode util functions
Also remove seemingly pointless vtable in class.
Diffstat (limited to 'vespalib')
-rw-r--r-- | vespalib/src/vespa/fastlib/text/unicodeutil.cpp | 21 | ||||
-rw-r--r-- | vespalib/src/vespa/fastlib/text/unicodeutil.h | 33 |
2 files changed, 26 insertions, 28 deletions
diff --git a/vespalib/src/vespa/fastlib/text/unicodeutil.cpp b/vespalib/src/vespa/fastlib/text/unicodeutil.cpp index e29b91d6522..bd4ff5d93a9 100644 --- a/vespalib/src/vespa/fastlib/text/unicodeutil.cpp +++ b/vespalib/src/vespa/fastlib/text/unicodeutil.cpp @@ -11,15 +11,15 @@ namespace { class Initialize { public: - Initialize() { Fast_UnicodeUtil::InitTables(); } + Initialize() noexcept { Fast_UnicodeUtil::InitTables(); } }; -Initialize _G_Initializer; +Initialize _g_initializer; } void -Fast_UnicodeUtil::InitTables() +Fast_UnicodeUtil::InitTables() noexcept { /** * Hack for Katakana accent marks (torgeir) @@ -29,8 +29,7 @@ Fast_UnicodeUtil::InitTables() } char * -Fast_UnicodeUtil::utf8ncopy(char *dst, const ucs4_t *src, - int maxdst, int maxsrc) +Fast_UnicodeUtil::utf8ncopy(char *dst, const ucs4_t *src, int maxdst, int maxsrc) noexcept { char * p = dst; char * edst = dst + maxdst; @@ -83,7 +82,7 @@ Fast_UnicodeUtil::utf8ncopy(char *dst, const ucs4_t *src, int -Fast_UnicodeUtil::utf8cmp(const char *s1, const ucs4_t *s2) +Fast_UnicodeUtil::utf8cmp(const char *s1, const ucs4_t *s2) noexcept { ucs4_t i1; ucs4_t i2; @@ -101,7 +100,7 @@ Fast_UnicodeUtil::utf8cmp(const char *s1, const ucs4_t *s2) } size_t -Fast_UnicodeUtil::ucs4strlen(const ucs4_t *str) +Fast_UnicodeUtil::ucs4strlen(const ucs4_t *str) noexcept { const ucs4_t *p = str; while (*p++ != 0) { @@ -111,7 +110,7 @@ Fast_UnicodeUtil::ucs4strlen(const ucs4_t *str) } ucs4_t * -Fast_UnicodeUtil::ucs4copy(ucs4_t *dst, const char *src) +Fast_UnicodeUtil::ucs4copy(ucs4_t *dst, const char *src) noexcept { ucs4_t i; ucs4_t *p; @@ -127,7 +126,7 @@ Fast_UnicodeUtil::ucs4copy(ucs4_t *dst, const char *src) } ucs4_t -Fast_UnicodeUtil::GetUTF8CharNonAscii(unsigned const char *&src) +Fast_UnicodeUtil::GetUTF8CharNonAscii(unsigned const char *&src) noexcept { ucs4_t retval; @@ -222,7 +221,7 @@ Fast_UnicodeUtil::GetUTF8CharNonAscii(unsigned const char *&src) } ucs4_t -Fast_UnicodeUtil::GetUTF8Char(unsigned const char *&src) +Fast_UnicodeUtil::GetUTF8Char(unsigned const char *&src) noexcept { return (*src >= 0x80) ? GetUTF8CharNonAscii(src) @@ -246,7 +245,7 @@ Fast_UnicodeUtil::GetUTF8Char(unsigned const char *&src) #define UTF8_STARTCHAR(c) (!((c) & 0x80) || ((c) & 0x40)) int Fast_UnicodeUtil::UTF8move(unsigned const char* start, size_t length, - unsigned const char*& pos, off_t offset) + unsigned const char*& pos, off_t offset) noexcept { int increment = offset > 0 ? 1 : -1; unsigned const char* p = pos; diff --git a/vespalib/src/vespa/fastlib/text/unicodeutil.h b/vespalib/src/vespa/fastlib/text/unicodeutil.h index 87c09826948..740cc9381b7 100644 --- a/vespalib/src/vespa/fastlib/text/unicodeutil.h +++ b/vespalib/src/vespa/fastlib/text/unicodeutil.h @@ -16,7 +16,7 @@ using ucs4_t = uint32_t; * Used to examine properties of unicode characters, and * provide fast conversion methods between often used encodings. */ -class Fast_UnicodeUtil { +class Fast_UnicodeUtil final { private: /** * Is true when the tables have been initialized. Is set by @@ -46,9 +46,8 @@ private: }; public: - virtual ~Fast_UnicodeUtil() { } /** Initialize the ISO 8859-1 static tables. */ - static void InitTables(); + static void InitTables() noexcept; /** Indicates an invalid UTF-8 character sequence. */ enum { _BadUTF8Char = 0xfffffffeu }; @@ -64,7 +63,7 @@ public: * one or more of the properties alphabetic, ideographic, * combining char, decimal digit char, private use, extender. */ - static bool IsWordChar(ucs4_t testchar) { + static bool IsWordChar(ucs4_t testchar) noexcept { return (testchar < 65536 && (_compCharProps[testchar >> 8][testchar & 255] & _wordcharProp) != 0); @@ -80,8 +79,8 @@ public: * @return The next UCS4 character, or _BadUTF8Char if the * next character is invalid. */ - static ucs4_t GetUTF8Char(const unsigned char *& src); - static ucs4_t GetUTF8Char(const char *& src) { + static ucs4_t GetUTF8Char(const unsigned char *& src) noexcept; + static ucs4_t GetUTF8Char(const char *& src) noexcept { const unsigned char *temp = reinterpret_cast<const unsigned char *>(src); ucs4_t res = GetUTF8Char(temp); src = reinterpret_cast<const char *>(temp); @@ -94,7 +93,7 @@ public: * @param i The UCS4 character. * @return Pointer to the next position in dst after the putted byte(s). */ - static char *utf8cput(char *dst, ucs4_t i) { + static char *utf8cput(char *dst, ucs4_t i) noexcept { if (i < 128) *dst++ = i; else if (i < 0x800) { @@ -132,14 +131,14 @@ public: * @param src The UTF-8 source buffer. * @return A pointer to the destination string. */ - static ucs4_t *ucs4copy(ucs4_t *dst, const char *src); + static ucs4_t *ucs4copy(ucs4_t *dst, const char *src) noexcept; /** * Get the length of the UTF-8 representation of an UCS4 character. * @param i The UCS4 character. * @return The number of bytes required for the UTF-8 representation. */ - static size_t utf8clen(ucs4_t i) { + static size_t utf8clen(ucs4_t i) noexcept { if (i < 128) return 1; else if (i < 0x800) @@ -159,7 +158,7 @@ public: * @param testchar The character to lowercase. * @return The lowercase of the input, if defined. Else the input character. */ - static ucs4_t ToLower(ucs4_t testchar) + static ucs4_t ToLower(ucs4_t testchar) noexcept { ucs4_t ret; if (testchar < 65536) { @@ -182,14 +181,14 @@ public: * @return Number of bytes moved, or -1 if out of range */ static int UTF8move(unsigned const char* start, size_t length, - unsigned const char*& pos, off_t offset); + unsigned const char*& pos, off_t offset) noexcept; /** * Find the number of characters in an UCS4 string. * @param str The UCS4 string. * @return The number of characters. */ - static size_t ucs4strlen(const ucs4_t *str); + static size_t ucs4strlen(const ucs4_t *str) noexcept; /** * Convert UCS4 to UTF-8, bounded by max lengths. @@ -199,7 +198,7 @@ public: * @param maxsrc The maximum number of characters to convert from src. * @return A pointer to the destination. */ - static char *utf8ncopy(char *dst, const ucs4_t *src, int maxdst, int maxsrc); + static char *utf8ncopy(char *dst, const ucs4_t *src, int maxdst, int maxsrc) noexcept; /** @@ -210,7 +209,7 @@ public: * if s1 is, respectively, less than, matching, or greater than s2. * NB Only used in local test */ - static int utf8cmp(const char *s1, const ucs4_t *s2); + static int utf8cmp(const char *s1, const ucs4_t *s2) noexcept; /** * Test for terminal punctuation. @@ -218,7 +217,7 @@ public: * @return true if testchar is a terminal punctuation character, * i.e. if it has the terminal punctuation char property. */ - static bool IsTerminalPunctuationChar(ucs4_t testchar) { + static bool IsTerminalPunctuationChar(ucs4_t testchar) noexcept { return (testchar < 65536 && (_compCharProps[testchar >> 8][testchar & 255] & _terminalPunctuationCharProp) != 0); @@ -233,10 +232,10 @@ public: * @return The next UCS4 character, or _BadUTF8Char if the * next character is invalid. */ - static ucs4_t GetUTF8CharNonAscii(unsigned const char *&src); + static ucs4_t GetUTF8CharNonAscii(unsigned const char *&src) noexcept; // this is really an alias of the above function - static ucs4_t GetUTF8CharNonAscii(const char *&src) { + static ucs4_t GetUTF8CharNonAscii(const char *&src) noexcept { unsigned const char *temp = reinterpret_cast<unsigned const char *>(src); ucs4_t res = GetUTF8CharNonAscii(temp); src = reinterpret_cast<const char *>(temp); |