Add `noexcept` to Unicode util functions

Also remove seemingly pointless vtable in class.
author: Tor Brede Vekterli <vekterli@vespa.ai> 2024-01-22 10:35:11 +0000
committer: Tor Brede Vekterli <vekterli@vespa.ai> 2024-01-22 10:35:11 +0000
commit: 07d9299b95711c0694d95d423dd9d8b184ef3684 (patch)
tree: 7f4c177de9c57cb8123ccffc653117370754e022 /vespalib
parent: 7d737bcdc09d4aefd3cad56383871e6530a84561 (diff)
2 files changed, 26 insertions, 28 deletions
diff --git a/vespalib/src/vespa/fastlib/text/unicodeutil.cpp b/vespalib/src/vespa/fastlib/text/unicodeutil.cpp
index e29b91d6522..bd4ff5d93a9 100644
--- a/vespalib/src/vespa/fastlib/text/unicodeutil.cpp
+++ b/vespalib/src/vespa/fastlib/text/unicodeutil.cpp
@@ -11,15 +11,15 @@ namespace {
 class Initialize
 {
 public:
-    Initialize() { Fast_UnicodeUtil::InitTables(); }
+    Initialize() noexcept { Fast_UnicodeUtil::InitTables(); }
 };
 
-Initialize _G_Initializer;
+Initialize _g_initializer;
 
 }
 
 void
-Fast_UnicodeUtil::InitTables()
+Fast_UnicodeUtil::InitTables() noexcept
 {
     /**
      * Hack for Katakana accent marks (torgeir)
@@ -29,8 +29,7 @@ Fast_UnicodeUtil::InitTables()
 }
 
 char *
-Fast_UnicodeUtil::utf8ncopy(char *dst, const ucs4_t *src,
-                            int maxdst, int maxsrc)
+Fast_UnicodeUtil::utf8ncopy(char *dst, const ucs4_t *src, int maxdst, int maxsrc) noexcept
 {
     char * p = dst;
     char * edst = dst + maxdst;
@@ -83,7 +82,7 @@ Fast_UnicodeUtil::utf8ncopy(char *dst, const ucs4_t *src,
 
 
 int
-Fast_UnicodeUtil::utf8cmp(const char *s1, const ucs4_t *s2)
+Fast_UnicodeUtil::utf8cmp(const char *s1, const ucs4_t *s2) noexcept
 {
     ucs4_t i1;
     ucs4_t i2;
@@ -101,7 +100,7 @@ Fast_UnicodeUtil::utf8cmp(const char *s1, const ucs4_t *s2)
 }
 
 size_t
-Fast_UnicodeUtil::ucs4strlen(const ucs4_t *str)
+Fast_UnicodeUtil::ucs4strlen(const ucs4_t *str) noexcept
 {
     const ucs4_t *p = str;
     while (*p++ != 0) {
@@ -111,7 +110,7 @@ Fast_UnicodeUtil::ucs4strlen(const ucs4_t *str)
 }
 
 ucs4_t *
-Fast_UnicodeUtil::ucs4copy(ucs4_t *dst, const char *src)
+Fast_UnicodeUtil::ucs4copy(ucs4_t *dst, const char *src) noexcept
 {
     ucs4_t i;
     ucs4_t *p;
@@ -127,7 +126,7 @@ Fast_UnicodeUtil::ucs4copy(ucs4_t *dst, const char *src)
 }
 
 ucs4_t
-Fast_UnicodeUtil::GetUTF8CharNonAscii(unsigned const char *&src)
+Fast_UnicodeUtil::GetUTF8CharNonAscii(unsigned const char *&src) noexcept
 {
     ucs4_t retval;
 
@@ -222,7 +221,7 @@ Fast_UnicodeUtil::GetUTF8CharNonAscii(unsigned const char *&src)
 }
 
 ucs4_t
-Fast_UnicodeUtil::GetUTF8Char(unsigned const char *&src)
+Fast_UnicodeUtil::GetUTF8Char(unsigned const char *&src) noexcept
 {
     return (*src >= 0x80)
         ? GetUTF8CharNonAscii(src)
@@ -246,7 +245,7 @@ Fast_UnicodeUtil::GetUTF8Char(unsigned const char *&src)
 #define UTF8_STARTCHAR(c)  (!((c) & 0x80) || ((c) & 0x40))
 
 int Fast_UnicodeUtil::UTF8move(unsigned const char* start, size_t length,
-                               unsigned const char*& pos, off_t offset)
+                               unsigned const char*& pos, off_t offset) noexcept
 {
     int increment = offset > 0 ? 1 : -1;
     unsigned const char* p = pos;
diff --git a/vespalib/src/vespa/fastlib/text/unicodeutil.h b/vespalib/src/vespa/fastlib/text/unicodeutil.h
index 87c09826948..740cc9381b7 100644
--- a/vespalib/src/vespa/fastlib/text/unicodeutil.h
+++ b/vespalib/src/vespa/fastlib/text/unicodeutil.h
@@ -16,7 +16,7 @@ using ucs4_t = uint32_t;
  * Used to examine properties of unicode characters, and
  * provide fast conversion methods between often used encodings.
  */
-class Fast_UnicodeUtil {
+class Fast_UnicodeUtil final {
 private:
     /**
      * Is true when the tables have been initialized. Is set by
@@ -46,9 +46,8 @@ private:
     };
 
 public:
-    virtual ~Fast_UnicodeUtil() { }
     /** Initialize the ISO 8859-1 static tables. */
-    static void InitTables();
+    static void InitTables() noexcept;
 
     /** Indicates an invalid UTF-8 character sequence. */
     enum { _BadUTF8Char = 0xfffffffeu };
@@ -64,7 +63,7 @@ public:
      * one or more of the properties alphabetic, ideographic,
      * combining char, decimal digit char, private use, extender.
      */
-    static bool IsWordChar(ucs4_t testchar) {
+    static bool IsWordChar(ucs4_t testchar) noexcept {
         return (testchar < 65536 &&
                 (_compCharProps[testchar >> 8][testchar & 255] &
                  _wordcharProp) != 0);
@@ -80,8 +79,8 @@ public:
      * @return The next UCS4 character, or _BadUTF8Char if the
      *         next character is invalid.
      */
-    static ucs4_t GetUTF8Char(const unsigned char *& src);
-    static ucs4_t GetUTF8Char(const char *& src) {
+    static ucs4_t GetUTF8Char(const unsigned char *& src) noexcept;
+    static ucs4_t GetUTF8Char(const char *& src) noexcept {
         const unsigned char *temp = reinterpret_cast<const unsigned char *>(src);
         ucs4_t res = GetUTF8Char(temp);
         src = reinterpret_cast<const char *>(temp);
@@ -94,7 +93,7 @@ public:
      * @param i The UCS4 character.
      * @return Pointer to the next position in dst after the putted byte(s).
      */
-    static char *utf8cput(char *dst, ucs4_t i) {
+    static char *utf8cput(char *dst, ucs4_t i) noexcept {
         if (i < 128)
             *dst++ = i;
         else if (i < 0x800) {
@@ -132,14 +131,14 @@ public:
      * @param src The UTF-8 source buffer.
      * @return A pointer to the destination string.
      */
-    static ucs4_t *ucs4copy(ucs4_t *dst, const char *src);
+    static ucs4_t *ucs4copy(ucs4_t *dst, const char *src) noexcept;
 
     /**
      * Get the length of the UTF-8 representation of an UCS4 character.
      * @param i The UCS4 character.
      * @return The number of bytes required for the UTF-8 representation.
      */
-    static size_t utf8clen(ucs4_t i) {
+    static size_t utf8clen(ucs4_t i) noexcept {
         if (i < 128)
             return 1;
         else if (i < 0x800)
@@ -159,7 +158,7 @@ public:
      * @param testchar The character to lowercase.
      * @return The lowercase of the input, if defined. Else the input character.
      */
-    static ucs4_t ToLower(ucs4_t testchar)
+    static ucs4_t ToLower(ucs4_t testchar) noexcept
     {
         ucs4_t ret;
         if (testchar < 65536) {
@@ -182,14 +181,14 @@ public:
      * @return Number of bytes moved, or -1 if out of range
      */
     static int UTF8move(unsigned const char* start, size_t length,
-                        unsigned const char*& pos, off_t offset);
+                        unsigned const char*& pos, off_t offset) noexcept;
 
     /**
      * Find the number of characters in an UCS4 string.
      * @param str The UCS4 string.
      * @return The number of characters.
      */
-    static size_t ucs4strlen(const ucs4_t *str);
+    static size_t ucs4strlen(const ucs4_t *str) noexcept;
 
     /**
      * Convert UCS4 to UTF-8, bounded by max lengths.
@@ -199,7 +198,7 @@ public:
      * @param maxsrc The maximum number of characters to convert from src.
      * @return A pointer to the destination.
      */
-    static char *utf8ncopy(char *dst, const ucs4_t *src, int maxdst, int maxsrc);
+    static char *utf8ncopy(char *dst, const ucs4_t *src, int maxdst, int maxsrc) noexcept;
 
 
     /**
@@ -210,7 +209,7 @@ public:
      *        if s1 is, respectively, less than, matching, or greater than s2.
      * NB Only used in local test
      */
-    static int utf8cmp(const char *s1, const ucs4_t *s2);
+    static int utf8cmp(const char *s1, const ucs4_t *s2) noexcept;
 
     /**
      * Test for terminal punctuation.
@@ -218,7 +217,7 @@ public:
      * @return true if testchar is a terminal punctuation character,
      *    i.e. if it has the terminal punctuation char property.
      */
-    static bool IsTerminalPunctuationChar(ucs4_t testchar) {
+    static bool IsTerminalPunctuationChar(ucs4_t testchar) noexcept {
         return (testchar < 65536 &&
                 (_compCharProps[testchar >> 8][testchar & 255] &
                  _terminalPunctuationCharProp) != 0);
@@ -233,10 +232,10 @@ public:
      * @return The next UCS4 character, or _BadUTF8Char if the
      *         next character is invalid.
      */
-    static ucs4_t GetUTF8CharNonAscii(unsigned const char *&src);
+    static ucs4_t GetUTF8CharNonAscii(unsigned const char *&src) noexcept;
 
     // this is really an alias of the above function
-    static ucs4_t GetUTF8CharNonAscii(const char *&src) {
+    static ucs4_t GetUTF8CharNonAscii(const char *&src) noexcept {
         unsigned const char *temp = reinterpret_cast<unsigned const char *>(src);
         ucs4_t res = GetUTF8CharNonAscii(temp);
         src = reinterpret_cast<const char *>(temp);
author	Tor Brede Vekterli <vekterli@vespa.ai>	2024-01-22 10:35:11 +0000
committer	Tor Brede Vekterli <vekterli@vespa.ai>	2024-01-22 10:35:11 +0000
commit	07d9299b95711c0694d95d423dd9d8b184ef3684 (patch)
tree	7f4c177de9c57cb8123ccffc653117370754e022 /vespalib
parent	7d737bcdc09d4aefd3cad56383871e6530a84561 (diff)