From 091ab18c517b68617d049dd102bbf3fb3d46c022 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Wed, 6 Apr 2022 14:29:10 +0200 Subject: Skip unnecessary surrogate check if ! isBmpCodePoint, and move high surrogate check inside isTextCharacter. --- vespajlib/src/main/java/com/yahoo/text/Text.java | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'vespajlib/src/main/java/com/yahoo/text/Text.java') diff --git a/vespajlib/src/main/java/com/yahoo/text/Text.java b/vespajlib/src/main/java/com/yahoo/text/Text.java index 7ce24f40bb2..8a97ddc7cbc 100644 --- a/vespajlib/src/main/java/com/yahoo/text/Text.java +++ b/vespajlib/src/main/java/com/yahoo/text/Text.java @@ -50,9 +50,10 @@ public final class Text { return (codepoint < 0x80) ? allowedAsciiChars[codepoint] - : isTextCharAboveUsAscii(codepoint); + : (codepoint < Character.MIN_SURROGATE) || isTextCharAboveUsAscii(codepoint); } private static boolean isTextCharAboveUsAscii(int codepoint) { + if (codepoint <= Character.MAX_HIGH_SURROGATE) return false; if (codepoint < 0xFDD0) return true; if (codepoint <= 0xFDDF) return false; if (codepoint < 0x1FFFE) return true; @@ -121,20 +122,14 @@ public final class Text { int codePoint = string.codePointAt(i); if (codePoint < 0x80) { if ( ! allowedAsciiChars[codePoint]) return false; - i++; } else if (codePoint < Character.MIN_SURROGATE) { - i++; } else { if ( ! isTextCharAboveUsAscii(codePoint)) return false; - if ( ! Character.isSupplementaryCodePoint(codePoint)) { - if (Character.isHighSurrogate((char)codePoint)) return false; + if ( ! Character.isBmpCodePoint(codePoint)) { i++; - } else { - if (Character.isHighSurrogate(Character.highSurrogate(codePoint)) - && ! Character.isLowSurrogate(Character.lowSurrogate(codePoint))) return false; - i += 2; } } + i++; } return true; } -- cgit v1.2.3