diff options
-rw-r--r-- | vespajlib/src/main/java/com/yahoo/text/Lowercase.java | 91 | ||||
-rw-r--r-- | vespajlib/src/test/java/com/yahoo/text/LowercaseTestCase.java | 34 |
2 files changed, 28 insertions, 97 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/text/Lowercase.java b/vespajlib/src/main/java/com/yahoo/text/Lowercase.java index a04ba1cfe13..6304f7f0a39 100644 --- a/vespajlib/src/main/java/com/yahoo/text/Lowercase.java +++ b/vespajlib/src/main/java/com/yahoo/text/Lowercase.java @@ -12,64 +12,6 @@ import java.util.Locale; */ public final class Lowercase { - private static final char[] lowercase = new char[123]; - - static { - lowercase[0x41] = 'a'; - lowercase[0x42] = 'b'; - lowercase[0x43] = 'c'; - lowercase[0x44] = 'd'; - lowercase[0x45] = 'e'; - lowercase[0x46] = 'f'; - lowercase[0x47] = 'g'; - lowercase[0x48] = 'h'; - lowercase[0x49] = 'i'; - lowercase[0x4A] = 'j'; - lowercase[0x4B] = 'k'; - lowercase[0x4C] = 'l'; - lowercase[0x4D] = 'm'; - lowercase[0x4E] = 'n'; - lowercase[0x4F] = 'o'; - lowercase[0x50] = 'p'; - lowercase[0x51] = 'q'; - lowercase[0x52] = 'r'; - lowercase[0x53] = 's'; - lowercase[0x54] = 't'; - lowercase[0x55] = 'u'; - lowercase[0x56] = 'v'; - lowercase[0x57] = 'w'; - lowercase[0x58] = 'x'; - lowercase[0x59] = 'y'; - lowercase[0x5A] = 'z'; - - lowercase[0x61] = 'a'; - lowercase[0x62] = 'b'; - lowercase[0x63] = 'c'; - lowercase[0x64] = 'd'; - lowercase[0x65] = 'e'; - lowercase[0x66] = 'f'; - lowercase[0x67] = 'g'; - lowercase[0x68] = 'h'; - lowercase[0x69] = 'i'; - lowercase[0x6A] = 'j'; - lowercase[0x6B] = 'k'; - lowercase[0x6C] = 'l'; - lowercase[0x6D] = 'm'; - lowercase[0x6E] = 'n'; - lowercase[0x6F] = 'o'; - lowercase[0x70] = 'p'; - lowercase[0x71] = 'q'; - lowercase[0x72] = 'r'; - lowercase[0x73] = 's'; - lowercase[0x74] = 't'; - lowercase[0x75] = 'u'; - lowercase[0x76] = 'v'; - lowercase[0x77] = 'w'; - lowercase[0x78] = 'x'; - lowercase[0x79] = 'y'; - lowercase[0x7A] = 'z'; - } - /** * Return a lowercased version of the given string. Since this is language * independent, this is more of a case normalization operation than @@ -80,40 +22,11 @@ public final class Lowercase { * @return a string containing only lowercase character */ public static String toLowerCase(String in) { - // def is picked from http://docs.oracle.com/javase/6/docs/api/java/lang/String.html#toLowerCase%28%29 - String lower = toLowerCasePrintableAsciiOnly(in); - return (lower == null) ? in.toLowerCase(Locale.ENGLISH) : lower; + return in.toLowerCase(Locale.ENGLISH); + } public static String toUpperCase(String in) { - // def is picked from http://docs.oracle.com/javase/6/docs/api/java/lang/String.html#toLowerCase%28%29 return in.toUpperCase(Locale.ENGLISH); } - private static String toLowerCasePrintableAsciiOnly(String in) { - boolean anyUpper = false; - for (int i = 0; i < in.length(); i++) { - char c = in.charAt(i); - if (c < 0x41) { //lower than A-Z - return null; - } - if (c > 0x5A && c < 0x61) { //between A-Z and a-z - return null; - } - if (c > 0x7A) { //higher than a-z - return null; - } - if (c != lowercase[c]) { - anyUpper = true; - } - } - if (!anyUpper) { - return in; - } - StringBuilder builder = new StringBuilder(in.length()); - for (int i = 0; i < in.length(); i++) { - builder.append((char) (in.charAt(i) | ((char) 0x20))); - } - return builder.toString(); - } - } diff --git a/vespajlib/src/test/java/com/yahoo/text/LowercaseTestCase.java b/vespajlib/src/test/java/com/yahoo/text/LowercaseTestCase.java index 8a3e6ed134d..a1379594ba0 100644 --- a/vespajlib/src/test/java/com/yahoo/text/LowercaseTestCase.java +++ b/vespajlib/src/test/java/com/yahoo/text/LowercaseTestCase.java @@ -7,6 +7,7 @@ import org.junit.Test; import java.util.Locale; import static org.hamcrest.CoreMatchers.equalTo; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThat; /** @@ -49,12 +50,28 @@ public class LowercaseTestCase { } @Test + public void test7bitAscii() { + for(char c = 0; c < 128; c++) { + char [] carray = {c}; + String s = new String(carray); + assertEquals(Lowercase.toLowerCase(s), s.toLowerCase(Locale.ENGLISH)); + assertEquals(Lowercase.toUpperCase(s), s.toUpperCase(Locale.ENGLISH)); + } + } + + @Test @Ignore public void performance() { + for (int i=0; i < 2; i++) { + benchmark(i); + } + } + + private void benchmark(int i) { Lowercase.toLowerCase("warmup"); - String lowercaseInput = "abcdefghijklmnopqrstuvwxyz"; - String uppercaseInput = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; - String mixedcaseInput = "AbCDEfGHIJklmnoPQRStuvwXyz"; + String lowercaseInput = "abcdefghijklmnopqrstuvwxyz" + i; + String uppercaseInput = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + i; + String mixedcaseInput = "AbCDEfGHIJklmnoPQRStuvwXyz" + i; System.err.println("Lowercase input: "); testPerformance(lowercaseInput); @@ -67,12 +84,14 @@ public class LowercaseTestCase { } private void testPerformance(String input) { - final int NUM = 10000000; + final int NUM = 100000000; long elapsedTimeOwnImpl; + long ownCount = 0; + long javaCount = 0; { long startTimeOwnImpl = System.currentTimeMillis(); for (int i = 0; i < NUM; i++) { - Lowercase.toLowerCase(input); + ownCount += Lowercase.toLowerCase(input).length(); } elapsedTimeOwnImpl = System.currentTimeMillis() - startTimeOwnImpl; System.err.println("Own implementation: " + elapsedTimeOwnImpl); @@ -82,7 +101,7 @@ public class LowercaseTestCase { { long startTimeJava = System.currentTimeMillis(); for (int i = 0; i < NUM; i++) { - input.toLowerCase(Locale.ENGLISH); + javaCount += input.toLowerCase(Locale.ENGLISH).length(); } elapsedTimeJava = System.currentTimeMillis() - startTimeJava; System.err.println("Java's implementation: " + elapsedTimeJava); @@ -90,7 +109,6 @@ public class LowercaseTestCase { long diff = elapsedTimeJava - elapsedTimeOwnImpl; double diffPercentage = (((double) diff) / ((double) elapsedTimeJava)) * 100.0; - System.err.println("Own implementation is " + diffPercentage + " % faster."); - + System.err.println("Own implementation is " + diffPercentage + " % faster. owncount=" + ownCount + " javaCount=" + javaCount); } } |