summaryrefslogtreecommitdiffstats
path: root/vespajlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2019-06-14 12:04:04 +0200
committerHenning Baldersheim <balder@yahoo-inc.com>2019-06-14 12:04:04 +0200
commit98a61cc161ebab349913f63dd89d23462954f028 (patch)
tree408dddaae42a3147fe704cb3a562f8dd74904cc6 /vespajlib
parent5f2c2e986c5ed94aebb9480754907e7263ed8b20 (diff)
Java lowercasing speed has caught up and bypassed our own homegrown optimalization.
Diffstat (limited to 'vespajlib')
-rw-r--r--vespajlib/src/main/java/com/yahoo/text/Lowercase.java91
-rw-r--r--vespajlib/src/test/java/com/yahoo/text/LowercaseTestCase.java34
2 files changed, 28 insertions, 97 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/text/Lowercase.java b/vespajlib/src/main/java/com/yahoo/text/Lowercase.java
index a04ba1cfe13..6304f7f0a39 100644
--- a/vespajlib/src/main/java/com/yahoo/text/Lowercase.java
+++ b/vespajlib/src/main/java/com/yahoo/text/Lowercase.java
@@ -12,64 +12,6 @@ import java.util.Locale;
*/
public final class Lowercase {
- private static final char[] lowercase = new char[123];
-
- static {
- lowercase[0x41] = 'a';
- lowercase[0x42] = 'b';
- lowercase[0x43] = 'c';
- lowercase[0x44] = 'd';
- lowercase[0x45] = 'e';
- lowercase[0x46] = 'f';
- lowercase[0x47] = 'g';
- lowercase[0x48] = 'h';
- lowercase[0x49] = 'i';
- lowercase[0x4A] = 'j';
- lowercase[0x4B] = 'k';
- lowercase[0x4C] = 'l';
- lowercase[0x4D] = 'm';
- lowercase[0x4E] = 'n';
- lowercase[0x4F] = 'o';
- lowercase[0x50] = 'p';
- lowercase[0x51] = 'q';
- lowercase[0x52] = 'r';
- lowercase[0x53] = 's';
- lowercase[0x54] = 't';
- lowercase[0x55] = 'u';
- lowercase[0x56] = 'v';
- lowercase[0x57] = 'w';
- lowercase[0x58] = 'x';
- lowercase[0x59] = 'y';
- lowercase[0x5A] = 'z';
-
- lowercase[0x61] = 'a';
- lowercase[0x62] = 'b';
- lowercase[0x63] = 'c';
- lowercase[0x64] = 'd';
- lowercase[0x65] = 'e';
- lowercase[0x66] = 'f';
- lowercase[0x67] = 'g';
- lowercase[0x68] = 'h';
- lowercase[0x69] = 'i';
- lowercase[0x6A] = 'j';
- lowercase[0x6B] = 'k';
- lowercase[0x6C] = 'l';
- lowercase[0x6D] = 'm';
- lowercase[0x6E] = 'n';
- lowercase[0x6F] = 'o';
- lowercase[0x70] = 'p';
- lowercase[0x71] = 'q';
- lowercase[0x72] = 'r';
- lowercase[0x73] = 's';
- lowercase[0x74] = 't';
- lowercase[0x75] = 'u';
- lowercase[0x76] = 'v';
- lowercase[0x77] = 'w';
- lowercase[0x78] = 'x';
- lowercase[0x79] = 'y';
- lowercase[0x7A] = 'z';
- }
-
/**
* Return a lowercased version of the given string. Since this is language
* independent, this is more of a case normalization operation than
@@ -80,40 +22,11 @@ public final class Lowercase {
* @return a string containing only lowercase character
*/
public static String toLowerCase(String in) {
- // def is picked from http://docs.oracle.com/javase/6/docs/api/java/lang/String.html#toLowerCase%28%29
- String lower = toLowerCasePrintableAsciiOnly(in);
- return (lower == null) ? in.toLowerCase(Locale.ENGLISH) : lower;
+ return in.toLowerCase(Locale.ENGLISH);
+
}
public static String toUpperCase(String in) {
- // def is picked from http://docs.oracle.com/javase/6/docs/api/java/lang/String.html#toLowerCase%28%29
return in.toUpperCase(Locale.ENGLISH);
}
- private static String toLowerCasePrintableAsciiOnly(String in) {
- boolean anyUpper = false;
- for (int i = 0; i < in.length(); i++) {
- char c = in.charAt(i);
- if (c < 0x41) { //lower than A-Z
- return null;
- }
- if (c > 0x5A && c < 0x61) { //between A-Z and a-z
- return null;
- }
- if (c > 0x7A) { //higher than a-z
- return null;
- }
- if (c != lowercase[c]) {
- anyUpper = true;
- }
- }
- if (!anyUpper) {
- return in;
- }
- StringBuilder builder = new StringBuilder(in.length());
- for (int i = 0; i < in.length(); i++) {
- builder.append((char) (in.charAt(i) | ((char) 0x20)));
- }
- return builder.toString();
- }
-
}
diff --git a/vespajlib/src/test/java/com/yahoo/text/LowercaseTestCase.java b/vespajlib/src/test/java/com/yahoo/text/LowercaseTestCase.java
index 8a3e6ed134d..a1379594ba0 100644
--- a/vespajlib/src/test/java/com/yahoo/text/LowercaseTestCase.java
+++ b/vespajlib/src/test/java/com/yahoo/text/LowercaseTestCase.java
@@ -7,6 +7,7 @@ import org.junit.Test;
import java.util.Locale;
import static org.hamcrest.CoreMatchers.equalTo;
+import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
/**
@@ -49,12 +50,28 @@ public class LowercaseTestCase {
}
@Test
+ public void test7bitAscii() {
+ for(char c = 0; c < 128; c++) {
+ char [] carray = {c};
+ String s = new String(carray);
+ assertEquals(Lowercase.toLowerCase(s), s.toLowerCase(Locale.ENGLISH));
+ assertEquals(Lowercase.toUpperCase(s), s.toUpperCase(Locale.ENGLISH));
+ }
+ }
+
+ @Test
@Ignore
public void performance() {
+ for (int i=0; i < 2; i++) {
+ benchmark(i);
+ }
+ }
+
+ private void benchmark(int i) {
Lowercase.toLowerCase("warmup");
- String lowercaseInput = "abcdefghijklmnopqrstuvwxyz";
- String uppercaseInput = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
- String mixedcaseInput = "AbCDEfGHIJklmnoPQRStuvwXyz";
+ String lowercaseInput = "abcdefghijklmnopqrstuvwxyz" + i;
+ String uppercaseInput = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + i;
+ String mixedcaseInput = "AbCDEfGHIJklmnoPQRStuvwXyz" + i;
System.err.println("Lowercase input: ");
testPerformance(lowercaseInput);
@@ -67,12 +84,14 @@ public class LowercaseTestCase {
}
private void testPerformance(String input) {
- final int NUM = 10000000;
+ final int NUM = 100000000;
long elapsedTimeOwnImpl;
+ long ownCount = 0;
+ long javaCount = 0;
{
long startTimeOwnImpl = System.currentTimeMillis();
for (int i = 0; i < NUM; i++) {
- Lowercase.toLowerCase(input);
+ ownCount += Lowercase.toLowerCase(input).length();
}
elapsedTimeOwnImpl = System.currentTimeMillis() - startTimeOwnImpl;
System.err.println("Own implementation: " + elapsedTimeOwnImpl);
@@ -82,7 +101,7 @@ public class LowercaseTestCase {
{
long startTimeJava = System.currentTimeMillis();
for (int i = 0; i < NUM; i++) {
- input.toLowerCase(Locale.ENGLISH);
+ javaCount += input.toLowerCase(Locale.ENGLISH).length();
}
elapsedTimeJava = System.currentTimeMillis() - startTimeJava;
System.err.println("Java's implementation: " + elapsedTimeJava);
@@ -90,7 +109,6 @@ public class LowercaseTestCase {
long diff = elapsedTimeJava - elapsedTimeOwnImpl;
double diffPercentage = (((double) diff) / ((double) elapsedTimeJava)) * 100.0;
- System.err.println("Own implementation is " + diffPercentage + " % faster.");
-
+ System.err.println("Own implementation is " + diffPercentage + " % faster. owncount=" + ownCount + " javaCount=" + javaCount);
}
}