diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
commit | 72231250ed81e10d66bfe70701e64fa5fe50f712 (patch) | |
tree | 2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /vespajlib/src/main/java/com/yahoo/geo |
Publish
Diffstat (limited to 'vespajlib/src/main/java/com/yahoo/geo')
4 files changed, 637 insertions, 0 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/geo/BoundingBoxParser.java b/vespajlib/src/main/java/com/yahoo/geo/BoundingBoxParser.java new file mode 100644 index 00000000000..001386cd4b0 --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/geo/BoundingBoxParser.java @@ -0,0 +1,147 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.geo; + +import com.yahoo.text.DoubleParser; + + +/** + * Class for parsing a bounding box in text format: + * "n=37.44899,s=37.3323,e=-121.98241,w=-122.06566" + * + * <pre> + * Input from: + * http://gws.maps.yahoo.com/findlocation?q=sunnyvale,ca&amp;flags=X + * which gives this format: + * <boundingbox> + * <north>37.44899</north><south>37.3323</south><east>-121.98241</east><west>-122.06566</west> + * </boundingbox> + * it's also easy to use the geoplanet bounding box + * <boundingBox> + * <southWest> + * <latitude>40.183868</latitude> + * <longitude>-74.819519</longitude> + * </southWest> + * <northEast> + * <latitude>40.248291</latitude> + * <longitude>-74.728798</longitude> + * </northEast> + * </boundingBox> + * can be input as: + * s=40.183868,w=-74.819519,n=40.248291,e=-74.728798 + * </pre> + * + * @author Arne J + */ +public class BoundingBoxParser { + + // return variables + public double n = 0.0; + public double s = 0.0; + public double e = 0.0; + public double w = 0.0; + + /** + * parse the given string as a bounding box and return a parser object with parsed coordinates in member variables + * @throws IllegalArgumentException if the input is malformed in any way + **/ + public BoundingBoxParser(String bb) { + this.parseString = bb; + this.len = bb.length(); + parse(); + } + + private final String parseString; + private final int len; + private int pos = 0; + + private char getNextChar() throws IllegalArgumentException { + if (pos == len) { + pos++; + return 0; + } else if (pos > len) { + throw new IllegalArgumentException("position after end of string"); + } else { + return parseString.charAt(pos++); + } + } + + private boolean isCompassDirection(char ch) { + return (ch == 'N' || ch == 'S' || ch == 'E' || ch == 'W' || + ch == 'n' || ch == 's' || ch == 'e' || ch == 'w'); + } + + private int lastNumStartPos = 0; + + private char nsew = 0; + private boolean doneN = false; + private boolean doneS = false; + private boolean doneE = false; + private boolean doneW = false; + + private void parse() { + do { + char ch = getNextChar(); + if (isCompassDirection(ch) && nsew == 0) { + if (ch == 'n' || ch =='N') { + nsew = 'n'; + } else if (ch == 's' || ch == 'S') { + nsew = 's'; + } else if (ch == 'e' || ch == 'E') { + nsew = 'e'; + } else if (ch == 'w' || ch == 'W') { + nsew = 'w'; + } + lastNumStartPos = 0; + } + if (ch == '=' || ch == ':') { + if (nsew != 0) { + lastNumStartPos = pos; + } + } + if (ch == ',' || ch == 0 || ch == ' ') { + if (nsew != 0 && lastNumStartPos > 0) { + String sub = parseString.substring(lastNumStartPos, pos-1); + try { + double v = DoubleParser.parse(sub); + if (nsew == 'n') { + if (doneN) { + throw new IllegalArgumentException("multiple limits for 'n' boundary"); + } + n = v; + doneN = true; + } else if (nsew == 's') { + if (doneS) { + throw new IllegalArgumentException("multiple limits for 's' boundary"); + } + s = v; + doneS = true; + } else if (nsew == 'e') { + if (doneE) { + throw new IllegalArgumentException("multiple limits for 'e' boundary"); + } + e = v; + doneE = true; + } else if (nsew == 'w') { + if (doneW) { + throw new IllegalArgumentException("multiple limits for 'w' boundary"); + } + w = v; + doneW = true; + } + } catch (NumberFormatException e) { + throw new IllegalArgumentException("Could not parse "+nsew+" limit '"+sub+"' as a number"); + } + nsew = 0; + } + } + } while (pos <= len); + + if (doneN && doneS && doneE && doneW) { + return; + } else { + throw new IllegalArgumentException("Missing bounding box limits, n="+doneN+" s="+doneS+" e="+doneE+" w="+doneW); + } + } + +} + diff --git a/vespajlib/src/main/java/com/yahoo/geo/DegreesParser.java b/vespajlib/src/main/java/com/yahoo/geo/DegreesParser.java new file mode 100644 index 00000000000..40398a2b1a0 --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/geo/DegreesParser.java @@ -0,0 +1,284 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.geo; + +/** + * utility for parsing geographical coordinates + * + * @author arnej27959 + **/ +public class DegreesParser { + /** + * the parsed latitude (degrees north if positive) + **/ + public double latitude = 0; + /** + * the parsed longitude (degrees east if positive) + **/ + public double longitude = 0; + + private boolean isDigit(char ch) { + return (ch >= '0' && ch <= '9'); + } + private boolean isCompassDirection(char ch) { + return (ch == 'N' || ch == 'S' || ch == 'E' || ch == 'W'); + } + + private String parseString = null; + private int len = 0; + private int pos = 0; + + private char getNextChar() throws IllegalArgumentException { + if (pos == len) { + pos++; + return 0; + } else if (pos > len) { + throw new IllegalArgumentException("position after end of string"); + } else { + return parseString.charAt(pos++); + } + } + + /** + * Parse the given string. + * + * The string must contain both a latitude and a longitude, + * separated by a semicolon, in any order. A latitude must + * contain "N" or "S" and a number signifying degrees north or + * south. A longitude must contain "E" or "W" and a number + * signifying degrees east or west. No signs or spaces are + * allowed. + * <br> + * Fractional degrees are recommended as the main input format, + * but degrees plus fractional minutes may be used for testing. + * You can use the degree sign (U+00B0 as seen in unicode at + * http://www.unicode.org/charts/PDF/U0080.pdf) to separate + * degrees from minutes, put the direction (NSEW) between as a + * separator, or use a small letter 'o' as a replacement for the + * degrees sign. + * <br> + * Some valid input formats: <br> + * "N37.416383;W122.024683" → Sunnyvale <br> + * "37N24.983;122W01.481" → same <br> + * "N37\u00B024.983;W122\u00B001.481" → same <br> + * "N63.418417;E10.433033" → Trondheim <br> + * "N63o25.105;E10o25.982" → same <br> + * "E10o25.982;N63o25.105" → same <br> + * "N63.418417;E10.433033" → same <br> + * "63N25.105;10E25.982" → same <br> + * @param latandlong Latitude and longitude separated by semicolon. + * + **/ + public DegreesParser(String latandlong) throws IllegalArgumentException { + this.parseString = latandlong; + this.len = parseString.length(); + + char ch = getNextChar(); + + boolean latSet = false; + boolean longSet = false; + + double degrees = 0.0; + double minutes = 0.0; + double seconds = 0.0; + boolean degSet = false; + boolean minSet = false; + boolean secSet = false; + boolean dirSet = false; + boolean foundDot = false; + boolean foundDigits = false; + + boolean findingLatitude = false; + boolean findingLongitude = false; + + double sign = 0.0; + + int lastpos = -1; + + do { + boolean valid = false; + if (pos == lastpos) { + throw new RuntimeException("internal logic error at '"+parseString+"' pos:"+pos); + } else { + lastpos = pos; + } + + // first, see if we can find some number + double accum = 0.0; + + if (isDigit(ch) || ch == '.') { + valid = true; + if (foundDigits) { + throw new IllegalArgumentException("found digits after not consuming previous digits"); + } + double divider = 1.0; + foundDot = false; + while (isDigit(ch)) { + foundDigits = true; + accum *= 10; + accum += (ch - '0'); + ch = getNextChar(); + } + if (ch == '.') { + foundDot = true; + ch = getNextChar(); + while (isDigit(ch)) { + foundDigits = true; + accum *= 10; + accum += (ch - '0'); + divider *= 10; + ch = getNextChar(); + } + } + if (!foundDigits) { + throw new IllegalArgumentException("just a . is not a valid number"); + } + accum /= divider; + } + + // next, did we find a separator after the number? + // degree sign is a separator after degrees, before minutes + if (ch == '\u00B0' || ch == 'o') { + valid = true; + if (degSet) { + throw new IllegalArgumentException("degrees sign only valid just after degrees"); + } + if (!foundDigits) { + throw new IllegalArgumentException("must have number before degrees sign"); + } + if (foundDot) { + throw new IllegalArgumentException("cannot have fractional degrees before degrees sign"); + } + ch = getNextChar(); + } + // apostrophe is a separator after minutes, before seconds + if (ch == '\'') { + if (minSet || !degSet || !foundDigits) { + throw new IllegalArgumentException("minutes sign only valid just after minutes"); + } + if (foundDot) { + throw new IllegalArgumentException("cannot have fractional minutes before minutes sign"); + } + ch = getNextChar(); + } + + // if we found some number, assign it into the next unset variable + if (foundDigits) { + valid = true; + if (degSet) { + if (minSet) { + if (secSet) { + throw new IllegalArgumentException("extra number after full field"); + } else { + seconds = accum; + secSet = true; + } + } else { + minutes = accum; + minSet = true; + if (foundDot) { + secSet = true; + } + } + } else { + degrees = accum; + degSet = true; + if (foundDot) { + minSet = true; + secSet = true; + } + } + foundDot = false; + foundDigits = false; + } + + // there needs to be a direction (NSEW) somewhere, too + if (isCompassDirection(ch)) { + valid = true; + if (dirSet) { + throw new IllegalArgumentException("already set direction once, cannot add direction: "+ch); + } + dirSet = true; + if (ch == 'S' || ch == 'W') { + sign = -1; + } else { + sign = 1; + } + if (ch == 'E' || ch == 'W') { + findingLongitude = true; + } else { + findingLatitude = true; + } + ch = getNextChar(); + } + + // lastly, did we find the end-of-string or a separator between lat and long? + if (ch == 0 || ch == ';' || ch == ' ') { + valid = true; + + if (!dirSet) { + throw new IllegalArgumentException("end of field without any compass direction seen"); + } + if (!degSet) { + throw new IllegalArgumentException("end of field without any number seen"); + } + degrees += minutes / 60.0; + degrees += seconds / 3600.0; + degrees *= sign; + + if (findingLatitude) { + if (latSet) { + throw new IllegalArgumentException("found latitude (N or S) twice"); + } + if (degrees < -90.0 || degrees > 90.0) { + throw new IllegalArgumentException("out of range [-90,+90]: "+degrees); + } + latitude = degrees; + latSet = true; + } else if (findingLongitude) { + if (longSet) { + throw new IllegalArgumentException("found longitude (E or W) twice"); + } + if (degrees < -180.0 || degrees > 180.0) { + throw new IllegalArgumentException("out of range [-180,+180]: "+degrees); + } + longitude = degrees; + longSet = true; + } else { + throw new IllegalArgumentException("no direction found"); + } + // reset + degrees = 0.0; + minutes = 0.0; + seconds = 0.0; + degSet = false; + minSet = false; + secSet = false; + dirSet = false; + foundDot = false; + foundDigits = false; + findingLatitude = false; + findingLongitude = false; + sign = 0.0; + + if (ch == 0) { + break; + } else { + ch = getNextChar(); + } + } + + if (!valid) { + throw new IllegalArgumentException("invalid character: "+ch); + } + + } while (ch != 0); + + if (!latSet) { + throw new IllegalArgumentException("missing latitude"); + } + if (!longSet) { + throw new IllegalArgumentException("missing longitude"); + } + // everything parsed OK + } +} diff --git a/vespajlib/src/main/java/com/yahoo/geo/ZCurve.java b/vespajlib/src/main/java/com/yahoo/geo/ZCurve.java new file mode 100644 index 00000000000..3e24316363e --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/geo/ZCurve.java @@ -0,0 +1,201 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.geo; + +/** + * Contains utility methods for a Z-curve (Morton-order) encoder and + * decoder. + * + * @author gjoranv + */ +public class ZCurve { + /** + * Encode two 32 bit integers by bit-interleaving them into one 64 bit + * integer value. The x-direction owns the least significant bit (bit + * 0). Both x and y can have negative values. + * + * <p> + * This is a time-efficient implementation. In the first step, the input + * value is split in two blocks, one containing the most significant bits, and + * the other containing the least significant bits. The most significant block + * is then shifted left for as many bits it contains. For each following step + * every block from the previous step is split in the same manner, with a + * least and most significant block, and the most significant blocks are shifted + * left for as many bits they contain (half the number from the previous step). + * This continues until each block has only one bit. + * + * <p> + * This algorithm works by placing the LSB of all blocks in the correct position + * after the bit-shifting is done in each step. This algorithm is quite similar + * to computing the Hamming Weight (or population count) of a bit + * string, see http://en.wikipedia.org/wiki/Hamming_weight. + * + * <p> + * Efficiency considerations: The encoding operations in this method + * should require 42 cpu operations, of which many can be executed + * in parallell. Practical experiments show that one call takes ~15 ns + * on a 64-bit Intel Xeon processor @2.33GHz, or 35 cycles. This gives + * an efficiency gain of just ~17% due to the CPUs ability to process + * parallell instructions, compared to ~50% for the slow method. + * But still it is 5 times faster. + * + * @param x x value + * @param y y value + * @return The bit-interleaved long containing x and y. + */ + public static long encode(int x, int y) { + long xl = (long)x; + long yl = (long)y; + + long rx = ((xl & 0x00000000ffff0000L) << 16) | (xl & 0x000000000000ffffL); + long ry = ((yl & 0x00000000ffff0000L) << 16) | (yl & 0x000000000000ffffL); + + rx = ((rx & 0xff00ff00ff00ff00L) << 8) | (rx & 0x00ff00ff00ff00ffL); + ry = ((ry & 0xff00ff00ff00ff00L) << 8) | (ry & 0x00ff00ff00ff00ffL); + + rx = ((rx & 0xf0f0f0f0f0f0f0f0L) << 4) | (rx & 0x0f0f0f0f0f0f0f0fL); + ry = ((ry & 0xf0f0f0f0f0f0f0f0L) << 4) | (ry & 0x0f0f0f0f0f0f0f0fL); + + rx = ((rx & 0xccccccccccccccccL) << 2) | (rx & 0x3333333333333333L); + ry = ((ry & 0xccccccccccccccccL) << 2) | (ry & 0x3333333333333333L); + + rx = ((rx & 0xaaaaaaaaaaaaaaaaL) << 1) | (rx & 0x5555555555555555L); + ry = ((ry & 0xaaaaaaaaaaaaaaaaL) << 1) | (ry & 0x5555555555555555L); + + return (rx | (ry << 1)); + } + + + /** + * Decode a z-value into the original two integers. Returns an + * array of two Integers, x and y in indices 0 and 1 respectively. + * + * @param z The bit-interleaved long containing x and y. + * @return Array of two Integers, x and y. + */ + public static int[] decode(long z) { + int[] xy = new int[2]; + + long xl = z & 0x5555555555555555L; + long yl = z & 0xaaaaaaaaaaaaaaaaL; + + xl = ((xl & 0xccccccccccccccccL) >> 1) | (xl & 0x3333333333333333L); + yl = ((yl & 0xccccccccccccccccL) >> 1) | (yl & 0x3333333333333333L); + + xl = ((xl & 0xf0f0f0f0f0f0f0f0L) >> 2) | (xl & 0x0f0f0f0f0f0f0f0fL); + yl = ((yl & 0xf0f0f0f0f0f0f0f0L) >> 2) | (yl & 0x0f0f0f0f0f0f0f0fL); + + xl = ((xl & 0xff00ff00ff00ff00L) >> 4) | (xl & 0x00ff00ff00ff00ffL); + yl = ((yl & 0xff00ff00ff00ff00L) >> 4) | (yl & 0x00ff00ff00ff00ffL); + + xl = ((xl & 0xffff0000ffff0000L) >> 8) | (xl & 0x0000ffff0000ffffL); + yl = ((yl & 0xffff0000ffff0000L) >> 8) | (yl & 0x0000ffff0000ffffL); + + xl = ((xl & 0xffffffff00000000L) >> 16) | (xl & 0x00000000ffffffffL); + yl = ((yl & 0xffffffff00000000L) >> 16) | (yl & 0x00000000ffffffffL); + + xy[0] = (int)xl; + xy[1] = (int)(yl >> 1); + return xy; + } + + + + /** + * Encode two integers by bit-interleaving them into one Long + * value. The x-direction owns the least significant bit (bit + * 0). Both x and y can have negative values. + * <br> + * Efficiency considerations: If Java compiles and runs this code + * as efficiently as would be the case with a good c-compiler, it + * should require 5 cpu operations per bit with optimal usage of + * the CPUs registers on a 64 bit processor(2 bit-shifts, 1 OR, 1 + * AND, and 1 conditional jump for the for-loop). This would + * correspond to 320+ cycles with no parallell execution. + * Practical experiments show that one call takes ~75 ns on a + * 64-bit Intel Xeon processor @2.33GHz, or 175 cycles. This gives + * an efficiency gain of ~50% due to the CPUs ability to perform + * several instructions in one clock-cycle. Here, it is probably the + * bit-shifts that can be done independently of the AND an OR + * operations, which must be done in sequence. + * + * @param x x value + * @param y y value + * @return The bit-interleaved long containing x and y. + */ + public static long encode_slow(int x, int y) { + long z = 0L; + long xl = (long)x; + long yl = (long)y; + + long mask = 1L; + for (int i=0; i<32; i++) { + long bit = (xl << i) & mask; + z |= bit; + //System.out.println("xs "+ i + ": " + toFullBinaryString(xl << i)); + //System.out.println("m "+ i + ": " + toFullBinaryString(mask)); + //System.out.println("bit "+ i + ": " + toFullBinaryString(bit)); + //System.out.println("z "+ i + ": " + toFullBinaryString(z)); + mask = mask << 2; + } + + mask = 2L; + for (int i=1; i<=32; i++) { + long bit = (yl << i) & mask; + z |= bit; + mask = mask << 2; + } + return z; + } + + /** + * Decode a z-value into the original two integers. Returns an + * array of two Integers, x and y in indices 0 and 1 respectively. + * + * @param z The bit-interleaved long containing x and y. + * @return Array of two Integers, x and y. + */ + public static int[] decode_slow(long z) { + int[] xy = new int[2]; + long xl = 0L; + long yl = 0L; + + long mask = 1L; + for (int i=0; i<32; i++) { + long bit = (z >> i) & mask; + xl |= bit; + //System.out.println("bits : m lm lm lm lm lm lm lm l"); + //System.out.println("zs "+ i + ": " + toFullBinaryString(z >> i)); + //System.out.println("m "+ i + ": " + toFullBinaryString(mask)); + //System.out.println("bit "+ i + ": " + toFullBinaryString(bit)); + //System.out.println("xl "+ i + ": " + toFullBinaryString(xl)); + mask = mask << 1; + } + + mask = 1L; + for (int i=1; i<=32; i++) { + long bit = (z >> i) & mask; + yl |= bit; + mask = mask << 1; + } + xy[0] = (int)xl; + xy[1] = (int)yl; + return xy; + } + + /** + * Debugging utility that returns a long value as binary string + * including the leading zeroes. + */ + public static String toFullBinaryString(long l) { + StringBuilder s = new StringBuilder(64); + for (int i=0; i<Long.numberOfLeadingZeros(l); i++) { + s.append('0'); + } + if (l == 0) { + s.deleteCharAt(0); + } + s.append(Long.toBinaryString(l)); + return s.toString(); + } + +} diff --git a/vespajlib/src/main/java/com/yahoo/geo/package-info.java b/vespajlib/src/main/java/com/yahoo/geo/package-info.java new file mode 100644 index 00000000000..2e515809012 --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/geo/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.geo; + +import com.yahoo.osgi.annotation.ExportPackage; |