diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
commit | 72231250ed81e10d66bfe70701e64fa5fe50f712 (patch) | |
tree | 2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /vespajlib/src/main/java/com/yahoo/net |
Publish
Diffstat (limited to 'vespajlib/src/main/java/com/yahoo/net')
-rw-r--r-- | vespajlib/src/main/java/com/yahoo/net/HostName.java | 40 | ||||
-rw-r--r-- | vespajlib/src/main/java/com/yahoo/net/LinuxInetAddress.java | 84 | ||||
-rw-r--r-- | vespajlib/src/main/java/com/yahoo/net/URI.java | 819 | ||||
-rw-r--r-- | vespajlib/src/main/java/com/yahoo/net/UriTools.java | 42 | ||||
-rw-r--r-- | vespajlib/src/main/java/com/yahoo/net/Url.java | 253 | ||||
-rw-r--r-- | vespajlib/src/main/java/com/yahoo/net/UrlToken.java | 103 | ||||
-rw-r--r-- | vespajlib/src/main/java/com/yahoo/net/UrlTokenizer.java | 178 | ||||
-rw-r--r-- | vespajlib/src/main/java/com/yahoo/net/package-info.java | 7 |
8 files changed, 1526 insertions, 0 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/net/HostName.java b/vespajlib/src/main/java/com/yahoo/net/HostName.java new file mode 100644 index 00000000000..3fb1fe49efd --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/net/HostName.java @@ -0,0 +1,40 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.net; + +import java.io.BufferedReader; +import java.io.InputStreamReader; + +/** + * Utilities for getting the hostname on a system running with the JVM. This is moved here from the old + * HostSystem#getHostName in config-model. + * + * @author lulf + */ +public class HostName { + + private static String myHost = null; + + /** + * Static method that returns the name of localhost using shell + * command "hostname". + * + * @return the name of localhost. + * @throws RuntimeException if executing the command 'hostname' fails. + */ + public static synchronized String getLocalhost() { + if (myHost == null) { + try { + Process p = Runtime.getRuntime().exec("hostname"); + BufferedReader in = new BufferedReader(new InputStreamReader(p.getInputStream())); + myHost = in.readLine(); + p.waitFor(); + if (p.exitValue() != 0) { + throw new RuntimeException("Command 'hostname' failed: exit("+p.exitValue()+")"); + } + } catch (Exception e) { + throw new RuntimeException("Failed when executing command 'hostname'", e); + } + } + return myHost; + } +} diff --git a/vespajlib/src/main/java/com/yahoo/net/LinuxInetAddress.java b/vespajlib/src/main/java/com/yahoo/net/LinuxInetAddress.java new file mode 100644 index 00000000000..540f8300f95 --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/net/LinuxInetAddress.java @@ -0,0 +1,84 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.net; + +import java.net.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Enumeration; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +/** + * Utilities for returning localhost addresses on Linux. + * See + * http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4665037 + * on why this is necessary. + * + * @author bratseth + */ +public class LinuxInetAddress { + + private static Logger log = Logger.getLogger(LinuxInetAddress.class.getName()); + + /** + * Returns an InetAddress representing the address of the localhost. + * A non-loopback address is preferred if available. + * IPv4 is preferred over IPv6 if available. + * + * @return a localhost address + * @throws UnknownHostException if an address could not be determined + */ + public static InetAddress getLocalHost() throws UnknownHostException { + InetAddress localAddress; + try { + localAddress = InetAddress.getLocalHost(); + } catch (UnknownHostException e) { + return InetAddress.getLoopbackAddress(); + } + + if ( ! localAddress.isLoopbackAddress()) return localAddress; + + List<InetAddress> nonLoopbackAddresses = + getAllLocalFromNetwork().stream().filter(a -> ! a.isLoopbackAddress()).collect(Collectors.toList()); + if (nonLoopbackAddresses.isEmpty()) return localAddress; + + List<InetAddress> ipV4NonLoopbackAddresses = + nonLoopbackAddresses.stream().filter(a -> a instanceof Inet4Address).collect(Collectors.toList()); + if ( ! ipV4NonLoopbackAddresses.isEmpty()) return ipV4NonLoopbackAddresses.get(0); + + return nonLoopbackAddresses.get(0); + } + + /** + * Returns all local addresses of this host. + * + * @return an array of the addresses of this + * @throws UnknownHostException if we cannot access the network + */ + public static InetAddress[] getAllLocal() throws UnknownHostException { + InetAddress[] localInetAddresses = InetAddress.getAllByName("127.0.0.1"); + if ( ! localInetAddresses[0].isLoopbackAddress()) return localInetAddresses; + return getAllLocalFromNetwork().toArray(new InetAddress[0]); + } + + /** + * Returns all local addresses of this host. + * + * @return a list of the addresses of this + * @throws UnknownHostException if we cannot access the network + */ + private static List<InetAddress> getAllLocalFromNetwork() throws UnknownHostException { + try { + List<InetAddress> addresses = new ArrayList<>(); + for (NetworkInterface networkInterface : Collections.list(NetworkInterface.getNetworkInterfaces())) + addresses.addAll(Collections.list(networkInterface.getInetAddresses())); + return addresses; + } + catch (SocketException ex) { + throw new UnknownHostException("127.0.0.1"); + } + } + +} diff --git a/vespajlib/src/main/java/com/yahoo/net/URI.java b/vespajlib/src/main/java/com/yahoo/net/URI.java new file mode 100644 index 00000000000..1f9baa36c06 --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/net/URI.java @@ -0,0 +1,819 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.net; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Pattern; + +import static com.yahoo.text.Lowercase.toLowerCase; + +/** + * <p>An URI. This is a pure (immutable) value object.</p> + * + * <p>This does more normalization of hierarchical URIs (URLs) than + * described in the RFC and allows hosts with underscores.</p> + * + * @author <a href="mailto:bratseth@fast.no">Jon S Bratseth</a> + */ +public class URI implements Cloneable, java.io.Serializable, Comparable<URI> { + + /** + * + */ + private static final long serialVersionUID = 2271558213498856909L; + + /** The uri string */ + private String uri; + + /** The scheme of the uri */ + private String scheme = null; + + /** The host part of the uri */ + private String host = null; + + /** The port number of the uri, or -1 if no port is explicitly given */ + private int port = -1; + + /** The part of the uri following the host (host and port) */ + private String rest = null; + + private static final Pattern tokenizePattern = Pattern.compile("[^\\w\\-]"); + + private boolean parsedDomain = false; + private String domain = null; + + private boolean parsedMainTld = false; + private String mainTld = null; + + private boolean parsedPath = false; + private String path = null; + + private boolean parsedParams = false; + private String params = null; + + private boolean parsedFilename = false; + private String filename = null; + + private boolean parsedExtension = false; + private String extension = null; + + private boolean parsedQuery = false; + private String query = null; + + private boolean parsedFragment = false; + private String fragment = null; + + + /** The explanation of why this uri is invalid, or null if it is valid */ + private String invalidExplanation = null; + + /** True if this uri is opaque, false if it is hierarchical */ + private boolean opaque = true; + + /** + * <p>Creates an URI without keeping the fragment (the part starting by #). + * If the uri is hierarchical, it is normalized and incorrect hierarchical uris + * which looks like urls are attempted repaired.</p> + * + * <p>Relative uris are not supported.</p> + * + * @param uriString the uri string + * @throws NullPointerException if the given uriString is null + */ + public URI(String uriString) { + this(uriString, false); + } + + /** + * Creates an URI, optionaly keeping the fragment (the part starting by #). + * If the uri is hierarchical, it is normalized and incorrect hierarchical uris + * which looks like urls are attempted repaired. + * + * <p>Relative uris are not supported.</p> + * + * @param uriString the uri string + * @param keepFragment true to keep the fragment + * @throws NullPointerException if the given uriString is null + */ + public URI(String uriString, boolean keepFragment) { + this(uriString, keepFragment, false); + } + + /** + * Creates an URI, optionaly keeping the fragment (the part starting by #). + * If the uri is hierarchical, it is normalized and incorrect hierarchical uris + * which looks like urls are attempted repaired. + * + * <p>Relative uris are not supported.</p> + * + * @param uriString the uri string + * @param keepFragment true to keep the fragment + * @param hierarchicalOnly will force any uri string given to be parsed as + * a hierarchical one, causing the uri to be invalid if it isn't + * @throws NullPointerException if the given uriString is null + */ + public URI(String uriString, boolean keepFragment, boolean hierarchicalOnly) { + if (uriString == null) { + throw new NullPointerException("Can not create an uri from null"); + } + + if (!keepFragment) { + int fragmentIndex = uriString.indexOf("#"); + + if (fragmentIndex >= 0) { + uriString = uriString.substring(0, fragmentIndex); + } + } + + try { + this.uri = uriString.trim(); + opaque = isOpaque(uri); + + // No further parsing of opaque uris + if (isOpaque() && !hierarchicalOnly) { + return; + } + opaque = false; + normalizeHierarchical(); + } catch (IllegalArgumentException e) { + if (e.getMessage() != null) { + invalidExplanation = e.getMessage(); + } else { + Throwable t = e.getCause(); + if (t != null && t.getMessage() != null) { + invalidExplanation = t.getMessage(); + } else { + invalidExplanation = "Invalid uri: " + e; + } + } + } + } + + /** Creates an url type uri */ + public URI(String scheme, String host, int port, String rest) { + this.scheme = scheme; + this.host = host; + this.port = port; + this.rest = rest; + recombine(); + normalizeHierarchical(); + opaque = false; + } + + /** Returns whether an url is opaque or hierarchical */ + private boolean isOpaque(String uri) { + int colonIndex = uri.indexOf(":"); + + if (colonIndex < 0) { + return true; + } else { + return !(uri.length() > colonIndex + 1 + && uri.charAt(colonIndex + 1) == '/'); + } + } + + /** + * Returns whether this is a valid URI (after normalizing). + * All non-hierarchical uri's containing a scheme is valid. + */ + public boolean isValid() { + return invalidExplanation == null; + } + + /** + * Normalizes this hierarchical uri according to FRC 2396 and the Overture + * standard. Before normalizing, some simple heuritics are use to make + * the uri complete if needed. After normalizing, the scheme, + * host, port and rest of this uri is set if defined. + * + * @throws IllegalArgumentException if this uri can not be normalized into a legal uri + */ + private void normalizeHierarchical() { + complete(); + escapeNonAscii(); + unescapeHtmlEntities(); + decompose(); + lowCaseHost(); + removeDefaultPortNumber(); + removeTrailingHostDot(); + makeDoubleSlashesSingle(); + recombine(); + } + + /** Applies simple heuristics to complete this uri if needed */ + private void complete() { + if (uri.startsWith("www.")) { + uri = "http://" + uri; + } else if (uri.startsWith("WWW")) { + uri = "http://" + uri; + } else if (uri.startsWith("/http:")) { + uri = uri.substring(1); + } else if (isFileURIShortHand(uri)) { + uri = "file://" + uri; + } + } + + private boolean isFileURIShortHand(String uri) { + if (uri.indexOf(":\\") == 1) { + return true; + } + if (uri.indexOf("c:/") == 0) { + return true; + } + if (uri.indexOf("d:/") == 0) { + return true; + } + return false; + } + + /** + * Decomposes this uri into scheme, host, port and rest. + */ + private void decompose() { + java.net.URI neturi = java.net.URI.create(uri).normalize(); + + scheme = neturi.getScheme(); + + host = neturi.getHost(); + boolean portAlreadyParsed = false; + + // No host if the host contains underscores + if (host == null) { + host = neturi.getAuthority(); + if (host != null) { + int colonPos = host.lastIndexOf(":"); + if (!scheme.equals("file") && colonPos > -1) { + //we probably have an (illegal) URI of type http://under_score.com:5000/ + try { + port = Integer.parseInt(host.substring(colonPos + 1, host.length())); + host = host.substring(0, colonPos); + portAlreadyParsed = true; + } catch (NumberFormatException nfe) { + //empty + } + } + } + } + + if ("file".equalsIgnoreCase(scheme)) { + if (host == null) { + host = "localhost"; + } else { + host = repairWindowsDrive(host, uri); + } + } + if (host == null) { + throw new IllegalArgumentException( + "A complete uri must specify a host"); + } + if (!portAlreadyParsed) { + port = neturi.getPort(); + } + rest = (neturi.getRawPath() != null ? neturi.getRawPath() : "") + + (neturi.getRawQuery() != null + ? ("?" + neturi.getRawQuery()) + : "") + + (neturi.getRawFragment() != null + ? ("#" + neturi.getRawFragment()) + : ""); + } + + /** c: turns to c when interpreted by URI. Repair it */ + private String repairWindowsDrive(String host, String uri) { + if (host.length() != 1) { + return host; + } + int driveIndex = uri.indexOf(host + ":"); + + if (driveIndex == 5 || driveIndex == 7) { // file:<drive> or file://<drive> + return host + ":"; + } else { + return host; + } + } + + /** "http://a/\u00E6" → "http://a/%E6;" */ + private void escapeNonAscii() { + char[] uriChars = uri.toCharArray(); + StringBuilder result = new StringBuilder(uri.length()); + + for (char uriChar : uriChars) { + if (uriChar >= 0x80 || uriChar == 0x22) { + result.append("%"); + result.append(Integer.toHexString(uriChar)); + result.append(";"); + } else { + result.append(uriChar); + } + } + uri = result.toString(); + } + + /** "http://a/&amp;" → "http://a/&" Currently ampersand only */ + private void unescapeHtmlEntities() { + int ampIndex = uri.indexOf("&"); + + if (ampIndex < 0) { + return; + } + + StringBuilder result = new StringBuilder(uri.substring(0, ampIndex)); + + while (ampIndex >= 0) { + result.append("&"); + int nextAmpIndex = uri.indexOf("&", ampIndex + 5); + + result.append( + uri.substring(ampIndex + 5, + nextAmpIndex > 0 ? nextAmpIndex : uri.length())); + ampIndex = nextAmpIndex; + } + uri = result.toString(); + } + + /** "HTTP://a" → "http://a" */ + private void lowCaseHost() { + host = toLowerCase(host); + } + + /** "http://a:80" → "http://a" and "https://a:443" → https//a */ + private void removeDefaultPortNumber() { + if (port == 80 && scheme.equals("http")) { + port = -1; + } else if (port == 443 && scheme.equals("https")) { + port = -1; + } + } + + /** "http://a./b" → "http://a/b" */ + private void removeTrailingHostDot() { + if (host.endsWith(".")) { + host = host.substring(0, host.length() - 1); + } + } + + /** "http://a//b" → "http://a/b" */ + private void makeDoubleSlashesSingle() { + StringBuilder result = new StringBuilder(rest.length()); + char[] restChars = rest.toCharArray(); + + for (int i = 0; i < restChars.length; i++) { + if (!(i + 1 < restChars.length && restChars[i] == '/' + && restChars[i + 1] == '/')) { + result.append(restChars[i]); + } + } + rest = result.toString(); + } + + /** Recombines the uri from the scheme, host, port and rest */ + private void recombine() { + StringBuilder recombined = new StringBuilder(100); + + recombined.append(scheme); + recombined.append("://"); + recombined.append(host); + if (port > -1) { + recombined.append(":").append(port); + } + if (rest != null) { + if (!rest.startsWith("/")) { + recombined.append("/"); + } + recombined.append(rest); + } else { + recombined.append("/"); // RFC 2396 violation, as required by search + } + uri = recombined.toString(); + } + + /** + * Returns the normalized scheme of this URI. + * + * @return the normalized scheme (protocol), or null if there is none, + * which may only be the case with non-hierarchical URIs + */ + public String getScheme() { + return scheme; + } + + /** + * Returns whether this URI is hierarchical or opaque. + * A typical example of an hierarchical URI is an URL, + * while URI's are mailto, news and such. + * + * @return true if the url is opaque, false if it is hierarchical + */ + public boolean isOpaque() { + return opaque; + } + + /** + * Returns the normalized host of this URI. + * + * @return the normalized host, or null if there is none, which may + * only be the case if this is a non-hierarchical uri + */ + public String getHost() { + return host; + } + + /** Returns the port number of this scheme if set explicitly, or -1 otherwise */ + public int getPort() { + return port; + } + + /** + * Returns the <i>rest</i> of this uri, that is what is following the host or port. + * This is path, query and fragment as defined in RFC 2396. Returns an empty string + * if this uri has no rest. + */ + public String getRest() { + if (rest == null) { + return null; + } else if (rest.equals("/")) { + return ""; + } else { + return rest; + } + } + + public String getDomain() { + if (parsedDomain) { + return domain; + } + String host = getHost(); + if (host == null) return null; + + int firstDotPos = host.indexOf("."); + int lastDotPos = host.lastIndexOf("."); + + String domain; + if (firstDotPos < 0) { + // "." was not found at all + domain = host; + } else if (firstDotPos == lastDotPos) { + //there is only one "." in the host + domain = host; + } else { + //for www.host.com return host.com + //TODO: Must be corrected when implementing tldlist + domain = host.substring(firstDotPos + 1, host.length()); + } + + this.parsedDomain = true; + this.domain = domain; + return domain; + } + + public String getMainTld() { + if (parsedMainTld) { + return mainTld; + } + String host = getHost(); + if (host == null) return null; + + int lastDotPos = host.lastIndexOf("."); + + String mainTld; + if (lastDotPos < 0) { + //no ".", no TLD + mainTld = null; + } else if (lastDotPos == host.length() - 1) { + //the "." is the last character + mainTld = null; + } else { + //for www.yahoo.co.uk return uk + //TODO: Implement list of TLDs from config? + mainTld = host.substring(lastDotPos + 1, host.length()); + } + this.parsedMainTld = true; + this.mainTld = mainTld; + return mainTld; + } + + public String getPath() { + if (parsedPath) { + return path; + } + String rest = this.rest; + if (rest == null) return null; + + rest = removeFragment(rest); + + int queryPos = rest.lastIndexOf("?"); + if (queryPos > -1) { + rest = rest.substring(0, queryPos); + } + this.parsedPath = true; + this.path = rest; + return this.path; + } + + private String removeFragment(String path) { + int fragmentPos = path.lastIndexOf("#"); + return (fragmentPos > -1) ? path.substring(0, fragmentPos) : path; + } + + public String getFilename() { + if (parsedFilename) { + return filename; + } + String path = getPath(); + if (path == null) return null; + + path = removeParams(path); + + int lastSlash = path.lastIndexOf("/"); + + String filename; + if (lastSlash < 0) { + //there is no slash, return the path, excluding params + filename = path; + } else if (lastSlash == path.length() - 1) { + //the slash is the last character, there is no filename here + filename = ""; + } else { + filename = path.substring(lastSlash + 1, path.length()); + } + this.parsedFilename = true; + this.filename = filename; + return filename; + } + + private String removeParams(String filename) { + int firstSemicolon = filename.indexOf(";"); + + if (firstSemicolon < 0) { + //there are no params + return filename; + } + return filename.substring(0, firstSemicolon); + } + + public String getExtension() { + if (parsedExtension) { + return extension; + } + String filename = getFilename(); + if (filename == null) return null; + + int lastDotPos = filename.lastIndexOf("."); + + String extension; + if (lastDotPos < 0) { + //there is no ".", there is no extension + extension = null; + } else if (lastDotPos == filename.length() - 1) { + //the "." is the last character, there is no extension + extension = null; + } else { + extension = filename.substring(lastDotPos + 1, filename.length()); + } + this.parsedExtension = true; + this.extension = extension; + return extension; + } + + public String getQuery() { + if (parsedQuery) { + return query; + } + String rest = this.rest; + if (rest == null) return null; + + rest = removeFragment(rest); + + int queryPos = rest.lastIndexOf("?"); + String query = null; + if (queryPos > -1) { + //we have a query + query = rest.substring(queryPos+1, rest.length()); + } + this.parsedQuery = true; + this.query = query; + return query; + } + + public String getFragment() { + if (parsedFragment) { + return fragment; + } + String path = this.rest; + if (path == null) return null; + + int fragmentPos = path.lastIndexOf("#"); + String fragment = null; + if (fragmentPos > -1) { + //we have a fragment + fragment = path.substring(fragmentPos+1, path.length()); + } + this.parsedFragment = true; + this.fragment = fragment; + return fragment; + } + + public String getParams() { + if (parsedParams) { + return params; + } + String path = getPath(); + if (path == null) return null; + + int semicolonPos = path.indexOf(";"); + String params; + if (semicolonPos < 0) { + //there is no semicolon, there are no params here + params = null; + } else if (semicolonPos == path.length() - 1) { + //the semicolon is the last character, there are no params here + params = null; + } else { + params = path.substring(semicolonPos + 1, path.length()); + } + this.parsedParams = true; + this.params = params; + return params; + } + + public static String[] tokenize(String item) { + return tokenizePattern.split(item); + } + + public List<Token> tokenize() { + List<Token> tokens = new ArrayList<>(); + + tokens.addAll(tokenize(URLContext.URL_SCHEME, getScheme())); + tokens.addAll(tokenize(URLContext.URL_HOST, getHost())); + tokens.addAll(tokenize(URLContext.URL_PORT, getPort() > -1 ? "" + getPort() : null)); + tokens.addAll(tokenize(URLContext.URL_PATH, getPath())); + tokens.addAll(tokenize(URLContext.URL_QUERY, getQuery())); + tokens.addAll(tokenize(URLContext.URL_FRAGMENT, getFragment())); + + return tokens; + } + + private List<Token> tokenize(URLContext context, String item) { + if (item == null) { + return new ArrayList<>(0); + } + String[] tokenStrings = tokenize(item); + List<Token> tokens = new ArrayList<>(tokenStrings.length); + for (String tokenString : tokenStrings) { + if (tokenString.length() > 0) { + tokens.add(new Token(context, tokenString)); + } + } + return tokens; + } + + /** Returns an explanation of why this uri is invalid, or null if it is valid */ + public String getInvalidExplanation() { + return invalidExplanation; + } + + public int hashCode() { + return uri.hashCode(); + } + + public boolean equals(Object object) { + if (!(object instanceof URI)) { + return false; + } + return (toString().equals(object.toString())); + } + + public int compareTo(URI object) { + return toString().compareTo(object.toString()); + } + + public Object clone() { + try { + return super.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException("Someone made me unclonable!", e); + } + } + + /** Returns a new URI with a changed scheme */ + public URI setScheme(String scheme) { + return new URI(scheme, host, port, rest); + } + + /** Returns a new URI with a changed host (or authority) */ + public URI setHost(String host) { + return new URI(scheme, host, port, rest); + } + + /** Returns a new URI with a changed port */ + public URI setPort(int port) { + return new URI(scheme, host, port, rest); + } + + /** Returns a new URI with a changed rest */ + public URI setRest(String rest) { + return new URI(scheme, host, port, rest); + } + + /** Returns a new uri with the an additional parameter */ + public URI addParameter(String name, String value) { + String newRest = rest; + + if (newRest == null) { + newRest = ""; + } + if (newRest.indexOf("?") < 0) { + newRest += "?"; + } else { + newRest += "&"; + } + newRest += name + "=" + value; + return new URI(scheme, host, port, newRest); + } + + /** Returns this uri as a string */ + public String stringValue() { + return uri; + } + + /** Returns this URI as a string */ + public String toString() { + return uri; + } + + /** + * Returns the depth of this uri. + * The depth of an hierarchical uri equals the number of slashes + * which are not separating the protocol and the host, and not at the end. + * + * @return the depth of this uri if it is hierarchical, or 0 if it is opaque + */ + public int getDepth() { + int colonIndex = uri.indexOf(':'); + + // count number of slashes in the Uri + int currentIndex = colonIndex; + int depth = 0; + + while (currentIndex != -1) { + currentIndex = uri.indexOf('/', currentIndex); + if (currentIndex != -1) { + depth++; + currentIndex++; + } + } + + if (uri.charAt(colonIndex + 1) == '/') { + depth--; + } + if (uri.charAt(colonIndex + 2) == '/') { + depth--; + } + if ((uri.charAt(uri.length() - 1) == '/') + && ((uri.length() - 1) > (colonIndex + 2))) { + depth--; + } + return depth; + } + + + public static class Token { + private final URLContext context; + private final String token; + + private Token(URLContext context, String token) { + this.context = context; + this.token = token; + } + + public URLContext getContext() { + return context; + } + + public String getToken() { + return token; + } + } + + public static enum URLContext { + URL_SCHEME(0, "scheme"), + URL_HOST(1, "host"), + URL_DOMAIN(2, "domain"), + URL_MAINTLD(3, "maintld"), + URL_PORT(4, "port"), + URL_PATH(5, "path"), + URL_FILENAME(6, "filename"), + URL_EXTENSION(7, "extension"), + URL_PARAMS(8, "params"), + URL_QUERY(9, "query"), + URL_FRAGMENT(10, "fragment"); + + public final int id; + public final String name; + + private URLContext(int id, String name) { + this.id = id; + this.name = name; + } + } +} diff --git a/vespajlib/src/main/java/com/yahoo/net/UriTools.java b/vespajlib/src/main/java/com/yahoo/net/UriTools.java new file mode 100644 index 00000000000..34d88713274 --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/net/UriTools.java @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.net; + +import java.net.URI; + +/** + * Utility methods for working with URIs. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public final class UriTools { + private UriTools() { + } + + /** + * Build a string representation of the normalized form of the given URI, + * containg the path and optionally query and fragment parts. The query part + * will be delimeted from the preceding data with "?" and the fragment with + * "#". + * + * @param uri + * source for path, query and fragment in returned data + * @return a string containing path, and optionally query and fragment, + * delimited by question mark and hash + */ + public static String rawRequest(final URI uri) { + final String rawQuery = uri.getRawQuery(); + final String rawFragment = uri.getRawFragment(); + final StringBuilder rawRequest = new StringBuilder(); + + rawRequest.append(uri.getRawPath()); + if (rawQuery != null) { + rawRequest.append("?").append(rawQuery); + } + + if (rawFragment != null) { + rawRequest.append("#").append(rawFragment); + } + + return rawRequest.toString(); + } +} diff --git a/vespajlib/src/main/java/com/yahoo/net/Url.java b/vespajlib/src/main/java/com/yahoo/net/Url.java new file mode 100644 index 00000000000..33571f9eb34 --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/net/Url.java @@ -0,0 +1,253 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.net; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class Url { + + private static final Pattern pattern = Pattern.compile( + //12 3 456 7 8 9ab c d e f g h i j + // 2 1 6 87 5 c b ed a4 f hg ji + "^(([^:/?#]+):)?(//((([^:@/?#]+)(:([^@/?#]+))?@))?(((\\[([^\\]]+)\\]|[^:/?#]+)(:([^/?#]+))?)))?([^?#]+)?(\\?([^#]*))?(#(.*))?"); + private final String image; + private final int schemeBegin; + private final int schemeEnd; + private final int userInfoBegin; + private final int userInfoEnd; + private final int passwordBegin; + private final int passwordEnd; + private final int hostBegin; + private final int hostEnd; + private final int portBegin; + private final int portEnd; + private final int pathBegin; + private final int pathEnd; + private final int queryBegin; + private final int queryEnd; + private final int fragmentBegin; + private final int fragmentEnd; + + public Url(String scheme, String user, String password, String host, Integer port, String path, String query, + String fragment) + { + StringBuilder image = new StringBuilder(); + schemeBegin = image.length(); + if (scheme != null) { + image.append(scheme); + schemeEnd = image.length(); + image.append(':'); + } else { + schemeEnd = schemeBegin; + } + if (host != null) { + image.append("//"); + } + userInfoBegin = image.length(); + if (user != null) { + image.append(user); + userInfoEnd = image.length(); + } else { + userInfoEnd = userInfoBegin; + } + if (password != null) { + image.append(':'); + passwordBegin = image.length(); + image.append(password); + passwordEnd = image.length(); + } else { + passwordBegin = image.length(); + passwordEnd = passwordBegin; + } + if (user != null || password != null) { + image.append('@'); + } + if (host != null) { + boolean esc = host.indexOf(':') >= 0; + if (esc) { + image.append('['); + } + hostBegin = image.length(); + image.append(host); + hostEnd = image.length(); + if (esc) { + image.append(']'); + } + } else { + hostBegin = image.length(); + hostEnd = hostBegin; + } + if (port != null) { + image.append(':'); + portBegin = image.length(); + image.append(port); + portEnd = image.length(); + } else { + portBegin = image.length(); + portEnd = portBegin; + } + pathBegin = image.length(); + if (path != null) { + image.append(path); + pathEnd = image.length(); + } else { + pathEnd = pathBegin; + } + if (query != null) { + image.append('?'); + queryBegin = image.length(); + image.append(query); + queryEnd = image.length(); + } else { + queryBegin = image.length(); + queryEnd = queryBegin; + } + if (fragment != null) { + image.append("#"); + fragmentBegin = image.length(); + image.append(fragment); + fragmentEnd = image.length(); + } else { + fragmentBegin = image.length(); + fragmentEnd = fragmentBegin; + } + this.image = image.toString(); + } + + public static Url fromString(String image) { + Matcher matcher = pattern.matcher(image); + if (!matcher.matches()) { + throw new IllegalArgumentException("Malformed URL."); + } + String host = matcher.group(12); + if (host == null) { + host = matcher.group(11); + } + if (host == null) { + host = matcher.group(9); + } + String port = matcher.group(14); + return new Url(matcher.group(2), matcher.group(6), matcher.group(8), host, + port != null ? Integer.valueOf(port) : null, matcher.group(15), matcher.group(17), + matcher.group(19)); + } + + public int getSchemeBegin() { + return schemeBegin; + } + + public int getSchemeEnd() { + return schemeEnd; + } + + public int getUserInfoBegin() { + return userInfoBegin; + } + + public int getUserInfoEnd() { + return userInfoEnd; + } + + public int getPasswordBegin() { + return passwordBegin; + } + + public int getPasswordEnd() { + return passwordEnd; + } + + public int getHostBegin() { + return hostBegin; + } + + public int getHostEnd() { + return hostEnd; + } + + public int getPortBegin() { + return portBegin; + } + + public int getPortEnd() { + return portEnd; + } + + public int getPathBegin() { + return pathBegin; + } + + public int getPathEnd() { + return pathEnd; + } + + public int getQueryBegin() { + return queryBegin; + } + + public int getQueryEnd() { + return queryEnd; + } + + public int getFragmentBegin() { + return fragmentBegin; + } + + public int getFragmentEnd() { + return fragmentEnd; + } + + public String getScheme() { + return schemeBegin < schemeEnd ? image.substring(schemeBegin, schemeEnd) : null; + } + + public String getUserInfo() { + return userInfoBegin < userInfoEnd ? image.substring(userInfoBegin, userInfoEnd) : null; + } + + public String getPassword() { + return passwordBegin < passwordEnd ? image.substring(passwordBegin, passwordEnd) : null; + } + + public String getHost() { + return hostBegin < hostEnd ? image.substring(hostBegin, hostEnd) : null; + } + + public Integer getPort() { + String str = getPortString(); + return str != null ? Integer.valueOf(str) : null; + } + + public String getPortString() { + return portBegin < portEnd ? image.substring(portBegin, portEnd) : null; + } + + public String getPath() { + return pathBegin < pathEnd ? image.substring(pathBegin, pathEnd) : null; + } + + public String getQuery() { + return queryBegin < queryEnd ? image.substring(queryBegin, queryEnd) : null; + } + + public String getFragment() { + return fragmentBegin < fragmentEnd ? image.substring(fragmentBegin, fragmentEnd) : null; + } + + @Override + public int hashCode() { + return image.hashCode(); + } + + @Override + public boolean equals(Object obj) { + return (obj instanceof Url) && image.equals(((Url)obj).image); + } + + @Override + public String toString() { + return image; + } +} diff --git a/vespajlib/src/main/java/com/yahoo/net/UrlToken.java b/vespajlib/src/main/java/com/yahoo/net/UrlToken.java new file mode 100644 index 00000000000..785c3b1fe43 --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/net/UrlToken.java @@ -0,0 +1,103 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.net; + +/** + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class UrlToken { + + public enum Type { + SCHEME, + USERINFO, + PASSWORD, + HOST, + PORT, + PATH, + QUERY, + FRAGMENT + } + + private final Type type; + private final int offset; + private final String orig; + private final String term; + + public UrlToken(Type type, int offset, String orig, String term) { + if (type == null) { + throw new NullPointerException(); + } + this.type = type; + this.offset = offset; + this.orig = orig; + this.term = term; + } + + public Type getType() { + return type; + } + + public int getOffset() { + return offset; + } + + public int getLength() { + return orig != null ? orig.length() : 0; + } + + public String getOrig() { + return orig; + } + + public String getTerm() { + return term; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!(obj instanceof UrlToken)) { + return false; + } + UrlToken rhs = (UrlToken)obj; + if (offset != rhs.offset) { + return false; + } + if (orig != null ? !orig.equals(rhs.orig) : rhs.orig != null) { + return false; + } + if (term != null ? !term.equals(rhs.term) : rhs.term != null) { + return false; + } + if (type != rhs.type) { + return false; + } + return true; + } + + @Override + public int hashCode() { + int result = type != null ? type.hashCode() : 0; + result = 31 * result + offset; + result = 31 * result + (orig != null ? orig.hashCode() : 0); + result = 31 * result + (term != null ? term.hashCode() : 0); + return result; + } + + @Override + public String toString() { + StringBuilder ret = new StringBuilder("UrlToken("); + ret.append("type=").append(type).append(", "); + ret.append("offset=").append(offset).append(", "); + if (orig != null) { + ret.append("orig='").append(orig).append("', "); + } + if (term != null) { + ret.append("term='").append(term).append("', "); + } + ret.setLength(ret.length() - 2); + ret.append(")"); + return ret.toString(); + } +} diff --git a/vespajlib/src/main/java/com/yahoo/net/UrlTokenizer.java b/vespajlib/src/main/java/com/yahoo/net/UrlTokenizer.java new file mode 100644 index 00000000000..ec617607b8a --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/net/UrlTokenizer.java @@ -0,0 +1,178 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.net; + +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +/** + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +public class UrlTokenizer { + + public static final String TERM_STARTHOST = "StArThOsT"; + public static final String TERM_ENDHOST = "EnDhOsT"; + + private static final Map<String, String> schemeToPort = new HashMap<>(); + private static final Map<String, String> portToScheme = new HashMap<>(); + private static final char TO_LOWER = (char)('A' - 'a'); + private final Url url; + + static { + registerScheme("ftp", 21); + registerScheme("gopher", 70); + registerScheme("http", 80); + registerScheme("https", 443); + registerScheme("imap", 143); + registerScheme("mailto", 25); + registerScheme("news", 119); + registerScheme("nntp", 119); + registerScheme("pop", 110); + registerScheme("rsync", 873); + registerScheme("rtsp", 554); + registerScheme("sftp", 22); + registerScheme("shttp", 443); + registerScheme("sip", 5060); + registerScheme("sips", 5061); + registerScheme("snmp", 161); + registerScheme("ssh", 22); + registerScheme("telnet", 23); + registerScheme("tftp", 69); + } + + public UrlTokenizer(String url) { + this(Url.fromString(url)); + } + + public UrlTokenizer(Url url) { + this.url = url; + } + + private String guessScheme(String port) { + String scheme = portToScheme.get(port); + if (scheme != null) { + return scheme; + } + return "http"; + } + + private String guessPort(String scheme) { + String port = schemeToPort.get(scheme); + if (port != null) { + return port; + } + return null; + } + + public List<UrlToken> tokenize() { + List<UrlToken> lst = new LinkedList<>(); + + int offset = 0; + String port = url.getPortString(); + String scheme = url.getScheme(); + if (scheme == null) { + scheme = guessScheme(port); + addTokens(lst, UrlToken.Type.SCHEME, offset, scheme, false); + } else { + addTokens(lst, UrlToken.Type.SCHEME, url.getSchemeBegin(), scheme, true); + offset = url.getSchemeEnd(); + } + + String userInfo = url.getUserInfo(); + if (userInfo != null) { + addTokens(lst, UrlToken.Type.USERINFO, url.getUserInfoBegin(), userInfo, true); + offset = url.getUserInfoEnd(); + } + + String password = url.getPassword(); + if (password != null) { + addTokens(lst, UrlToken.Type.PASSWORD, url.getPasswordBegin(), password, true); + offset = url.getPasswordEnd(); + } + + String host = url.getHost(); + if (host == null || host.isEmpty()) { + if (host != null) { + offset = url.getHostBegin(); + } + if ("file".equalsIgnoreCase(scheme)) { + addHostTokens(lst, offset, offset, "localhost", false); + } + } else { + addHostTokens(lst, url.getHostBegin(), url.getHostEnd(), host, true); + offset = url.getHostEnd(); + } + + port = url.getPortString(); + if (port == null) { + if ((port = guessPort(scheme)) != null) { + addTokens(lst, UrlToken.Type.PORT, offset, port, false); + } + } else { + addTokens(lst, UrlToken.Type.PORT, url.getPortBegin(), port, true); + } + + String path = url.getPath(); + if (path != null) { + addTokens(lst, UrlToken.Type.PATH, url.getPathBegin(), path, true); + } + + String query = url.getQuery(); + if (query != null) { + addTokens(lst, UrlToken.Type.QUERY, url.getQueryBegin(), query, true); + } + + String fragment = url.getFragment(); + if (fragment != null) { + addTokens(lst, UrlToken.Type.FRAGMENT, url.getFragmentBegin(), fragment, true); + } + + return lst; + } + + public static void addTokens(List<UrlToken> lst, UrlToken.Type type, int offset, String image, boolean orig) { + StringBuilder term = new StringBuilder(); + int prev = 0; + for (int skip, next = 0, len = image.length(); next < len; next += skip) { + char c = image.charAt(next); + if (c == '%') { + c = (char)Integer.parseInt(image.substring(next + 1, next + 3), 16); + skip = 3; + } else { + skip = 1; + } + if ((c >= '0' && c <= '9') || + (c >= 'a' && c <= 'z') || + (c == '-' || c == '_')) + { + term.append(c); + } else if (c >= 'A' && c <= 'Z') { + term.append((char)(c - TO_LOWER)); + } else { + if (prev < next) { + lst.add(new UrlToken(type, offset + (orig ? prev : 0), orig ? image.substring(prev, next) : null, + term.toString())); + term = new StringBuilder(); + } + prev = next + skip; + } + } + if (term.length() > 0) { + lst.add(new UrlToken(type, offset + (orig ? prev : 0), orig ? image.substring(prev) : null, + term.toString())); + } + } + + private static void addHostTokens(List<UrlToken> lst, int begin, int end, String image, boolean orig) { + lst.add(new UrlToken(UrlToken.Type.HOST, begin, null, TERM_STARTHOST)); + addTokens(lst, UrlToken.Type.HOST, begin, image, orig); + lst.add(new UrlToken(UrlToken.Type.HOST, end, null, TERM_ENDHOST)); + } + + private static void registerScheme(String scheme, int port) { + String str = String.valueOf(port); + schemeToPort.put(scheme, str); + portToScheme.put(str, scheme); + } +} diff --git a/vespajlib/src/main/java/com/yahoo/net/package-info.java b/vespajlib/src/main/java/com/yahoo/net/package-info.java new file mode 100644 index 00000000000..ab474304da2 --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/net/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.net; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; |