aboutsummaryrefslogtreecommitdiffstats
path: root/vespajlib/src/main/java/com/yahoo/net
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
commit72231250ed81e10d66bfe70701e64fa5fe50f712 (patch)
tree2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /vespajlib/src/main/java/com/yahoo/net
Publish
Diffstat (limited to 'vespajlib/src/main/java/com/yahoo/net')
-rw-r--r--vespajlib/src/main/java/com/yahoo/net/HostName.java40
-rw-r--r--vespajlib/src/main/java/com/yahoo/net/LinuxInetAddress.java84
-rw-r--r--vespajlib/src/main/java/com/yahoo/net/URI.java819
-rw-r--r--vespajlib/src/main/java/com/yahoo/net/UriTools.java42
-rw-r--r--vespajlib/src/main/java/com/yahoo/net/Url.java253
-rw-r--r--vespajlib/src/main/java/com/yahoo/net/UrlToken.java103
-rw-r--r--vespajlib/src/main/java/com/yahoo/net/UrlTokenizer.java178
-rw-r--r--vespajlib/src/main/java/com/yahoo/net/package-info.java7
8 files changed, 1526 insertions, 0 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/net/HostName.java b/vespajlib/src/main/java/com/yahoo/net/HostName.java
new file mode 100644
index 00000000000..3fb1fe49efd
--- /dev/null
+++ b/vespajlib/src/main/java/com/yahoo/net/HostName.java
@@ -0,0 +1,40 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.net;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+
+/**
+ * Utilities for getting the hostname on a system running with the JVM. This is moved here from the old
+ * HostSystem#getHostName in config-model.
+ *
+ * @author lulf
+ */
+public class HostName {
+
+ private static String myHost = null;
+
+ /**
+ * Static method that returns the name of localhost using shell
+ * command "hostname".
+ *
+ * @return the name of localhost.
+ * @throws RuntimeException if executing the command 'hostname' fails.
+ */
+ public static synchronized String getLocalhost() {
+ if (myHost == null) {
+ try {
+ Process p = Runtime.getRuntime().exec("hostname");
+ BufferedReader in = new BufferedReader(new InputStreamReader(p.getInputStream()));
+ myHost = in.readLine();
+ p.waitFor();
+ if (p.exitValue() != 0) {
+ throw new RuntimeException("Command 'hostname' failed: exit("+p.exitValue()+")");
+ }
+ } catch (Exception e) {
+ throw new RuntimeException("Failed when executing command 'hostname'", e);
+ }
+ }
+ return myHost;
+ }
+}
diff --git a/vespajlib/src/main/java/com/yahoo/net/LinuxInetAddress.java b/vespajlib/src/main/java/com/yahoo/net/LinuxInetAddress.java
new file mode 100644
index 00000000000..540f8300f95
--- /dev/null
+++ b/vespajlib/src/main/java/com/yahoo/net/LinuxInetAddress.java
@@ -0,0 +1,84 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.net;
+
+import java.net.*;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.List;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import java.util.stream.Collectors;
+
+/**
+ * Utilities for returning localhost addresses on Linux.
+ * See
+ * http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4665037
+ * on why this is necessary.
+ *
+ * @author bratseth
+ */
+public class LinuxInetAddress {
+
+ private static Logger log = Logger.getLogger(LinuxInetAddress.class.getName());
+
+ /**
+ * Returns an InetAddress representing the address of the localhost.
+ * A non-loopback address is preferred if available.
+ * IPv4 is preferred over IPv6 if available.
+ *
+ * @return a localhost address
+ * @throws UnknownHostException if an address could not be determined
+ */
+ public static InetAddress getLocalHost() throws UnknownHostException {
+ InetAddress localAddress;
+ try {
+ localAddress = InetAddress.getLocalHost();
+ } catch (UnknownHostException e) {
+ return InetAddress.getLoopbackAddress();
+ }
+
+ if ( ! localAddress.isLoopbackAddress()) return localAddress;
+
+ List<InetAddress> nonLoopbackAddresses =
+ getAllLocalFromNetwork().stream().filter(a -> ! a.isLoopbackAddress()).collect(Collectors.toList());
+ if (nonLoopbackAddresses.isEmpty()) return localAddress;
+
+ List<InetAddress> ipV4NonLoopbackAddresses =
+ nonLoopbackAddresses.stream().filter(a -> a instanceof Inet4Address).collect(Collectors.toList());
+ if ( ! ipV4NonLoopbackAddresses.isEmpty()) return ipV4NonLoopbackAddresses.get(0);
+
+ return nonLoopbackAddresses.get(0);
+ }
+
+ /**
+ * Returns all local addresses of this host.
+ *
+ * @return an array of the addresses of this
+ * @throws UnknownHostException if we cannot access the network
+ */
+ public static InetAddress[] getAllLocal() throws UnknownHostException {
+ InetAddress[] localInetAddresses = InetAddress.getAllByName("127.0.0.1");
+ if ( ! localInetAddresses[0].isLoopbackAddress()) return localInetAddresses;
+ return getAllLocalFromNetwork().toArray(new InetAddress[0]);
+ }
+
+ /**
+ * Returns all local addresses of this host.
+ *
+ * @return a list of the addresses of this
+ * @throws UnknownHostException if we cannot access the network
+ */
+ private static List<InetAddress> getAllLocalFromNetwork() throws UnknownHostException {
+ try {
+ List<InetAddress> addresses = new ArrayList<>();
+ for (NetworkInterface networkInterface : Collections.list(NetworkInterface.getNetworkInterfaces()))
+ addresses.addAll(Collections.list(networkInterface.getInetAddresses()));
+ return addresses;
+ }
+ catch (SocketException ex) {
+ throw new UnknownHostException("127.0.0.1");
+ }
+ }
+
+}
diff --git a/vespajlib/src/main/java/com/yahoo/net/URI.java b/vespajlib/src/main/java/com/yahoo/net/URI.java
new file mode 100644
index 00000000000..1f9baa36c06
--- /dev/null
+++ b/vespajlib/src/main/java/com/yahoo/net/URI.java
@@ -0,0 +1,819 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.net;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Pattern;
+
+import static com.yahoo.text.Lowercase.toLowerCase;
+
+/**
+ * <p>An URI. This is a pure (immutable) value object.</p>
+ *
+ * <p>This does more normalization of hierarchical URIs (URLs) than
+ * described in the RFC and allows hosts with underscores.</p>
+ *
+ * @author <a href="mailto:bratseth@fast.no">Jon S Bratseth</a>
+ */
+public class URI implements Cloneable, java.io.Serializable, Comparable<URI> {
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = 2271558213498856909L;
+
+ /** The uri string */
+ private String uri;
+
+ /** The scheme of the uri */
+ private String scheme = null;
+
+ /** The host part of the uri */
+ private String host = null;
+
+ /** The port number of the uri, or -1 if no port is explicitly given */
+ private int port = -1;
+
+ /** The part of the uri following the host (host and port) */
+ private String rest = null;
+
+ private static final Pattern tokenizePattern = Pattern.compile("[^\\w\\-]");
+
+ private boolean parsedDomain = false;
+ private String domain = null;
+
+ private boolean parsedMainTld = false;
+ private String mainTld = null;
+
+ private boolean parsedPath = false;
+ private String path = null;
+
+ private boolean parsedParams = false;
+ private String params = null;
+
+ private boolean parsedFilename = false;
+ private String filename = null;
+
+ private boolean parsedExtension = false;
+ private String extension = null;
+
+ private boolean parsedQuery = false;
+ private String query = null;
+
+ private boolean parsedFragment = false;
+ private String fragment = null;
+
+
+ /** The explanation of why this uri is invalid, or null if it is valid */
+ private String invalidExplanation = null;
+
+ /** True if this uri is opaque, false if it is hierarchical */
+ private boolean opaque = true;
+
+ /**
+ * <p>Creates an URI without keeping the fragment (the part starting by #).
+ * If the uri is hierarchical, it is normalized and incorrect hierarchical uris
+ * which looks like urls are attempted repaired.</p>
+ *
+ * <p>Relative uris are not supported.</p>
+ *
+ * @param uriString the uri string
+ * @throws NullPointerException if the given uriString is null
+ */
+ public URI(String uriString) {
+ this(uriString, false);
+ }
+
+ /**
+ * Creates an URI, optionaly keeping the fragment (the part starting by #).
+ * If the uri is hierarchical, it is normalized and incorrect hierarchical uris
+ * which looks like urls are attempted repaired.
+ *
+ * <p>Relative uris are not supported.</p>
+ *
+ * @param uriString the uri string
+ * @param keepFragment true to keep the fragment
+ * @throws NullPointerException if the given uriString is null
+ */
+ public URI(String uriString, boolean keepFragment) {
+ this(uriString, keepFragment, false);
+ }
+
+ /**
+ * Creates an URI, optionaly keeping the fragment (the part starting by #).
+ * If the uri is hierarchical, it is normalized and incorrect hierarchical uris
+ * which looks like urls are attempted repaired.
+ *
+ * <p>Relative uris are not supported.</p>
+ *
+ * @param uriString the uri string
+ * @param keepFragment true to keep the fragment
+ * @param hierarchicalOnly will force any uri string given to be parsed as
+ * a hierarchical one, causing the uri to be invalid if it isn't
+ * @throws NullPointerException if the given uriString is null
+ */
+ public URI(String uriString, boolean keepFragment, boolean hierarchicalOnly) {
+ if (uriString == null) {
+ throw new NullPointerException("Can not create an uri from null");
+ }
+
+ if (!keepFragment) {
+ int fragmentIndex = uriString.indexOf("#");
+
+ if (fragmentIndex >= 0) {
+ uriString = uriString.substring(0, fragmentIndex);
+ }
+ }
+
+ try {
+ this.uri = uriString.trim();
+ opaque = isOpaque(uri);
+
+ // No further parsing of opaque uris
+ if (isOpaque() && !hierarchicalOnly) {
+ return;
+ }
+ opaque = false;
+ normalizeHierarchical();
+ } catch (IllegalArgumentException e) {
+ if (e.getMessage() != null) {
+ invalidExplanation = e.getMessage();
+ } else {
+ Throwable t = e.getCause();
+ if (t != null && t.getMessage() != null) {
+ invalidExplanation = t.getMessage();
+ } else {
+ invalidExplanation = "Invalid uri: " + e;
+ }
+ }
+ }
+ }
+
+ /** Creates an url type uri */
+ public URI(String scheme, String host, int port, String rest) {
+ this.scheme = scheme;
+ this.host = host;
+ this.port = port;
+ this.rest = rest;
+ recombine();
+ normalizeHierarchical();
+ opaque = false;
+ }
+
+ /** Returns whether an url is opaque or hierarchical */
+ private boolean isOpaque(String uri) {
+ int colonIndex = uri.indexOf(":");
+
+ if (colonIndex < 0) {
+ return true;
+ } else {
+ return !(uri.length() > colonIndex + 1
+ && uri.charAt(colonIndex + 1) == '/');
+ }
+ }
+
+ /**
+ * Returns whether this is a valid URI (after normalizing).
+ * All non-hierarchical uri's containing a scheme is valid.
+ */
+ public boolean isValid() {
+ return invalidExplanation == null;
+ }
+
+ /**
+ * Normalizes this hierarchical uri according to FRC 2396 and the Overture
+ * standard. Before normalizing, some simple heuritics are use to make
+ * the uri complete if needed. After normalizing, the scheme,
+ * host, port and rest of this uri is set if defined.
+ *
+ * @throws IllegalArgumentException if this uri can not be normalized into a legal uri
+ */
+ private void normalizeHierarchical() {
+ complete();
+ escapeNonAscii();
+ unescapeHtmlEntities();
+ decompose();
+ lowCaseHost();
+ removeDefaultPortNumber();
+ removeTrailingHostDot();
+ makeDoubleSlashesSingle();
+ recombine();
+ }
+
+ /** Applies simple heuristics to complete this uri if needed */
+ private void complete() {
+ if (uri.startsWith("www.")) {
+ uri = "http://" + uri;
+ } else if (uri.startsWith("WWW")) {
+ uri = "http://" + uri;
+ } else if (uri.startsWith("/http:")) {
+ uri = uri.substring(1);
+ } else if (isFileURIShortHand(uri)) {
+ uri = "file://" + uri;
+ }
+ }
+
+ private boolean isFileURIShortHand(String uri) {
+ if (uri.indexOf(":\\") == 1) {
+ return true;
+ }
+ if (uri.indexOf("c:/") == 0) {
+ return true;
+ }
+ if (uri.indexOf("d:/") == 0) {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Decomposes this uri into scheme, host, port and rest.
+ */
+ private void decompose() {
+ java.net.URI neturi = java.net.URI.create(uri).normalize();
+
+ scheme = neturi.getScheme();
+
+ host = neturi.getHost();
+ boolean portAlreadyParsed = false;
+
+ // No host if the host contains underscores
+ if (host == null) {
+ host = neturi.getAuthority();
+ if (host != null) {
+ int colonPos = host.lastIndexOf(":");
+ if (!scheme.equals("file") && colonPos > -1) {
+ //we probably have an (illegal) URI of type http://under_score.com:5000/
+ try {
+ port = Integer.parseInt(host.substring(colonPos + 1, host.length()));
+ host = host.substring(0, colonPos);
+ portAlreadyParsed = true;
+ } catch (NumberFormatException nfe) {
+ //empty
+ }
+ }
+ }
+ }
+
+ if ("file".equalsIgnoreCase(scheme)) {
+ if (host == null) {
+ host = "localhost";
+ } else {
+ host = repairWindowsDrive(host, uri);
+ }
+ }
+ if (host == null) {
+ throw new IllegalArgumentException(
+ "A complete uri must specify a host");
+ }
+ if (!portAlreadyParsed) {
+ port = neturi.getPort();
+ }
+ rest = (neturi.getRawPath() != null ? neturi.getRawPath() : "")
+ + (neturi.getRawQuery() != null
+ ? ("?" + neturi.getRawQuery())
+ : "")
+ + (neturi.getRawFragment() != null
+ ? ("#" + neturi.getRawFragment())
+ : "");
+ }
+
+ /** c: turns to c when interpreted by URI. Repair it */
+ private String repairWindowsDrive(String host, String uri) {
+ if (host.length() != 1) {
+ return host;
+ }
+ int driveIndex = uri.indexOf(host + ":");
+
+ if (driveIndex == 5 || driveIndex == 7) { // file:<drive> or file://<drive>
+ return host + ":";
+ } else {
+ return host;
+ }
+ }
+
+ /** "http://a/\u00E6" → "http://a/%E6;" */
+ private void escapeNonAscii() {
+ char[] uriChars = uri.toCharArray();
+ StringBuilder result = new StringBuilder(uri.length());
+
+ for (char uriChar : uriChars) {
+ if (uriChar >= 0x80 || uriChar == 0x22) {
+ result.append("%");
+ result.append(Integer.toHexString(uriChar));
+ result.append(";");
+ } else {
+ result.append(uriChar);
+ }
+ }
+ uri = result.toString();
+ }
+
+ /** "http://a/&amp;amp;" → "http://a/&amp;" Currently ampersand only */
+ private void unescapeHtmlEntities() {
+ int ampIndex = uri.indexOf("&amp;");
+
+ if (ampIndex < 0) {
+ return;
+ }
+
+ StringBuilder result = new StringBuilder(uri.substring(0, ampIndex));
+
+ while (ampIndex >= 0) {
+ result.append("&");
+ int nextAmpIndex = uri.indexOf("&amp;", ampIndex + 5);
+
+ result.append(
+ uri.substring(ampIndex + 5,
+ nextAmpIndex > 0 ? nextAmpIndex : uri.length()));
+ ampIndex = nextAmpIndex;
+ }
+ uri = result.toString();
+ }
+
+ /** "HTTP://a" → "http://a" */
+ private void lowCaseHost() {
+ host = toLowerCase(host);
+ }
+
+ /** "http://a:80" → "http://a" and "https://a:443" → https//a */
+ private void removeDefaultPortNumber() {
+ if (port == 80 && scheme.equals("http")) {
+ port = -1;
+ } else if (port == 443 && scheme.equals("https")) {
+ port = -1;
+ }
+ }
+
+ /** "http://a./b" → "http://a/b" */
+ private void removeTrailingHostDot() {
+ if (host.endsWith(".")) {
+ host = host.substring(0, host.length() - 1);
+ }
+ }
+
+ /** "http://a//b" → "http://a/b" */
+ private void makeDoubleSlashesSingle() {
+ StringBuilder result = new StringBuilder(rest.length());
+ char[] restChars = rest.toCharArray();
+
+ for (int i = 0; i < restChars.length; i++) {
+ if (!(i + 1 < restChars.length && restChars[i] == '/'
+ && restChars[i + 1] == '/')) {
+ result.append(restChars[i]);
+ }
+ }
+ rest = result.toString();
+ }
+
+ /** Recombines the uri from the scheme, host, port and rest */
+ private void recombine() {
+ StringBuilder recombined = new StringBuilder(100);
+
+ recombined.append(scheme);
+ recombined.append("://");
+ recombined.append(host);
+ if (port > -1) {
+ recombined.append(":").append(port);
+ }
+ if (rest != null) {
+ if (!rest.startsWith("/")) {
+ recombined.append("/");
+ }
+ recombined.append(rest);
+ } else {
+ recombined.append("/"); // RFC 2396 violation, as required by search
+ }
+ uri = recombined.toString();
+ }
+
+ /**
+ * Returns the normalized scheme of this URI.
+ *
+ * @return the normalized scheme (protocol), or null if there is none,
+ * which may only be the case with non-hierarchical URIs
+ */
+ public String getScheme() {
+ return scheme;
+ }
+
+ /**
+ * Returns whether this URI is hierarchical or opaque.
+ * A typical example of an hierarchical URI is an URL,
+ * while URI's are mailto, news and such.
+ *
+ * @return true if the url is opaque, false if it is hierarchical
+ */
+ public boolean isOpaque() {
+ return opaque;
+ }
+
+ /**
+ * Returns the normalized host of this URI.
+ *
+ * @return the normalized host, or null if there is none, which may
+ * only be the case if this is a non-hierarchical uri
+ */
+ public String getHost() {
+ return host;
+ }
+
+ /** Returns the port number of this scheme if set explicitly, or -1 otherwise */
+ public int getPort() {
+ return port;
+ }
+
+ /**
+ * Returns the <i>rest</i> of this uri, that is what is following the host or port.
+ * This is path, query and fragment as defined in RFC 2396. Returns an empty string
+ * if this uri has no rest.
+ */
+ public String getRest() {
+ if (rest == null) {
+ return null;
+ } else if (rest.equals("/")) {
+ return "";
+ } else {
+ return rest;
+ }
+ }
+
+ public String getDomain() {
+ if (parsedDomain) {
+ return domain;
+ }
+ String host = getHost();
+ if (host == null) return null;
+
+ int firstDotPos = host.indexOf(".");
+ int lastDotPos = host.lastIndexOf(".");
+
+ String domain;
+ if (firstDotPos < 0) {
+ // "." was not found at all
+ domain = host;
+ } else if (firstDotPos == lastDotPos) {
+ //there is only one "." in the host
+ domain = host;
+ } else {
+ //for www.host.com return host.com
+ //TODO: Must be corrected when implementing tldlist
+ domain = host.substring(firstDotPos + 1, host.length());
+ }
+
+ this.parsedDomain = true;
+ this.domain = domain;
+ return domain;
+ }
+
+ public String getMainTld() {
+ if (parsedMainTld) {
+ return mainTld;
+ }
+ String host = getHost();
+ if (host == null) return null;
+
+ int lastDotPos = host.lastIndexOf(".");
+
+ String mainTld;
+ if (lastDotPos < 0) {
+ //no ".", no TLD
+ mainTld = null;
+ } else if (lastDotPos == host.length() - 1) {
+ //the "." is the last character
+ mainTld = null;
+ } else {
+ //for www.yahoo.co.uk return uk
+ //TODO: Implement list of TLDs from config?
+ mainTld = host.substring(lastDotPos + 1, host.length());
+ }
+ this.parsedMainTld = true;
+ this.mainTld = mainTld;
+ return mainTld;
+ }
+
+ public String getPath() {
+ if (parsedPath) {
+ return path;
+ }
+ String rest = this.rest;
+ if (rest == null) return null;
+
+ rest = removeFragment(rest);
+
+ int queryPos = rest.lastIndexOf("?");
+ if (queryPos > -1) {
+ rest = rest.substring(0, queryPos);
+ }
+ this.parsedPath = true;
+ this.path = rest;
+ return this.path;
+ }
+
+ private String removeFragment(String path) {
+ int fragmentPos = path.lastIndexOf("#");
+ return (fragmentPos > -1) ? path.substring(0, fragmentPos) : path;
+ }
+
+ public String getFilename() {
+ if (parsedFilename) {
+ return filename;
+ }
+ String path = getPath();
+ if (path == null) return null;
+
+ path = removeParams(path);
+
+ int lastSlash = path.lastIndexOf("/");
+
+ String filename;
+ if (lastSlash < 0) {
+ //there is no slash, return the path, excluding params
+ filename = path;
+ } else if (lastSlash == path.length() - 1) {
+ //the slash is the last character, there is no filename here
+ filename = "";
+ } else {
+ filename = path.substring(lastSlash + 1, path.length());
+ }
+ this.parsedFilename = true;
+ this.filename = filename;
+ return filename;
+ }
+
+ private String removeParams(String filename) {
+ int firstSemicolon = filename.indexOf(";");
+
+ if (firstSemicolon < 0) {
+ //there are no params
+ return filename;
+ }
+ return filename.substring(0, firstSemicolon);
+ }
+
+ public String getExtension() {
+ if (parsedExtension) {
+ return extension;
+ }
+ String filename = getFilename();
+ if (filename == null) return null;
+
+ int lastDotPos = filename.lastIndexOf(".");
+
+ String extension;
+ if (lastDotPos < 0) {
+ //there is no ".", there is no extension
+ extension = null;
+ } else if (lastDotPos == filename.length() - 1) {
+ //the "." is the last character, there is no extension
+ extension = null;
+ } else {
+ extension = filename.substring(lastDotPos + 1, filename.length());
+ }
+ this.parsedExtension = true;
+ this.extension = extension;
+ return extension;
+ }
+
+ public String getQuery() {
+ if (parsedQuery) {
+ return query;
+ }
+ String rest = this.rest;
+ if (rest == null) return null;
+
+ rest = removeFragment(rest);
+
+ int queryPos = rest.lastIndexOf("?");
+ String query = null;
+ if (queryPos > -1) {
+ //we have a query
+ query = rest.substring(queryPos+1, rest.length());
+ }
+ this.parsedQuery = true;
+ this.query = query;
+ return query;
+ }
+
+ public String getFragment() {
+ if (parsedFragment) {
+ return fragment;
+ }
+ String path = this.rest;
+ if (path == null) return null;
+
+ int fragmentPos = path.lastIndexOf("#");
+ String fragment = null;
+ if (fragmentPos > -1) {
+ //we have a fragment
+ fragment = path.substring(fragmentPos+1, path.length());
+ }
+ this.parsedFragment = true;
+ this.fragment = fragment;
+ return fragment;
+ }
+
+ public String getParams() {
+ if (parsedParams) {
+ return params;
+ }
+ String path = getPath();
+ if (path == null) return null;
+
+ int semicolonPos = path.indexOf(";");
+ String params;
+ if (semicolonPos < 0) {
+ //there is no semicolon, there are no params here
+ params = null;
+ } else if (semicolonPos == path.length() - 1) {
+ //the semicolon is the last character, there are no params here
+ params = null;
+ } else {
+ params = path.substring(semicolonPos + 1, path.length());
+ }
+ this.parsedParams = true;
+ this.params = params;
+ return params;
+ }
+
+ public static String[] tokenize(String item) {
+ return tokenizePattern.split(item);
+ }
+
+ public List<Token> tokenize() {
+ List<Token> tokens = new ArrayList<>();
+
+ tokens.addAll(tokenize(URLContext.URL_SCHEME, getScheme()));
+ tokens.addAll(tokenize(URLContext.URL_HOST, getHost()));
+ tokens.addAll(tokenize(URLContext.URL_PORT, getPort() > -1 ? "" + getPort() : null));
+ tokens.addAll(tokenize(URLContext.URL_PATH, getPath()));
+ tokens.addAll(tokenize(URLContext.URL_QUERY, getQuery()));
+ tokens.addAll(tokenize(URLContext.URL_FRAGMENT, getFragment()));
+
+ return tokens;
+ }
+
+ private List<Token> tokenize(URLContext context, String item) {
+ if (item == null) {
+ return new ArrayList<>(0);
+ }
+ String[] tokenStrings = tokenize(item);
+ List<Token> tokens = new ArrayList<>(tokenStrings.length);
+ for (String tokenString : tokenStrings) {
+ if (tokenString.length() > 0) {
+ tokens.add(new Token(context, tokenString));
+ }
+ }
+ return tokens;
+ }
+
+ /** Returns an explanation of why this uri is invalid, or null if it is valid */
+ public String getInvalidExplanation() {
+ return invalidExplanation;
+ }
+
+ public int hashCode() {
+ return uri.hashCode();
+ }
+
+ public boolean equals(Object object) {
+ if (!(object instanceof URI)) {
+ return false;
+ }
+ return (toString().equals(object.toString()));
+ }
+
+ public int compareTo(URI object) {
+ return toString().compareTo(object.toString());
+ }
+
+ public Object clone() {
+ try {
+ return super.clone();
+ } catch (CloneNotSupportedException e) {
+ throw new RuntimeException("Someone made me unclonable!", e);
+ }
+ }
+
+ /** Returns a new URI with a changed scheme */
+ public URI setScheme(String scheme) {
+ return new URI(scheme, host, port, rest);
+ }
+
+ /** Returns a new URI with a changed host (or authority) */
+ public URI setHost(String host) {
+ return new URI(scheme, host, port, rest);
+ }
+
+ /** Returns a new URI with a changed port */
+ public URI setPort(int port) {
+ return new URI(scheme, host, port, rest);
+ }
+
+ /** Returns a new URI with a changed rest */
+ public URI setRest(String rest) {
+ return new URI(scheme, host, port, rest);
+ }
+
+ /** Returns a new uri with the an additional parameter */
+ public URI addParameter(String name, String value) {
+ String newRest = rest;
+
+ if (newRest == null) {
+ newRest = "";
+ }
+ if (newRest.indexOf("?") < 0) {
+ newRest += "?";
+ } else {
+ newRest += "&";
+ }
+ newRest += name + "=" + value;
+ return new URI(scheme, host, port, newRest);
+ }
+
+ /** Returns this uri as a string */
+ public String stringValue() {
+ return uri;
+ }
+
+ /** Returns this URI as a string */
+ public String toString() {
+ return uri;
+ }
+
+ /**
+ * Returns the depth of this uri.
+ * The depth of an hierarchical uri equals the number of slashes
+ * which are not separating the protocol and the host, and not at the end.
+ *
+ * @return the depth of this uri if it is hierarchical, or 0 if it is opaque
+ */
+ public int getDepth() {
+ int colonIndex = uri.indexOf(':');
+
+ // count number of slashes in the Uri
+ int currentIndex = colonIndex;
+ int depth = 0;
+
+ while (currentIndex != -1) {
+ currentIndex = uri.indexOf('/', currentIndex);
+ if (currentIndex != -1) {
+ depth++;
+ currentIndex++;
+ }
+ }
+
+ if (uri.charAt(colonIndex + 1) == '/') {
+ depth--;
+ }
+ if (uri.charAt(colonIndex + 2) == '/') {
+ depth--;
+ }
+ if ((uri.charAt(uri.length() - 1) == '/')
+ && ((uri.length() - 1) > (colonIndex + 2))) {
+ depth--;
+ }
+ return depth;
+ }
+
+
+ public static class Token {
+ private final URLContext context;
+ private final String token;
+
+ private Token(URLContext context, String token) {
+ this.context = context;
+ this.token = token;
+ }
+
+ public URLContext getContext() {
+ return context;
+ }
+
+ public String getToken() {
+ return token;
+ }
+ }
+
+ public static enum URLContext {
+ URL_SCHEME(0, "scheme"),
+ URL_HOST(1, "host"),
+ URL_DOMAIN(2, "domain"),
+ URL_MAINTLD(3, "maintld"),
+ URL_PORT(4, "port"),
+ URL_PATH(5, "path"),
+ URL_FILENAME(6, "filename"),
+ URL_EXTENSION(7, "extension"),
+ URL_PARAMS(8, "params"),
+ URL_QUERY(9, "query"),
+ URL_FRAGMENT(10, "fragment");
+
+ public final int id;
+ public final String name;
+
+ private URLContext(int id, String name) {
+ this.id = id;
+ this.name = name;
+ }
+ }
+}
diff --git a/vespajlib/src/main/java/com/yahoo/net/UriTools.java b/vespajlib/src/main/java/com/yahoo/net/UriTools.java
new file mode 100644
index 00000000000..34d88713274
--- /dev/null
+++ b/vespajlib/src/main/java/com/yahoo/net/UriTools.java
@@ -0,0 +1,42 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.net;
+
+import java.net.URI;
+
+/**
+ * Utility methods for working with URIs.
+ *
+ * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a>
+ */
+public final class UriTools {
+ private UriTools() {
+ }
+
+ /**
+ * Build a string representation of the normalized form of the given URI,
+ * containg the path and optionally query and fragment parts. The query part
+ * will be delimeted from the preceding data with "?" and the fragment with
+ * "#".
+ *
+ * @param uri
+ * source for path, query and fragment in returned data
+ * @return a string containing path, and optionally query and fragment,
+ * delimited by question mark and hash
+ */
+ public static String rawRequest(final URI uri) {
+ final String rawQuery = uri.getRawQuery();
+ final String rawFragment = uri.getRawFragment();
+ final StringBuilder rawRequest = new StringBuilder();
+
+ rawRequest.append(uri.getRawPath());
+ if (rawQuery != null) {
+ rawRequest.append("?").append(rawQuery);
+ }
+
+ if (rawFragment != null) {
+ rawRequest.append("#").append(rawFragment);
+ }
+
+ return rawRequest.toString();
+ }
+}
diff --git a/vespajlib/src/main/java/com/yahoo/net/Url.java b/vespajlib/src/main/java/com/yahoo/net/Url.java
new file mode 100644
index 00000000000..33571f9eb34
--- /dev/null
+++ b/vespajlib/src/main/java/com/yahoo/net/Url.java
@@ -0,0 +1,253 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.net;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class Url {
+
+ private static final Pattern pattern = Pattern.compile(
+ //12 3 456 7 8 9ab c d e f g h i j
+ // 2 1 6 87 5 c b ed a4 f hg ji
+ "^(([^:/?#]+):)?(//((([^:@/?#]+)(:([^@/?#]+))?@))?(((\\[([^\\]]+)\\]|[^:/?#]+)(:([^/?#]+))?)))?([^?#]+)?(\\?([^#]*))?(#(.*))?");
+ private final String image;
+ private final int schemeBegin;
+ private final int schemeEnd;
+ private final int userInfoBegin;
+ private final int userInfoEnd;
+ private final int passwordBegin;
+ private final int passwordEnd;
+ private final int hostBegin;
+ private final int hostEnd;
+ private final int portBegin;
+ private final int portEnd;
+ private final int pathBegin;
+ private final int pathEnd;
+ private final int queryBegin;
+ private final int queryEnd;
+ private final int fragmentBegin;
+ private final int fragmentEnd;
+
+ public Url(String scheme, String user, String password, String host, Integer port, String path, String query,
+ String fragment)
+ {
+ StringBuilder image = new StringBuilder();
+ schemeBegin = image.length();
+ if (scheme != null) {
+ image.append(scheme);
+ schemeEnd = image.length();
+ image.append(':');
+ } else {
+ schemeEnd = schemeBegin;
+ }
+ if (host != null) {
+ image.append("//");
+ }
+ userInfoBegin = image.length();
+ if (user != null) {
+ image.append(user);
+ userInfoEnd = image.length();
+ } else {
+ userInfoEnd = userInfoBegin;
+ }
+ if (password != null) {
+ image.append(':');
+ passwordBegin = image.length();
+ image.append(password);
+ passwordEnd = image.length();
+ } else {
+ passwordBegin = image.length();
+ passwordEnd = passwordBegin;
+ }
+ if (user != null || password != null) {
+ image.append('@');
+ }
+ if (host != null) {
+ boolean esc = host.indexOf(':') >= 0;
+ if (esc) {
+ image.append('[');
+ }
+ hostBegin = image.length();
+ image.append(host);
+ hostEnd = image.length();
+ if (esc) {
+ image.append(']');
+ }
+ } else {
+ hostBegin = image.length();
+ hostEnd = hostBegin;
+ }
+ if (port != null) {
+ image.append(':');
+ portBegin = image.length();
+ image.append(port);
+ portEnd = image.length();
+ } else {
+ portBegin = image.length();
+ portEnd = portBegin;
+ }
+ pathBegin = image.length();
+ if (path != null) {
+ image.append(path);
+ pathEnd = image.length();
+ } else {
+ pathEnd = pathBegin;
+ }
+ if (query != null) {
+ image.append('?');
+ queryBegin = image.length();
+ image.append(query);
+ queryEnd = image.length();
+ } else {
+ queryBegin = image.length();
+ queryEnd = queryBegin;
+ }
+ if (fragment != null) {
+ image.append("#");
+ fragmentBegin = image.length();
+ image.append(fragment);
+ fragmentEnd = image.length();
+ } else {
+ fragmentBegin = image.length();
+ fragmentEnd = fragmentBegin;
+ }
+ this.image = image.toString();
+ }
+
+ public static Url fromString(String image) {
+ Matcher matcher = pattern.matcher(image);
+ if (!matcher.matches()) {
+ throw new IllegalArgumentException("Malformed URL.");
+ }
+ String host = matcher.group(12);
+ if (host == null) {
+ host = matcher.group(11);
+ }
+ if (host == null) {
+ host = matcher.group(9);
+ }
+ String port = matcher.group(14);
+ return new Url(matcher.group(2), matcher.group(6), matcher.group(8), host,
+ port != null ? Integer.valueOf(port) : null, matcher.group(15), matcher.group(17),
+ matcher.group(19));
+ }
+
+ public int getSchemeBegin() {
+ return schemeBegin;
+ }
+
+ public int getSchemeEnd() {
+ return schemeEnd;
+ }
+
+ public int getUserInfoBegin() {
+ return userInfoBegin;
+ }
+
+ public int getUserInfoEnd() {
+ return userInfoEnd;
+ }
+
+ public int getPasswordBegin() {
+ return passwordBegin;
+ }
+
+ public int getPasswordEnd() {
+ return passwordEnd;
+ }
+
+ public int getHostBegin() {
+ return hostBegin;
+ }
+
+ public int getHostEnd() {
+ return hostEnd;
+ }
+
+ public int getPortBegin() {
+ return portBegin;
+ }
+
+ public int getPortEnd() {
+ return portEnd;
+ }
+
+ public int getPathBegin() {
+ return pathBegin;
+ }
+
+ public int getPathEnd() {
+ return pathEnd;
+ }
+
+ public int getQueryBegin() {
+ return queryBegin;
+ }
+
+ public int getQueryEnd() {
+ return queryEnd;
+ }
+
+ public int getFragmentBegin() {
+ return fragmentBegin;
+ }
+
+ public int getFragmentEnd() {
+ return fragmentEnd;
+ }
+
+ public String getScheme() {
+ return schemeBegin < schemeEnd ? image.substring(schemeBegin, schemeEnd) : null;
+ }
+
+ public String getUserInfo() {
+ return userInfoBegin < userInfoEnd ? image.substring(userInfoBegin, userInfoEnd) : null;
+ }
+
+ public String getPassword() {
+ return passwordBegin < passwordEnd ? image.substring(passwordBegin, passwordEnd) : null;
+ }
+
+ public String getHost() {
+ return hostBegin < hostEnd ? image.substring(hostBegin, hostEnd) : null;
+ }
+
+ public Integer getPort() {
+ String str = getPortString();
+ return str != null ? Integer.valueOf(str) : null;
+ }
+
+ public String getPortString() {
+ return portBegin < portEnd ? image.substring(portBegin, portEnd) : null;
+ }
+
+ public String getPath() {
+ return pathBegin < pathEnd ? image.substring(pathBegin, pathEnd) : null;
+ }
+
+ public String getQuery() {
+ return queryBegin < queryEnd ? image.substring(queryBegin, queryEnd) : null;
+ }
+
+ public String getFragment() {
+ return fragmentBegin < fragmentEnd ? image.substring(fragmentBegin, fragmentEnd) : null;
+ }
+
+ @Override
+ public int hashCode() {
+ return image.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ return (obj instanceof Url) && image.equals(((Url)obj).image);
+ }
+
+ @Override
+ public String toString() {
+ return image;
+ }
+}
diff --git a/vespajlib/src/main/java/com/yahoo/net/UrlToken.java b/vespajlib/src/main/java/com/yahoo/net/UrlToken.java
new file mode 100644
index 00000000000..785c3b1fe43
--- /dev/null
+++ b/vespajlib/src/main/java/com/yahoo/net/UrlToken.java
@@ -0,0 +1,103 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.net;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class UrlToken {
+
+ public enum Type {
+ SCHEME,
+ USERINFO,
+ PASSWORD,
+ HOST,
+ PORT,
+ PATH,
+ QUERY,
+ FRAGMENT
+ }
+
+ private final Type type;
+ private final int offset;
+ private final String orig;
+ private final String term;
+
+ public UrlToken(Type type, int offset, String orig, String term) {
+ if (type == null) {
+ throw new NullPointerException();
+ }
+ this.type = type;
+ this.offset = offset;
+ this.orig = orig;
+ this.term = term;
+ }
+
+ public Type getType() {
+ return type;
+ }
+
+ public int getOffset() {
+ return offset;
+ }
+
+ public int getLength() {
+ return orig != null ? orig.length() : 0;
+ }
+
+ public String getOrig() {
+ return orig;
+ }
+
+ public String getTerm() {
+ return term;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (!(obj instanceof UrlToken)) {
+ return false;
+ }
+ UrlToken rhs = (UrlToken)obj;
+ if (offset != rhs.offset) {
+ return false;
+ }
+ if (orig != null ? !orig.equals(rhs.orig) : rhs.orig != null) {
+ return false;
+ }
+ if (term != null ? !term.equals(rhs.term) : rhs.term != null) {
+ return false;
+ }
+ if (type != rhs.type) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = type != null ? type.hashCode() : 0;
+ result = 31 * result + offset;
+ result = 31 * result + (orig != null ? orig.hashCode() : 0);
+ result = 31 * result + (term != null ? term.hashCode() : 0);
+ return result;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder ret = new StringBuilder("UrlToken(");
+ ret.append("type=").append(type).append(", ");
+ ret.append("offset=").append(offset).append(", ");
+ if (orig != null) {
+ ret.append("orig='").append(orig).append("', ");
+ }
+ if (term != null) {
+ ret.append("term='").append(term).append("', ");
+ }
+ ret.setLength(ret.length() - 2);
+ ret.append(")");
+ return ret.toString();
+ }
+}
diff --git a/vespajlib/src/main/java/com/yahoo/net/UrlTokenizer.java b/vespajlib/src/main/java/com/yahoo/net/UrlTokenizer.java
new file mode 100644
index 00000000000..ec617607b8a
--- /dev/null
+++ b/vespajlib/src/main/java/com/yahoo/net/UrlTokenizer.java
@@ -0,0 +1,178 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.net;
+
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class UrlTokenizer {
+
+ public static final String TERM_STARTHOST = "StArThOsT";
+ public static final String TERM_ENDHOST = "EnDhOsT";
+
+ private static final Map<String, String> schemeToPort = new HashMap<>();
+ private static final Map<String, String> portToScheme = new HashMap<>();
+ private static final char TO_LOWER = (char)('A' - 'a');
+ private final Url url;
+
+ static {
+ registerScheme("ftp", 21);
+ registerScheme("gopher", 70);
+ registerScheme("http", 80);
+ registerScheme("https", 443);
+ registerScheme("imap", 143);
+ registerScheme("mailto", 25);
+ registerScheme("news", 119);
+ registerScheme("nntp", 119);
+ registerScheme("pop", 110);
+ registerScheme("rsync", 873);
+ registerScheme("rtsp", 554);
+ registerScheme("sftp", 22);
+ registerScheme("shttp", 443);
+ registerScheme("sip", 5060);
+ registerScheme("sips", 5061);
+ registerScheme("snmp", 161);
+ registerScheme("ssh", 22);
+ registerScheme("telnet", 23);
+ registerScheme("tftp", 69);
+ }
+
+ public UrlTokenizer(String url) {
+ this(Url.fromString(url));
+ }
+
+ public UrlTokenizer(Url url) {
+ this.url = url;
+ }
+
+ private String guessScheme(String port) {
+ String scheme = portToScheme.get(port);
+ if (scheme != null) {
+ return scheme;
+ }
+ return "http";
+ }
+
+ private String guessPort(String scheme) {
+ String port = schemeToPort.get(scheme);
+ if (port != null) {
+ return port;
+ }
+ return null;
+ }
+
+ public List<UrlToken> tokenize() {
+ List<UrlToken> lst = new LinkedList<>();
+
+ int offset = 0;
+ String port = url.getPortString();
+ String scheme = url.getScheme();
+ if (scheme == null) {
+ scheme = guessScheme(port);
+ addTokens(lst, UrlToken.Type.SCHEME, offset, scheme, false);
+ } else {
+ addTokens(lst, UrlToken.Type.SCHEME, url.getSchemeBegin(), scheme, true);
+ offset = url.getSchemeEnd();
+ }
+
+ String userInfo = url.getUserInfo();
+ if (userInfo != null) {
+ addTokens(lst, UrlToken.Type.USERINFO, url.getUserInfoBegin(), userInfo, true);
+ offset = url.getUserInfoEnd();
+ }
+
+ String password = url.getPassword();
+ if (password != null) {
+ addTokens(lst, UrlToken.Type.PASSWORD, url.getPasswordBegin(), password, true);
+ offset = url.getPasswordEnd();
+ }
+
+ String host = url.getHost();
+ if (host == null || host.isEmpty()) {
+ if (host != null) {
+ offset = url.getHostBegin();
+ }
+ if ("file".equalsIgnoreCase(scheme)) {
+ addHostTokens(lst, offset, offset, "localhost", false);
+ }
+ } else {
+ addHostTokens(lst, url.getHostBegin(), url.getHostEnd(), host, true);
+ offset = url.getHostEnd();
+ }
+
+ port = url.getPortString();
+ if (port == null) {
+ if ((port = guessPort(scheme)) != null) {
+ addTokens(lst, UrlToken.Type.PORT, offset, port, false);
+ }
+ } else {
+ addTokens(lst, UrlToken.Type.PORT, url.getPortBegin(), port, true);
+ }
+
+ String path = url.getPath();
+ if (path != null) {
+ addTokens(lst, UrlToken.Type.PATH, url.getPathBegin(), path, true);
+ }
+
+ String query = url.getQuery();
+ if (query != null) {
+ addTokens(lst, UrlToken.Type.QUERY, url.getQueryBegin(), query, true);
+ }
+
+ String fragment = url.getFragment();
+ if (fragment != null) {
+ addTokens(lst, UrlToken.Type.FRAGMENT, url.getFragmentBegin(), fragment, true);
+ }
+
+ return lst;
+ }
+
+ public static void addTokens(List<UrlToken> lst, UrlToken.Type type, int offset, String image, boolean orig) {
+ StringBuilder term = new StringBuilder();
+ int prev = 0;
+ for (int skip, next = 0, len = image.length(); next < len; next += skip) {
+ char c = image.charAt(next);
+ if (c == '%') {
+ c = (char)Integer.parseInt(image.substring(next + 1, next + 3), 16);
+ skip = 3;
+ } else {
+ skip = 1;
+ }
+ if ((c >= '0' && c <= '9') ||
+ (c >= 'a' && c <= 'z') ||
+ (c == '-' || c == '_'))
+ {
+ term.append(c);
+ } else if (c >= 'A' && c <= 'Z') {
+ term.append((char)(c - TO_LOWER));
+ } else {
+ if (prev < next) {
+ lst.add(new UrlToken(type, offset + (orig ? prev : 0), orig ? image.substring(prev, next) : null,
+ term.toString()));
+ term = new StringBuilder();
+ }
+ prev = next + skip;
+ }
+ }
+ if (term.length() > 0) {
+ lst.add(new UrlToken(type, offset + (orig ? prev : 0), orig ? image.substring(prev) : null,
+ term.toString()));
+ }
+ }
+
+ private static void addHostTokens(List<UrlToken> lst, int begin, int end, String image, boolean orig) {
+ lst.add(new UrlToken(UrlToken.Type.HOST, begin, null, TERM_STARTHOST));
+ addTokens(lst, UrlToken.Type.HOST, begin, image, orig);
+ lst.add(new UrlToken(UrlToken.Type.HOST, end, null, TERM_ENDHOST));
+ }
+
+ private static void registerScheme(String scheme, int port) {
+ String str = String.valueOf(port);
+ schemeToPort.put(scheme, str);
+ portToScheme.put(str, scheme);
+ }
+}
diff --git a/vespajlib/src/main/java/com/yahoo/net/package-info.java b/vespajlib/src/main/java/com/yahoo/net/package-info.java
new file mode 100644
index 00000000000..ab474304da2
--- /dev/null
+++ b/vespajlib/src/main/java/com/yahoo/net/package-info.java
@@ -0,0 +1,7 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+@ExportPackage
+@PublicApi
+package com.yahoo.net;
+
+import com.yahoo.api.annotations.PublicApi;
+import com.yahoo.osgi.annotation.ExportPackage;