URI.java
资源名称:HTTP客户端.zip [点击查看]
上传用户:demmber
上传日期:2007-12-22
资源大小:717k
文件大小:56k
源码类别:
Java编程
开发平台:
Java
- /*
- * @(#)URI.java 0.3-3 06/05/2001
- *
- * This file is part of the HTTPClient package
- * Copyright (C) 1996-2001 Ronald Tschal鋜
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free
- * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
- * MA 02111-1307, USA
- *
- * For questions, suggestions, bug-reports, enhancement-requests etc.
- * I may be contacted at:
- *
- * ronald@innovation.ch
- *
- * The HTTPClient's home page is located at:
- *
- * http://www.innovation.ch/java/HTTPClient/
- *
- */
- package HTTPClient;
- import java.net.URL;
- import java.net.MalformedURLException;
- import java.util.BitSet;
- import java.util.Hashtable;
- /**
- * This class represents a generic URI, as defined in RFC-2396.
- * This is similar to java.net.URL, with the following enhancements:
- * <UL>
- * <LI>it doesn't require a URLStreamhandler to exist for the scheme; this
- * allows this class to be used to hold any URI, construct absolute
- * URIs from relative ones, etc.
- * <LI>it handles escapes correctly
- * <LI>equals() works correctly
- * <LI>relative URIs are correctly constructed
- * <LI>it has methods for accessing various fields such as userinfo,
- * fragment, params, etc.
- * <LI>it handles less common forms of resources such as the "*" used in
- * http URLs.
- * </UL>
- *
- * <P>The elements are always stored in escaped form.
- *
- * <P>While RFC-2396 distinguishes between just two forms of URI's, those that
- * follow the generic syntax and those that don't, this class knows about a
- * third form, named semi-generic, used by quite a few popular schemes.
- * Semi-generic syntax treats the path part as opaque, i.e. has the form
- * <scheme>://<authority>/<opaque> . Relative URI's of this
- * type are only resolved as far as absolute paths - relative paths do not
- * exist.
- *
- * <P>Ideally, java.net.URL should subclass URI.
- *
- * @see <A HREF="http://www.ics.uci.edu/pub/ietf/uri/rfc2396.txt">rfc-2396</A>
- * @version 0.3-3 06/05/2001
- * @author Ronald Tschal鋜
- * @since V0.3-1
- */
- public class URI
- {
- /**
- * If true, then the parser will resolve certain URI's in backwards
- * compatible (but technically incorrect) manner. Example:
- *
- *<PRE>
- * base = http://a/b/c/d;p?q
- * rel = http:g
- * result = http:g (correct)
- * result = http://a/b/c/g (backwards compatible)
- *</PRE>
- *
- * See rfc-2396, section 5.2, step 3, second paragraph.
- */
- public static final boolean ENABLE_BACKWARDS_COMPATIBILITY = true;
- protected static final Hashtable defaultPorts = new Hashtable();
- protected static final Hashtable usesGenericSyntax = new Hashtable();
- protected static final Hashtable usesSemiGenericSyntax = new Hashtable();
- /* various character classes as defined in the draft */
- protected static final BitSet alphanumChar;
- protected static final BitSet markChar;
- protected static final BitSet reservedChar;
- protected static final BitSet unreservedChar;
- protected static final BitSet uricChar;
- protected static final BitSet pcharChar;
- protected static final BitSet userinfoChar;
- protected static final BitSet schemeChar;
- protected static final BitSet hostChar;
- protected static final BitSet opaqueChar;
- protected static final BitSet reg_nameChar;
- /* These are not directly in the spec, but used for escaping and
- * unescaping parts
- */
- /** list of characters which must not be unescaped when unescaping a scheme */
- public static final BitSet resvdSchemeChar;
- /** list of characters which must not be unescaped when unescaping a userinfo */
- public static final BitSet resvdUIChar;
- /** list of characters which must not be unescaped when unescaping a host */
- public static final BitSet resvdHostChar;
- /** list of characters which must not be unescaped when unescaping a path */
- public static final BitSet resvdPathChar;
- /** list of characters which must not be unescaped when unescaping a query string */
- public static final BitSet resvdQueryChar;
- /** list of characters which must not be escaped when escaping a path */
- public static final BitSet escpdPathChar;
- /** list of characters which must not be escaped when escaping a query string */
- public static final BitSet escpdQueryChar;
- /** list of characters which must not be escaped when escaping a fragment identifier */
- public static final BitSet escpdFragChar;
- static
- {
- defaultPorts.put("http", new Integer(80));
- defaultPorts.put("shttp", new Integer(80));
- defaultPorts.put("http-ng", new Integer(80));
- defaultPorts.put("coffee", new Integer(80));
- defaultPorts.put("https", new Integer(443));
- defaultPorts.put("ftp", new Integer(21));
- defaultPorts.put("telnet", new Integer(23));
- defaultPorts.put("nntp", new Integer(119));
- defaultPorts.put("news", new Integer(119));
- defaultPorts.put("snews", new Integer(563));
- defaultPorts.put("hnews", new Integer(80));
- defaultPorts.put("smtp", new Integer(25));
- defaultPorts.put("gopher", new Integer(70));
- defaultPorts.put("wais", new Integer(210));
- defaultPorts.put("whois", new Integer(43));
- defaultPorts.put("whois++", new Integer(63));
- defaultPorts.put("rwhois", new Integer(4321));
- defaultPorts.put("imap", new Integer(143));
- defaultPorts.put("pop", new Integer(110));
- defaultPorts.put("prospero", new Integer(1525));
- defaultPorts.put("irc", new Integer(194));
- defaultPorts.put("ldap", new Integer(389));
- defaultPorts.put("nfs", new Integer(2049));
- defaultPorts.put("z39.50r", new Integer(210));
- defaultPorts.put("z39.50s", new Integer(210));
- defaultPorts.put("vemmi", new Integer(575));
- defaultPorts.put("videotex", new Integer(516));
- defaultPorts.put("cmp", new Integer(829));
- usesGenericSyntax.put("http", Boolean.TRUE);
- usesGenericSyntax.put("https", Boolean.TRUE);
- usesGenericSyntax.put("shttp", Boolean.TRUE);
- usesGenericSyntax.put("coffee", Boolean.TRUE);
- usesGenericSyntax.put("ftp", Boolean.TRUE);
- usesGenericSyntax.put("file", Boolean.TRUE);
- usesGenericSyntax.put("nntp", Boolean.TRUE);
- usesGenericSyntax.put("news", Boolean.TRUE);
- usesGenericSyntax.put("snews", Boolean.TRUE);
- usesGenericSyntax.put("hnews", Boolean.TRUE);
- usesGenericSyntax.put("imap", Boolean.TRUE);
- usesGenericSyntax.put("wais", Boolean.TRUE);
- usesGenericSyntax.put("nfs", Boolean.TRUE);
- usesGenericSyntax.put("sip", Boolean.TRUE);
- usesGenericSyntax.put("sips", Boolean.TRUE);
- usesGenericSyntax.put("sipt", Boolean.TRUE);
- usesGenericSyntax.put("sipu", Boolean.TRUE);
- /* Note: schemes which definitely don't use the generic-URI syntax
- * and must therefore never appear in the above list:
- * "urn", "mailto", "sdp", "service", "tv", "gsm-sms", "tel", "fax",
- * "modem", "eid", "cid", "mid", "data", "ldap"
- */
- usesSemiGenericSyntax.put("ldap", Boolean.TRUE);
- usesSemiGenericSyntax.put("irc", Boolean.TRUE);
- usesSemiGenericSyntax.put("gopher", Boolean.TRUE);
- usesSemiGenericSyntax.put("videotex", Boolean.TRUE);
- usesSemiGenericSyntax.put("rwhois", Boolean.TRUE);
- usesSemiGenericSyntax.put("whois++", Boolean.TRUE);
- usesSemiGenericSyntax.put("smtp", Boolean.TRUE);
- usesSemiGenericSyntax.put("telnet", Boolean.TRUE);
- usesSemiGenericSyntax.put("prospero", Boolean.TRUE);
- usesSemiGenericSyntax.put("pop", Boolean.TRUE);
- usesSemiGenericSyntax.put("vemmi", Boolean.TRUE);
- usesSemiGenericSyntax.put("z39.50r", Boolean.TRUE);
- usesSemiGenericSyntax.put("z39.50s", Boolean.TRUE);
- usesSemiGenericSyntax.put("stream", Boolean.TRUE);
- usesSemiGenericSyntax.put("cmp", Boolean.TRUE);
- alphanumChar = new BitSet(128);
- for (int ch='0'; ch<='9'; ch++) alphanumChar.set(ch);
- for (int ch='A'; ch<='Z'; ch++) alphanumChar.set(ch);
- for (int ch='a'; ch<='z'; ch++) alphanumChar.set(ch);
- markChar = new BitSet(128);
- markChar.set('-');
- markChar.set('_');
- markChar.set('.');
- markChar.set('!');
- markChar.set('~');
- markChar.set('*');
- markChar.set(''');
- markChar.set('(');
- markChar.set(')');
- reservedChar = new BitSet(128);
- reservedChar.set(';');
- reservedChar.set('/');
- reservedChar.set('?');
- reservedChar.set(':');
- reservedChar.set('@');
- reservedChar.set('&');
- reservedChar.set('=');
- reservedChar.set('+');
- reservedChar.set('$');
- reservedChar.set(',');
- unreservedChar = new BitSet(128);
- unreservedChar.or(alphanumChar);
- unreservedChar.or(markChar);
- uricChar = new BitSet(128);
- uricChar.or(unreservedChar);
- uricChar.or(reservedChar);
- uricChar.set('%');
- pcharChar = new BitSet(128);
- pcharChar.or(unreservedChar);
- pcharChar.set('%');
- pcharChar.set(':');
- pcharChar.set('@');
- pcharChar.set('&');
- pcharChar.set('=');
- pcharChar.set('+');
- pcharChar.set('$');
- pcharChar.set(',');
- userinfoChar = new BitSet(128);
- userinfoChar.or(unreservedChar);
- userinfoChar.set('%');
- userinfoChar.set(';');
- userinfoChar.set(':');
- userinfoChar.set('&');
- userinfoChar.set('=');
- userinfoChar.set('+');
- userinfoChar.set('$');
- userinfoChar.set(',');
- // this actually shouldn't contain uppercase letters...
- schemeChar = new BitSet(128);
- schemeChar.or(alphanumChar);
- schemeChar.set('+');
- schemeChar.set('-');
- schemeChar.set('.');
- opaqueChar = new BitSet(128);
- opaqueChar.or(uricChar);
- hostChar = new BitSet(128);
- hostChar.or(alphanumChar);
- hostChar.set('-');
- hostChar.set('.');
- reg_nameChar = new BitSet(128);
- reg_nameChar.or(unreservedChar);
- reg_nameChar.set('$');
- reg_nameChar.set(',');
- reg_nameChar.set(';');
- reg_nameChar.set(':');
- reg_nameChar.set('@');
- reg_nameChar.set('&');
- reg_nameChar.set('=');
- reg_nameChar.set('+');
- resvdSchemeChar = new BitSet(128);
- resvdSchemeChar.set(':');
- resvdUIChar = new BitSet(128);
- resvdUIChar.set('@');
- resvdHostChar = new BitSet(128);
- resvdHostChar.set(':');
- resvdHostChar.set('/');
- resvdHostChar.set('?');
- resvdHostChar.set('#');
- resvdPathChar = new BitSet(128);
- resvdPathChar.set('/');
- resvdPathChar.set(';');
- resvdPathChar.set('?');
- resvdPathChar.set('#');
- resvdQueryChar = new BitSet(128);
- resvdQueryChar.set('#');
- escpdPathChar = new BitSet(128);
- escpdPathChar.or(pcharChar);
- escpdPathChar.set('%');
- escpdPathChar.set('/');
- escpdPathChar.set(';');
- escpdQueryChar = new BitSet(128);
- escpdQueryChar.or(uricChar);
- escpdQueryChar.clear('#');
- escpdFragChar = new BitSet(128);
- escpdFragChar.or(uricChar);
- }
- /* our uri in pieces */
- protected static final int OPAQUE = 0;
- protected static final int SEMI_GENERIC = 1;
- protected static final int GENERIC = 2;
- protected int type;
- protected String scheme;
- protected String opaque;
- protected String userinfo;
- protected String host;
- protected int port = -1;
- protected String path;
- protected String query;
- protected String fragment;
- /* cache the java.net.URL */
- protected URL url = null;
- // Constructors
- /**
- * Constructs a URI from the given string representation. The string
- * must be an absolute URI.
- *
- * @param uri a String containing an absolute URI
- * @exception ParseException if no scheme can be found or a specified
- * port cannot be parsed as a number
- */
- public URI(String uri) throws ParseException
- {
- this((URI) null, uri);
- }
- /**
- * Constructs a URI from the given string representation, relative to
- * the given base URI.
- *
- * @param base the base URI, relative to which <var>rel_uri</var>
- * is to be parsed
- * @param rel_uri a String containing a relative or absolute URI
- * @exception ParseException if <var>base</var> is null and
- * <var>rel_uri</var> is not an absolute URI, or
- * if <var>base</var> is not null and the scheme
- * is not known to use the generic syntax, or
- * if a given port cannot be parsed as a number
- */
- public URI(URI base, String rel_uri) throws ParseException
- {
- /* Parsing is done according to the following RE:
- *
- * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(?([^#]*))?(#(.*))?
- * 12 3 4 5 6 7 8 9
- *
- * 2: scheme
- * 4: authority
- * 5: path
- * 7: query
- * 9: fragment
- */
- char[] uri = rel_uri.toCharArray();
- int pos = 0, idx, len = uri.length;
- // trim()
- while (pos < len && Character.isWhitespace(uri[pos])) pos++;
- while (len > 0 && Character.isWhitespace(uri[len-1])) len--;
- // strip the special "url" or "uri" scheme
- if (pos < len-3 && uri[pos+3] == ':' &&
- (uri[pos+0] == 'u' || uri[pos+0] == 'U') &&
- (uri[pos+1] == 'r' || uri[pos+1] == 'R') &&
- (uri[pos+2] == 'i' || uri[pos+2] == 'I' ||
- uri[pos+2] == 'l' || uri[pos+2] == 'L'))
- pos += 4;
- // get scheme: (([^:/?#]+):)?
- idx = pos;
- while (idx < len && uri[idx] != ':' && uri[idx] != '/' &&
- uri[idx] != '?' && uri[idx] != '#')
- idx++;
- if (idx < len && uri[idx] == ':')
- {
- scheme = rel_uri.substring(pos, idx).trim().toLowerCase();
- pos = idx + 1;
- }
- // check and resolve scheme
- String final_scheme = scheme;
- if (scheme == null)
- {
- if (base == null)
- throw new ParseException("No scheme found");
- final_scheme = base.scheme;
- }
- // check for generic vs. opaque
- type = usesGenericSyntax(final_scheme) ? GENERIC :
- usesSemiGenericSyntax(final_scheme) ? SEMI_GENERIC : OPAQUE;
- if (type == OPAQUE)
- {
- if (base != null && scheme == null)
- throw new ParseException("Can't resolve relative URI for " +
- "scheme " + final_scheme);
- opaque = escape(rel_uri.substring(pos), opaqueChar, true);
- if (opaque.length() > 0 && opaque.charAt(0) == '/')
- opaque = "%2F" + opaque.substring(1);
- return;
- }
- // get authority: (//([^/?#]*))?
- if (pos+1 < len && uri[pos] == '/' && uri[pos+1] == '/')
- {
- pos += 2;
- idx = pos;
- while (idx < len && uri[idx] != '/' && uri[idx] != '?' &&
- uri[idx] != '#')
- idx++;
- parse_authority(rel_uri.substring(pos, idx), final_scheme);
- pos = idx;
- }
- // handle semi-generic and generic uri's
- if (type == SEMI_GENERIC)
- {
- path = escape(rel_uri.substring(pos), uricChar, true);
- if (path.length() > 0 && path.charAt(0) != '/')
- path = '/' + path;
- }
- else
- {
- // get path: ([^?#]*)
- idx = pos;
- while (idx < len && uri[idx] != '?' && uri[idx] != '#')
- idx++;
- path = escape(rel_uri.substring(pos, idx), escpdPathChar, true);
- pos = idx;
- // get query: (?([^#]*))?
- if (pos < len && uri[pos] == '?')
- {
- pos += 1;
- idx = pos;
- while (idx < len && uri[idx] != '#')
- idx++;
- this.query = escape(rel_uri.substring(pos, idx), escpdQueryChar, true);
- pos = idx;
- }
- // get fragment: (#(.*))?
- if (pos < len && uri[pos] == '#')
- this.fragment = escape(rel_uri.substring(pos+1, len), escpdFragChar, true);
- }
- // now resolve the parts relative to the base
- if (base != null)
- {
- if (scheme != null && // resolve scheme
- !(scheme.equals(base.scheme) && ENABLE_BACKWARDS_COMPATIBILITY))
- return;
- scheme = base.scheme;
- if (host != null) // resolve authority
- return;
- userinfo = base.userinfo;
- host = base.host;
- port = base.port;
- if (type == SEMI_GENERIC) // can't resolve relative paths
- return;
- if (path.length() == 0 && query == null) // current doc
- {
- path = base.path;
- query = base.query;
- return;
- }
- if (path.length() == 0 || path.charAt(0) != '/') // relative path
- {
- idx = (base.path != null) ? base.path.lastIndexOf('/') : -1;
- if (idx < 0)
- path = '/' + path;
- else
- path = base.path.substring(0, idx+1) + path;
- path = canonicalizePath(path);
- }
- }
- }
- /**
- * Remove all "/../" and "/./" from path, where possible. Leading "/../"'s
- * are not removed.
- *
- * @param path the path to canonicalize
- * @return the canonicalized path
- */
- public static String canonicalizePath(String path)
- {
- int idx, len = path.length();
- if (!((idx = path.indexOf("/.")) != -1 &&
- (idx == len-2 || path.charAt(idx+2) == '/' ||
- (path.charAt(idx+2) == '.' &&
- (idx == len-3 || path.charAt(idx+3) == '/')) )))
- return path;
- char[] p = new char[path.length()]; // clean path
- path.getChars(0, p.length, p, 0);
- int beg = 0;
- for (idx=1; idx<len; idx++)
- {
- if (p[idx] == '.' && p[idx-1] == '/')
- {
- int end;
- if (idx == len-1) // trailing "/."
- {
- end = idx;
- idx += 1;
- }
- else if (p[idx+1] == '/') // "/./"
- {
- end = idx - 1;
- idx += 1;
- }
- else if (p[idx+1] == '.' &&
- (idx == len-2 || p[idx+2] == '/')) // "/../"
- {
- if (idx < beg + 2) // keep from backing up too much
- {
- beg = idx + 2;
- continue;
- }
- end = idx - 2;
- while (end > beg && p[end] != '/') end--;
- if (p[end] != '/') continue;
- if (idx == len-2) end++;
- idx += 2;
- }
- else
- continue;
- System.arraycopy(p, idx, p, end, len-idx);
- len -= idx - end;
- idx = end;
- }
- }
- return new String(p, 0, len);
- }
- /**
- * Parse the authority specific part
- */
- private void parse_authority(String authority, String scheme)
- throws ParseException
- {
- /* The authority is further parsed according to:
- *
- * ^(([^@]*)@?)([[^]]*]|[^:]*)?(:(.*))?
- * 12 3 4 5
- *
- * 2: userinfo
- * 3: host
- * 5: port
- */
- char[] uri = authority.toCharArray();
- int pos = 0, idx, len = uri.length;
- // get userinfo: (([^@]*)@?)
- idx = pos;
- while (idx < len && uri[idx] != '@')
- idx++;
- if (idx < len && uri[idx] == '@')
- {
- this.userinfo = escape(authority.substring(pos, idx), userinfoChar, true);
- pos = idx + 1;
- }
- // get host: ([[^]]*]|[^:]*)?
- idx = pos;
- if (idx < len && uri[idx] == '[') // IPv6
- {
- while (idx < len && uri[idx] != ']')
- idx++;
- if (idx == len)
- throw new ParseException("No closing ']' found for opening '['"+
- " at position " + pos +
- " in authority `" + authority + "'");
- this.host = authority.substring(pos+1, idx);
- idx++;
- }
- else
- {
- while (idx < len && uri[idx] != ':')
- idx++;
- this.host = escape(authority.substring(pos, idx), uricChar, true);
- }
- pos = idx;
- // get port: (:(.*))?
- if (pos < (len-1) && uri[pos] == ':')
- {
- int p;
- try
- {
- p = Integer.parseInt(
- unescape(authority.substring(pos+1, len), null));
- if (p < 0) throw new NumberFormatException();
- }
- catch (NumberFormatException e)
- {
- throw new ParseException(authority.substring(pos+1, len) +
- " is an invalid port number");
- }
- if (p == defaultPort(scheme))
- this.port = -1;
- else
- this.port = p;
- }
- }
- /**
- * Construct a URI from the given URL.
- *
- * @param url the URL
- * @exception ParseException if <code>url.toExternalForm()</code> generates
- * an invalid string representation
- */
- public URI(URL url) throws ParseException
- {
- this((URI) null, url.toExternalForm());
- }
- /**
- * Constructs a URI from the given parts, using the default port for
- * this scheme (if known). The parts must be in unescaped form.
- *
- * @param scheme the scheme (sometimes known as protocol)
- * @param host the host
- * @param path the path part
- * @exception ParseException if <var>scheme</var> is null
- */
- public URI(String scheme, String host, String path) throws ParseException
- {
- this(scheme, null, host, -1, path, null, null);
- }
- /**
- * Constructs a URI from the given parts. The parts must be in unescaped
- * form.
- *
- * @param scheme the scheme (sometimes known as protocol)
- * @param host the host
- * @param port the port
- * @param path the path part
- * @exception ParseException if <var>scheme</var> is null
- */
- public URI(String scheme, String host, int port, String path)
- throws ParseException
- {
- this(scheme, null, host, port, path, null, null);
- }
- /**
- * Constructs a URI from the given parts. Any part except for the
- * the scheme may be null. The parts must be in unescaped form.
- *
- * @param scheme the scheme (sometimes known as protocol)
- * @param userinfo the userinfo
- * @param host the host
- * @param port the port
- * @param path the path part
- * @param query the query string
- * @param fragment the fragment identifier
- * @exception ParseException if <var>scheme</var> is null
- */
- public URI(String scheme, String userinfo, String host, int port,
- String path, String query, String fragment)
- throws ParseException
- {
- if (scheme == null)
- throw new ParseException("missing scheme");
- this.scheme = escape(scheme.trim().toLowerCase(), schemeChar, true);
- if (userinfo != null)
- this.userinfo = escape(userinfo.trim(), userinfoChar, true);
- if (host != null)
- {
- host = host.trim();
- this.host = isIPV6Addr(host) ? host : escape(host, hostChar, true);
- }
- if (port != defaultPort(scheme))
- this.port = port;
- if (path != null)
- this.path = escape(path.trim(), escpdPathChar, true); // ???
- if (query != null)
- this.query = escape(query.trim(), escpdQueryChar, true);
- if (fragment != null)
- this.fragment = escape(fragment.trim(), escpdFragChar, true);
- type = usesGenericSyntax(scheme) ? GENERIC : SEMI_GENERIC;
- }
- private static final boolean isIPV6Addr(String host)
- {
- if (host.indexOf(':') < 0)
- return false;
- for (int idx=0; idx<host.length(); idx++)
- {
- char ch = host.charAt(idx);
- if ((ch < '0' || ch > '9') && ch != ':')
- return false;
- }
- return true;
- }
- /**
- * Constructs an opaque URI from the given parts.
- *
- * @param scheme the scheme (sometimes known as protocol)
- * @param opaque the opaque part
- * @exception ParseException if <var>scheme</var> is null
- */
- public URI(String scheme, String opaque)
- throws ParseException
- {
- if (scheme == null)
- throw new ParseException("missing scheme");
- this.scheme = escape(scheme.trim().toLowerCase(), schemeChar, true);
- this.opaque = escape(opaque, opaqueChar, true);
- type = OPAQUE;
- }
- // Class Methods
- /**
- * @return true if the scheme should be parsed according to the
- * generic-URI syntax
- */
- public static boolean usesGenericSyntax(String scheme)
- {
- return usesGenericSyntax.containsKey(scheme.trim().toLowerCase());
- }
- /**
- * @return true if the scheme should be parsed according to a
- * semi-generic-URI syntax <scheme&tgt;://<hostport>/<opaque>
- */
- public static boolean usesSemiGenericSyntax(String scheme)
- {
- return usesSemiGenericSyntax.containsKey(scheme.trim().toLowerCase());
- }
- /**
- * Return the default port used by a given protocol.
- *
- * @param protocol the protocol
- * @return the port number, or 0 if unknown
- */
- public final static int defaultPort(String protocol)
- {
- Integer port = (Integer) defaultPorts.get(protocol.trim().toLowerCase());
- return (port != null) ? port.intValue() : 0;
- }
- // Instance Methods
- /**
- * @return the scheme (often also referred to as protocol)
- */
- public String getScheme()
- {
- return scheme;
- }
- /**
- * @return the opaque part, or null if this URI is generic
- */
- public String getOpaque()
- {
- return opaque;
- }
- /**
- * @return the host
- */
- public String getHost()
- {
- return host;
- }
- /**
- * @return the port, or -1 if it's the default port, or 0 if unknown
- */
- public int getPort()
- {
- return port;
- }
- /**
- * @return the user info
- */
- public String getUserinfo()
- {
- return userinfo;
- }
- /**
- * @return the path
- */
- public String getPath()
- {
- return path;
- }
- /**
- * @return the query string
- */
- public String getQueryString()
- {
- return query;
- }
- /**
- * @return the path and query
- */
- public String getPathAndQuery()
- {
- if (query == null)
- return path;
- if (path == null)
- return "?" + query;
- return path + "?" + query;
- }
- /**
- * @return the fragment
- */
- public String getFragment()
- {
- return fragment;
- }
- /**
- * Does the scheme specific part of this URI use the generic-URI syntax?
- *
- * <P>In general URI are split into two categories: opaque-URI and
- * generic-URI. The generic-URI syntax is the syntax most are familiar
- * with from URLs such as ftp- and http-URLs, which is roughly:
- * <PRE>
- * generic-URI = scheme ":" [ "//" server ] [ "/" ] [ path_segments ] [ "?" query ]
- * </PRE>
- * (see RFC-2396 for exact syntax). Only URLs using the generic-URI syntax
- * can be used to create and resolve relative URIs.
- *
- * <P>Whether a given scheme is parsed according to the generic-URI
- * syntax or wether it is treated as opaque is determined by an internal
- * table of URI schemes.
- *
- * @see <A HREF="http://www.ics.uci.edu/pub/ietf/uri/rfc2396.txt">rfc-2396</A>
- */
- public boolean isGenericURI()
- {
- return (type == GENERIC);
- }
- /**
- * Does the scheme specific part of this URI use the semi-generic-URI syntax?
- *
- * <P>Many schemes which don't follow the full generic syntax actually
- * follow a reduced form where the path part is treated is opaque. This
- * is used for example by ldap, smtp, pop, etc, and is roughly
- * <PRE>
- * generic-URI = scheme ":" [ "//" server ] [ "/" [ opaque_path ] ]
- * </PRE>
- * I.e. parsing is identical to the generic-syntax, except that the path
- * part is not further parsed. URLs using the semi-generic-URI syntax can
- * be used to create and resolve relative URIs with the restriction that
- * all paths are treated as absolute.
- *
- * <P>Whether a given scheme is parsed according to the semi-generic-URI
- * syntax is determined by an internal table of URI schemes.
- *
- * @see #isGenericURI()
- */
- public boolean isSemiGenericURI()
- {
- return (type == SEMI_GENERIC);
- }
- /**
- * Will try to create a java.net.URL object from this URI.
- *
- * @return the URL
- * @exception MalformedURLException if no handler is available for the
- * scheme
- */
- public URL toURL() throws MalformedURLException
- {
- if (url != null) return url;
- if (opaque != null)
- return (url = new URL(scheme + ":" + opaque));
- String hostinfo;
- if (userinfo != null && host != null)
- hostinfo = userinfo + "@" + host;
- else if (userinfo != null)
- hostinfo = userinfo + "@";
- else
- hostinfo = host;
- StringBuffer file = new StringBuffer(100);
- assemblePath(file, true, true, false);
- url = new URL(scheme, hostinfo, port, file.toString());
- return url;
- }
- private final void assemblePath(StringBuffer buf, boolean printEmpty,
- boolean incFragment, boolean unescape)
- {
- if ((path == null || path.length() == 0) && printEmpty)
- buf.append('/');
- if (path != null)
- buf.append(unescape ? unescapeNoPE(path, resvdPathChar) : path);
- if (query != null)
- {
- buf.append('?');
- buf.append(unescape ? unescapeNoPE(query, resvdQueryChar) : query);
- }
- if (fragment != null && incFragment)
- {
- buf.append('#');
- buf.append(unescape ? unescapeNoPE(fragment, null) : fragment);
- }
- }
- private final String stringify(boolean unescape)
- {
- StringBuffer uri = new StringBuffer(100);
- if (scheme != null)
- {
- uri.append(unescape ? unescapeNoPE(scheme, resvdSchemeChar) : scheme);
- uri.append(':');
- }
- if (opaque != null) // it's an opaque-uri
- {
- uri.append(unescape ? unescapeNoPE(opaque, null) : opaque);
- return uri.toString();
- }
- if (userinfo != null || host != null || port != -1)
- uri.append("//");
- if (userinfo != null)
- {
- uri.append(unescape ? unescapeNoPE(userinfo, resvdUIChar) : userinfo);
- uri.append('@');
- }
- if (host != null)
- {
- if (host.indexOf(':') < 0)
- uri.append(unescape ? unescapeNoPE(host, resvdHostChar) : host);
- else
- uri.append('[').append(host).append(']');
- }
- if (port != -1)
- {
- uri.append(':');
- uri.append(port);
- }
- assemblePath(uri, false, true, unescape);
- return uri.toString();
- }
- /**
- * @return a string representation of this URI suitable for use in
- * links, headers, etc.
- */
- public String toExternalForm()
- {
- return stringify(false);
- }
- /**
- * Return the URI as string. This differs from toExternalForm() in that
- * all elements are unescaped before assembly. This is <em>not suitable</em>
- * for passing to other apps or in header fields and such, and is usually
- * not what you want.
- *
- * @return the URI as a string
- * @see #toExternalForm()
- */
- public String toString()
- {
- return stringify(true);
- }
- /**
- * @return true if <var>other</var> is either a URI or URL and it
- * matches the current URI
- */
- public boolean equals(Object other)
- {
- if (other instanceof URI)
- {
- URI o = (URI) other;
- return (scheme.equals(o.scheme) &&
- (
- type == OPAQUE && areEqual(opaque, o.opaque) ||
- type == SEMI_GENERIC &&
- areEqual(userinfo, o.userinfo) &&
- areEqualIC(host, o.host) &&
- port == o.port &&
- areEqual(path, o.path) ||
- type == GENERIC &&
- areEqual(userinfo, o.userinfo) &&
- areEqualIC(host, o.host) &&
- port == o.port &&
- pathsEqual(path, o.path) &&
- areEqual(query, o.query) &&
- areEqual(fragment, o.fragment)
- ));
- }
- if (other instanceof URL)
- {
- URL o = (URL) other;
- String h, f;
- if (userinfo != null)
- h = userinfo + "@" + host;
- else
- h = host;
- f = getPathAndQuery();
- return (scheme.equalsIgnoreCase(o.getProtocol()) &&
- (type == OPAQUE && opaque.equals(o.getFile()) ||
- type == SEMI_GENERIC &&
- areEqualIC(h, o.getHost()) &&
- (port == o.getPort() ||
- o.getPort() == defaultPort(scheme)) &&
- areEqual(f, o.getFile()) ||
- type == GENERIC &&
- areEqualIC(h, o.getHost()) &&
- (port == o.getPort() ||
- o.getPort() == defaultPort(scheme)) &&
- pathsEqual(f, o.getFile()) &&
- areEqual(fragment, o.getRef())
- )
- );
- }
- return false;
- }
- private static final boolean areEqual(String s1, String s2)
- {
- return (s1 == null && s2 == null ||
- s1 != null && s2 != null &&
- (s1.equals(s2) ||
- unescapeNoPE(s1, null).equals(unescapeNoPE(s2, null)))
- );
- }
- private static final boolean areEqualIC(String s1, String s2)
- {
- return (s1 == null && s2 == null ||
- s1 != null && s2 != null &&
- (s1.equalsIgnoreCase(s2) ||
- unescapeNoPE(s1, null).equalsIgnoreCase(unescapeNoPE(s2, null)))
- );
- }
- private static final boolean pathsEqual(String p1, String p2)
- {
- if (p1 == null && p2 == null)
- return true;
- if (p1 == null || p2 == null)
- return false;
- if (p1.equals(p2))
- return true;
- // ok, so it wasn't that simple. Let's split into parts and compare
- // unescaped.
- int pos1 = 0, end1 = p1.length(), pos2 = 0, end2 = p2.length();
- while (pos1 < end1 && pos2 < end2)
- {
- int start1 = pos1, start2 = pos2;
- char ch;
- while (pos1 < end1 && (ch = p1.charAt(pos1)) != '/' && ch != ';')
- pos1++;
- while (pos2 < end2 && (ch = p2.charAt(pos2)) != '/' && ch != ';')
- pos2++;
- if (pos1 == end1 && pos2 < end2 ||
- pos2 == end2 && pos1 < end1 ||
- pos1 < end1 && pos2 < end2 && p1.charAt(pos1) != p2.charAt(pos2))
- return false;
- if ((!p1.regionMatches(start1, p2, start2, pos1-start1) || (pos1-start1) != (pos2-start2)) &&
- !unescapeNoPE(p1.substring(start1, pos1), null).equals(unescapeNoPE(p2.substring(start2, pos2), null)))
- return false;
- pos1++;
- pos2++;
- }
- return (pos1 == end1 && pos2 == end2);
- }
- private int hashCode = -1;
- /**
- * The hash code is calculated over scheme, host, path, and query.
- *
- * @return the hash code
- */
- public int hashCode()
- {
- if (hashCode == -1)
- hashCode = (scheme != null ? unescapeNoPE(scheme, null).hashCode() : 0) +
- (type == OPAQUE ?
- (opaque != null ? unescapeNoPE(opaque, null).hashCode() : 0) * 7
- : (host != null ? unescapeNoPE(host, null).toLowerCase().hashCode() : 0) * 7 +
- (path != null ? unescapeNoPE(path, null).hashCode() : 0) * 13 +
- (query != null ? unescapeNoPE(query, null).hashCode() : 0) * 17);
- return hashCode;
- }
- /**
- * Escape any character not in the given character class. Characters
- * greater 255 are always escaped according to ??? .
- *
- * @param elem the string to escape
- * @param allowed_char the BitSet of all allowed characters
- * @param utf8 if true, will first UTF-8 encode unallowed characters
- * @return the string with all characters not in allowed_char
- * escaped
- */
- public static String escape(String elem, BitSet allowed_char, boolean utf8)
- {
- return new String(escape(elem.toCharArray(), allowed_char, utf8));
- }
- /**
- * Escape any character not in the given character class. Characters
- * greater 255 are always escaped according to ??? .
- *
- * @param elem the array of characters to escape
- * @param allowed_char the BitSet of all allowed characters
- * @param utf8 if true, will first UTF-8 encode unallowed characters
- * @return the elem array with all characters not in allowed_char
- * escaped
- */
- public static char[] escape(char[] elem, BitSet allowed_char, boolean utf8)
- {
- int cnt=0;
- for (int idx=0; idx<elem.length; idx++)
- {
- if (!allowed_char.get(elem[idx]))
- {
- cnt += 2;
- if (utf8)
- {
- if (elem[idx] >= 0x0080)
- cnt += 3;
- if (elem[idx] >= 0x00800)
- cnt += 3;
- if ((elem[idx] & 0xFC00) == 0xD800 && idx+1 < elem.length &&
- (elem[idx+1] & 0xFC00) == 0xDC00)
- cnt -= 6;
- }
- }
- }
- if (cnt == 0) return elem;
- char[] tmp = new char[elem.length + cnt];
- for (int idx=0, pos=0; idx<elem.length; idx++)
- {
- char c = elem[idx];
- if (allowed_char.get(c))
- tmp[pos++] = c;
- else if (utf8)
- {
- /* We're UTF-8 encoding the chars first, as recommended in
- * the HTML 4.0 specification:
- * http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
- * Note that this doesn't change things for ASCII chars
- */
- if (c <= 0x007F)
- {
- pos = enc(tmp, pos, c);
- }
- else if (c <= 0x07FF)
- {
- pos = enc(tmp, pos, 0xC0 | ((c >> 6) & 0x1F));
- pos = enc(tmp, pos, 0x80 | ((c >> 0) & 0x3F));
- }
- else if (!((c & 0xFC00) == 0xD800 && idx+1 < elem.length &&
- (elem[idx+1] & 0xFC00) == 0xDC00))
- {
- pos = enc(tmp, pos, 0xE0 | ((c >> 12) & 0x0F));
- pos = enc(tmp, pos, 0x80 | ((c >> 6) & 0x3F));
- pos = enc(tmp, pos, 0x80 | ((c >> 0) & 0x3F));
- }
- else
- {
- int ch = ((c & 0x03FF) << 10) | (elem[++idx] & 0x03FF);
- ch += 0x10000;
- pos = enc(tmp, pos, 0xF0 | ((ch >> 18) & 0x07));
- pos = enc(tmp, pos, 0x80 | ((ch >> 12) & 0x3F));
- pos = enc(tmp, pos, 0x80 | ((ch >> 6) & 0x3F));
- pos = enc(tmp, pos, 0x80 | ((ch >> 0) & 0x3F));
- }
- }
- else
- pos = enc(tmp, pos, c);
- }
- return tmp;
- }
- private static final char[] hex =
- {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};
- private static final int enc(char[] out, int pos, int c)
- {
- out[pos++] = '%';
- out[pos++] = hex[(c >> 4) & 0xf];
- out[pos++] = hex[c & 0xf];
- return pos;
- }
- /**
- * Unescape escaped characters (i.e. %xx) except reserved ones.
- *
- * @param str the string to unescape
- * @param reserved the characters which may not be unescaped, or null
- * @return the unescaped string
- * @exception ParseException if the two digits following a `%' are
- * not a valid hex number
- */
- public static final String unescape(String str, BitSet reserved)
- throws ParseException
- {
- if (str == null || str.indexOf('%') == -1)
- return str; // an optimization
- char[] buf = str.toCharArray();
- char[] res = new char[buf.length];
- char[] utf = new char[4];
- int utf_idx = 0, utf_len = -1;
- int didx = 0;
- for (int sidx=0; sidx<buf.length; sidx++)
- {
- if (buf[sidx] == '%')
- {
- int ch;
- try
- {
- if (sidx + 3 > buf.length)
- throw new NumberFormatException();
- ch = Integer.parseInt(str.substring(sidx+1,sidx+3), 16);
- if (ch < 0)
- throw new NumberFormatException();
- sidx += 2;
- }
- catch (NumberFormatException e)
- {
- /* Hmm, people not reading specs again, so we just
- * ignore it...
- throw new ParseException(str.substring(sidx,sidx+3) +
- " is an invalid code");
- */
- ch = buf[sidx];
- }
- // check if we're working on a utf-char
- if (utf_len > 0)
- {
- if ((ch & 0xC0) != 0x80) // oops, we misinterpreted
- {
- didx = copyBuf(utf, utf_idx, ch, res, didx, reserved, false);
- utf_len = -1;
- }
- else if (utf_idx == utf_len - 1) // end-of-char
- {
- if ((utf[0] & 0xE0) == 0xC0)
- ch = (utf[0] & 0x1F) << 6 |
- (ch & 0x3F);
- else if ((utf[0] & 0xF0) == 0xE0)
- ch = (utf[0] & 0x0F) << 12 |
- (utf[1] & 0x3F) << 6 |
- (ch & 0x3F);
- else
- ch = (utf[0] & 0x07) << 18 |
- (utf[1] & 0x3F) << 12 |
- (utf[2] & 0x3F) << 6 |
- (ch & 0x3F);
- if (reserved != null && reserved.get(ch))
- didx = copyBuf(utf, utf_idx, ch, res, didx, null, true);
- else if (utf_len < 4)
- res[didx++] = (char) ch;
- else
- {
- ch -= 0x10000;
- res[didx++] = (char) ((ch >> 10) | 0xD800);
- res[didx++] = (char) ((ch & 0x03FF) | 0xDC00);
- }
- utf_len = -1;
- }
- else // continue
- utf[utf_idx++] = (char) ch;
- }
- // check if this is the start of a utf-char
- else if ((ch & 0xE0) == 0xC0 || (ch & 0xF0) == 0xE0 ||
- (ch & 0xF8) == 0xF0)
- {
- if ((ch & 0xE0) == 0xC0)
- utf_len = 2;
- else if ((ch & 0xF0) == 0xE0)
- utf_len = 3;
- else
- utf_len = 4;
- utf[0] = (char) ch;
- utf_idx = 1;
- }
- // leave reserved alone
- else if (reserved != null && reserved.get(ch))
- {
- res[didx++] = buf[sidx];
- sidx -= 2;
- }
- // just use the decoded version
- else
- res[didx++] = (char) ch;
- }
- else if (utf_len > 0) // oops, we misinterpreted
- {
- didx = copyBuf(utf, utf_idx, buf[sidx], res, didx, reserved, false);
- utf_len = -1;
- }
- else
- res[didx++] = buf[sidx];
- }
- if (utf_len > 0) // oops, we misinterpreted
- didx = copyBuf(utf, utf_idx, -1, res, didx, reserved, false);
- return new String(res, 0, didx);
- }
- private static final int copyBuf(char[] utf, int utf_idx, int ch,
- char[] res, int didx, BitSet reserved,
- boolean escapeAll)
- {
- if (ch >= 0)
- utf[utf_idx++] = (char) ch;
- for (int idx=0; idx<utf_idx; idx++)
- {
- if (reserved != null && reserved.get(utf[idx]) || escapeAll)
- didx = enc(res, didx, utf[idx]);
- else
- res[didx++] = utf[idx];
- }
- return didx;
- }
- /**
- * Unescape escaped characters (i.e. %xx). If a ParseException would
- * be thrown then just return the original string.
- *
- * @param str the string to unescape
- * @param reserved the characters which may not be unescaped, or null
- * @return the unescaped string, or the original string if unescaping
- * would throw a ParseException
- * @see #unescape(java.lang.String, java.util.BitSet)
- */
- private static final String unescapeNoPE(String str, BitSet reserved)
- {
- try
- { return unescape(str, reserved); }
- catch (ParseException pe)
- { return str; }
- }
- /**
- * Run test set.
- *
- * @exception Exception if any test fails
- */
- public static void main(String args[]) throws Exception
- {
- System.err.println();
- System.err.println("*** URI Tests ...");
- /* Relative URI test set, taken from Section C of rfc-2396 and
- * Roy's test1. All Roy's URI parser tests can be found at
- * http://www.ics.uci.edu/~fielding/url/
- * The tests have been augmented by a few for the IPv6 syntax
- */
- URI base = new URI("http://a/b/c/d;p?q");
- // normal examples
- testParser(base, "g:h", "g:h");
- testParser(base, "g", "http://a/b/c/g");
- testParser(base, "./g", "http://a/b/c/g");
- testParser(base, "g/", "http://a/b/c/g/");
- testParser(base, "/g", "http://a/g");
- testParser(base, "//g", "http://g");
- testParser(base, "//[23:54]", "http://[23:54]");
- testParser(base, "?y", "http://a/b/c/?y");
- testParser(base, "g?y", "http://a/b/c/g?y");
- testParser(base, "#s", "http://a/b/c/d;p?q#s");
- testParser(base, "g#s", "http://a/b/c/g#s");
- testParser(base, "g?y#s", "http://a/b/c/g?y#s");
- testParser(base, ";x", "http://a/b/c/;x");
- testParser(base, "g;x", "http://a/b/c/g;x");
- testParser(base, "g;x?y#s", "http://a/b/c/g;x?y#s");
- testParser(base, ".", "http://a/b/c/");
- testParser(base, "./", "http://a/b/c/");
- testParser(base, "..", "http://a/b/");
- testParser(base, "../", "http://a/b/");
- testParser(base, "../g", "http://a/b/g");
- testParser(base, "../..", "http://a/");
- testParser(base, "../../", "http://a/");
- testParser(base, "../../g", "http://a/g");
- // abnormal examples
- testParser(base, "", "http://a/b/c/d;p?q");
- testParser(base, "/./g", "http://a/./g");
- testParser(base, "/../g", "http://a/../g");
- testParser(base, "../../../g", "http://a/../g");
- testParser(base, "../../../../g", "http://a/../../g");
- testParser(base, "g.", "http://a/b/c/g.");
- testParser(base, ".g", "http://a/b/c/.g");
- testParser(base, "g..", "http://a/b/c/g..");
- testParser(base, "..g", "http://a/b/c/..g");
- testParser(base, "./../g", "http://a/b/g");
- testParser(base, "./g/.", "http://a/b/c/g/");
- testParser(base, "g/./h", "http://a/b/c/g/h");
- testParser(base, "g/../h", "http://a/b/c/h");
- testParser(base, "g;x=1/./y", "http://a/b/c/g;x=1/y");
- testParser(base, "g;x=1/../y", "http://a/b/c/y");
- testParser(base, "g?y/./x", "http://a/b/c/g?y/./x");
- testParser(base, "g?y/../x", "http://a/b/c/g?y/../x");
- testParser(base, "g#s/./x", "http://a/b/c/g#s/./x");
- testParser(base, "g#s/../x", "http://a/b/c/g#s/../x");
- if (ENABLE_BACKWARDS_COMPATIBILITY)
- testParser(base, "http:g", "http://a/b/c/g");
- else
- testParser(base, "http:g", "http:g");
- if (ENABLE_BACKWARDS_COMPATIBILITY)
- testParser(base, "http:", "http://a/b/c/d;p?q");
- else
- testParser(base, "http:", "http:");
- testParser(base, "./g:h", "http://a/b/c/g:h");
- /* Roy's test2
- */
- base = new URI("http://a/b/c/d;p?q=1/2");
- testParser(base, "g", "http://a/b/c/g");
- testParser(base, "./g", "http://a/b/c/g");
- testParser(base, "g/", "http://a/b/c/g/");
- testParser(base, "/g", "http://a/g");
- testParser(base, "//g", "http://g");
- testParser(base, "//[23:54]","http://[23:54]");
- testParser(base, "?y", "http://a/b/c/?y");
- testParser(base, "g?y", "http://a/b/c/g?y");
- testParser(base, "g?y/./x", "http://a/b/c/g?y/./x");
- testParser(base, "g?y/../x", "http://a/b/c/g?y/../x");
- testParser(base, "g#s", "http://a/b/c/g#s");
- testParser(base, "g#s/./x", "http://a/b/c/g#s/./x");
- testParser(base, "g#s/../x", "http://a/b/c/g#s/../x");
- testParser(base, "./", "http://a/b/c/");
- testParser(base, "../", "http://a/b/");
- testParser(base, "../g", "http://a/b/g");
- testParser(base, "../../", "http://a/");
- testParser(base, "../../g", "http://a/g");
- /* Roy's test3
- */
- base = new URI("http://a/b/c/d;p=1/2?q");
- testParser(base, "g", "http://a/b/c/d;p=1/g");
- testParser(base, "./g", "http://a/b/c/d;p=1/g");
- testParser(base, "g/", "http://a/b/c/d;p=1/g/");
- testParser(base, "g?y", "http://a/b/c/d;p=1/g?y");
- testParser(base, ";x", "http://a/b/c/d;p=1/;x");
- testParser(base, "g;x", "http://a/b/c/d;p=1/g;x");
- testParser(base, "g;x=1/./y", "http://a/b/c/d;p=1/g;x=1/y");
- testParser(base, "g;x=1/../y", "http://a/b/c/d;p=1/y");
- testParser(base, "./", "http://a/b/c/d;p=1/");
- testParser(base, "../", "http://a/b/c/");
- testParser(base, "../g", "http://a/b/c/g");
- testParser(base, "../../", "http://a/b/");
- testParser(base, "../../g", "http://a/b/g");
- /* Roy's test4
- */
- base = new URI("fred:///s//a/b/c");
- testParser(base, "g:h", "g:h");
- /* we have to skip these, as usesGeneraicSyntax("fred") returns false
- * and we therefore don't parse relative URI's here. But test5 is
- * the same except that the http scheme is used.
- testParser(base, "g", "fred:///s//a/b/g");
- testParser(base, "./g", "fred:///s//a/b/g");
- testParser(base, "g/", "fred:///s//a/b/g/");
- testParser(base, "/g", "fred:///g");
- testParser(base, "//g", "fred://g");
- testParser(base, "//g/x", "fred://g/x");
- testParser(base, "///g", "fred:///g");
- testParser(base, "./", "fred:///s//a/b/");
- testParser(base, "../", "fred:///s//a/");
- testParser(base, "../g", "fred:///s//a/g");
- testParser(base, "../../", "fred:///s//");
- testParser(base, "../../g", "fred:///s//g");
- testParser(base, "../../../g", "fred:///s/g");
- testParser(base, "../../../../g", "fred:///g");
- */
- testPE(base, "g");
- /* Roy's test5
- */
- base = new URI("http:///s//a/b/c");
- testParser(base, "g:h", "g:h");
- testParser(base, "g", "http:///s//a/b/g");
- testParser(base, "./g", "http:///s//a/b/g");
- testParser(base, "g/", "http:///s//a/b/g/");
- testParser(base, "/g", "http:///g");
- testParser(base, "//g", "http://g");
- testParser(base, "//[23:54]", "http://[23:54]");
- testParser(base, "//g/x", "http://g/x");
- testParser(base, "///g", "http:///g");
- testParser(base, "./", "http:///s//a/b/");
- testParser(base, "../", "http:///s//a/");
- testParser(base, "../g", "http:///s//a/g");
- testParser(base, "../../", "http:///s//");
- testParser(base, "../../g", "http:///s//g");
- testParser(base, "../../../g", "http:///s/g");
- testParser(base, "../../../../g", "http:///g");
- /* Some additional parser tests
- */
- base = new URI("http://s");
- testParser(base, "ftp:h", "ftp:h");
- testParser(base, "ftp://h", "ftp://h");
- testParser(base, "//g", "http://g");
- testParser(base, "//g?h", "http://g?h");
- testParser(base, "g", "http://s/g");
- testParser(base, "./g", "http://s/g");
- testParser(base, "?g", "http://s/?g");
- testParser(base, "#g", "http://s#g");
- base = new URI("http:");
- testParser(base, "ftp:h", "ftp:h");
- testParser(base, "ftp://h", "ftp://h");
- testParser(base, "//g", "http://g");
- testParser(base, "g", "http:/g");
- testParser(base, "?g", "http:/?g");
- testParser(base, "#g", "http:#g");
- base = new URI("http://s/t");
- testParser(base, "ftp:/h", "ftp:/h");
- if (ENABLE_BACKWARDS_COMPATIBILITY)
- testParser(base, "http:/h", "http://s/h");
- else
- testParser(base, "http:/h", "http:/h");
- base = new URI("http://s/g?h/j");
- testParser(base, "k", "http://s/k");
- testParser(base, "k?l", "http://s/k?l");
- /* Parser tests for semi-generic syntax
- */
- base = new URI("ldap:");
- testParser(base, "ldap:", "ldap:");
- testParser(base, "ldap://a", "ldap://a");
- testParser(base, "ldap://a/b", "ldap://a/b");
- testParser(base, "ldap:/b", "ldap:/b");
- testParser(base, "ftp:h", "ftp:h");
- testParser(base, "ftp://h", "ftp://h");
- testParser(base, "//g", "ldap://g");
- testParser(base, "//g?h", "ldap://g/?h");
- testParser(base, "g", "ldap:/g");
- testParser(base, "./g", "ldap:/./g");
- testParser(base, "?g", "ldap:/?g");
- testParser(base, "#g", "ldap:/%23g");
- base = new URI("ldap://s");
- if (ENABLE_BACKWARDS_COMPATIBILITY)
- testParser(base, "ldap:", "ldap://s");
- else
- testParser(base, "ldap:", "ldap:");
- testParser(base, "ldap://a", "ldap://a");
- testParser(base, "ldap://a/b", "ldap://a/b");
- if (ENABLE_BACKWARDS_COMPATIBILITY)
- testParser(base, "ldap:/b", "ldap://s/b");
- else
- testParser(base, "ldap:/b", "ldap:/b");
- testParser(base, "ftp:h", "ftp:h");
- testParser(base, "ftp://h", "ftp://h");
- testParser(base, "//g", "ldap://g");
- testParser(base, "//g?h", "ldap://g/?h");
- testParser(base, "g", "ldap://s/g");
- testParser(base, "./g", "ldap://s/./g");
- testParser(base, "?g", "ldap://s/?g");
- testParser(base, "#g", "ldap://s/%23g");
- base = new URI("ldap://s/t");
- testParser(base, "ftp:/h", "ftp:/h");
- if (ENABLE_BACKWARDS_COMPATIBILITY)
- testParser(base, "ldap:/h", "ldap://s/h");
- else
- testParser(base, "ldap:/h", "ldap:/h");
- if (ENABLE_BACKWARDS_COMPATIBILITY)
- testParser(base, "ldap:", "ldap://s");
- else
- testParser(base, "ldap:", "ldap:");
- testParser(base, "ldap://a", "ldap://a");
- testParser(base, "ldap://a/b", "ldap://a/b");
- testParser(base, "ftp:h", "ftp:h");
- testParser(base, "ftp://h", "ftp://h");
- testParser(base, "//g", "ldap://g");
- testParser(base, "//g?h", "ldap://g/?h");
- testParser(base, "g", "ldap://s/g");
- testParser(base, "./g", "ldap://s/./g");
- testParser(base, "?g", "ldap://s/?g");
- testParser(base, "#g", "ldap://s/%23g");
- /* equality tests */
- // protocol
- testNotEqual("http://a/", "nntp://a/");
- testNotEqual("http://a/", "https://a/");
- testNotEqual("http://a/", "shttp://a/");
- testEqual("http://a/", "Http://a/");
- testEqual("http://a/", "hTTP://a/");
- testEqual("url:http://a/", "hTTP://a/");
- testEqual("urI:http://a/", "hTTP://a/");
- // host
- testEqual("http://a/", "Http://A/");
- testEqual("http://a.b.c/", "Http://A.b.C/");
- testEqual("http:///", "Http:///");
- testEqual("http://[]/", "Http:///");
- testNotEqual("http:///", "Http://a/");
- testNotEqual("http://[]/", "Http://a/");
- testPE(null, "ftp://[23::43:1/");
- testPE(null, "ftp://[/");
- // port
- testEqual("http://a.b.c/", "Http://A.b.C:80/");
- testEqual("http://a.b.c:/", "Http://A.b.C:80/");
- testEqual("http://[23::45:::5:]/", "Http://[23::45:::5:]:80/");
- testEqual("http://[23::45:::5:]:/", "Http://[23::45:::5:]:80/");
- testEqual("nntp://a", "nntp://a:119");
- testEqual("nntp://a:", "nntp://a:119");
- testEqual("nntp://a/", "nntp://a:119/");
- testNotEqual("nntp://a", "nntp://a:118");
- testNotEqual("nntp://a", "nntp://a:0");
- testNotEqual("nntp://a:", "nntp://a:0");
- testEqual("telnet://:23/", "telnet:///");
- testPE(null, "ftp://:a/");
- testPE(null, "ftp://:-1/");
- testPE(null, "ftp://::1/");
- // userinfo
- testNotEqual("ftp://me@a", "ftp://a");
- testNotEqual("ftp://me@a", "ftp://Me@a");
- testEqual("ftp://Me@a", "ftp://Me@a");
- testEqual("ftp://Me:My@a:21", "ftp://Me:My@a");
- testEqual("ftp://Me:My@a:", "ftp://Me:My@a");
- testNotEqual("ftp://Me:My@a:21", "ftp://Me:my@a");
- testNotEqual("ftp://Me:My@a:", "ftp://Me:my@a");
- // path
- testEqual("ftp://a/b%2b/", "ftp://a/b+/");
- testEqual("ftp://a/b%2b/", "ftp://a/b+/");
- testEqual("ftp://a/b%5E/", "ftp://a/b^/");
- testEqual("ftp://a/b%4C/", "ftp://a/bL/");
- testNotEqual("ftp://a/b/", "ftp://a//b/");
- testNotEqual("ftp://a/b/", "ftp://a/b//");
- testNotEqual("ftp://a/b%4C/", "ftp://a/bl/");
- testNotEqual("ftp://a/b%3f/", "ftp://a/b?/");
- testNotEqual("ftp://a/b%2f/", "ftp://a/b//");
- testNotEqual("ftp://a/b%2fc/", "ftp://a/b/c/");
- testNotEqual("ftp://a/bc/", "ftp://a/b//");
- testNotEqual("ftp://a/bc/", "ftp://a/b/");
- testNotEqual("ftp://a/bc//", "ftp://a/b/");
- testNotEqual("ftp://a/b/", "ftp://a/bc//");
- testNotEqual("ftp://a/b/", "ftp://a/bc/");
- testNotEqual("ftp://a/b//", "ftp://a/bc/");
- testNotEqual("ftp://a/b;fc/", "ftp://a/bf;c/");
- testNotEqual("ftp://a/b%3bfc/", "ftp://a/b;fc/");
- testEqual("ftp://a/b;/;/", "ftp://a/b;/;/");
- testNotEqual("ftp://a/b;/", "ftp://a/b//");
- testNotEqual("ftp://a/b//", "ftp://a/b;/");
- testNotEqual("ftp://a/b/;", "ftp://a/b//");
- testNotEqual("ftp://a/b//", "ftp://a/b/;");
- testNotEqual("ftp://a/b;/", "ftp://a/b;//");
- testNotEqual("ftp://a/b;//", "ftp://a/b;/");
- // escaping/unescaping
- testEscape("hellou1212there", "hello%E1%88%92there");
- testEscape("hellou0232there", "hello%C8%B2there");
- testEscape("hellouDA42uDD42there", "hello%F2%A0%A5%82there");
- testEscape("hellouDA42", "hello%ED%A9%82");
- testEscape("hellouDA42there", "hello%ED%A9%82there");
- testUnescape("hello%F2%A0%A5%82there", "hellouDA42uDD42there");
- testUnescape("hello%F2%A0%A5there", "hellou00F2u00A0u00A5there");
- testUnescape("hello%F2%A0there", "hellou00F2u00A0there");
- testUnescape("hello%F2there", "hellou00F2there");
- testUnescape("hello%F2%A0%A5%82", "hellouDA42uDD42");
- testUnescape("hello%F2%A0%A5", "hellou00F2u00A0u00A5");
- testUnescape("hello%F2%A0", "hellou00F2u00A0");
- testUnescape("hello%F2", "hellou00F2");
- testUnescape("hello%E1%88%92there", "hellou1212there");
- testUnescape("hello%E1%88there", "hellou00E1u0088there");
- testUnescape("hello%E1there", "hellou00E1there");
- testUnescape("hello%E1%71there", "hellou00E1qthere");
- testUnescape("hello%E1%88", "hellou00E1u0088");
- testUnescape("hello%E1%71", "hellou00E1q");
- testUnescape("hello%E1", "hellou00E1");
- testUnescape("hello%C8%B2there", "hellou0232there");
- testUnescape("hello%C8there", "hellou00C8there");
- testUnescape("hello%C8%71there", "hellou00C8qthere");
- testUnescape("hello%C8%71", "hellou00C8q");
- testUnescape("hello%C8", "hellou00C8");
- testUnescape("%71there", "qthere");
- testUnescape("%B1there", "u00B1there");
- System.err.println("*** Tests finished successfuly");
- }
- private static final String nl = System.getProperty("line.separator");
- private static void testParser(URI base, String relURI, String result)
- throws Exception
- {
- if (!(new URI(base, relURI).toExternalForm().equals(result)))
- {
- throw new Exception("Test failed: " + nl +
- " base-URI = <" + base + ">" + nl +
- " rel-URI = <" + relURI + ">" + nl+
- " expected <" + result + ">" + nl+
- " but got <" + new URI(base, relURI) + ">");
- }
- }
- private static void testEqual(String one, String two) throws Exception
- {
- URI u1 = new URI(one);
- URI u2 = new URI(two);
- if (!u1.equals(u2))
- {
- throw new Exception("Test failed: " + nl +
- " <" + one + "> != <" + two + ">");
- }
- if (u1.hashCode() != u2.hashCode())
- {
- throw new Exception("Test failed: " + nl +
- " hashCode <" + one + "> != hashCode <" + two + ">");
- }
- }
- private static void testNotEqual(String one, String two) throws Exception
- {
- URI u1 = new URI(one);
- URI u2 = new URI(two);
- if (u1.equals(u2))
- {
- throw new Exception("Test failed: " + nl +
- " <" + one + "> == <" + two + ">");
- }
- }
- private static void testPE(URI base, String uri) throws Exception
- {
- boolean got_pe = false;
- try
- { new URI(base, uri); }
- catch (ParseException pe)
- { got_pe = true; }
- if (!got_pe)
- {
- throw new Exception("Test failed: " + nl +
- " <" + uri + "> should be invalid");
- }
- }
- private static void testEscape(String raw, String escaped) throws Exception
- {
- String test = new String(escape(raw.toCharArray(), uricChar, true));
- if (!test.equals(escaped))
- throw new Exception("Test failed: " + nl +
- " raw-string: " + raw + nl +
- " escaped: " + test + nl +
- " expected: " + escaped);
- }
- private static void testUnescape(String escaped, String raw)
- throws Exception
- {
- if (!unescape(escaped, null).equals(raw))
- throw new Exception("Test failed: " + nl +
- " escaped-string: " + escaped + nl +
- " unescaped: " + unescape(escaped, null) + nl +
- " expected: " + raw);
- }
- }
English
