View Javadoc

1   /*
2    * @(#)URI.java						0.3-3 06/05/2001
3    *
4    *  This file is part of the HTTPClient package
5    *  Copyright (C) 1996-2001 Ronald Tschalär
6    *
7    *  This library is free software; you can redistribute it and/or
8    *  modify it under the terms of the GNU Lesser General Public
9    *  License as published by the Free Software Foundation; either
10   *  version 2 of the License, or (at your option) any later version.
11   *
12   *  This library is distributed in the hope that it will be useful,
13   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   *  Lesser General Public License for more details.
16   *
17   *  You should have received a copy of the GNU Lesser General Public
18   *  License along with this library; if not, write to the Free
19   *  Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
20   *  MA 02111-1307, USA
21   *
22   *  For questions, suggestions, bug-reports, enhancement-requests etc.
23   *  I may be contacted at:
24   *
25   *  ronald@innovation.ch
26   *
27   *  The HTTPClient's home page is located at:
28   *
29   *  http://www.innovation.ch/java/HTTPClient/ 
30   *
31   */
32  
33  package HTTPClient;
34  
35  import java.net.URL;
36  import java.net.MalformedURLException;
37  import java.util.BitSet;
38  import java.util.Hashtable;
39  
40  /**
41   * This class represents a generic URI, as defined in RFC-2396.
42   * This is similar to java.net.URL, with the following enhancements:
43   * <UL>
44   * <LI>it doesn't require a URLStreamhandler to exist for the scheme; this
45   *     allows this class to be used to hold any URI, construct absolute
46   *     URIs from relative ones, etc.
47   * <LI>it handles escapes correctly
48   * <LI>equals() works correctly
49   * <LI>relative URIs are correctly constructed
50   * <LI>it has methods for accessing various fields such as userinfo,
51   *     fragment, params, etc.
52   * <LI>it handles less common forms of resources such as the "*" used in
53   *     http URLs.
54   * </UL>
55   *
56   * <P>The elements are always stored in escaped form.
57   *
58   * <P>While RFC-2396 distinguishes between just two forms of URI's, those that
59   * follow the generic syntax and those that don't, this class knows about a
60   * third form, named semi-generic, used by quite a few popular schemes.
61   * Semi-generic syntax treats the path part as opaque, i.e. has the form
62   * &lt;scheme&gt;://&lt;authority&gt;/&lt;opaque&gt; . Relative URI's of this
63   * type are only resolved as far as absolute paths - relative paths do not
64   * exist.
65   *
66   * <P>Ideally, java.net.URL should subclass URI.
67   *
68   * @see		<A HREF="http://www.ics.uci.edu/pub/ietf/uri/rfc2396.txt">rfc-2396</A>
69   * @version	0.3-3  06/05/2001
70   * @author	Ronald Tschalär
71   * @since	V0.3-1
72   */
73  public class URI
74  {
75      /**
76       * If true, then the parser will resolve certain URI's in backwards
77       * compatible (but technically incorrect) manner. Example:
78       *
79       *<PRE>
80       * base   = http://a/b/c/d;p?q
81       * rel    = http:g
82       * result = http:g		(correct)
83       * result = http://a/b/c/g	(backwards compatible)
84       *</PRE>
85       *
86       * See rfc-2396, section 5.2, step 3, second paragraph.
87       */
88      public static final boolean ENABLE_BACKWARDS_COMPATIBILITY = true;
89  
90      protected static final Hashtable defaultPorts          = new Hashtable();
91      protected static final Hashtable usesGenericSyntax     = new Hashtable();
92      protected static final Hashtable usesSemiGenericSyntax = new Hashtable();
93  
94      /* various character classes as defined in the draft */
95      protected static final BitSet alphanumChar;
96      protected static final BitSet markChar;
97      protected static final BitSet reservedChar;
98      protected static final BitSet unreservedChar;
99      protected static final BitSet uricChar;
100     protected static final BitSet pcharChar;
101     protected static final BitSet userinfoChar;
102     protected static final BitSet schemeChar;
103     protected static final BitSet hostChar;
104     protected static final BitSet opaqueChar;
105     protected static final BitSet reg_nameChar;
106 
107     /* These are not directly in the spec, but used for escaping and
108      * unescaping parts
109      */
110 
111     /** list of characters which must not be unescaped when unescaping a scheme */
112     public static final BitSet resvdSchemeChar;
113     /** list of characters which must not be unescaped when unescaping a userinfo */
114     public static final BitSet resvdUIChar;
115     /** list of characters which must not be unescaped when unescaping a host */
116     public static final BitSet resvdHostChar;
117     /** list of characters which must not be unescaped when unescaping a path */
118     public static final BitSet resvdPathChar;
119     /** list of characters which must not be unescaped when unescaping a query string */
120     public static final BitSet resvdQueryChar;
121     /** list of characters which must not be escaped when escaping a path */
122     public static final BitSet escpdPathChar;
123     /** list of characters which must not be escaped when escaping a query string */
124     public static final BitSet escpdQueryChar;
125     /** list of characters which must not be escaped when escaping a fragment identifier */
126     public static final BitSet escpdFragChar;
127 
128     static
129     {
130 	defaultPorts.put("http",      new Integer(80));
131 	defaultPorts.put("shttp",     new Integer(80));
132 	defaultPorts.put("http-ng",   new Integer(80));
133 	defaultPorts.put("coffee",    new Integer(80));
134 	defaultPorts.put("https",     new Integer(443));
135 	defaultPorts.put("ftp",       new Integer(21));
136 	defaultPorts.put("telnet",    new Integer(23));
137 	defaultPorts.put("nntp",      new Integer(119));
138 	defaultPorts.put("news",      new Integer(119));
139 	defaultPorts.put("snews",     new Integer(563));
140 	defaultPorts.put("hnews",     new Integer(80));
141 	defaultPorts.put("smtp",      new Integer(25));
142 	defaultPorts.put("gopher",    new Integer(70));
143 	defaultPorts.put("wais",      new Integer(210));
144 	defaultPorts.put("whois",     new Integer(43));
145 	defaultPorts.put("whois++",   new Integer(63));
146 	defaultPorts.put("rwhois",    new Integer(4321));
147 	defaultPorts.put("imap",      new Integer(143));
148 	defaultPorts.put("pop",       new Integer(110));
149 	defaultPorts.put("prospero",  new Integer(1525));
150 	defaultPorts.put("irc",       new Integer(194));
151 	defaultPorts.put("ldap",      new Integer(389));
152 	defaultPorts.put("nfs",       new Integer(2049));
153 	defaultPorts.put("z39.50r",   new Integer(210));
154 	defaultPorts.put("z39.50s",   new Integer(210));
155 	defaultPorts.put("vemmi",     new Integer(575));
156 	defaultPorts.put("videotex",  new Integer(516));
157 	defaultPorts.put("cmp",       new Integer(829));
158 
159 	usesGenericSyntax.put("http", Boolean.TRUE);
160 	usesGenericSyntax.put("https", Boolean.TRUE);
161 	usesGenericSyntax.put("shttp", Boolean.TRUE);
162 	usesGenericSyntax.put("coffee", Boolean.TRUE);
163 	usesGenericSyntax.put("ftp", Boolean.TRUE);
164 	usesGenericSyntax.put("file", Boolean.TRUE);
165 	usesGenericSyntax.put("nntp", Boolean.TRUE);
166 	usesGenericSyntax.put("news", Boolean.TRUE);
167 	usesGenericSyntax.put("snews", Boolean.TRUE);
168 	usesGenericSyntax.put("hnews", Boolean.TRUE);
169 	usesGenericSyntax.put("imap", Boolean.TRUE);
170 	usesGenericSyntax.put("wais", Boolean.TRUE);
171 	usesGenericSyntax.put("nfs", Boolean.TRUE);
172 	usesGenericSyntax.put("sip", Boolean.TRUE);
173 	usesGenericSyntax.put("sips", Boolean.TRUE);
174 	usesGenericSyntax.put("sipt", Boolean.TRUE);
175 	usesGenericSyntax.put("sipu", Boolean.TRUE);
176 	/* Note: schemes which definitely don't use the generic-URI syntax
177 	 * and must therefore never appear in the above list:
178 	 * "urn", "mailto", "sdp", "service", "tv", "gsm-sms", "tel", "fax",
179 	 * "modem", "eid", "cid", "mid", "data", "ldap"
180 	 */
181 
182 	usesSemiGenericSyntax.put("ldap", Boolean.TRUE);
183 	usesSemiGenericSyntax.put("irc", Boolean.TRUE);
184 	usesSemiGenericSyntax.put("gopher", Boolean.TRUE);
185 	usesSemiGenericSyntax.put("videotex", Boolean.TRUE);
186 	usesSemiGenericSyntax.put("rwhois", Boolean.TRUE);
187 	usesSemiGenericSyntax.put("whois++", Boolean.TRUE);
188 	usesSemiGenericSyntax.put("smtp", Boolean.TRUE);
189 	usesSemiGenericSyntax.put("telnet", Boolean.TRUE);
190 	usesSemiGenericSyntax.put("prospero", Boolean.TRUE);
191 	usesSemiGenericSyntax.put("pop", Boolean.TRUE);
192 	usesSemiGenericSyntax.put("vemmi", Boolean.TRUE);
193 	usesSemiGenericSyntax.put("z39.50r", Boolean.TRUE);
194 	usesSemiGenericSyntax.put("z39.50s", Boolean.TRUE);
195 	usesSemiGenericSyntax.put("stream", Boolean.TRUE);
196 	usesSemiGenericSyntax.put("cmp", Boolean.TRUE);
197 
198 	alphanumChar = new BitSet(128);
199 	for (int ch='0'; ch<='9'; ch++)  alphanumChar.set(ch);
200 	for (int ch='A'; ch<='Z'; ch++)  alphanumChar.set(ch);
201 	for (int ch='a'; ch<='z'; ch++)  alphanumChar.set(ch);
202 
203 	markChar = new BitSet(128);
204 	markChar.set('-');
205 	markChar.set('_');
206 	markChar.set('.');
207 	markChar.set('!');
208 	markChar.set('~');
209 	markChar.set('*');
210 	markChar.set('\'');
211 	markChar.set('(');
212 	markChar.set(')');
213 
214 	reservedChar = new BitSet(128);
215 	reservedChar.set(';');
216 	reservedChar.set('/');
217 	reservedChar.set('?');
218 	reservedChar.set(':');
219 	reservedChar.set('@');
220 	reservedChar.set('&');
221 	reservedChar.set('=');
222 	reservedChar.set('+');
223 	reservedChar.set('$');
224 	reservedChar.set(',');
225 
226 	unreservedChar = new BitSet(128);
227 	unreservedChar.or(alphanumChar);
228 	unreservedChar.or(markChar);
229 
230 	uricChar = new BitSet(128);
231 	uricChar.or(unreservedChar);
232 	uricChar.or(reservedChar);
233 	uricChar.set('%');
234 
235 	pcharChar = new BitSet(128);
236 	pcharChar.or(unreservedChar);
237 	pcharChar.set('%');
238 	pcharChar.set(':');
239 	pcharChar.set('@');
240 	pcharChar.set('&');
241 	pcharChar.set('=');
242 	pcharChar.set('+');
243 	pcharChar.set('$');
244 	pcharChar.set(',');
245 
246 	userinfoChar = new BitSet(128);
247 	userinfoChar.or(unreservedChar);
248 	userinfoChar.set('%');
249 	userinfoChar.set(';');
250 	userinfoChar.set(':');
251 	userinfoChar.set('&');
252 	userinfoChar.set('=');
253 	userinfoChar.set('+');
254 	userinfoChar.set('$');
255 	userinfoChar.set(',');
256 
257 	// this actually shouldn't contain uppercase letters...
258 	schemeChar = new BitSet(128);
259 	schemeChar.or(alphanumChar);
260 	schemeChar.set('+');
261 	schemeChar.set('-');
262 	schemeChar.set('.');
263 
264 	opaqueChar = new BitSet(128);
265 	opaqueChar.or(uricChar);
266 
267 	hostChar = new BitSet(128);
268 	hostChar.or(alphanumChar);
269 	hostChar.set('-');
270 	hostChar.set('.');
271 
272 	reg_nameChar = new BitSet(128);
273 	reg_nameChar.or(unreservedChar);
274 	reg_nameChar.set('$');
275 	reg_nameChar.set(',');
276 	reg_nameChar.set(';');
277 	reg_nameChar.set(':');
278 	reg_nameChar.set('@');
279 	reg_nameChar.set('&');
280 	reg_nameChar.set('=');
281 	reg_nameChar.set('+');
282 
283 	resvdSchemeChar = new BitSet(128);
284 	resvdSchemeChar.set(':');
285 
286 	resvdUIChar = new BitSet(128);
287 	resvdUIChar.set('@');
288 
289 	resvdHostChar = new BitSet(128);
290 	resvdHostChar.set(':');
291 	resvdHostChar.set('/');
292 	resvdHostChar.set('?');
293 	resvdHostChar.set('#');
294 
295 	resvdPathChar = new BitSet(128);
296 	resvdPathChar.set('/');
297 	resvdPathChar.set(';');
298 	resvdPathChar.set('?');
299 	resvdPathChar.set('#');
300 
301 	resvdQueryChar = new BitSet(128);
302 	resvdQueryChar.set('#');
303 
304 	escpdPathChar = new BitSet(128);
305 	escpdPathChar.or(pcharChar);
306 	escpdPathChar.set('%');
307 	escpdPathChar.set('/');
308 	escpdPathChar.set(';');
309 
310 	escpdQueryChar = new BitSet(128);
311 	escpdQueryChar.or(uricChar);
312 	escpdQueryChar.clear('#');
313 
314 	escpdFragChar = new BitSet(128);
315 	escpdFragChar.or(uricChar);
316     }
317 
318 
319     /* our uri in pieces */
320 
321     protected static final int OPAQUE       = 0;
322     protected static final int SEMI_GENERIC = 1;
323     protected static final int GENERIC      = 2;
324 
325     protected int     type;
326     protected String  scheme;
327     protected String  opaque;
328     protected String  userinfo;
329     protected String  host;
330     protected int     port = -1;
331     protected String  path;
332     protected String  query;
333     protected String  fragment;
334 
335 
336     /* cache the java.net.URL */
337 
338     protected URL     url = null;
339 
340 
341     // Constructors
342 
343     /**
344      * Constructs a URI from the given string representation. The string
345      * must be an absolute URI.
346      *
347      * @param uri a String containing an absolute URI
348      * @exception ParseException if no scheme can be found or a specified
349      *                           port cannot be parsed as a number
350      */
351     public URI(String uri)  throws ParseException
352     {
353 	this((URI) null, uri);
354     }
355 
356 
357     /**
358      * Constructs a URI from the given string representation, relative to
359      * the given base URI.
360      *
361      * @param base    the base URI, relative to which <var>rel_uri</var>
362      *                is to be parsed
363      * @param rel_uri a String containing a relative or absolute URI
364      * @exception ParseException if <var>base</var> is null and
365      *                           <var>rel_uri</var> is not an absolute URI, or
366      *                           if <var>base</var> is not null and the scheme
367      *                           is not known to use the generic syntax, or
368      *                           if a given port cannot be parsed as a number
369      */
370     public URI(URI base, String rel_uri)  throws ParseException
371     {
372 	/* Parsing is done according to the following RE:
373 	 *
374 	 *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
375 	 *   12            3  4          5       6  7        8 9
376 	 *
377 	 * 2: scheme
378 	 * 4: authority
379 	 * 5: path
380 	 * 7: query
381 	 * 9: fragment
382 	 */
383 
384 	char[] uri = rel_uri.toCharArray();
385 	int pos = 0, idx, len = uri.length;
386 
387 
388 	// trim()
389 
390 	while (pos < len  &&  Character.isWhitespace(uri[pos]))    pos++;
391 	while (len > 0    &&  Character.isWhitespace(uri[len-1]))  len--;
392 
393 
394 	// strip the special "url" or "uri" scheme
395 
396 	if (pos < len-3  &&  uri[pos+3] == ':'  &&
397 	    (uri[pos+0] == 'u'  ||  uri[pos+0] == 'U')  &&
398 	    (uri[pos+1] == 'r'  ||  uri[pos+1] == 'R')  &&
399 	    (uri[pos+2] == 'i'  ||  uri[pos+2] == 'I'  ||
400 	     uri[pos+2] == 'l'  ||  uri[pos+2] == 'L'))
401 	    pos += 4;
402 
403 
404 	// get scheme: (([^:/?#]+):)?
405 
406 	idx = pos;
407 	while (idx < len  &&  uri[idx] != ':'  &&  uri[idx] != '/'  &&
408 	       uri[idx] != '?'  &&  uri[idx] != '#')
409 	    idx++;
410 	if (idx < len  &&  uri[idx] == ':')
411 	{
412 	    scheme = rel_uri.substring(pos, idx).trim().toLowerCase();
413 	    pos = idx + 1;
414 	}
415 
416 
417 	// check and resolve scheme
418 
419 	String final_scheme = scheme;
420 	if (scheme == null)
421 	{
422 	    if (base == null)
423 		throw new ParseException("No scheme found");
424 	    final_scheme = base.scheme;
425 	}
426 
427 
428 	// check for generic vs. opaque
429 
430 	type = usesGenericSyntax(final_scheme) ? GENERIC :
431 	       usesSemiGenericSyntax(final_scheme) ? SEMI_GENERIC : OPAQUE;
432 	if (type == OPAQUE)
433 	{
434 	    if (base != null  &&  scheme == null)
435 		throw new ParseException("Can't resolve relative URI for " +
436 					 "scheme " + final_scheme);
437 
438 	    opaque = escape(rel_uri.substring(pos), opaqueChar, true);
439 	    if (opaque.length() > 0  &&  opaque.charAt(0) == '/')
440 		opaque = "%2F" + opaque.substring(1);
441 	    return;
442 	}
443 
444 
445 	// get authority: (//([^/?#]*))?
446 
447 	if (pos+1 < len  &&  uri[pos] == '/'  &&  uri[pos+1] == '/')
448 	{
449 	    pos += 2;
450 	    idx = pos;
451 	    while (idx < len  &&  uri[idx] != '/'  &&  uri[idx] != '?'  &&
452 		   uri[idx] != '#')
453 		idx++;
454 
455 	    parse_authority(rel_uri.substring(pos, idx), final_scheme);
456 	    pos = idx;
457 	}
458 
459 
460 	// handle semi-generic and generic uri's
461 	
462 	if (type == SEMI_GENERIC)
463 	{
464 	    path = escape(rel_uri.substring(pos), uricChar, true);
465 	    if (path.length() > 0  &&  path.charAt(0) != '/')
466 		path = '/' + path;
467 	}
468 	else
469 	{
470 	    // get path: ([^?#]*)
471 
472 	    idx = pos;
473 	    while (idx < len  &&  uri[idx] != '?'  &&  uri[idx] != '#')
474 		idx++;
475 	    path = escape(rel_uri.substring(pos, idx), escpdPathChar, true);
476 	    pos = idx;
477 
478 
479 	    // get query: (\?([^#]*))?
480 
481 	    if (pos < len  &&  uri[pos] == '?')
482 	    {
483 		pos += 1;
484 		idx = pos;
485 		while (idx < len  &&  uri[idx] != '#')
486 		    idx++;
487 		this.query = escape(rel_uri.substring(pos, idx), escpdQueryChar, true);
488 		pos = idx;
489 	    }
490 
491 
492 	    // get fragment: (#(.*))?
493 
494 	    if (pos < len  &&  uri[pos] == '#')
495 		this.fragment = escape(rel_uri.substring(pos+1, len), escpdFragChar, true);
496 	}
497 
498 
499 	// now resolve the parts relative to the base
500 
501 	if (base != null)
502 	{
503 	    if (scheme != null  &&			// resolve scheme
504 		!(scheme.equals(base.scheme)  &&  ENABLE_BACKWARDS_COMPATIBILITY))
505 	      return;
506 	    scheme = base.scheme;
507 
508 	    if (host != null)				// resolve authority
509 		return;
510 	    userinfo = base.userinfo;
511 	    host     = base.host;
512 	    port     = base.port;
513 
514 	    if (type == SEMI_GENERIC)			// can't resolve relative paths
515 		return;
516 
517 	    if (path.length() == 0  &&  query == null)	// current doc
518 	    {
519 		path  = base.path;
520 		query = base.query;
521 		return;
522 	    }
523 
524 	    if (path.length() == 0  ||  path.charAt(0) != '/')	// relative path
525 	    {
526 		idx = (base.path != null) ? base.path.lastIndexOf('/') : -1;
527 		if (idx < 0)
528 		    path = '/' + path;
529 		else
530 		    path = base.path.substring(0, idx+1) + path;
531 
532 		path = canonicalizePath(path);
533 	    }
534 	}
535     }
536 
537     /**
538      * Remove all "/../" and "/./" from path, where possible. Leading "/../"'s
539      * are not removed.
540      *
541      * @param path the path to canonicalize
542      * @return the canonicalized path
543      */
544     public static String canonicalizePath(String path)
545     {
546 	int idx, len = path.length();
547 	if (!((idx = path.indexOf("/.")) != -1  &&
548 	      (idx == len-2  ||  path.charAt(idx+2) == '/'  ||
549 	       (path.charAt(idx+2) == '.'  &&
550 		(idx == len-3  ||  path.charAt(idx+3) == '/')) )))
551 	    return path;
552 
553 	char[] p = new char[path.length()];		// clean path
554 	path.getChars(0, p.length, p, 0);
555 
556 	int beg = 0;
557 	for (idx=1; idx<len; idx++)
558 	{
559 	    if (p[idx] == '.'  &&  p[idx-1] == '/')
560 	    {
561 		int end;
562 		if (idx == len-1)		// trailing "/."
563 		{
564 		    end  = idx;
565 		    idx += 1;
566 		}
567 		else if (p[idx+1] == '/')	// "/./"
568 		{
569 		    end  = idx - 1;
570 		    idx += 1;
571 		}
572 		else if (p[idx+1] == '.'  &&
573 			 (idx == len-2  ||  p[idx+2] == '/')) // "/../"
574 		{
575 		    if (idx < beg + 2)	// keep from backing up too much
576 		    {
577 			beg = idx + 2;
578 			continue;
579 		    }
580 
581 		    end  = idx - 2;
582 		    while (end > beg  &&  p[end] != '/')  end--;
583 		    if (p[end] != '/')  continue;
584 		    if (idx == len-2) end++;
585 		    idx += 2;
586 		}
587 		else
588 		    continue;
589 		System.arraycopy(p, idx, p, end, len-idx);
590 		len -= idx - end;
591 		idx = end;
592 	    }
593 	}
594 
595 	return new String(p, 0, len);
596     }
597 
598     /**
599      * Parse the authority specific part
600      */
601     private void parse_authority(String authority, String scheme)
602 	    throws ParseException
603     {
604 	/* The authority is further parsed according to:
605 	 *
606 	 *  ^(([^@]*)@?)(\[[^]]*\]|[^:]*)?(:(.*))?
607 	 *   12         3       4 5
608 	 *
609 	 * 2: userinfo
610 	 * 3: host
611 	 * 5: port
612 	 */
613 
614 	char[] uri = authority.toCharArray();
615 	int pos = 0, idx, len = uri.length;
616 
617 
618 	// get userinfo: (([^@]*)@?)
619 
620 	idx = pos;
621 	while (idx < len  &&  uri[idx] != '@')
622 	    idx++;
623 	if (idx < len  &&  uri[idx] == '@')
624 	{
625 	    this.userinfo = escape(authority.substring(pos, idx), userinfoChar, true);
626 	    pos = idx + 1;
627 	}
628 
629 
630 	// get host: (\[[^]]*\]|[^:]*)?
631 
632 	idx = pos;
633 	if (idx < len  &&  uri[idx] == '[')	// IPv6
634 	{
635 	    while (idx < len  &&  uri[idx] != ']')
636 		idx++;
637 	    if (idx == len)
638 		throw new ParseException("No closing ']' found for opening '['"+
639 					 " at position " + pos +
640 					 " in authority `" + authority + "'");
641 	    this.host = authority.substring(pos+1, idx);
642 	    idx++;
643 	}
644 	else
645 	{
646 	    while (idx < len  &&  uri[idx] != ':')
647 		idx++;
648 	    this.host = escape(authority.substring(pos, idx), uricChar, true);
649 	}
650 	pos = idx;
651 
652 
653 	// get port: (:(.*))?
654 
655 	if (pos < (len-1)  &&  uri[pos] == ':')
656 	{
657 	    int p;
658 	    try
659 	    {
660 		p = Integer.parseInt(
661 			    unescape(authority.substring(pos+1, len), null));
662 		if (p < 0)  throw new NumberFormatException();
663 	    }
664 	    catch (NumberFormatException e)
665 	    {
666 		throw new ParseException(authority.substring(pos+1, len) +
667 					 " is an invalid port number");
668 	    }
669 	    if (p == defaultPort(scheme))
670 		this.port = -1;
671 	    else
672 		this.port = p;
673 	}
674     }
675 
676 
677     /**
678      * Construct a URI from the given URL.
679      *
680      * @param url the URL
681      * @exception ParseException if <code>url.toExternalForm()</code> generates
682      *                           an invalid string representation
683      */
684     public URI(URL url)  throws ParseException
685     {
686 	this((URI) null, url.toExternalForm());
687     }
688 
689 
690     /**
691      * Constructs a URI from the given parts, using the default port for
692      * this scheme (if known). The parts must be in unescaped form.
693      *
694      * @param scheme the scheme (sometimes known as protocol)
695      * @param host   the host
696      * @param path   the path part
697      * @exception ParseException if <var>scheme</var> is null
698      */
699     public URI(String scheme, String host, String path)  throws ParseException
700     {
701 	this(scheme, null, host, -1, path, null, null);
702     }
703 
704 
705     /**
706      * Constructs a URI from the given parts. The parts must be in unescaped
707      * form.
708      *
709      * @param scheme the scheme (sometimes known as protocol)
710      * @param host   the host
711      * @param port   the port
712      * @param path   the path part
713      * @exception ParseException if <var>scheme</var> is null
714      */
715     public URI(String scheme, String host, int port, String path)
716 	    throws ParseException
717     {
718 	this(scheme, null, host, port, path, null, null);
719     }
720 
721 
722     /**
723      * Constructs a URI from the given parts. Any part except for the
724      * the scheme may be null. The parts must be in unescaped form.
725      *
726      * @param scheme   the scheme (sometimes known as protocol)
727      * @param userinfo the userinfo
728      * @param host     the host
729      * @param port     the port
730      * @param path     the path part
731      * @param query    the query string
732      * @param fragment the fragment identifier
733      * @exception ParseException if <var>scheme</var> is null
734      */
735     public URI(String scheme, String userinfo, String host, int port,
736 	       String path, String query, String fragment)
737 	    throws ParseException
738     {
739 	if (scheme == null)
740 	    throw new ParseException("missing scheme");
741 	this.scheme = escape(scheme.trim().toLowerCase(), schemeChar, true);
742 	if (userinfo != null)
743 	    this.userinfo = escape(userinfo.trim(), userinfoChar, true);
744 	if (host != null)
745 	{
746 	    host = host.trim();
747 	    this.host = isIPV6Addr(host) ? host : escape(host, hostChar, true);
748 	}
749 	if (port != defaultPort(scheme))
750 	    this.port     = port;
751 	if (path != null)
752 	    this.path     = escape(path.trim(), escpdPathChar, true);	// ???
753 	if (query != null)
754 	    this.query    = escape(query.trim(), escpdQueryChar, true);
755 	if (fragment != null)
756 	    this.fragment = escape(fragment.trim(), escpdFragChar, true);
757 
758 	type = usesGenericSyntax(scheme) ? GENERIC : SEMI_GENERIC;
759     }
760 
761     private static final boolean isIPV6Addr(String host)
762     {
763 	if (host.indexOf(':') < 0)
764 	    return false;
765 
766 	for (int idx=0; idx<host.length(); idx++)
767 	{
768 	    char ch = host.charAt(idx);
769 	    if ((ch < '0'  ||  ch > '9')  &&  ch != ':')
770 		return false;
771 	}
772 
773 	return true;
774     }
775 
776 
777     /**
778      * Constructs an opaque URI from the given parts.
779      *
780      * @param scheme the scheme (sometimes known as protocol)
781      * @param opaque the opaque part
782      * @exception ParseException if <var>scheme</var> is null
783      */
784     public URI(String scheme, String opaque)
785 	    throws ParseException
786     {
787 	if (scheme == null)
788 	    throw new ParseException("missing scheme");
789 	this.scheme = escape(scheme.trim().toLowerCase(), schemeChar, true);
790 	this.opaque = escape(opaque, opaqueChar, true);
791 
792 	type = OPAQUE;
793     }
794 
795 
796     // Class Methods
797 
798     /**
799      * @return true if the scheme should be parsed according to the
800      *         generic-URI syntax
801      */
802     public static boolean usesGenericSyntax(String scheme)
803     {
804 	return usesGenericSyntax.containsKey(scheme.trim().toLowerCase());
805     }
806 
807 
808     /**
809      * @return true if the scheme should be parsed according to a
810      *         semi-generic-URI syntax &lt;scheme&tgt;://&lt;hostport&gt;/&lt;opaque&gt;
811      */
812     public static boolean usesSemiGenericSyntax(String scheme)
813     {
814 	return usesSemiGenericSyntax.containsKey(scheme.trim().toLowerCase());
815     }
816 
817 
818     /**
819      * Return the default port used by a given protocol.
820      *
821      * @param protocol the protocol
822      * @return the port number, or 0 if unknown
823      */
824     public final static int defaultPort(String protocol)
825     {
826 	Integer port = (Integer) defaultPorts.get(protocol.trim().toLowerCase());
827 	return (port != null) ? port.intValue() : 0;
828     }
829 
830 
831     // Instance Methods
832 
833     /**
834      * @return the scheme (often also referred to as protocol)
835      */
836     public String getScheme()
837     {
838 	return scheme;
839     }
840 
841 
842     /**
843      * @return the opaque part, or null if this URI is generic
844      */
845     public String getOpaque()
846     {
847 	return opaque;
848     }
849 
850 
851     /**
852      * @return the host
853      */
854     public String getHost()
855     {
856 	return host;
857     }
858 
859 
860     /**
861      * @return the port, or -1 if it's the default port, or 0 if unknown
862      */
863     public int getPort()
864     {
865 	return port;
866     }
867 
868 
869     /**
870      * @return the user info
871      */
872     public String getUserinfo()
873     {
874 	return userinfo;
875     }
876 
877 
878     /**
879      * @return the path
880      */
881     public String getPath()
882     {
883 	return path;
884     }
885 
886 
887     /**
888      * @return the query string
889      */
890     public String getQueryString()
891     {
892 	return query;
893     }
894 
895 
896     /**
897      * @return the path and query
898      */
899     public String getPathAndQuery()
900     {
901 	if (query == null)
902 	    return path;
903 	if (path == null)
904 	    return "?" + query;
905 	return path + "?" + query;
906     }
907 
908 
909     /**
910      * @return the fragment
911      */
912     public String getFragment()
913     {
914 	return fragment;
915     }
916 
917 
918     /**
919      * Does the scheme specific part of this URI use the generic-URI syntax?
920      *
921      * <P>In general URI are split into two categories: opaque-URI and
922      * generic-URI. The generic-URI syntax is the syntax most are familiar
923      * with from URLs such as ftp- and http-URLs, which is roughly:
924      * <PRE>
925      * generic-URI = scheme ":" [ "//" server ] [ "/" ] [ path_segments ] [ "?" query ]
926      * </PRE>
927      * (see RFC-2396 for exact syntax). Only URLs using the generic-URI syntax
928      * can be used to create and resolve relative URIs.
929      *
930      * <P>Whether a given scheme is parsed according to the generic-URI
931      * syntax or wether it is treated as opaque is determined by an internal
932      * table of URI schemes.
933      *
934      * @see <A HREF="http://www.ics.uci.edu/pub/ietf/uri/rfc2396.txt">rfc-2396</A>
935      */
936     public boolean isGenericURI()
937     {
938 	return (type == GENERIC);
939     }
940 
941     /**
942      * Does the scheme specific part of this URI use the semi-generic-URI syntax?
943      *
944      * <P>Many schemes which don't follow the full generic syntax actually
945      * follow a reduced form where the path part is treated is opaque. This
946      * is used for example by ldap, smtp, pop, etc, and is roughly
947      * <PRE>
948      * generic-URI = scheme ":" [ "//" server ] [ "/" [ opaque_path ] ]
949      * </PRE>
950      * I.e. parsing is identical to the generic-syntax, except that the path
951      * part is not further parsed. URLs using the semi-generic-URI syntax can
952      * be used to create and resolve relative URIs with the restriction that
953      * all paths are treated as absolute.
954      *
955      * <P>Whether a given scheme is parsed according to the semi-generic-URI
956      * syntax is determined by an internal table of URI schemes.
957      *
958      * @see #isGenericURI()
959      */
960     public boolean isSemiGenericURI()
961     {
962 	return (type == SEMI_GENERIC);
963     }
964 
965 
966     /**
967      * Will try to create a java.net.URL object from this URI.
968      *
969      * @return the URL
970      * @exception MalformedURLException if no handler is available for the
971      *            scheme
972      */
973     public URL toURL()  throws MalformedURLException
974     {
975 	if (url != null)  return url;
976 
977 	if (opaque != null)
978 	    return (url = new URL(scheme + ":" + opaque));
979 
980 	String hostinfo;
981 	if (userinfo != null  &&  host != null)
982 	    hostinfo = userinfo + "@" + host;
983 	else if (userinfo != null)
984 	    hostinfo = userinfo + "@";
985 	else
986 	    hostinfo = host;
987 
988 	StringBuffer file = new StringBuffer(100);
989 	assemblePath(file, true, true, false);
990 
991 	url = new URL(scheme, hostinfo, port, file.toString());
992 	return url;
993     }
994 
995 
996     private final void assemblePath(StringBuffer buf, boolean printEmpty,
997 				    boolean incFragment, boolean unescape)
998     {
999 	if ((path == null  ||  path.length() == 0)  &&  printEmpty)
1000 	    buf.append('/');
1001 
1002 	if (path != null)
1003 	    buf.append(unescape ? unescapeNoPE(path, resvdPathChar) : path);
1004 
1005 	if (query != null)
1006 	{
1007 	    buf.append('?');
1008 	    buf.append(unescape ? unescapeNoPE(query, resvdQueryChar) : query);
1009 	}
1010 
1011 	if (fragment != null  &&  incFragment)
1012 	{
1013 	    buf.append('#');
1014 	    buf.append(unescape ? unescapeNoPE(fragment, null) : fragment);
1015 	}
1016     }
1017 
1018 
1019     private final String stringify(boolean unescape)
1020     {
1021 	StringBuffer uri = new StringBuffer(100);
1022 
1023 	if (scheme != null)
1024 	{
1025 	    uri.append(unescape ? unescapeNoPE(scheme, resvdSchemeChar) : scheme);
1026 	    uri.append(':');
1027 	}
1028 
1029 	if (opaque != null)		// it's an opaque-uri
1030 	{
1031 	    uri.append(unescape ? unescapeNoPE(opaque, null) : opaque);
1032 	    return uri.toString();
1033 	}
1034 
1035 	if (userinfo != null  ||  host != null  ||  port != -1)
1036 	    uri.append("//");
1037 
1038 	if (userinfo != null)
1039 	{
1040 	    uri.append(unescape ? unescapeNoPE(userinfo, resvdUIChar) : userinfo);
1041 	    uri.append('@');
1042 	}
1043 
1044 	if (host != null)
1045 	{
1046 	    if (host.indexOf(':') < 0)
1047 		uri.append(unescape ? unescapeNoPE(host, resvdHostChar) : host);
1048 	    else
1049 		uri.append('[').append(host).append(']');
1050 	}
1051 
1052 	if (port != -1)
1053 	{
1054 	    uri.append(':');
1055 	    uri.append(port);
1056 	}
1057 
1058 	assemblePath(uri, false, true, unescape);
1059 
1060 	return uri.toString();
1061     }
1062 
1063 
1064     /**
1065      * @return a string representation of this URI suitable for use in
1066      *         links, headers, etc.
1067      */
1068     public String toExternalForm()
1069     {
1070 	return stringify(false);
1071     }
1072 
1073 
1074     /**
1075      * Return the URI as string. This differs from toExternalForm() in that
1076      * all elements are unescaped before assembly. This is <em>not suitable</em>
1077      * for passing to other apps or in header fields and such, and is usually
1078      * not what you want.
1079      *
1080      * @return the URI as a string
1081      * @see #toExternalForm()
1082      */
1083     public String toString()
1084     {
1085 	return stringify(true);
1086     }
1087 
1088 
1089     /**
1090      * @return true if <var>other</var> is either a URI or URL and it
1091      *         matches the current URI
1092      */
1093     public boolean equals(Object other)
1094     {
1095 	if (other instanceof URI)
1096 	{
1097 	    URI o = (URI) other;
1098 	    return (scheme.equals(o.scheme)  &&
1099 		    (
1100 		     type == OPAQUE  &&  areEqual(opaque, o.opaque)  ||
1101 
1102 		     type == SEMI_GENERIC  &&
1103 		      areEqual(userinfo, o.userinfo)  &&
1104 		      areEqualIC(host, o.host)  &&
1105 		      port == o.port  &&
1106 		      areEqual(path, o.path)  ||
1107 
1108 		     type == GENERIC  &&
1109 		      areEqual(userinfo, o.userinfo)  &&
1110 		      areEqualIC(host, o.host)  &&
1111 		      port == o.port  &&
1112 		      pathsEqual(path, o.path)  &&
1113 		      areEqual(query, o.query)  &&
1114 		      areEqual(fragment, o.fragment)
1115 		    ));
1116 	}
1117 
1118 	if (other instanceof URL)
1119 	{
1120 	    URL o = (URL) other;
1121 	    String h, f;
1122 
1123 	    if (userinfo != null)
1124 		h = userinfo + "@" + host;
1125 	    else
1126 		h = host;
1127 
1128 	    f = getPathAndQuery();
1129 
1130 	    return (scheme.equalsIgnoreCase(o.getProtocol())  &&
1131 		    (type == OPAQUE  &&  opaque.equals(o.getFile())  ||
1132 
1133 		     type == SEMI_GENERIC  &&
1134 		       areEqualIC(h, o.getHost())  &&
1135 		       (port == o.getPort()  ||
1136 			o.getPort() == defaultPort(scheme))  &&
1137 		       areEqual(f, o.getFile())  ||
1138 
1139 		     type == GENERIC  &&
1140 		       areEqualIC(h, o.getHost())  &&
1141 		       (port == o.getPort()  ||
1142 			o.getPort() == defaultPort(scheme))  &&
1143 		       pathsEqual(f, o.getFile())  &&
1144 		       areEqual(fragment, o.getRef())
1145 		    )
1146 		   );
1147 	}
1148 
1149 	return false;
1150     }
1151 
1152     private static final boolean areEqual(String s1, String s2)
1153     {
1154 	return (s1 == null  &&  s2 == null  ||
1155 		s1 != null  &&  s2 != null  &&
1156 		  (s1.equals(s2)  ||
1157 		   unescapeNoPE(s1, null).equals(unescapeNoPE(s2, null)))
1158 	       );
1159     }
1160 
1161     private static final boolean areEqualIC(String s1, String s2)
1162     {
1163 	return (s1 == null  &&  s2 == null  ||
1164 		s1 != null  &&  s2 != null  &&
1165 		  (s1.equalsIgnoreCase(s2)  ||
1166 		   unescapeNoPE(s1, null).equalsIgnoreCase(unescapeNoPE(s2, null)))
1167 	       );
1168     }
1169 
1170     private static final boolean pathsEqual(String p1, String p2)
1171     {
1172 	if (p1 == null  &&  p2 == null)
1173 	    return true;
1174 	if (p1 == null  ||  p2 == null)
1175 	    return false;
1176 	if (p1.equals(p2))
1177 	    return true;
1178 
1179 	// ok, so it wasn't that simple. Let's split into parts and compare
1180 	// unescaped.
1181 	int pos1 = 0, end1 = p1.length(), pos2 = 0, end2 = p2.length();
1182 	while (pos1 < end1  &&  pos2 < end2)
1183 	{
1184 	    int start1 = pos1, start2 = pos2;
1185 
1186 	    char ch;
1187 	    while (pos1 < end1  &&  (ch = p1.charAt(pos1)) != '/'  &&  ch != ';')
1188 		pos1++;
1189 	    while (pos2 < end2  &&  (ch = p2.charAt(pos2)) != '/'  &&  ch != ';')
1190 		pos2++;
1191 
1192 	    if (pos1 == end1  &&  pos2 < end2  ||
1193 		pos2 == end2  &&  pos1 < end1  ||
1194 		pos1 < end1  &&  pos2 < end2  &&  p1.charAt(pos1) != p2.charAt(pos2))
1195 		return false;
1196 
1197 	    if ((!p1.regionMatches(start1, p2, start2, pos1-start1)  ||  (pos1-start1) != (pos2-start2))  &&
1198 		!unescapeNoPE(p1.substring(start1, pos1), null).equals(unescapeNoPE(p2.substring(start2, pos2), null)))
1199 		return false;
1200 
1201 	    pos1++;
1202 	    pos2++;
1203 	}
1204 
1205 	return (pos1 == end1  &&  pos2 == end2);
1206     }
1207 
1208     private int hashCode = -1;
1209 
1210     /**
1211      * The hash code is calculated over scheme, host, path, and query.
1212      *
1213      * @return the hash code
1214      */
1215     public int hashCode()
1216     {
1217 	if (hashCode == -1)
1218 	    hashCode = (scheme != null ? unescapeNoPE(scheme, null).hashCode() : 0) + 
1219 		       (type == OPAQUE ?
1220 			  (opaque != null ? unescapeNoPE(opaque, null).hashCode() : 0) * 7
1221 			: (host != null ? unescapeNoPE(host, null).toLowerCase().hashCode() : 0) * 7 +
1222 			  (path != null ? unescapeNoPE(path, null).hashCode() : 0) * 13 +
1223 			  (query != null ? unescapeNoPE(query, null).hashCode() : 0) * 17);
1224 
1225 	return hashCode;
1226     }
1227 
1228 
1229     /**
1230      * Escape any character not in the given character class. Characters
1231      * greater 255 are always escaped according to ??? .
1232      *
1233      * @param elem         the string to escape
1234      * @param allowed_char the BitSet of all allowed characters
1235      * @param utf8         if true, will first UTF-8 encode unallowed characters
1236      * @return the string with all characters not in allowed_char
1237      *         escaped
1238      */
1239     public static String escape(String elem, BitSet allowed_char, boolean utf8)
1240     {
1241 	return new String(escape(elem.toCharArray(), allowed_char, utf8));
1242     }
1243 
1244     /**
1245      * Escape any character not in the given character class. Characters
1246      * greater 255 are always escaped according to ??? .
1247      *
1248      * @param elem         the array of characters to escape
1249      * @param allowed_char the BitSet of all allowed characters
1250      * @param utf8         if true, will first UTF-8 encode unallowed characters
1251      * @return the elem array with all characters not in allowed_char
1252      *         escaped
1253      */
1254     public static char[] escape(char[] elem, BitSet allowed_char, boolean utf8)
1255     {
1256 	int cnt=0;
1257 	for (int idx=0; idx<elem.length; idx++)
1258 	{
1259 	    if (!allowed_char.get(elem[idx]))
1260 	    {
1261 		cnt += 2;
1262 		if (utf8)
1263 		{
1264 		    if (elem[idx] >= 0x0080)
1265 			cnt += 3;
1266 		    if (elem[idx] >= 0x00800)
1267 			cnt += 3;
1268 		    if ((elem[idx] & 0xFC00) == 0xD800  &&  idx+1 < elem.length  &&
1269 			(elem[idx+1] & 0xFC00) == 0xDC00)
1270 		      cnt -= 6;
1271 		}
1272 	    }
1273 	}
1274 
1275 	if (cnt == 0)  return elem;
1276 
1277 	char[] tmp = new char[elem.length + cnt];
1278 	for (int idx=0, pos=0; idx<elem.length; idx++)
1279 	{
1280 	    char c = elem[idx];
1281 	    if (allowed_char.get(c))
1282 		tmp[pos++] = c;
1283 	    else if (utf8)
1284 	    {
1285 		/* We're UTF-8 encoding the chars first, as recommended in
1286 		 * the HTML 4.0 specification:
1287 		 * http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
1288 		 * Note that this doesn't change things for ASCII chars
1289 		 */
1290 		if (c <= 0x007F)
1291 		{
1292 		    pos = enc(tmp, pos, c);
1293 		}
1294 		else if (c <= 0x07FF)
1295 		{
1296 		    pos = enc(tmp, pos, 0xC0 | ((c >>  6) & 0x1F));
1297 		    pos = enc(tmp, pos, 0x80 | ((c >>  0) & 0x3F));
1298 		}
1299 		else if (!((c & 0xFC00) == 0xD800  &&  idx+1 < elem.length  &&
1300 			     (elem[idx+1] & 0xFC00) == 0xDC00))
1301 		{
1302 		    pos = enc(tmp, pos, 0xE0 | ((c >> 12) & 0x0F));
1303 		    pos = enc(tmp, pos, 0x80 | ((c >>  6) & 0x3F));
1304 		    pos = enc(tmp, pos, 0x80 | ((c >>  0) & 0x3F));
1305 		}
1306 		else
1307 		{
1308 		    int ch = ((c & 0x03FF) << 10) | (elem[++idx] & 0x03FF);
1309 		    ch += 0x10000;
1310 		    pos = enc(tmp, pos, 0xF0 | ((ch >> 18) & 0x07));
1311 		    pos = enc(tmp, pos, 0x80 | ((ch >> 12) & 0x3F));
1312 		    pos = enc(tmp, pos, 0x80 | ((ch >>  6) & 0x3F));
1313 		    pos = enc(tmp, pos, 0x80 | ((ch >>  0) & 0x3F));
1314 		}
1315 	    }
1316 	    else
1317 		pos = enc(tmp, pos, c);
1318 	}
1319 
1320 	return tmp;
1321     }
1322 
1323     private static final char[] hex =
1324 	    {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};
1325 
1326     private static final int enc(char[] out, int pos, int c)
1327     {
1328 	out[pos++] = '%';
1329 	out[pos++] = hex[(c >> 4) & 0xf];
1330 	out[pos++] = hex[c & 0xf];
1331 	return pos;
1332     }
1333 
1334     /**
1335      * Unescape escaped characters (i.e. %xx) except reserved ones.
1336      *
1337      * @param str      the string to unescape
1338      * @param reserved the characters which may not be unescaped, or null
1339      * @return the unescaped string
1340      * @exception ParseException if the two digits following a `%' are
1341      *            not a valid hex number
1342      */
1343     public static final String unescape(String str, BitSet reserved)
1344 	    throws ParseException
1345     {
1346 	if (str == null  ||  str.indexOf('%') == -1)
1347 	    return str;  				// an optimization
1348 
1349 	char[] buf = str.toCharArray();
1350 	char[] res = new char[buf.length];
1351 
1352 	char[] utf = new char[4];
1353 	int utf_idx = 0, utf_len = -1;
1354 	int didx = 0;
1355 	for (int sidx=0; sidx<buf.length; sidx++)
1356 	{
1357 	    if (buf[sidx] == '%')
1358 	    {
1359 		int ch;
1360                 try
1361                 {
1362 		    if (sidx + 3 > buf.length)
1363 			throw new NumberFormatException();
1364 		    ch = Integer.parseInt(str.substring(sidx+1,sidx+3), 16);
1365 		    if (ch < 0)
1366 			throw new NumberFormatException();
1367 		    sidx += 2;
1368                 }
1369                 catch (NumberFormatException e)
1370                 {
1371 		    /* Hmm, people not reading specs again, so we just
1372 		     * ignore it...
1373                     throw new ParseException(str.substring(sidx,sidx+3) +
1374                                             " is an invalid code");
1375 		    */
1376 		    ch = buf[sidx];
1377                 }
1378 
1379 		// check if we're working on a utf-char
1380 		if (utf_len > 0)
1381 		{
1382 		    if ((ch & 0xC0) != 0x80)	// oops, we misinterpreted
1383 		    {
1384 			didx = copyBuf(utf, utf_idx, ch, res, didx, reserved, false);
1385 			utf_len = -1;
1386 		    }
1387 		    else if (utf_idx == utf_len - 1)	// end-of-char
1388 		    {
1389 			if ((utf[0] & 0xE0) == 0xC0)
1390 			    ch = (utf[0] & 0x1F) <<  6 |
1391 				 (ch & 0x3F);
1392 			else if ((utf[0] & 0xF0) == 0xE0)
1393 			    ch = (utf[0] & 0x0F) << 12 |
1394 				 (utf[1] & 0x3F) <<  6 |
1395 				 (ch & 0x3F);
1396 			else
1397 			    ch = (utf[0] & 0x07) << 18 |
1398 				 (utf[1] & 0x3F) << 12 |
1399 				 (utf[2] & 0x3F) <<  6 |
1400 				 (ch & 0x3F);
1401 			if (reserved != null  &&  reserved.get(ch))
1402 			    didx = copyBuf(utf, utf_idx, ch, res, didx, null, true);
1403 			else if (utf_len < 4)
1404 			    res[didx++] = (char) ch;
1405 			else
1406 			{
1407 			    ch -= 0x10000;
1408 			    res[didx++] = (char) ((ch >> 10)    | 0xD800);
1409 			    res[didx++] = (char) ((ch & 0x03FF) | 0xDC00);
1410 			}
1411 			utf_len = -1;
1412 		    }
1413 		    else				// continue
1414 			utf[utf_idx++] = (char) ch;
1415 		}
1416 		// check if this is the start of a utf-char
1417 		else if ((ch & 0xE0) == 0xC0  ||  (ch & 0xF0) == 0xE0  ||
1418 			 (ch & 0xF8) == 0xF0)
1419 		{
1420 		    if ((ch & 0xE0) == 0xC0)
1421 			utf_len = 2;
1422 		    else if ((ch & 0xF0) == 0xE0)
1423 			utf_len = 3;
1424 		    else
1425 			utf_len = 4;
1426 		    utf[0] = (char) ch;
1427 		    utf_idx = 1;
1428 		}
1429 		// leave reserved alone
1430 		else if (reserved != null  &&  reserved.get(ch))
1431 		{
1432 		    res[didx++] = buf[sidx];
1433 		    sidx -= 2;
1434 		}
1435 		// just use the decoded version
1436 		else
1437 		    res[didx++] = (char) ch;
1438 	    }
1439 	    else if (utf_len > 0)	// oops, we misinterpreted
1440 	    {
1441 		didx = copyBuf(utf, utf_idx, buf[sidx], res, didx, reserved, false);
1442 		utf_len = -1;
1443 	    }
1444 	    else
1445 		res[didx++] = buf[sidx];
1446 	}
1447 	if (utf_len > 0)	// oops, we misinterpreted
1448 	    didx = copyBuf(utf, utf_idx, -1, res, didx, reserved, false);
1449 
1450 	return new String(res, 0, didx);
1451     }
1452 
1453     private static final int copyBuf(char[] utf, int utf_idx, int ch,
1454 				     char[] res, int didx, BitSet reserved,
1455 				     boolean escapeAll)
1456     {
1457 	if (ch >= 0)
1458 	    utf[utf_idx++] = (char) ch;
1459 
1460 	for (int idx=0; idx<utf_idx; idx++)
1461 	{
1462 	    if (reserved != null  &&  reserved.get(utf[idx])  ||  escapeAll)
1463 		didx = enc(res, didx, utf[idx]);
1464 	    else
1465 		res[didx++] = utf[idx];
1466 	}
1467 
1468 	return didx;
1469     }
1470 
1471     /**
1472      * Unescape escaped characters (i.e. %xx). If a ParseException would
1473      * be thrown then just return the original string.
1474      *
1475      * @param str      the string to unescape
1476      * @param reserved the characters which may not be unescaped, or null
1477      * @return the unescaped string, or the original string if unescaping
1478      *         would throw a ParseException
1479      * @see #unescape(java.lang.String, java.util.BitSet)
1480      */
1481     private static final String unescapeNoPE(String str, BitSet reserved)
1482     {
1483 	try
1484 	    { return unescape(str, reserved); }
1485 	catch (ParseException pe)
1486 	    { return str; }
1487     }
1488 
1489 
1490     /**
1491      * Run test set.
1492      *
1493      * @exception Exception if any test fails
1494      */
1495     public static void main(String args[])  throws Exception
1496     {
1497 	System.err.println();
1498 	System.err.println("*** URI Tests ...");
1499 
1500 
1501 	/* Relative URI test set, taken from Section C of rfc-2396 and
1502 	 * Roy's test1. All Roy's URI parser tests can be found at
1503 	 * http://www.ics.uci.edu/~fielding/url/
1504 	 * The tests have been augmented by a few for the IPv6 syntax
1505 	 */
1506 
1507 	URI base = new URI("http://a/b/c/d;p?q");
1508 
1509 	// normal examples
1510 	testParser(base, "g:h",        "g:h");
1511 	testParser(base, "g",          "http://a/b/c/g");
1512 	testParser(base, "./g",        "http://a/b/c/g");
1513 	testParser(base, "g/",         "http://a/b/c/g/");
1514 	testParser(base, "/g",         "http://a/g");
1515 	testParser(base, "//g",        "http://g");
1516 	testParser(base, "//[23:54]",  "http://[23:54]");
1517 	testParser(base, "?y",         "http://a/b/c/?y");
1518 	testParser(base, "g?y",        "http://a/b/c/g?y");
1519 	testParser(base, "#s",         "http://a/b/c/d;p?q#s");
1520 	testParser(base, "g#s",        "http://a/b/c/g#s");
1521 	testParser(base, "g?y#s",      "http://a/b/c/g?y#s");
1522 	testParser(base, ";x",         "http://a/b/c/;x");
1523 	testParser(base, "g;x",        "http://a/b/c/g;x");
1524 	testParser(base, "g;x?y#s",    "http://a/b/c/g;x?y#s");
1525 	testParser(base, ".",          "http://a/b/c/");
1526 	testParser(base, "./",         "http://a/b/c/");
1527 	testParser(base, "..",         "http://a/b/");
1528 	testParser(base, "../",        "http://a/b/");
1529 	testParser(base, "../g",       "http://a/b/g");
1530 	testParser(base, "../..",      "http://a/");
1531 	testParser(base, "../../",     "http://a/");
1532 	testParser(base, "../../g",    "http://a/g");
1533 
1534 	// abnormal examples
1535 	testParser(base, "",              "http://a/b/c/d;p?q");
1536 	testParser(base, "/./g",          "http://a/./g");
1537 	testParser(base, "/../g",         "http://a/../g");
1538 	testParser(base, "../../../g",    "http://a/../g");
1539 	testParser(base, "../../../../g", "http://a/../../g");
1540 	testParser(base, "g.",            "http://a/b/c/g.");
1541 	testParser(base, ".g",            "http://a/b/c/.g");
1542 	testParser(base, "g..",           "http://a/b/c/g..");
1543 	testParser(base, "..g",           "http://a/b/c/..g");
1544 	testParser(base, "./../g",        "http://a/b/g");
1545 	testParser(base, "./g/.",         "http://a/b/c/g/");
1546 	testParser(base, "g/./h",         "http://a/b/c/g/h");
1547 	testParser(base, "g/../h",        "http://a/b/c/h");
1548 	testParser(base, "g;x=1/./y",     "http://a/b/c/g;x=1/y");
1549 	testParser(base, "g;x=1/../y",    "http://a/b/c/y");
1550 	testParser(base, "g?y/./x",       "http://a/b/c/g?y/./x");
1551 	testParser(base, "g?y/../x",      "http://a/b/c/g?y/../x");
1552 	testParser(base, "g#s/./x",       "http://a/b/c/g#s/./x");
1553 	testParser(base, "g#s/../x",      "http://a/b/c/g#s/../x");
1554 	if (ENABLE_BACKWARDS_COMPATIBILITY)
1555 	    testParser(base, "http:g",        "http://a/b/c/g");
1556 	else
1557 	    testParser(base, "http:g",        "http:g");
1558 	if (ENABLE_BACKWARDS_COMPATIBILITY)
1559 	    testParser(base, "http:",         "http://a/b/c/d;p?q");
1560 	else
1561 	    testParser(base, "http:",         "http:");
1562 	testParser(base, "./g:h",         "http://a/b/c/g:h");
1563 
1564 
1565 	/* Roy's test2
1566 	 */
1567 	base = new URI("http://a/b/c/d;p?q=1/2");
1568 
1569 	testParser(base, "g",        "http://a/b/c/g");
1570 	testParser(base, "./g",      "http://a/b/c/g");
1571 	testParser(base, "g/",       "http://a/b/c/g/");
1572 	testParser(base, "/g",       "http://a/g");
1573 	testParser(base, "//g",      "http://g");
1574 	testParser(base, "//[23:54]","http://[23:54]");
1575 	testParser(base, "?y",       "http://a/b/c/?y");
1576 	testParser(base, "g?y",      "http://a/b/c/g?y");
1577 	testParser(base, "g?y/./x",  "http://a/b/c/g?y/./x");
1578 	testParser(base, "g?y/../x", "http://a/b/c/g?y/../x");
1579 	testParser(base, "g#s",      "http://a/b/c/g#s");
1580 	testParser(base, "g#s/./x",  "http://a/b/c/g#s/./x");
1581 	testParser(base, "g#s/../x", "http://a/b/c/g#s/../x");
1582 	testParser(base, "./",       "http://a/b/c/");
1583 	testParser(base, "../",      "http://a/b/");
1584 	testParser(base, "../g",     "http://a/b/g");
1585 	testParser(base, "../../",   "http://a/");
1586 	testParser(base, "../../g",  "http://a/g");
1587 
1588 
1589 	/* Roy's test3
1590 	 */
1591 	base = new URI("http://a/b/c/d;p=1/2?q");
1592 
1593 	testParser(base, "g",          "http://a/b/c/d;p=1/g");
1594 	testParser(base, "./g",        "http://a/b/c/d;p=1/g");
1595 	testParser(base, "g/",         "http://a/b/c/d;p=1/g/");
1596 	testParser(base, "g?y",        "http://a/b/c/d;p=1/g?y");
1597 	testParser(base, ";x",         "http://a/b/c/d;p=1/;x");
1598 	testParser(base, "g;x",        "http://a/b/c/d;p=1/g;x");
1599 	testParser(base, "g;x=1/./y",  "http://a/b/c/d;p=1/g;x=1/y");
1600 	testParser(base, "g;x=1/../y", "http://a/b/c/d;p=1/y");
1601 	testParser(base, "./",         "http://a/b/c/d;p=1/");
1602 	testParser(base, "../",        "http://a/b/c/");
1603 	testParser(base, "../g",       "http://a/b/c/g");
1604 	testParser(base, "../../",     "http://a/b/");
1605 	testParser(base, "../../g",    "http://a/b/g");
1606 
1607 
1608 	/* Roy's test4
1609 	 */
1610 	base = new URI("fred:///s//a/b/c");
1611 
1612 	testParser(base, "g:h",           "g:h");
1613 	/* we have to skip these, as usesGeneraicSyntax("fred") returns false
1614 	 * and we therefore don't parse relative URI's here. But test5 is
1615 	 * the same except that the http scheme is used.
1616 	testParser(base, "g",             "fred:///s//a/b/g");
1617 	testParser(base, "./g",           "fred:///s//a/b/g");
1618 	testParser(base, "g/",            "fred:///s//a/b/g/");
1619 	testParser(base, "/g",            "fred:///g");
1620 	testParser(base, "//g",           "fred://g");
1621 	testParser(base, "//g/x",         "fred://g/x");
1622 	testParser(base, "///g",          "fred:///g");
1623 	testParser(base, "./",            "fred:///s//a/b/");
1624 	testParser(base, "../",           "fred:///s//a/");
1625 	testParser(base, "../g",          "fred:///s//a/g");
1626 	testParser(base, "../../",        "fred:///s//");
1627 	testParser(base, "../../g",       "fred:///s//g");
1628 	testParser(base, "../../../g",    "fred:///s/g");
1629 	testParser(base, "../../../../g", "fred:///g");
1630 	 */
1631 	testPE(base, "g");
1632 
1633 
1634 	/* Roy's test5
1635 	 */
1636 	base = new URI("http:///s//a/b/c");
1637 
1638 	testParser(base, "g:h",           "g:h");
1639 	testParser(base, "g",             "http:///s//a/b/g");
1640 	testParser(base, "./g",           "http:///s//a/b/g");
1641 	testParser(base, "g/",            "http:///s//a/b/g/");
1642 	testParser(base, "/g",            "http:///g");
1643 	testParser(base, "//g",           "http://g");
1644 	testParser(base, "//[23:54]",     "http://[23:54]");
1645 	testParser(base, "//g/x",         "http://g/x");
1646 	testParser(base, "///g",          "http:///g");
1647 	testParser(base, "./",            "http:///s//a/b/");
1648 	testParser(base, "../",           "http:///s//a/");
1649 	testParser(base, "../g",          "http:///s//a/g");
1650 	testParser(base, "../../",        "http:///s//");
1651 	testParser(base, "../../g",       "http:///s//g");
1652 	testParser(base, "../../../g",    "http:///s/g");
1653 	testParser(base, "../../../../g", "http:///g");
1654 
1655 
1656 	/* Some additional parser tests
1657 	 */
1658 	base = new URI("http://s");
1659 
1660 	testParser(base, "ftp:h",         "ftp:h");
1661 	testParser(base, "ftp://h",       "ftp://h");
1662 	testParser(base, "//g",           "http://g");
1663 	testParser(base, "//g?h",         "http://g?h");
1664 	testParser(base, "g",             "http://s/g");
1665 	testParser(base, "./g",           "http://s/g");
1666 	testParser(base, "?g",            "http://s/?g");
1667 	testParser(base, "#g",            "http://s#g");
1668 
1669 	base = new URI("http:");
1670 
1671 	testParser(base, "ftp:h",         "ftp:h");
1672 	testParser(base, "ftp://h",       "ftp://h");
1673 	testParser(base, "//g",           "http://g");
1674 	testParser(base, "g",             "http:/g");
1675 	testParser(base, "?g",            "http:/?g");
1676 	testParser(base, "#g",            "http:#g");
1677 
1678 	base = new URI("http://s/t");
1679 
1680 	testParser(base, "ftp:/h",        "ftp:/h");
1681 	if (ENABLE_BACKWARDS_COMPATIBILITY)
1682 	    testParser(base, "http:/h",       "http://s/h");
1683 	else
1684 	    testParser(base, "http:/h",       "http:/h");
1685 
1686 	base = new URI("http://s/g?h/j");
1687 	testParser(base, "k",             "http://s/k");
1688 	testParser(base, "k?l",           "http://s/k?l");
1689 
1690 
1691 	/* Parser tests for semi-generic syntax
1692 	 */
1693 	base = new URI("ldap:");
1694 
1695 	testParser(base, "ldap:",         "ldap:");
1696 	testParser(base, "ldap://a",      "ldap://a");
1697 	testParser(base, "ldap://a/b",    "ldap://a/b");
1698 	testParser(base, "ldap:/b",       "ldap:/b");
1699 
1700 	testParser(base, "ftp:h",         "ftp:h");
1701 	testParser(base, "ftp://h",       "ftp://h");
1702 	testParser(base, "//g",           "ldap://g");
1703 	testParser(base, "//g?h",         "ldap://g/?h");
1704 	testParser(base, "g",             "ldap:/g");
1705 	testParser(base, "./g",           "ldap:/./g");
1706 	testParser(base, "?g",            "ldap:/?g");
1707 	testParser(base, "#g",            "ldap:/%23g");
1708 
1709 	base = new URI("ldap://s");
1710 
1711 	if (ENABLE_BACKWARDS_COMPATIBILITY)
1712 	    testParser(base, "ldap:",         "ldap://s");
1713 	else
1714 	    testParser(base, "ldap:",         "ldap:");
1715 	testParser(base, "ldap://a",      "ldap://a");
1716 	testParser(base, "ldap://a/b",    "ldap://a/b");
1717 	if (ENABLE_BACKWARDS_COMPATIBILITY)
1718 	    testParser(base, "ldap:/b",       "ldap://s/b");
1719 	else
1720 	    testParser(base, "ldap:/b",       "ldap:/b");
1721 
1722 	testParser(base, "ftp:h",         "ftp:h");
1723 	testParser(base, "ftp://h",       "ftp://h");
1724 	testParser(base, "//g",           "ldap://g");
1725 	testParser(base, "//g?h",         "ldap://g/?h");
1726 	testParser(base, "g",             "ldap://s/g");
1727 	testParser(base, "./g",           "ldap://s/./g");
1728 	testParser(base, "?g",            "ldap://s/?g");
1729 	testParser(base, "#g",            "ldap://s/%23g");
1730 
1731 	base = new URI("ldap://s/t");
1732 
1733 	testParser(base, "ftp:/h",        "ftp:/h");
1734 	if (ENABLE_BACKWARDS_COMPATIBILITY)
1735 	    testParser(base, "ldap:/h",       "ldap://s/h");
1736 	else
1737 	    testParser(base, "ldap:/h",       "ldap:/h");
1738 
1739 	if (ENABLE_BACKWARDS_COMPATIBILITY)
1740 	    testParser(base, "ldap:",         "ldap://s");
1741 	else
1742 	    testParser(base, "ldap:",         "ldap:");
1743 	testParser(base, "ldap://a",      "ldap://a");
1744 	testParser(base, "ldap://a/b",    "ldap://a/b");
1745 
1746 	testParser(base, "ftp:h",         "ftp:h");
1747 	testParser(base, "ftp://h",       "ftp://h");
1748 	testParser(base, "//g",           "ldap://g");
1749 	testParser(base, "//g?h",         "ldap://g/?h");
1750 	testParser(base, "g",             "ldap://s/g");
1751 	testParser(base, "./g",           "ldap://s/./g");
1752 	testParser(base, "?g",            "ldap://s/?g");
1753 	testParser(base, "#g",            "ldap://s/%23g");
1754 
1755 
1756 	/* equality tests */
1757 
1758 	// protocol
1759 	testNotEqual("http://a/", "nntp://a/");
1760 	testNotEqual("http://a/", "https://a/");
1761 	testNotEqual("http://a/", "shttp://a/");
1762 	testEqual("http://a/", "Http://a/");
1763 	testEqual("http://a/", "hTTP://a/");
1764 	testEqual("url:http://a/", "hTTP://a/");
1765 	testEqual("urI:http://a/", "hTTP://a/");
1766 
1767 	// host
1768 	testEqual("http://a/", "Http://A/");
1769 	testEqual("http://a.b.c/", "Http://A.b.C/");
1770 	testEqual("http:///", "Http:///");
1771 	testEqual("http://[]/", "Http:///");
1772 	testNotEqual("http:///", "Http://a/");
1773 	testNotEqual("http://[]/", "Http://a/");
1774 	testPE(null, "ftp://[23::43:1/");
1775 	testPE(null, "ftp://[/");
1776 
1777 	// port
1778 	testEqual("http://a.b.c/", "Http://A.b.C:80/");
1779 	testEqual("http://a.b.c:/", "Http://A.b.C:80/");
1780 	testEqual("http://[23::45:::5:]/", "Http://[23::45:::5:]:80/");
1781 	testEqual("http://[23::45:::5:]:/", "Http://[23::45:::5:]:80/");
1782 	testEqual("nntp://a", "nntp://a:119");
1783 	testEqual("nntp://a:", "nntp://a:119");
1784 	testEqual("nntp://a/", "nntp://a:119/");
1785 	testNotEqual("nntp://a", "nntp://a:118");
1786 	testNotEqual("nntp://a", "nntp://a:0");
1787 	testNotEqual("nntp://a:", "nntp://a:0");
1788 	testEqual("telnet://:23/", "telnet:///");
1789 	testPE(null, "ftp://:a/");
1790 	testPE(null, "ftp://:-1/");
1791 	testPE(null, "ftp://::1/");
1792 
1793 	// userinfo
1794 	testNotEqual("ftp://me@a", "ftp://a");
1795 	testNotEqual("ftp://me@a", "ftp://Me@a");
1796 	testEqual("ftp://Me@a", "ftp://Me@a");
1797 	testEqual("ftp://Me:My@a:21", "ftp://Me:My@a");
1798 	testEqual("ftp://Me:My@a:", "ftp://Me:My@a");
1799 	testNotEqual("ftp://Me:My@a:21", "ftp://Me:my@a");
1800 	testNotEqual("ftp://Me:My@a:", "ftp://Me:my@a");
1801 
1802 	// path
1803 	testEqual("ftp://a/b%2b/", "ftp://a/b+/");
1804 	testEqual("ftp://a/b%2b/", "ftp://a/b+/");
1805 	testEqual("ftp://a/b%5E/", "ftp://a/b^/");
1806 	testEqual("ftp://a/b%4C/", "ftp://a/bL/");
1807 	testNotEqual("ftp://a/b/", "ftp://a//b/");
1808 	testNotEqual("ftp://a/b/", "ftp://a/b//");
1809 	testNotEqual("ftp://a/b%4C/", "ftp://a/bl/");
1810 	testNotEqual("ftp://a/b%3f/", "ftp://a/b?/");
1811 	testNotEqual("ftp://a/b%2f/", "ftp://a/b//");
1812 	testNotEqual("ftp://a/b%2fc/", "ftp://a/b/c/");
1813 	testNotEqual("ftp://a/bc/", "ftp://a/b//");
1814 	testNotEqual("ftp://a/bc/", "ftp://a/b/");
1815 	testNotEqual("ftp://a/bc//", "ftp://a/b/");
1816 	testNotEqual("ftp://a/b/", "ftp://a/bc//");
1817 	testNotEqual("ftp://a/b/", "ftp://a/bc/");
1818 	testNotEqual("ftp://a/b//", "ftp://a/bc/");
1819 
1820 	testNotEqual("ftp://a/b;fc/", "ftp://a/bf;c/");
1821 	testNotEqual("ftp://a/b%3bfc/", "ftp://a/b;fc/");
1822 	testEqual("ftp://a/b;/;/", "ftp://a/b;/;/");
1823 	testNotEqual("ftp://a/b;/", "ftp://a/b//");
1824 	testNotEqual("ftp://a/b//", "ftp://a/b;/");
1825 	testNotEqual("ftp://a/b/;", "ftp://a/b//");
1826 	testNotEqual("ftp://a/b//", "ftp://a/b/;");
1827 	testNotEqual("ftp://a/b;/", "ftp://a/b;//");
1828 	testNotEqual("ftp://a/b;//", "ftp://a/b;/");
1829 
1830 	// escaping/unescaping
1831 	testEscape("hello\u1212there", "hello%E1%88%92there");
1832 	testEscape("hello\u0232there", "hello%C8%B2there");
1833 	testEscape("hello\uDA42\uDD42there", "hello%F2%A0%A5%82there");
1834 	testEscape("hello\uDA42", "hello%ED%A9%82");
1835 	testEscape("hello\uDA42there", "hello%ED%A9%82there");
1836 	testUnescape("hello%F2%A0%A5%82there", "hello\uDA42\uDD42there");
1837 	testUnescape("hello%F2%A0%A5there", "hello\u00F2\u00A0\u00A5there");
1838 	testUnescape("hello%F2%A0there", "hello\u00F2\u00A0there");
1839 	testUnescape("hello%F2there", "hello\u00F2there");
1840 	testUnescape("hello%F2%A0%A5%82", "hello\uDA42\uDD42");
1841 	testUnescape("hello%F2%A0%A5", "hello\u00F2\u00A0\u00A5");
1842 	testUnescape("hello%F2%A0", "hello\u00F2\u00A0");
1843 	testUnescape("hello%F2", "hello\u00F2");
1844 	testUnescape("hello%E1%88%92there", "hello\u1212there");
1845 	testUnescape("hello%E1%88there", "hello\u00E1\u0088there");
1846 	testUnescape("hello%E1there", "hello\u00E1there");
1847 	testUnescape("hello%E1%71there", "hello\u00E1qthere");
1848 	testUnescape("hello%E1%88", "hello\u00E1\u0088");
1849 	testUnescape("hello%E1%71", "hello\u00E1q");
1850 	testUnescape("hello%E1", "hello\u00E1");
1851 	testUnescape("hello%C8%B2there", "hello\u0232there");
1852 	testUnescape("hello%C8there", "hello\u00C8there");
1853 	testUnescape("hello%C8%71there", "hello\u00C8qthere");
1854 	testUnescape("hello%C8%71", "hello\u00C8q");
1855 	testUnescape("hello%C8", "hello\u00C8");
1856 	testUnescape("%71there", "qthere");
1857 	testUnescape("%B1there", "\u00B1there");
1858 
1859 	System.err.println("*** Tests finished successfuly");
1860     }
1861 
1862     private static final String nl = System.getProperty("line.separator");
1863 
1864     private static void testParser(URI base, String relURI, String result)
1865 	    throws Exception
1866     {
1867 	if (!(new URI(base, relURI).toExternalForm().equals(result)))
1868 	{
1869 	    throw new Exception("Test failed: " + nl +
1870 				"  base-URI = <" + base + ">" + nl +
1871 				"  rel-URI  = <" + relURI + ">" + nl+
1872 				"  expected   <" + result + ">" + nl+
1873 				"  but got    <" + new URI(base, relURI) + ">");
1874 	}
1875     }
1876 
1877     private static void testEqual(String one, String two)  throws Exception
1878     {
1879 	URI u1 = new URI(one);
1880 	URI u2 = new URI(two);
1881 
1882 	if (!u1.equals(u2))
1883 	{
1884 	    throw new Exception("Test failed: " + nl +
1885 				"  <" + one + "> != <" + two + ">");
1886 	}
1887 	if (u1.hashCode() != u2.hashCode())
1888 	{
1889 	    throw new Exception("Test failed: " + nl +
1890 				"  hashCode <" + one + "> != hashCode <" + two + ">");
1891 	}
1892     }
1893 
1894     private static void testNotEqual(String one, String two)  throws Exception
1895     {
1896 	URI u1 = new URI(one);
1897 	URI u2 = new URI(two);
1898 
1899 	if (u1.equals(u2))
1900 	{
1901 	    throw new Exception("Test failed: " + nl +
1902 				"  <" + one + "> == <" + two + ">");
1903 	}
1904     }
1905 
1906     private static void testPE(URI base, String uri)  throws Exception
1907     {
1908 	boolean got_pe = false;
1909 	try
1910 	    { new URI(base, uri); }
1911 	catch (ParseException pe)
1912 	    { got_pe = true; }
1913 	if (!got_pe)
1914 	{
1915 	    throw new Exception("Test failed: " + nl +
1916 				"  <" + uri + "> should be invalid");
1917 	}
1918     }
1919 
1920     private static void testEscape(String raw, String escaped)  throws Exception
1921     {
1922 	String test = new String(escape(raw.toCharArray(), uricChar, true));
1923 	if (!test.equals(escaped))
1924 	    throw new Exception("Test failed: " + nl +
1925 				"  raw-string: " + raw + nl +
1926 				"  escaped:    " + test + nl +
1927 				"  expected:   " + escaped);
1928     }
1929 
1930     private static void testUnescape(String escaped, String raw)
1931 	throws Exception
1932     {
1933 	if (!unescape(escaped, null).equals(raw))
1934 	    throw new Exception("Test failed: " + nl +
1935 				"  escaped-string: " + escaped + nl +
1936 				"  unescaped:      " + unescape(escaped, null) + nl +
1937 				"  expected:       " + raw);
1938     }
1939 }