4 * @fileoverview An RFC 3986 compliant, scheme extendable URI parsing/validating/resolving library for JavaScript.
5 * @author <a href="mailto:gary.court@gmail.com">Gary Court</a>
6 * @see http://github.com/garycourt/uri-js
10 * Copyright 2011 Gary Court. All rights reserved.
12 * Redistribution and use in source and binary forms, with or without modification, are
13 * permitted provided that the following conditions are met:
15 * 1. Redistributions of source code must retain the above copyright notice, this list of
16 * conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
19 * of conditions and the following disclaimer in the documentation and/or other materials
20 * provided with the distribution.
22 * THIS SOFTWARE IS PROVIDED BY GARY COURT ``AS IS'' AND ANY EXPRESS OR IMPLIED
23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
24 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GARY COURT OR
25 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
28 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
30 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * The views and conclusions contained in the software and documentation are those of the
33 * authors and should not be interpreted as representing official policies, either expressed
34 * or implied, of Gary Court.
37 import URI_PROTOCOL from "./regexps-uri";
38 import IRI_PROTOCOL from "./regexps-iri";
39 import punycode from "punycode";
40 import { toUpperCase, typeOf, assign } from "./util";
42 export interface URIComponents {
54 export interface URIOptions {
58 absolutePath?:boolean;
60 unicodeSupport?:boolean;
64 export interface URISchemeHandler<Components extends URIComponents = URIComponents, Options extends URIOptions = URIOptions, ParentComponents extends URIComponents = URIComponents> {
66 parse(components:ParentComponents, options:Options):Components;
67 serialize(components:Components, options:Options):ParentComponents;
68 unicodeSupport?:boolean;
70 absolutePath?:boolean;
73 export interface URIRegExps {
75 NOT_USERINFO : RegExp,
78 NOT_PATH_NOSCHEME : RegExp,
80 NOT_FRAGMENT : RegExp,
89 export const SCHEMES:{[scheme:string]:URISchemeHandler} = {};
91 export function pctEncChar(chr:string):string {
92 const c = chr.charCodeAt(0);
95 if (c < 16) e = "%0" + c.toString(16).toUpperCase();
96 else if (c < 128) e = "%" + c.toString(16).toUpperCase();
97 else if (c < 2048) e = "%" + ((c >> 6) | 192).toString(16).toUpperCase() + "%" + ((c & 63) | 128).toString(16).toUpperCase();
98 else e = "%" + ((c >> 12) | 224).toString(16).toUpperCase() + "%" + (((c >> 6) & 63) | 128).toString(16).toUpperCase() + "%" + ((c & 63) | 128).toString(16).toUpperCase();
103 export function pctDecChars(str:string):string {
106 const il = str.length;
109 const c = parseInt(str.substr(i + 1, 2), 16);
112 newStr += String.fromCharCode(c);
115 else if (c >= 194 && c < 224) {
117 const c2 = parseInt(str.substr(i + 4, 2), 16);
118 newStr += String.fromCharCode(((c & 31) << 6) | (c2 & 63));
120 newStr += str.substr(i, 6);
126 const c2 = parseInt(str.substr(i + 4, 2), 16);
127 const c3 = parseInt(str.substr(i + 7, 2), 16);
128 newStr += String.fromCharCode(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63));
130 newStr += str.substr(i, 9);
135 newStr += str.substr(i, 3);
143 function _normalizeComponentEncoding(components:URIComponents, protocol:URIRegExps) {
144 function decodeUnreserved(str:string):string {
145 const decStr = pctDecChars(str);
146 return (!decStr.match(protocol.UNRESERVED) ? str : decStr);
149 if (components.scheme) components.scheme = String(components.scheme).replace(protocol.PCT_ENCODED, decodeUnreserved).toLowerCase().replace(protocol.NOT_SCHEME, "");
150 if (components.userinfo !== undefined) components.userinfo = String(components.userinfo).replace(protocol.PCT_ENCODED, decodeUnreserved).replace(protocol.NOT_USERINFO, pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase);
151 if (components.host !== undefined) components.host = String(components.host).replace(protocol.PCT_ENCODED, decodeUnreserved).toLowerCase().replace(protocol.NOT_HOST, pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase);
152 if (components.path !== undefined) components.path = String(components.path).replace(protocol.PCT_ENCODED, decodeUnreserved).replace((components.scheme ? protocol.NOT_PATH : protocol.NOT_PATH_NOSCHEME), pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase);
153 if (components.query !== undefined) components.query = String(components.query).replace(protocol.PCT_ENCODED, decodeUnreserved).replace(protocol.NOT_QUERY, pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase);
154 if (components.fragment !== undefined) components.fragment = String(components.fragment).replace(protocol.PCT_ENCODED, decodeUnreserved).replace(protocol.NOT_FRAGMENT, pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase);
159 function _stripLeadingZeros(str:string):string {
160 return str.replace(/^0*(.*)/, "$1") || "0";
163 function _normalizeIPv4(host:string, protocol:URIRegExps):string {
164 const matches = host.match(protocol.IPV4ADDRESS) || [];
165 const [, address] = matches;
168 return address.split(".").map(_stripLeadingZeros).join(".");
174 function _normalizeIPv6(host:string, protocol:URIRegExps):string {
175 const matches = host.match(protocol.IPV6ADDRESS) || [];
176 const [, address, zone] = matches;
179 const [last, first] = address.toLowerCase().split('::').reverse();
180 const firstFields = first ? first.split(":").map(_stripLeadingZeros) : [];
181 const lastFields = last.split(":").map(_stripLeadingZeros);
182 const isLastFieldIPv4Address = protocol.IPV4ADDRESS.test(lastFields[lastFields.length - 1]);
183 const fieldCount = isLastFieldIPv4Address ? 7 : 8;
184 const lastFieldsStart = lastFields.length - fieldCount;
185 const fields = Array<string>(fieldCount);
187 for (let x = 0; x < fieldCount; ++x) {
188 fields[x] = firstFields[x] || lastFields[lastFieldsStart + x] || '';
191 if (isLastFieldIPv4Address) {
192 fields[fieldCount - 1] = _normalizeIPv4(fields[fieldCount - 1], protocol);
195 const allZeroFields = fields.reduce<Array<{index:number,length:number}>>((acc, field, index) => {
196 if (!field || field === "0") {
197 const lastLongest = acc[acc.length - 1];
198 if (lastLongest && lastLongest.index + lastLongest.length === index) {
199 lastLongest.length++;
201 acc.push({ index, length : 1 });
207 const longestZeroFields = allZeroFields.sort((a, b) => b.length - a.length)[0];
210 if (longestZeroFields && longestZeroFields.length > 1) {
211 const newFirst = fields.slice(0, longestZeroFields.index) ;
212 const newLast = fields.slice(longestZeroFields.index + longestZeroFields.length);
213 newHost = newFirst.join(":") + "::" + newLast.join(":");
215 newHost = fields.join(":");
219 newHost += "%" + zone;
228 const URI_PARSE = /^(?:([^:\/?#]+):)?(?:\/\/((?:([^\/?#@]*)@)?(\[[^\/?#\]]+\]|[^\/?#:]*)(?:\:(\d*))?))?([^?#]*)(?:\?([^#]*))?(?:#((?:.|\n|\r)*))?/i;
229 const NO_MATCH_IS_UNDEFINED = (<RegExpMatchArray>("").match(/(){0}/))[1] === undefined;
231 export function parse(uriString:string, options:URIOptions = {}):URIComponents {
232 const components:URIComponents = {};
233 const protocol = (options.iri !== false ? IRI_PROTOCOL : URI_PROTOCOL);
235 if (options.reference === "suffix") uriString = (options.scheme ? options.scheme + ":" : "") + "//" + uriString;
237 const matches = uriString.match(URI_PARSE);
240 if (NO_MATCH_IS_UNDEFINED) {
241 //store each component
242 components.scheme = matches[1];
243 components.userinfo = matches[3];
244 components.host = matches[4];
245 components.port = parseInt(matches[5], 10);
246 components.path = matches[6] || "";
247 components.query = matches[7];
248 components.fragment = matches[8];
251 if (isNaN(components.port)) {
252 components.port = matches[5];
254 } else { //IE FIX for improper RegExp matching
255 //store each component
256 components.scheme = matches[1] || undefined;
257 components.userinfo = (uriString.indexOf("@") !== -1 ? matches[3] : undefined);
258 components.host = (uriString.indexOf("//") !== -1 ? matches[4] : undefined);
259 components.port = parseInt(matches[5], 10);
260 components.path = matches[6] || "";
261 components.query = (uriString.indexOf("?") !== -1 ? matches[7] : undefined);
262 components.fragment = (uriString.indexOf("#") !== -1 ? matches[8] : undefined);
265 if (isNaN(components.port)) {
266 components.port = (uriString.match(/\/\/(?:.|\n)*\:(?:\/|\?|\#|$)/) ? matches[4] : undefined);
270 if (components.host) {
272 components.host = _normalizeIPv6(_normalizeIPv4(components.host, protocol), protocol);
275 //determine reference type
276 if (components.scheme === undefined && components.userinfo === undefined && components.host === undefined && components.port === undefined && !components.path && components.query === undefined) {
277 components.reference = "same-document";
278 } else if (components.scheme === undefined) {
279 components.reference = "relative";
280 } else if (components.fragment === undefined) {
281 components.reference = "absolute";
283 components.reference = "uri";
286 //check for reference errors
287 if (options.reference && options.reference !== "suffix" && options.reference !== components.reference) {
288 components.error = components.error || "URI is not a " + options.reference + " reference.";
291 //find scheme handler
292 const schemeHandler = SCHEMES[(options.scheme || components.scheme || "").toLowerCase()];
294 //check if scheme can't handle IRIs
295 if (!options.unicodeSupport && (!schemeHandler || !schemeHandler.unicodeSupport)) {
296 //if host component is a domain name
297 if (components.host && (options.domainHost || (schemeHandler && schemeHandler.domainHost))) {
298 //convert Unicode IDN -> ASCII IDN
300 components.host = punycode.toASCII(components.host.replace(protocol.PCT_ENCODED, pctDecChars).toLowerCase());
302 components.error = components.error || "Host's domain name can not be converted to ASCII via punycode: " + e;
306 _normalizeComponentEncoding(components, URI_PROTOCOL);
308 //normalize encodings
309 _normalizeComponentEncoding(components, protocol);
312 //perform scheme specific parsing
313 if (schemeHandler && schemeHandler.parse) {
314 schemeHandler.parse(components, options);
317 components.error = components.error || "URI can not be parsed.";
323 function _recomposeAuthority(components:URIComponents, options:URIOptions):string|undefined {
324 const protocol = (options.iri !== false ? IRI_PROTOCOL : URI_PROTOCOL);
325 const uriTokens:Array<string> = [];
327 if (components.userinfo !== undefined) {
328 uriTokens.push(components.userinfo);
332 if (components.host !== undefined) {
333 //normalize IP hosts, add brackets and escape zone separator for IPv6
334 uriTokens.push(_normalizeIPv6(_normalizeIPv4(String(components.host), protocol), protocol).replace(protocol.IPV6ADDRESS, (_, $1, $2) => "[" + $1 + ($2 ? "%25" + $2 : "") + "]"));
337 if (typeof components.port === "number") {
339 uriTokens.push(components.port.toString(10));
342 return uriTokens.length ? uriTokens.join("") : undefined;
345 const RDS1 = /^\.\.?\//;
346 const RDS2 = /^\/\.(\/|$)/;
347 const RDS3 = /^\/\.\.(\/|$)/;
348 const RDS4 = /^\.\.?$/;
349 const RDS5 = /^\/?(?:.|\n)*?(?=\/|$)/;
351 export function removeDotSegments(input:string):string {
352 const output:Array<string> = [];
354 while (input.length) {
355 if (input.match(RDS1)) {
356 input = input.replace(RDS1, "");
357 } else if (input.match(RDS2)) {
358 input = input.replace(RDS2, "/");
359 } else if (input.match(RDS3)) {
360 input = input.replace(RDS3, "/");
362 } else if (input === "." || input === "..") {
365 const im = input.match(RDS5);
368 input = input.slice(s.length);
371 throw new Error("Unexpected dot segment condition");
376 return output.join("");
379 export function serialize(components:URIComponents, options:URIOptions = {}):string {
380 const protocol = (options.iri ? IRI_PROTOCOL : URI_PROTOCOL);
381 const uriTokens:Array<string> = [];
383 //find scheme handler
384 const schemeHandler = SCHEMES[(options.scheme || components.scheme || "").toLowerCase()];
386 //perform scheme specific serialization
387 if (schemeHandler && schemeHandler.serialize) schemeHandler.serialize(components, options);
389 if (components.host) {
390 //if host component is an IPv6 address
391 if (protocol.IPV6ADDRESS.test(components.host)) {
392 //TODO: normalize IPv6 address as per RFC 5952
395 //if host component is a domain name
396 else if (options.domainHost || (schemeHandler && schemeHandler.domainHost)) {
397 //convert IDN via punycode
399 components.host = (!options.iri ? punycode.toASCII(components.host.replace(protocol.PCT_ENCODED, pctDecChars).toLowerCase()) : punycode.toUnicode(components.host));
401 components.error = components.error || "Host's domain name can not be converted to " + (!options.iri ? "ASCII" : "Unicode") + " via punycode: " + e;
407 _normalizeComponentEncoding(components, protocol);
409 if (options.reference !== "suffix" && components.scheme) {
410 uriTokens.push(components.scheme);
414 const authority = _recomposeAuthority(components, options);
415 if (authority !== undefined) {
416 if (options.reference !== "suffix") {
417 uriTokens.push("//");
420 uriTokens.push(authority);
422 if (components.path && components.path.charAt(0) !== "/") {
427 if (components.path !== undefined) {
428 let s = components.path;
430 if (!options.absolutePath && (!schemeHandler || !schemeHandler.absolutePath)) {
431 s = removeDotSegments(s);
434 if (authority === undefined) {
435 s = s.replace(/^\/\//, "/%2F"); //don't allow the path to start with "//"
441 if (components.query !== undefined) {
443 uriTokens.push(components.query);
446 if (components.fragment !== undefined) {
448 uriTokens.push(components.fragment);
451 return uriTokens.join(""); //merge tokens into a string
454 export function resolveComponents(base:URIComponents, relative:URIComponents, options:URIOptions = {}, skipNormalization?:boolean):URIComponents {
455 const target:URIComponents = {};
457 if (!skipNormalization) {
458 base = parse(serialize(base, options), options); //normalize base components
459 relative = parse(serialize(relative, options), options); //normalize relative components
461 options = options || {};
463 if (!options.tolerant && relative.scheme) {
464 target.scheme = relative.scheme;
465 //target.authority = relative.authority;
466 target.userinfo = relative.userinfo;
467 target.host = relative.host;
468 target.port = relative.port;
469 target.path = removeDotSegments(relative.path || "");
470 target.query = relative.query;
472 if (relative.userinfo !== undefined || relative.host !== undefined || relative.port !== undefined) {
473 //target.authority = relative.authority;
474 target.userinfo = relative.userinfo;
475 target.host = relative.host;
476 target.port = relative.port;
477 target.path = removeDotSegments(relative.path || "");
478 target.query = relative.query;
480 if (!relative.path) {
481 target.path = base.path;
482 if (relative.query !== undefined) {
483 target.query = relative.query;
485 target.query = base.query;
488 if (relative.path.charAt(0) === "/") {
489 target.path = removeDotSegments(relative.path);
491 if ((base.userinfo !== undefined || base.host !== undefined || base.port !== undefined) && !base.path) {
492 target.path = "/" + relative.path;
493 } else if (!base.path) {
494 target.path = relative.path;
496 target.path = base.path.slice(0, base.path.lastIndexOf("/") + 1) + relative.path;
498 target.path = removeDotSegments(target.path);
500 target.query = relative.query;
502 //target.authority = base.authority;
503 target.userinfo = base.userinfo;
504 target.host = base.host;
505 target.port = base.port;
507 target.scheme = base.scheme;
510 target.fragment = relative.fragment;
515 export function resolve(baseURI:string, relativeURI:string, options?:URIOptions):string {
516 const schemelessOptions = assign({ scheme : 'null' }, options);
517 return serialize(resolveComponents(parse(baseURI, schemelessOptions), parse(relativeURI, schemelessOptions), schemelessOptions, true), schemelessOptions);
520 export function normalize(uri:string, options?:URIOptions):string;
521 export function normalize(uri:URIComponents, options?:URIOptions):URIComponents;
522 export function normalize(uri:any, options?:URIOptions):any {
523 if (typeof uri === "string") {
524 uri = serialize(parse(uri, options), options);
525 } else if (typeOf(uri) === "object") {
526 uri = parse(serialize(<URIComponents>uri, options), options);
532 export function equal(uriA:string, uriB:string, options?: URIOptions):boolean;
533 export function equal(uriA:URIComponents, uriB:URIComponents, options?:URIOptions):boolean;
534 export function equal(uriA:any, uriB:any, options?:URIOptions):boolean {
535 if (typeof uriA === "string") {
536 uriA = serialize(parse(uriA, options), options);
537 } else if (typeOf(uriA) === "object") {
538 uriA = serialize(<URIComponents>uriA, options);
541 if (typeof uriB === "string") {
542 uriB = serialize(parse(uriB, options), options);
543 } else if (typeOf(uriB) === "object") {
544 uriB = serialize(<URIComponents>uriB, options);
547 return uriA === uriB;
550 export function escapeComponent(str:string, options?:URIOptions):string {
551 return str && str.toString().replace((!options || !options.iri ? URI_PROTOCOL.ESCAPE : IRI_PROTOCOL.ESCAPE), pctEncChar);
554 export function unescapeComponent(str:string, options?:URIOptions):string {
555 return str && str.toString().replace((!options || !options.iri ? URI_PROTOCOL.PCT_ENCODED : IRI_PROTOCOL.PCT_ENCODED), pctDecChars);