4 * @fileoverview An RFC 3986 compliant, scheme extendable URI parsing/validating/resolving library for JavaScript.
5 * @author <a href="mailto:gary.court@gmail.com">Gary Court</a>
6 * @see http://github.com/garycourt/uri-js
9 * Copyright 2011 Gary Court. All rights reserved.
11 * Redistribution and use in source and binary forms, with or without modification, are
12 * permitted provided that the following conditions are met:
14 * 1. Redistributions of source code must retain the above copyright notice, this list of
15 * conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
18 * of conditions and the following disclaimer in the documentation and/or other materials
19 * provided with the distribution.
21 * THIS SOFTWARE IS PROVIDED BY GARY COURT ``AS IS'' AND ANY EXPRESS OR IMPLIED
22 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
23 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GARY COURT OR
24 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 * The views and conclusions contained in the software and documentation are those of the
32 * authors and should not be interpreted as representing official policies, either expressed
33 * or implied, of Gary Court.
35 import URI_PROTOCOL from "./regexps-uri";
36 import IRI_PROTOCOL from "./regexps-iri";
37 import punycode from "punycode";
38 import { toUpperCase, typeOf, assign } from "./util";
39 export const SCHEMES = {};
40 export function pctEncChar(chr) {
41 const c = chr.charCodeAt(0);
44 e = "%0" + c.toString(16).toUpperCase();
46 e = "%" + c.toString(16).toUpperCase();
48 e = "%" + ((c >> 6) | 192).toString(16).toUpperCase() + "%" + ((c & 63) | 128).toString(16).toUpperCase();
50 e = "%" + ((c >> 12) | 224).toString(16).toUpperCase() + "%" + (((c >> 6) & 63) | 128).toString(16).toUpperCase() + "%" + ((c & 63) | 128).toString(16).toUpperCase();
53 export function pctDecChars(str) {
56 const il = str.length;
58 const c = parseInt(str.substr(i + 1, 2), 16);
60 newStr += String.fromCharCode(c);
63 else if (c >= 194 && c < 224) {
65 const c2 = parseInt(str.substr(i + 4, 2), 16);
66 newStr += String.fromCharCode(((c & 31) << 6) | (c2 & 63));
69 newStr += str.substr(i, 6);
75 const c2 = parseInt(str.substr(i + 4, 2), 16);
76 const c3 = parseInt(str.substr(i + 7, 2), 16);
77 newStr += String.fromCharCode(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63));
80 newStr += str.substr(i, 9);
85 newStr += str.substr(i, 3);
91 function _normalizeComponentEncoding(components, protocol) {
92 function decodeUnreserved(str) {
93 const decStr = pctDecChars(str);
94 return (!decStr.match(protocol.UNRESERVED) ? str : decStr);
96 if (components.scheme)
97 components.scheme = String(components.scheme).replace(protocol.PCT_ENCODED, decodeUnreserved).toLowerCase().replace(protocol.NOT_SCHEME, "");
98 if (components.userinfo !== undefined)
99 components.userinfo = String(components.userinfo).replace(protocol.PCT_ENCODED, decodeUnreserved).replace(protocol.NOT_USERINFO, pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase);
100 if (components.host !== undefined)
101 components.host = String(components.host).replace(protocol.PCT_ENCODED, decodeUnreserved).toLowerCase().replace(protocol.NOT_HOST, pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase);
102 if (components.path !== undefined)
103 components.path = String(components.path).replace(protocol.PCT_ENCODED, decodeUnreserved).replace((components.scheme ? protocol.NOT_PATH : protocol.NOT_PATH_NOSCHEME), pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase);
104 if (components.query !== undefined)
105 components.query = String(components.query).replace(protocol.PCT_ENCODED, decodeUnreserved).replace(protocol.NOT_QUERY, pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase);
106 if (components.fragment !== undefined)
107 components.fragment = String(components.fragment).replace(protocol.PCT_ENCODED, decodeUnreserved).replace(protocol.NOT_FRAGMENT, pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase);
111 function _stripLeadingZeros(str) {
112 return str.replace(/^0*(.*)/, "$1") || "0";
114 function _normalizeIPv4(host, protocol) {
115 const matches = host.match(protocol.IPV4ADDRESS) || [];
116 const [, address] = matches;
118 return address.split(".").map(_stripLeadingZeros).join(".");
124 function _normalizeIPv6(host, protocol) {
125 const matches = host.match(protocol.IPV6ADDRESS) || [];
126 const [, address, zone] = matches;
128 const [last, first] = address.toLowerCase().split('::').reverse();
129 const firstFields = first ? first.split(":").map(_stripLeadingZeros) : [];
130 const lastFields = last.split(":").map(_stripLeadingZeros);
131 const isLastFieldIPv4Address = protocol.IPV4ADDRESS.test(lastFields[lastFields.length - 1]);
132 const fieldCount = isLastFieldIPv4Address ? 7 : 8;
133 const lastFieldsStart = lastFields.length - fieldCount;
134 const fields = Array(fieldCount);
135 for (let x = 0; x < fieldCount; ++x) {
136 fields[x] = firstFields[x] || lastFields[lastFieldsStart + x] || '';
138 if (isLastFieldIPv4Address) {
139 fields[fieldCount - 1] = _normalizeIPv4(fields[fieldCount - 1], protocol);
141 const allZeroFields = fields.reduce((acc, field, index) => {
142 if (!field || field === "0") {
143 const lastLongest = acc[acc.length - 1];
144 if (lastLongest && lastLongest.index + lastLongest.length === index) {
145 lastLongest.length++;
148 acc.push({ index, length: 1 });
153 const longestZeroFields = allZeroFields.sort((a, b) => b.length - a.length)[0];
155 if (longestZeroFields && longestZeroFields.length > 1) {
156 const newFirst = fields.slice(0, longestZeroFields.index);
157 const newLast = fields.slice(longestZeroFields.index + longestZeroFields.length);
158 newHost = newFirst.join(":") + "::" + newLast.join(":");
161 newHost = fields.join(":");
164 newHost += "%" + zone;
172 const URI_PARSE = /^(?:([^:\/?#]+):)?(?:\/\/((?:([^\/?#@]*)@)?(\[[^\/?#\]]+\]|[^\/?#:]*)(?:\:(\d*))?))?([^?#]*)(?:\?([^#]*))?(?:#((?:.|\n|\r)*))?/i;
173 const NO_MATCH_IS_UNDEFINED = ("").match(/(){0}/)[1] === undefined;
174 export function parse(uriString, options = {}) {
175 const components = {};
176 const protocol = (options.iri !== false ? IRI_PROTOCOL : URI_PROTOCOL);
177 if (options.reference === "suffix")
178 uriString = (options.scheme ? options.scheme + ":" : "") + "//" + uriString;
179 const matches = uriString.match(URI_PARSE);
181 if (NO_MATCH_IS_UNDEFINED) {
182 //store each component
183 components.scheme = matches[1];
184 components.userinfo = matches[3];
185 components.host = matches[4];
186 components.port = parseInt(matches[5], 10);
187 components.path = matches[6] || "";
188 components.query = matches[7];
189 components.fragment = matches[8];
191 if (isNaN(components.port)) {
192 components.port = matches[5];
195 else { //IE FIX for improper RegExp matching
196 //store each component
197 components.scheme = matches[1] || undefined;
198 components.userinfo = (uriString.indexOf("@") !== -1 ? matches[3] : undefined);
199 components.host = (uriString.indexOf("//") !== -1 ? matches[4] : undefined);
200 components.port = parseInt(matches[5], 10);
201 components.path = matches[6] || "";
202 components.query = (uriString.indexOf("?") !== -1 ? matches[7] : undefined);
203 components.fragment = (uriString.indexOf("#") !== -1 ? matches[8] : undefined);
205 if (isNaN(components.port)) {
206 components.port = (uriString.match(/\/\/(?:.|\n)*\:(?:\/|\?|\#|$)/) ? matches[4] : undefined);
209 if (components.host) {
211 components.host = _normalizeIPv6(_normalizeIPv4(components.host, protocol), protocol);
213 //determine reference type
214 if (components.scheme === undefined && components.userinfo === undefined && components.host === undefined && components.port === undefined && !components.path && components.query === undefined) {
215 components.reference = "same-document";
217 else if (components.scheme === undefined) {
218 components.reference = "relative";
220 else if (components.fragment === undefined) {
221 components.reference = "absolute";
224 components.reference = "uri";
226 //check for reference errors
227 if (options.reference && options.reference !== "suffix" && options.reference !== components.reference) {
228 components.error = components.error || "URI is not a " + options.reference + " reference.";
230 //find scheme handler
231 const schemeHandler = SCHEMES[(options.scheme || components.scheme || "").toLowerCase()];
232 //check if scheme can't handle IRIs
233 if (!options.unicodeSupport && (!schemeHandler || !schemeHandler.unicodeSupport)) {
234 //if host component is a domain name
235 if (components.host && (options.domainHost || (schemeHandler && schemeHandler.domainHost))) {
236 //convert Unicode IDN -> ASCII IDN
238 components.host = punycode.toASCII(components.host.replace(protocol.PCT_ENCODED, pctDecChars).toLowerCase());
241 components.error = components.error || "Host's domain name can not be converted to ASCII via punycode: " + e;
245 _normalizeComponentEncoding(components, URI_PROTOCOL);
248 //normalize encodings
249 _normalizeComponentEncoding(components, protocol);
251 //perform scheme specific parsing
252 if (schemeHandler && schemeHandler.parse) {
253 schemeHandler.parse(components, options);
257 components.error = components.error || "URI can not be parsed.";
262 function _recomposeAuthority(components, options) {
263 const protocol = (options.iri !== false ? IRI_PROTOCOL : URI_PROTOCOL);
264 const uriTokens = [];
265 if (components.userinfo !== undefined) {
266 uriTokens.push(components.userinfo);
269 if (components.host !== undefined) {
270 //normalize IP hosts, add brackets and escape zone separator for IPv6
271 uriTokens.push(_normalizeIPv6(_normalizeIPv4(String(components.host), protocol), protocol).replace(protocol.IPV6ADDRESS, (_, $1, $2) => "[" + $1 + ($2 ? "%25" + $2 : "") + "]"));
273 if (typeof components.port === "number") {
275 uriTokens.push(components.port.toString(10));
277 return uriTokens.length ? uriTokens.join("") : undefined;
280 const RDS1 = /^\.\.?\//;
281 const RDS2 = /^\/\.(\/|$)/;
282 const RDS3 = /^\/\.\.(\/|$)/;
283 const RDS4 = /^\.\.?$/;
284 const RDS5 = /^\/?(?:.|\n)*?(?=\/|$)/;
285 export function removeDotSegments(input) {
287 while (input.length) {
288 if (input.match(RDS1)) {
289 input = input.replace(RDS1, "");
291 else if (input.match(RDS2)) {
292 input = input.replace(RDS2, "/");
294 else if (input.match(RDS3)) {
295 input = input.replace(RDS3, "/");
298 else if (input === "." || input === "..") {
302 const im = input.match(RDS5);
305 input = input.slice(s.length);
309 throw new Error("Unexpected dot segment condition");
313 return output.join("");
316 export function serialize(components, options = {}) {
317 const protocol = (options.iri ? IRI_PROTOCOL : URI_PROTOCOL);
318 const uriTokens = [];
319 //find scheme handler
320 const schemeHandler = SCHEMES[(options.scheme || components.scheme || "").toLowerCase()];
321 //perform scheme specific serialization
322 if (schemeHandler && schemeHandler.serialize)
323 schemeHandler.serialize(components, options);
324 if (components.host) {
325 //if host component is an IPv6 address
326 if (protocol.IPV6ADDRESS.test(components.host)) {
327 //TODO: normalize IPv6 address as per RFC 5952
329 //if host component is a domain name
330 else if (options.domainHost || (schemeHandler && schemeHandler.domainHost)) {
331 //convert IDN via punycode
333 components.host = (!options.iri ? punycode.toASCII(components.host.replace(protocol.PCT_ENCODED, pctDecChars).toLowerCase()) : punycode.toUnicode(components.host));
336 components.error = components.error || "Host's domain name can not be converted to " + (!options.iri ? "ASCII" : "Unicode") + " via punycode: " + e;
341 _normalizeComponentEncoding(components, protocol);
342 if (options.reference !== "suffix" && components.scheme) {
343 uriTokens.push(components.scheme);
346 const authority = _recomposeAuthority(components, options);
347 if (authority !== undefined) {
348 if (options.reference !== "suffix") {
349 uriTokens.push("//");
351 uriTokens.push(authority);
352 if (components.path && components.path.charAt(0) !== "/") {
356 if (components.path !== undefined) {
357 let s = components.path;
358 if (!options.absolutePath && (!schemeHandler || !schemeHandler.absolutePath)) {
359 s = removeDotSegments(s);
361 if (authority === undefined) {
362 s = s.replace(/^\/\//, "/%2F"); //don't allow the path to start with "//"
366 if (components.query !== undefined) {
368 uriTokens.push(components.query);
370 if (components.fragment !== undefined) {
372 uriTokens.push(components.fragment);
374 return uriTokens.join(""); //merge tokens into a string
377 export function resolveComponents(base, relative, options = {}, skipNormalization) {
379 if (!skipNormalization) {
380 base = parse(serialize(base, options), options); //normalize base components
381 relative = parse(serialize(relative, options), options); //normalize relative components
383 options = options || {};
384 if (!options.tolerant && relative.scheme) {
385 target.scheme = relative.scheme;
386 //target.authority = relative.authority;
387 target.userinfo = relative.userinfo;
388 target.host = relative.host;
389 target.port = relative.port;
390 target.path = removeDotSegments(relative.path || "");
391 target.query = relative.query;
394 if (relative.userinfo !== undefined || relative.host !== undefined || relative.port !== undefined) {
395 //target.authority = relative.authority;
396 target.userinfo = relative.userinfo;
397 target.host = relative.host;
398 target.port = relative.port;
399 target.path = removeDotSegments(relative.path || "");
400 target.query = relative.query;
403 if (!relative.path) {
404 target.path = base.path;
405 if (relative.query !== undefined) {
406 target.query = relative.query;
409 target.query = base.query;
413 if (relative.path.charAt(0) === "/") {
414 target.path = removeDotSegments(relative.path);
417 if ((base.userinfo !== undefined || base.host !== undefined || base.port !== undefined) && !base.path) {
418 target.path = "/" + relative.path;
420 else if (!base.path) {
421 target.path = relative.path;
424 target.path = base.path.slice(0, base.path.lastIndexOf("/") + 1) + relative.path;
426 target.path = removeDotSegments(target.path);
428 target.query = relative.query;
430 //target.authority = base.authority;
431 target.userinfo = base.userinfo;
432 target.host = base.host;
433 target.port = base.port;
435 target.scheme = base.scheme;
437 target.fragment = relative.fragment;
441 export function resolve(baseURI, relativeURI, options) {
442 const schemelessOptions = assign({ scheme: 'null' }, options);
443 return serialize(resolveComponents(parse(baseURI, schemelessOptions), parse(relativeURI, schemelessOptions), schemelessOptions, true), schemelessOptions);
446 export function normalize(uri, options) {
447 if (typeof uri === "string") {
448 uri = serialize(parse(uri, options), options);
450 else if (typeOf(uri) === "object") {
451 uri = parse(serialize(uri, options), options);
456 export function equal(uriA, uriB, options) {
457 if (typeof uriA === "string") {
458 uriA = serialize(parse(uriA, options), options);
460 else if (typeOf(uriA) === "object") {
461 uriA = serialize(uriA, options);
463 if (typeof uriB === "string") {
464 uriB = serialize(parse(uriB, options), options);
466 else if (typeOf(uriB) === "object") {
467 uriB = serialize(uriB, options);
469 return uriA === uriB;
472 export function escapeComponent(str, options) {
473 return str && str.toString().replace((!options || !options.iri ? URI_PROTOCOL.ESCAPE : IRI_PROTOCOL.ESCAPE), pctEncChar);
476 export function unescapeComponent(str, options) {
477 return str && str.toString().replace((!options || !options.iri ? URI_PROTOCOL.PCT_ENCODED : IRI_PROTOCOL.PCT_ENCODED), pctDecChars);
480 //# sourceMappingURL=uri.js.map