index.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590
  1. 'use strict';
  2. var required = require('requires-port')
  3. , qs = require('querystringify')
  4. , controlOrWhitespace = /^[\x00-\x20\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]+/
  5. , CRHTLF = /[\n\r\t]/g
  6. , slashes = /^[A-Za-z][A-Za-z0-9+-.]*:\/\//
  7. , port = /:\d+$/
  8. , protocolre = /^([a-z][a-z0-9.+-]*:)?(\/\/)?([\\/]+)?([\S\s]*)/i
  9. , windowsDriveLetter = /^[a-zA-Z]:/;
  10. /**
  11. * Remove control characters and whitespace from the beginning of a string.
  12. *
  13. * @param {Object|String} str String to trim.
  14. * @returns {String} A new string representing `str` stripped of control
  15. * characters and whitespace from its beginning.
  16. * @public
  17. */
  18. function trimLeft(str) {
  19. return (str ? str : '').toString().replace(controlOrWhitespace, '');
  20. }
  21. /**
  22. * These are the parse rules for the URL parser, it informs the parser
  23. * about:
  24. *
  25. * 0. The char it Needs to parse, if it's a string it should be done using
  26. * indexOf, RegExp using exec and NaN means set as current value.
  27. * 1. The property we should set when parsing this value.
  28. * 2. Indication if it's backwards or forward parsing, when set as number it's
  29. * the value of extra chars that should be split off.
  30. * 3. Inherit from location if non existing in the parser.
  31. * 4. `toLowerCase` the resulting value.
  32. */
  33. var rules = [
  34. ['#', 'hash'], // Extract from the back.
  35. ['?', 'query'], // Extract from the back.
  36. function sanitize(address, url) { // Sanitize what is left of the address
  37. return isSpecial(url.protocol) ? address.replace(/\\/g, '/') : address;
  38. },
  39. ['/', 'pathname'], // Extract from the back.
  40. ['@', 'auth', 1], // Extract from the front.
  41. [NaN, 'host', undefined, 1, 1], // Set left over value.
  42. [/:(\d*)$/, 'port', undefined, 1], // RegExp the back.
  43. [NaN, 'hostname', undefined, 1, 1] // Set left over.
  44. ];
  45. /**
  46. * These properties should not be copied or inherited from. This is only needed
  47. * for all non blob URL's as a blob URL does not include a hash, only the
  48. * origin.
  49. *
  50. * @type {Object}
  51. * @private
  52. */
  53. var ignore = { hash: 1, query: 1 };
  54. /**
  55. * The location object differs when your code is loaded through a normal page,
  56. * Worker or through a worker using a blob. And with the blobble begins the
  57. * trouble as the location object will contain the URL of the blob, not the
  58. * location of the page where our code is loaded in. The actual origin is
  59. * encoded in the `pathname` so we can thankfully generate a good "default"
  60. * location from it so we can generate proper relative URL's again.
  61. *
  62. * @param {Object|String} loc Optional default location object.
  63. * @returns {Object} lolcation object.
  64. * @public
  65. */
  66. function lolcation(loc) {
  67. var globalVar;
  68. if (typeof window !== 'undefined') globalVar = window;
  69. else if (typeof global !== 'undefined') globalVar = global;
  70. else if (typeof self !== 'undefined') globalVar = self;
  71. else globalVar = {};
  72. var location = globalVar.location || {};
  73. loc = loc || location;
  74. var finaldestination = {}
  75. , type = typeof loc
  76. , key;
  77. if ('blob:' === loc.protocol) {
  78. finaldestination = new Url(unescape(loc.pathname), {});
  79. } else if ('string' === type) {
  80. finaldestination = new Url(loc, {});
  81. for (key in ignore) delete finaldestination[key];
  82. } else if ('object' === type) {
  83. for (key in loc) {
  84. if (key in ignore) continue;
  85. finaldestination[key] = loc[key];
  86. }
  87. if (finaldestination.slashes === undefined) {
  88. finaldestination.slashes = slashes.test(loc.href);
  89. }
  90. }
  91. return finaldestination;
  92. }
  93. /**
  94. * Check whether a protocol scheme is special.
  95. *
  96. * @param {String} The protocol scheme of the URL
  97. * @return {Boolean} `true` if the protocol scheme is special, else `false`
  98. * @private
  99. */
  100. function isSpecial(scheme) {
  101. return (
  102. scheme === 'file:' ||
  103. scheme === 'ftp:' ||
  104. scheme === 'http:' ||
  105. scheme === 'https:' ||
  106. scheme === 'ws:' ||
  107. scheme === 'wss:'
  108. );
  109. }
  110. /**
  111. * @typedef ProtocolExtract
  112. * @type Object
  113. * @property {String} protocol Protocol matched in the URL, in lowercase.
  114. * @property {Boolean} slashes `true` if protocol is followed by "//", else `false`.
  115. * @property {String} rest Rest of the URL that is not part of the protocol.
  116. */
  117. /**
  118. * Extract protocol information from a URL with/without double slash ("//").
  119. *
  120. * @param {String} address URL we want to extract from.
  121. * @param {Object} location
  122. * @return {ProtocolExtract} Extracted information.
  123. * @private
  124. */
  125. function extractProtocol(address, location) {
  126. address = trimLeft(address);
  127. address = address.replace(CRHTLF, '');
  128. location = location || {};
  129. var match = protocolre.exec(address);
  130. var protocol = match[1] ? match[1].toLowerCase() : '';
  131. var forwardSlashes = !!match[2];
  132. var otherSlashes = !!match[3];
  133. var slashesCount = 0;
  134. var rest;
  135. if (forwardSlashes) {
  136. if (otherSlashes) {
  137. rest = match[2] + match[3] + match[4];
  138. slashesCount = match[2].length + match[3].length;
  139. } else {
  140. rest = match[2] + match[4];
  141. slashesCount = match[2].length;
  142. }
  143. } else {
  144. if (otherSlashes) {
  145. rest = match[3] + match[4];
  146. slashesCount = match[3].length;
  147. } else {
  148. rest = match[4]
  149. }
  150. }
  151. if (protocol === 'file:') {
  152. if (slashesCount >= 2) {
  153. rest = rest.slice(2);
  154. }
  155. } else if (isSpecial(protocol)) {
  156. rest = match[4];
  157. } else if (protocol) {
  158. if (forwardSlashes) {
  159. rest = rest.slice(2);
  160. }
  161. } else if (slashesCount >= 2 && isSpecial(location.protocol)) {
  162. rest = match[4];
  163. }
  164. return {
  165. protocol: protocol,
  166. slashes: forwardSlashes || isSpecial(protocol),
  167. slashesCount: slashesCount,
  168. rest: rest
  169. };
  170. }
  171. /**
  172. * Resolve a relative URL pathname against a base URL pathname.
  173. *
  174. * @param {String} relative Pathname of the relative URL.
  175. * @param {String} base Pathname of the base URL.
  176. * @return {String} Resolved pathname.
  177. * @private
  178. */
  179. function resolve(relative, base) {
  180. if (relative === '') return base;
  181. var path = (base || '/').split('/').slice(0, -1).concat(relative.split('/'))
  182. , i = path.length
  183. , last = path[i - 1]
  184. , unshift = false
  185. , up = 0;
  186. while (i--) {
  187. if (path[i] === '.') {
  188. path.splice(i, 1);
  189. } else if (path[i] === '..') {
  190. path.splice(i, 1);
  191. up++;
  192. } else if (up) {
  193. if (i === 0) unshift = true;
  194. path.splice(i, 1);
  195. up--;
  196. }
  197. }
  198. if (unshift) path.unshift('');
  199. if (last === '.' || last === '..') path.push('');
  200. return path.join('/');
  201. }
  202. /**
  203. * The actual URL instance. Instead of returning an object we've opted-in to
  204. * create an actual constructor as it's much more memory efficient and
  205. * faster and it pleases my OCD.
  206. *
  207. * It is worth noting that we should not use `URL` as class name to prevent
  208. * clashes with the global URL instance that got introduced in browsers.
  209. *
  210. * @constructor
  211. * @param {String} address URL we want to parse.
  212. * @param {Object|String} [location] Location defaults for relative paths.
  213. * @param {Boolean|Function} [parser] Parser for the query string.
  214. * @private
  215. */
  216. function Url(address, location, parser) {
  217. address = trimLeft(address);
  218. address = address.replace(CRHTLF, '');
  219. if (!(this instanceof Url)) {
  220. return new Url(address, location, parser);
  221. }
  222. var relative, extracted, parse, instruction, index, key
  223. , instructions = rules.slice()
  224. , type = typeof location
  225. , url = this
  226. , i = 0;
  227. //
  228. // The following if statements allows this module two have compatibility with
  229. // 2 different API:
  230. //
  231. // 1. Node.js's `url.parse` api which accepts a URL, boolean as arguments
  232. // where the boolean indicates that the query string should also be parsed.
  233. //
  234. // 2. The `URL` interface of the browser which accepts a URL, object as
  235. // arguments. The supplied object will be used as default values / fall-back
  236. // for relative paths.
  237. //
  238. if ('object' !== type && 'string' !== type) {
  239. parser = location;
  240. location = null;
  241. }
  242. if (parser && 'function' !== typeof parser) parser = qs.parse;
  243. location = lolcation(location);
  244. //
  245. // Extract protocol information before running the instructions.
  246. //
  247. extracted = extractProtocol(address || '', location);
  248. relative = !extracted.protocol && !extracted.slashes;
  249. url.slashes = extracted.slashes || relative && location.slashes;
  250. url.protocol = extracted.protocol || location.protocol || '';
  251. address = extracted.rest;
  252. //
  253. // When the authority component is absent the URL starts with a path
  254. // component.
  255. //
  256. if (
  257. extracted.protocol === 'file:' && (
  258. extracted.slashesCount !== 2 || windowsDriveLetter.test(address)) ||
  259. (!extracted.slashes &&
  260. (extracted.protocol ||
  261. extracted.slashesCount < 2 ||
  262. !isSpecial(url.protocol)))
  263. ) {
  264. instructions[3] = [/(.*)/, 'pathname'];
  265. }
  266. for (; i < instructions.length; i++) {
  267. instruction = instructions[i];
  268. if (typeof instruction === 'function') {
  269. address = instruction(address, url);
  270. continue;
  271. }
  272. parse = instruction[0];
  273. key = instruction[1];
  274. if (parse !== parse) {
  275. url[key] = address;
  276. } else if ('string' === typeof parse) {
  277. index = parse === '@'
  278. ? address.lastIndexOf(parse)
  279. : address.indexOf(parse);
  280. if (~index) {
  281. if ('number' === typeof instruction[2]) {
  282. url[key] = address.slice(0, index);
  283. address = address.slice(index + instruction[2]);
  284. } else {
  285. url[key] = address.slice(index);
  286. address = address.slice(0, index);
  287. }
  288. }
  289. } else if ((index = parse.exec(address))) {
  290. url[key] = index[1];
  291. address = address.slice(0, index.index);
  292. }
  293. url[key] = url[key] || (
  294. relative && instruction[3] ? location[key] || '' : ''
  295. );
  296. //
  297. // Hostname, host and protocol should be lowercased so they can be used to
  298. // create a proper `origin`.
  299. //
  300. if (instruction[4]) url[key] = url[key].toLowerCase();
  301. }
  302. //
  303. // Also parse the supplied query string in to an object. If we're supplied
  304. // with a custom parser as function use that instead of the default build-in
  305. // parser.
  306. //
  307. if (parser) url.query = parser(url.query);
  308. //
  309. // If the URL is relative, resolve the pathname against the base URL.
  310. //
  311. if (
  312. relative
  313. && location.slashes
  314. && url.pathname.charAt(0) !== '/'
  315. && (url.pathname !== '' || location.pathname !== '')
  316. ) {
  317. url.pathname = resolve(url.pathname, location.pathname);
  318. }
  319. //
  320. // Default to a / for pathname if none exists. This normalizes the URL
  321. // to always have a /
  322. //
  323. if (url.pathname.charAt(0) !== '/' && isSpecial(url.protocol)) {
  324. url.pathname = '/' + url.pathname;
  325. }
  326. //
  327. // We should not add port numbers if they are already the default port number
  328. // for a given protocol. As the host also contains the port number we're going
  329. // override it with the hostname which contains no port number.
  330. //
  331. if (!required(url.port, url.protocol)) {
  332. url.host = url.hostname;
  333. url.port = '';
  334. }
  335. //
  336. // Parse down the `auth` for the username and password.
  337. //
  338. url.username = url.password = '';
  339. if (url.auth) {
  340. index = url.auth.indexOf(':');
  341. if (~index) {
  342. url.username = url.auth.slice(0, index);
  343. url.username = encodeURIComponent(decodeURIComponent(url.username));
  344. url.password = url.auth.slice(index + 1);
  345. url.password = encodeURIComponent(decodeURIComponent(url.password))
  346. } else {
  347. url.username = encodeURIComponent(decodeURIComponent(url.auth));
  348. }
  349. url.auth = url.password ? url.username +':'+ url.password : url.username;
  350. }
  351. url.origin = url.protocol !== 'file:' && isSpecial(url.protocol) && url.host
  352. ? url.protocol +'//'+ url.host
  353. : 'null';
  354. //
  355. // The href is just the compiled result.
  356. //
  357. url.href = url.toString();
  358. }
  359. /**
  360. * This is convenience method for changing properties in the URL instance to
  361. * insure that they all propagate correctly.
  362. *
  363. * @param {String} part Property we need to adjust.
  364. * @param {Mixed} value The newly assigned value.
  365. * @param {Boolean|Function} fn When setting the query, it will be the function
  366. * used to parse the query.
  367. * When setting the protocol, double slash will be
  368. * removed from the final url if it is true.
  369. * @returns {URL} URL instance for chaining.
  370. * @public
  371. */
  372. function set(part, value, fn) {
  373. var url = this;
  374. switch (part) {
  375. case 'query':
  376. if ('string' === typeof value && value.length) {
  377. value = (fn || qs.parse)(value);
  378. }
  379. url[part] = value;
  380. break;
  381. case 'port':
  382. url[part] = value;
  383. if (!required(value, url.protocol)) {
  384. url.host = url.hostname;
  385. url[part] = '';
  386. } else if (value) {
  387. url.host = url.hostname +':'+ value;
  388. }
  389. break;
  390. case 'hostname':
  391. url[part] = value;
  392. if (url.port) value += ':'+ url.port;
  393. url.host = value;
  394. break;
  395. case 'host':
  396. url[part] = value;
  397. if (port.test(value)) {
  398. value = value.split(':');
  399. url.port = value.pop();
  400. url.hostname = value.join(':');
  401. } else {
  402. url.hostname = value;
  403. url.port = '';
  404. }
  405. break;
  406. case 'protocol':
  407. url.protocol = value.toLowerCase();
  408. url.slashes = !fn;
  409. break;
  410. case 'pathname':
  411. case 'hash':
  412. if (value) {
  413. var char = part === 'pathname' ? '/' : '#';
  414. url[part] = value.charAt(0) !== char ? char + value : value;
  415. } else {
  416. url[part] = value;
  417. }
  418. break;
  419. case 'username':
  420. case 'password':
  421. url[part] = encodeURIComponent(value);
  422. break;
  423. case 'auth':
  424. var index = value.indexOf(':');
  425. if (~index) {
  426. url.username = value.slice(0, index);
  427. url.username = encodeURIComponent(decodeURIComponent(url.username));
  428. url.password = value.slice(index + 1);
  429. url.password = encodeURIComponent(decodeURIComponent(url.password));
  430. } else {
  431. url.username = encodeURIComponent(decodeURIComponent(value));
  432. }
  433. }
  434. for (var i = 0; i < rules.length; i++) {
  435. var ins = rules[i];
  436. if (ins[4]) url[ins[1]] = url[ins[1]].toLowerCase();
  437. }
  438. url.auth = url.password ? url.username +':'+ url.password : url.username;
  439. url.origin = url.protocol !== 'file:' && isSpecial(url.protocol) && url.host
  440. ? url.protocol +'//'+ url.host
  441. : 'null';
  442. url.href = url.toString();
  443. return url;
  444. }
  445. /**
  446. * Transform the properties back in to a valid and full URL string.
  447. *
  448. * @param {Function} stringify Optional query stringify function.
  449. * @returns {String} Compiled version of the URL.
  450. * @public
  451. */
  452. function toString(stringify) {
  453. if (!stringify || 'function' !== typeof stringify) stringify = qs.stringify;
  454. var query
  455. , url = this
  456. , host = url.host
  457. , protocol = url.protocol;
  458. if (protocol && protocol.charAt(protocol.length - 1) !== ':') protocol += ':';
  459. var result =
  460. protocol +
  461. ((url.protocol && url.slashes) || isSpecial(url.protocol) ? '//' : '');
  462. if (url.username) {
  463. result += url.username;
  464. if (url.password) result += ':'+ url.password;
  465. result += '@';
  466. } else if (url.password) {
  467. result += ':'+ url.password;
  468. result += '@';
  469. } else if (
  470. url.protocol !== 'file:' &&
  471. isSpecial(url.protocol) &&
  472. !host &&
  473. url.pathname !== '/'
  474. ) {
  475. //
  476. // Add back the empty userinfo, otherwise the original invalid URL
  477. // might be transformed into a valid one with `url.pathname` as host.
  478. //
  479. result += '@';
  480. }
  481. //
  482. // Trailing colon is removed from `url.host` when it is parsed. If it still
  483. // ends with a colon, then add back the trailing colon that was removed. This
  484. // prevents an invalid URL from being transformed into a valid one.
  485. //
  486. if (host[host.length - 1] === ':' || (port.test(url.hostname) && !url.port)) {
  487. host += ':';
  488. }
  489. result += host + url.pathname;
  490. query = 'object' === typeof url.query ? stringify(url.query) : url.query;
  491. if (query) result += '?' !== query.charAt(0) ? '?'+ query : query;
  492. if (url.hash) result += url.hash;
  493. return result;
  494. }
  495. Url.prototype = { set: set, toString: toString };
  496. //
  497. // Expose the URL parser and some additional properties that might be useful for
  498. // others or testing.
  499. //
  500. Url.extractProtocol = extractProtocol;
  501. Url.location = lolcation;
  502. Url.trimLeft = trimLeft;
  503. Url.qs = qs;
  504. module.exports = Url;