1 /*
2  * Database - Database abstraction layer for D programing language.
3  *
4  * Copyright (C) 2017  Shanghai Putao Technology Co., Ltd
5  *
6  * Developer: HuntLabs
7  *
8  * Licensed under the Apache-2.0 License.
9  *
10  */
11 
12 module database.url;
13 
14 import std.algorithm;
15 import std.array;
16 import std.conv;
17 import std.encoding;
18 import std..string;
19 import std.utf;
20 
21 @safe:
22 
23 class URLException : Exception {
24     this(string msg) { super(msg); }
25 }
26 
27 ushort[string] schemeToDefaultPort;
28 
29 static this() 
30 {
31     schemeToDefaultPort = [
32         "aaa": 3868,
33         "aaas": 5658,
34         "acap": 674,
35         "amqp": 5672,
36         "cap": 1026,
37         "coap": 5683,
38         "coaps": 5684,
39         "dav": 443,
40         "dict": 2628,
41         "ftp": 21,
42         "git": 9418,
43         "go": 1096,
44         "gopher": 70,
45         "http": 80,
46         "https": 443,
47         "ws": 80,
48         "wss": 443,
49         "iac": 4569,
50         "icap": 1344,
51         "imap": 143,
52         "ipp": 631,
53         "ipps": 631,  // yes, they're both mapped to port 631
54         "irc": 6667,  // De facto default port, not the IANA reserved port.
55         "ircs": 6697,
56         "iris": 702,  // defaults to iris.beep
57         "iris.beep": 702,
58         "iris.lwz": 715,
59         "iris.xpc": 713,
60         "iris.xpcs": 714,
61         "jabber": 5222,  // client-to-server
62         "ldap": 389,
63         "ldaps": 636,
64         "msrp": 2855,
65         "msrps": 2855,
66         "mtqp": 1038,
67         "mupdate": 3905,
68         "news": 119,
69         "nfs": 2049,
70         "pop": 110,
71         "redis": 6379,
72         "reload": 6084,
73         "rsync": 873,
74         "rtmfp": 1935,
75         "rtsp": 554,
76         "shttp": 80,
77         "sieve": 4190,
78         "sip": 5060,
79         "sips": 5061,
80         "smb": 445,
81         "smtp": 25,
82         "snews": 563,
83         "snmp": 161,
84         "soap.beep": 605,
85         "ssh": 22,
86         "stun": 3478,
87         "stuns": 5349,
88         "svn": 3690,
89         "teamspeak": 9987,
90         "telnet": 23,
91         "tftp": 69,
92         "tip": 3372,
93         "mysql": 3306,
94         "postgresql": 5432
95     ];
96 }
97 
98 /**
99  * A collection of query parameters.
100  *
101  * This is effectively a multimap of string -> strings.
102  */
103 struct QueryParams {
104     import std.typecons;
105     alias Tuple!(string, "key", string, "value") Param;
106     Param[] params;
107 
108     @property size_t length() {
109         return params.length;
110     }
111 
112     /// Get a range over the query parameter values for the given key.
113     auto opIndex(string key) {
114         return params.find!(x => x.key == key).map!(x => x.value);
115     }
116 
117     /// Add a query parameter with the given key and value.
118     /// If one already exists, there will now be two query parameters with the given name.
119     void add(string key, string value) {
120         params ~= Param(key, value);
121     }
122 
123     /// Add a query parameter with the given key and value.
124     /// If there are any existing parameters with the same key, they are removed and overwritten.
125     void overwrite(string key, string value) {
126         for (int i = 0; i < params.length; i++) {
127             if (params[i].key == key) {
128                 params[i] = params[$-1];
129                 params.length--;
130             }
131         }
132         params ~= Param(key, value);
133     }
134 
135     private struct QueryParamRange {
136         size_t i;
137         const(Param)[] params;
138         bool empty() { return i >= params.length; }
139         void popFront() { i++; }
140         Param front() { return params[i]; }
141     }
142 
143     /**
144      * A range over the query parameters.
145      *
146      * Usage:
147      * ---
148      * foreach (key, value; url.queryParams) {}
149      * ---
150      */
151     auto range() {
152         return QueryParamRange(0, this.params);
153     }
154     /// ditto
155     alias range this;
156 }
157 
158 /**
159  * A Unique Resource Locator.
160  * 
161  * URLs can be parsed (see parseURL) and implicitly convert to strings.
162  */
163 struct URL {
164     /// The URL scheme. For instance, ssh, ftp, or https.
165     string scheme;
166 
167     /// The username in this URL. Usually absent. If present, there will also be a password.
168     string user;
169 
170     /// The password in this URL. Usually absent.
171     string pass;
172 
173     /// The hostname.
174     string host;
175 
176     /**
177      * The port.
178      *
179      * This is inferred from the scheme if it isn't present in the URL itself.
180      * If the scheme is not known and the port is not present, the port will be given as 0.
181      * For some schemes, port will not be sensible -- for instance, file or chrome-extension.
182      *
183      * If you explicitly need to detect whether the user provided a port, check the providedPort
184      * field.
185      */
186     @property ushort port() {
187         if (providedPort != 0) {
188             return providedPort;
189         }
190         if (auto p = scheme in schemeToDefaultPort) {
191             return *p;
192         }
193         return 0;
194     }
195 
196     /**
197      * Set the port.
198      *
199      * This sets the providedPort field and is provided for convenience.
200      */
201     @property ushort port(ushort value) {
202         return providedPort = value;
203     }
204 
205     /// The port that was explicitly provided in the URL.
206     ushort providedPort;
207 
208     /**
209      * The path.
210      *
211      * For instance, in the URL https://cnn.com/news/story/17774?visited=false, the path is
212      * "/news/story/17774".
213      */
214     string path;
215 
216     /**
217      * Deprecated: this disallows multiple values for the same query string. Please use queryParams
218      * instead.
219      * 
220      * The query string elements.
221      *
222      * For instance, in the URL https://cnn.com/news/story/17774?visited=false, the query string
223      * elements will be ["visited": "false"].
224      *
225      * Similarly, in the URL https://bbc.co.uk/news?item, the query string elements will be
226      * ["item": ""].
227      *
228      * This field is mutable, so be cautious.
229      */
230     string[string] query;
231 
232     /**
233      * The query parameters associated with this URL.
234      */
235     QueryParams queryParams;
236 
237     /**
238      * The fragment. In web documents, this typically refers to an anchor element.
239      * For instance, in the URL https://cnn.com/news/story/17774#header2, the fragment is "header2".
240      */
241     string fragment;
242 
243     /**
244      * Convert this URL to a string.
245      * The string is properly formatted and usable for, eg, a web request.
246      */
247     string toString() {
248         return toString(false);
249     }
250 
251     /**
252      * Convert this URL to a string.
253      * The string is intended to be human-readable rather than machine-readable.
254      */
255     string toHumanReadableString() {
256         return toString(true);
257     }
258 
259     private string toString(bool humanReadable) {
260         Appender!string s;
261         s ~= scheme;
262         s ~= "://";
263         if (user) {
264             s ~= humanReadable ? user : user.percentEncode;
265             s ~= ":";
266             s ~= humanReadable ? pass : pass.percentEncode;
267             s ~= "@";
268         }
269         s ~= humanReadable ? host : host.toPuny;
270         if (providedPort) {
271             if ((scheme in schemeToDefaultPort) == null || schemeToDefaultPort[scheme] != providedPort) {
272                 s ~= ":";
273                 s ~= providedPort.to!string;
274             }
275         }
276         string p = path;
277         if (p.length == 0 || p == "/") {
278             s ~= '/';
279         } else {
280             if (p[0] == '/') {
281                 p = p[1..$];
282             }
283             if (humanReadable) {
284                 s ~= p;
285             } else {
286                 foreach (part; p.split('/')) {
287                     s ~= '/';
288                     s ~= part.percentEncode;
289                 }
290             }
291         }
292         if (queryParams.length) {
293             bool first = true;
294             s ~= '?';
295             foreach (k, v; queryParams) {
296                 if (!first) {
297                     s ~= '&';
298                 }
299                 first = false;
300                 s ~= k.percentEncode;
301                 if (v.length > 0) {
302                     s ~= '=';
303                     s ~= v.percentEncode;
304                 }
305             }
306         } else if (query) {
307             s ~= '?';
308             bool first = true;
309             foreach (k, v; query) {
310                 if (!first) {
311                     s ~= '&';
312                 }
313                 first = false;
314                 s ~= k.percentEncode;
315                 if (v.length > 0) {
316                     s ~= '=';
317                     s ~= v.percentEncode;
318                 }
319             }
320         }
321         if (fragment) {
322             s ~= '#';
323             s ~= fragment.percentEncode;
324         }
325         return s.data;
326     }
327 
328     /// Implicitly convert URLs to strings.
329     alias toString this;
330 
331     /**
332      * The append operator (~).
333      *
334      * The append operator for URLs returns a new URL with the given string appended as a path
335      * element to the URL's path. It only adds new path elements (or sequences of path elements).
336      *
337      * Don't worry about path separators; whether you include them or not, it will just work.
338      *
339      * Query elements are copied.
340      *
341      * Examples:
342      * ---
343      * auto random = "http://testdata.org/random".parseURL;
344      * auto randInt = random ~ "int";
345      * writeln(randInt);  // prints "http://testdata.org/random/int"
346      * ---
347      */
348     URL opBinary(string op : "~")(string subsequentPath) {
349         URL other = this;
350         other ~= subsequentPath;
351         if (query) {
352             other.query = other.query.dup;
353         }
354         return other;
355     }
356 
357     /**
358      * The append-in-place operator (~=).
359      *
360      * The append operator for URLs adds a path element to this URL. It only adds new path elements
361      * (or sequences of path elements).
362      *
363      * Don't worry about path separators; whether you include them or not, it will just work.
364      *
365      * Examples:
366      * ---
367      * auto random = "http://testdata.org/random".parseURL;
368      * random ~= "int";
369      * writeln(random);  // prints "http://testdata.org/random/int"
370      * ---
371      */
372     URL opOpAssign(string op : "~")(string subsequentPath) {
373         if (path.endsWith("/")) {
374             if (subsequentPath.startsWith("/")) {
375                 path ~= subsequentPath[1..$];
376             } else {
377                 path ~= subsequentPath;
378             }
379         } else {
380             if (!subsequentPath.startsWith("/")) {
381                 path ~= '/';
382             }
383             path ~= subsequentPath;
384         }
385         return this;
386     }
387 }
388 
389 /**
390  * Parse a URL from a string.
391  *
392  * This attempts to parse a wide range of URLs as people might actually type them. Some mistakes
393  * may be made. However, any URL in a correct format will be parsed correctly.
394  */
395 bool tryParseURL(string value, out URL url) {
396     url = URL.init;
397     // scheme:[//[user:password@]host[:port]][/]path[?query][#fragment]
398     // Scheme is optional in common use. We infer 'http' if it's not given.
399     auto i = value.indexOf("//");
400     if (i > -1) {
401         if (i > 1) {
402             url.scheme = value[0..i-1];
403         }
404         value = value[i+2 .. $];
405     } else {
406         url.scheme = "http";
407     }
408     // [user:password@]host[:port]][/]path[?query][#fragment
409     i = value.indexOfAny([':', '/']);
410     if (i == -1) {
411         // Just a hostname.
412         url.host = value.fromPuny;
413         return true;
414     }
415 
416     if (value[i] == ':') {
417         // This could be between username and password, or it could be between host and port.
418         auto j = value.indexOfAny(['@', '/']);
419         if (j > -1 && value[j] == '@') {
420             try {
421                 url.user = value[0..i].percentDecode;
422                 url.pass = value[i+1 .. j].percentDecode;
423             } catch (URLException) {
424                 return false;
425             }
426             value = value[j+1 .. $];
427         }
428     }
429 
430     // It's trying to be a host/port, not a user/pass.
431     i = value.indexOfAny([':', '/']);
432     if (i == -1) {
433         url.host = value.fromPuny;
434         return true;
435     }
436     url.host = value[0..i].fromPuny;
437     value = value[i .. $];
438     if (value[0] == ':') {
439         auto end = value.indexOf('/');
440         if (end == -1) {
441             end = value.length;
442         }
443         try {
444             url.port = value[1 .. end].to!ushort;
445         } catch (ConvException) {
446             return false;
447         }
448         value = value[end .. $];
449         if (value.length == 0) {
450             return true;
451         }
452     }
453 
454     i = value.indexOfAny("?#");
455     if (i == -1) {
456         url.path = value.percentDecode;
457         return true;
458     }
459 
460     try {
461         url.path = value[0..i].percentDecode;
462     } catch (URLException) {
463         return false;
464     }
465     auto c = value[i];
466     value = value[i + 1 .. $];
467     if (c == '?') {
468         i = value.indexOf('#');
469         string query;
470         if (i < 0) {
471             query = value;
472             value = null;
473         } else {
474             query = value[0..i];
475             value = value[i + 1 .. $];
476         }
477         auto queries = query.split('&');
478         foreach (q; queries) {
479             auto j = q.indexOf('=');
480             string key, val;
481             if (j < 0) {
482                 key = q;
483             } else {
484                 key = q[0..j];
485                 val = q[j + 1 .. $];
486             }
487             try {
488                 key = key.percentDecode;
489                 val = val.percentDecode;
490             } catch (URLException) {
491                 return false;
492             }
493             url.query[key] = val;
494             url.queryParams.add(key, val);
495         }
496     }
497 
498     try {
499         url.fragment = value.percentDecode;
500     } catch (URLException) {
501         return false;
502     }
503 
504     return true;
505 }
506 
507 unittest {
508     {
509         // Basic.
510         URL url;
511         with (url) {
512             scheme = "https";
513             host = "example.org";
514             path = "/foo/bar";
515             query["hello"] = "world";
516             query["gibe"] = "clay";
517             fragment = "frag";
518         }
519         assert(
520                 // Not sure what order it'll come out in.
521                 url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" ||
522                 url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag",
523                 url.toString);
524     }
525     {
526         // Percent encoded.
527         URL url;
528         with (url) {
529             scheme = "https";
530             host = "example.org";
531             path = "/f☃o";
532             query["❄"] = "❀";
533             query["["] = "]";
534             fragment = "ş";
535         }
536         assert(
537                 // Not sure what order it'll come out in.
538                 url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" ||
539                 url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F",
540                 url.toString);
541     }
542     {
543         // Port, user, pass.
544         URL url;
545         with (url) {
546             scheme = "https";
547             host = "example.org";
548             user = "dhasenan";
549             pass = "itsasecret";
550             port = 17;
551         }
552         assert(
553                 url.toString == "https://dhasenan:itsasecret@example.org:17/",
554                 url.toString);
555     }
556     {
557         // Query with no path.
558         URL url;
559         with (url) {
560             scheme = "https";
561             host = "example.org";
562             query["hi"] = "bye";
563         }
564         assert(
565                 url.toString == "https://example.org/?hi=bye",
566                 url.toString);
567     }
568 }
569 
570 unittest
571 {
572     auto url = "//foo/bar".parseURL;
573     assert(url.host == "foo", "expected host foo, got " ~ url.host);
574     assert(url.path == "/bar");
575 }
576 
577 unittest
578 {
579     auto url = "localhost:5984".parseURL;
580     auto url2 = url ~ "db1";
581     assert(url2.toString == "http://localhost:5984/db1", url2.toString);
582     auto url3 = url2 ~ "_all_docs";
583     assert(url3.toString == "http://localhost:5984/db1/_all_docs", url3.toString);
584 }
585 
586 ///
587 unittest {
588     {
589         // Basic.
590         URL url;
591         with (url) {
592             scheme = "https";
593             host = "example.org";
594             path = "/foo/bar";
595             queryParams.add("hello", "world");
596             queryParams.add("gibe", "clay");
597             fragment = "frag";
598         }
599         assert(
600                 // Not sure what order it'll come out in.
601                 url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" ||
602                 url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag",
603                 url.toString);
604     }
605     {
606         // Passing an array of query values.
607         URL url;
608         with (url) {
609             scheme = "https";
610             host = "example.org";
611             path = "/foo/bar";
612             queryParams.add("hello", "world");
613             queryParams.add("hello", "aether");
614             fragment = "frag";
615         }
616         assert(
617                 // Not sure what order it'll come out in.
618                 url.toString == "https://example.org/foo/bar?hello=world&hello=aether#frag" ||
619                 url.toString == "https://example.org/foo/bar?hello=aether&hello=world#frag",
620                 url.toString);
621     }
622     {
623         // Percent encoded.
624         URL url;
625         with (url) {
626             scheme = "https";
627             host = "example.org";
628             path = "/f☃o";
629             queryParams.add("❄", "❀");
630             queryParams.add("[", "]");
631             fragment = "ş";
632         }
633         assert(
634                 // Not sure what order it'll come out in.
635                 url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" ||
636                 url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F",
637                 url.toString);
638     }
639     {
640         // Port, user, pass.
641         URL url;
642         with (url) {
643             scheme = "https";
644             host = "example.org";
645             user = "dhasenan";
646             pass = "itsasecret";
647             port = 17;
648         }
649         assert(
650                 url.toString == "https://dhasenan:itsasecret@example.org:17/",
651                 url.toString);
652     }
653     {
654         // Query with no path.
655         URL url;
656         with (url) {
657             scheme = "https";
658             host = "example.org";
659             queryParams.add("hi", "bye");
660         }
661         assert(
662                 url.toString == "https://example.org/?hi=bye",
663                 url.toString);
664     }
665 }
666 
667 unittest {
668     // Percent decoding.
669 
670     // http://#:!:@
671     auto urlString = "http://%23:%21%3A@example.org/%7B/%7D?%3B&%26=%3D#%23hash";
672     auto url = urlString.parseURL;
673     assert(url.user == "#");
674     assert(url.pass == "!:");
675     assert(url.host == "example.org");
676     assert(url.path == "/{/}");
677     assert(url.queryParams[";"].front == "");
678     assert(url.queryParams["&"].front == "=");
679     assert(url.fragment == "#hash");
680 
681     // Round trip.
682     assert(urlString == urlString.parseURL.toString, urlString.parseURL.toString);
683     assert(urlString == urlString.parseURL.toString.parseURL.toString);
684 }
685 
686 unittest {
687     auto url = "https://xn--m3h.xn--n3h.org/?hi=bye".parseURL;
688     assert(url.host == "☂.☃.org", url.host);
689 }
690 
691 unittest {
692     auto url = "https://xn--m3h.xn--n3h.org/?hi=bye".parseURL;
693     assert(url.toString == "https://xn--m3h.xn--n3h.org/?hi=bye", url.toString);
694     assert(url.toHumanReadableString == "https://☂.☃.org/?hi=bye", url.toString);
695 }
696 
697 unittest {
698     auto url = "https://☂.☃.org/?hi=bye".parseURL;
699     assert(url.toString == "https://xn--m3h.xn--n3h.org/?hi=bye");
700 }
701 
702 ///
703 unittest {
704     // There's an existing path.
705     auto url = parseURL("http://example.org/foo");
706     URL url2;
707     // No slash? Assume it needs a slash.
708     assert((url ~ "bar").toString == "http://example.org/foo/bar");
709     // With slash? Don't add another.
710     url2 = url ~ "/bar";
711     assert(url2.toString == "http://example.org/foo/bar", url2.toString);
712     url ~= "bar";
713     assert(url.toString == "http://example.org/foo/bar");
714 
715     // Path already ends with a slash; don't add another.
716     url = parseURL("http://example.org/foo/");
717     assert((url ~ "bar").toString == "http://example.org/foo/bar");
718     // Still don't add one even if you're appending with a slash.
719     assert((url ~ "/bar").toString == "http://example.org/foo/bar");
720     url ~= "/bar";
721     assert(url.toString == "http://example.org/foo/bar");
722 
723     // No path.
724     url = parseURL("http://example.org");
725     assert((url ~ "bar").toString == "http://example.org/bar");
726     assert((url ~ "/bar").toString == "http://example.org/bar");
727     url ~= "bar";
728     assert(url.toString == "http://example.org/bar");
729 
730     // Path is just a slash.
731     url = parseURL("http://example.org/");
732     assert((url ~ "bar").toString == "http://example.org/bar");
733     assert((url ~ "/bar").toString == "http://example.org/bar");
734     url ~= "bar";
735     assert(url.toString == "http://example.org/bar", url.toString);
736 
737     // No path, just fragment.
738     url = "ircs://irc.freenode.com/#d".parseURL;
739     assert(url.toString == "ircs://irc.freenode.com/#d", url.toString);
740 }
741 
742 unittest {
743     import std.net.curl;
744     auto url = "http://example.org".parseURL;
745     assert(is(typeof(std.net.curl.get(url))));
746 }
747 
748 /**
749  * Parse the input string as a URL.
750  *
751  * Throws:
752  *   URLException if the string was in an incorrect format.
753  */
754 URL parseURL(string value) {
755     URL url;
756     if (tryParseURL(value, url)) {
757         return url;
758     }
759     throw new URLException("failed to parse URL " ~ value);
760 }
761 
762 ///
763 unittest {
764     {
765         // Infer scheme
766         auto u1 = parseURL("example.org");
767         assert(u1.scheme == "http");
768         assert(u1.host == "example.org");
769         assert(u1.path == "");
770         assert(u1.port == 80);
771         assert(u1.providedPort == 0);
772         assert(u1.fragment == "");
773     }
774     {
775         // Simple host and scheme
776         auto u1 = parseURL("https://example.org");
777         assert(u1.scheme == "https");
778         assert(u1.host == "example.org");
779         assert(u1.path == "");
780         assert(u1.port == 443);
781         assert(u1.providedPort == 0);
782     }
783     {
784         // With path
785         auto u1 = parseURL("https://example.org/foo/bar");
786         assert(u1.scheme == "https");
787         assert(u1.host == "example.org");
788         assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path);
789         assert(u1.port == 443);
790         assert(u1.providedPort == 0);
791     }
792     {
793         // With explicit port
794         auto u1 = parseURL("https://example.org:1021/foo/bar");
795         assert(u1.scheme == "https");
796         assert(u1.host == "example.org");
797         assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path);
798         assert(u1.port == 1021);
799         assert(u1.providedPort == 1021);
800     }
801     {
802         // With user
803         auto u1 = parseURL("https://bob:secret@example.org/foo/bar");
804         assert(u1.scheme == "https");
805         assert(u1.host == "example.org");
806         assert(u1.path == "/foo/bar");
807         assert(u1.port == 443);
808         assert(u1.user == "bob");
809         assert(u1.pass == "secret");
810     }
811     {
812         // With user, URL-encoded
813         auto u1 = parseURL("https://bob%21:secret%21%3F@example.org/foo/bar");
814         assert(u1.scheme == "https");
815         assert(u1.host == "example.org");
816         assert(u1.path == "/foo/bar");
817         assert(u1.port == 443);
818         assert(u1.user == "bob!");
819         assert(u1.pass == "secret!?");
820     }
821     {
822         // With user and port and path
823         auto u1 = parseURL("https://bob:secret@example.org:2210/foo/bar");
824         assert(u1.scheme == "https");
825         assert(u1.host == "example.org");
826         assert(u1.path == "/foo/bar");
827         assert(u1.port == 2210);
828         assert(u1.user == "bob");
829         assert(u1.pass == "secret");
830         assert(u1.fragment == "");
831     }
832     {
833         // With query string
834         auto u1 = parseURL("https://example.org/?login=true");
835         assert(u1.scheme == "https");
836         assert(u1.host == "example.org");
837         assert(u1.path == "/", "expected path: / actual path: " ~ u1.path);
838         assert(u1.queryParams["login"].front == "true");
839         assert(u1.fragment == "");
840     }
841     {
842         // With query string and fragment
843         auto u1 = parseURL("https://example.org/?login=true#justkidding");
844         assert(u1.scheme == "https");
845         assert(u1.host == "example.org");
846         assert(u1.path == "/", "expected path: / actual path: " ~ u1.path);
847         assert(u1.queryParams["login"].front == "true");
848         assert(u1.fragment == "justkidding");
849     }
850     {
851         // With URL-encoded values
852         auto u1 = parseURL("https://example.org/%E2%98%83?%E2%9D%84=%3D#%5E");
853         assert(u1.scheme == "https");
854         assert(u1.host == "example.org");
855         assert(u1.path == "/☃", "expected path: /☃ actual path: " ~ u1.path);
856         assert(u1.queryParams["❄"].front == "=");
857         assert(u1.fragment == "^");
858     }
859 }
860 
861 unittest {
862     assert(parseURL("http://example.org").port == 80);
863     assert(parseURL("http://example.org:5326").port == 5326);
864 
865     auto url = parseURL("redis://admin:password@redisbox.local:2201/path?query=value#fragment");
866     assert(url.scheme == "redis");
867     assert(url.user == "admin");
868     assert(url.pass == "password");
869 
870     assert(parseURL("example.org").toString == "http://example.org/");
871     assert(parseURL("http://example.org:80").toString == "http://example.org/");
872 
873     assert(parseURL("localhost:8070").toString == "http://localhost:8070/");
874 }
875 
876 /**
877  * Percent-encode a string.
878  *
879  * URL components cannot contain non-ASCII characters, and there are very few characters that are
880  * safe to include as URL components. Domain names using Unicode values use Punycode. For
881  * everything else, there is percent encoding.
882  */
883 string percentEncode(string raw) {
884     // We *must* encode these characters: :/?#[]@!$&'()*+,;="
885     // We *can* encode any other characters.
886     // We *should not* encode alpha, numeric, or -._~.
887     Appender!string app;
888     foreach (dchar d; raw) {
889         if (('a' <= d && 'z' >= d) ||
890                 ('A' <= d && 'Z' >= d) ||
891                 ('0' <= d && '9' >= d) ||
892                 d == '-' || d == '.' || d == '_' || d == '~') {
893             app ~= d;
894             continue;
895         }
896         // Something simple like a space character? Still in 7-bit ASCII?
897         // Then we get a single-character string out of it and just encode
898         // that one bit.
899         // Something not in 7-bit ASCII? Then we percent-encode each octet
900         // in the UTF-8 encoding (and hope the server understands UTF-8).
901         char[] c;
902         encode(c, d);
903         auto bytes = cast(ubyte[])c;
904         foreach (b; bytes) {
905             app ~= format("%%%02X", b);
906         }
907     }
908     return cast(string)app.data;
909 }
910 
911 ///
912 unittest {
913     assert(percentEncode("IDontNeedNoPercentEncoding") == "IDontNeedNoPercentEncoding");
914     assert(percentEncode("~~--..__") == "~~--..__");
915     assert(percentEncode("0123456789") == "0123456789");
916 
917     string e;
918 
919     e = percentEncode("☃");
920     assert(e == "%E2%98%83", "expected %E2%98%83 but got" ~ e);
921 }
922 
923 /**
924  * Percent-decode a string.
925  *
926  * URL components cannot contain non-ASCII characters, and there are very few characters that are
927  * safe to include as URL components. Domain names using Unicode values use Punycode. For
928  * everything else, there is percent encoding.
929  *
930  * This explicitly ensures that the result is a valid UTF-8 string.
931  */
932 @trusted string percentDecode(string encoded) {
933     ubyte[] raw = percentDecodeRaw(encoded);
934     // This cast is not considered @safe because it converts from one pointer type to another.
935     // However, it's 1-byte values in either case, no reference types, so this won't result in any
936     // memory safety errors. We also check for validity immediately.
937     auto s = cast(string) raw;
938     if (!s.isValid) {
939         // TODO(dhasenan): 
940         throw new URLException("input contains invalid UTF data");
941     }
942     return s;
943 }
944 
945 ///
946 unittest {
947     assert(percentDecode("IDontNeedNoPercentDecoding") == "IDontNeedNoPercentDecoding");
948     assert(percentDecode("~~--..__") == "~~--..__");
949     assert(percentDecode("0123456789") == "0123456789");
950 
951     string e;
952 
953     e = percentDecode("%E2%98%83");
954     assert(e == "☃", "expected a snowman but got" ~ e);
955 }
956 
957 /**
958  * Percent-decode a string into a ubyte array.
959  *
960  * URL components cannot contain non-ASCII characters, and there are very few characters that are
961  * safe to include as URL components. Domain names using Unicode values use Punycode. For
962  * everything else, there is percent encoding.
963  *
964  * This yields a ubyte array and will not perform validation on the output. However, an improperly
965  * formatted input string will result in a URLException.
966  */
967 ubyte[] percentDecodeRaw(string encoded) {
968     // We're dealing with possibly incorrectly encoded UTF-8. Mark it down as ubyte[] for now.
969     Appender!(ubyte[]) app;
970     for (int i = 0; i < encoded.length; i++) {
971         if (encoded[i] != '%') {
972             app ~= encoded[i];
973             continue;
974         }
975         if (i >= encoded.length - 2) {
976             throw new URLException("Invalid percent encoded value: expected two characters after " ~
977                     "percent symbol. Error at index " ~ i.to!string);
978         }
979         auto b = cast(ubyte)("0123456789ABCDEF".indexOf(encoded[i + 1]));
980         auto c = cast(ubyte)("0123456789ABCDEF".indexOf(encoded[i + 2]));
981         app ~= cast(ubyte)((b << 4) | c);
982         i += 2;
983     }
984     return app.data;
985 }
986 
987 private string toPuny(string unicodeHostname) {
988     bool mustEncode = false;
989     foreach (i, dchar d; unicodeHostname) {
990         auto c = cast(uint) d;
991         if (c > 0x80) {
992             mustEncode = true;
993             break;
994         }
995         if (c < 0x2C || (c >= 0x3A && c <= 40) || (c >= 0x5B && c <= 0x60) || (c >= 0x7B)) {
996             throw new URLException(
997                     format(
998                         "domain name '%s' contains illegal character '%s' at position %s",
999                         unicodeHostname, d, i));
1000         }
1001     }
1002     if (!mustEncode) {
1003         return unicodeHostname;
1004     }
1005     return unicodeHostname.split('.').map!punyEncode.join(".");
1006 }
1007 
1008 private string fromPuny(string hostname) {
1009     return hostname.split('.').map!punyDecode.join(".");
1010 }
1011 
1012 private {
1013     enum delimiter = '-';
1014     enum marker = "xn--";
1015     enum ulong damp = 700;
1016     enum ulong tmin = 1;
1017     enum ulong tmax = 26;
1018     enum ulong skew = 38;
1019     enum ulong base = 36;
1020     enum ulong initialBias = 72;
1021     enum dchar initialN = cast(dchar)128;
1022 
1023     ulong adapt(ulong delta, ulong numPoints, bool firstTime) {
1024         if (firstTime) {
1025             delta /= damp;
1026         } else {
1027             delta /= 2;
1028         }
1029         delta += delta / numPoints;
1030         ulong k = 0;
1031         while (delta > ((base - tmin) * tmax) / 2) {
1032             delta /= (base - tmin);
1033             k += base;
1034         }
1035         return k + (((base - tmin + 1) * delta) / (delta + skew));
1036     }
1037 }
1038 
1039 /**
1040  * Encode the input string using the Punycode algorithm.
1041  *
1042  * Punycode is used to encode UTF domain name segment. A Punycode-encoded segment will be marked
1043  * with "xn--". Each segment is encoded separately. For instance, if you wish to encode "☂.☃.com"
1044  * in Punycode, you will get "xn--m3h.xn--n3h.com".
1045  *
1046  * In order to puny-encode a domain name, you must split it into its components. The following will
1047  * typically suffice:
1048  * ---
1049  * auto domain = "☂.☃.com";
1050  * auto encodedDomain = domain.splitter(".").map!(punyEncode).join(".");
1051  * ---
1052  */
1053 string punyEncode(string input) {
1054     ulong delta = 0;
1055     dchar n = initialN;
1056     auto i = 0;
1057     auto bias = initialBias;
1058     Appender!string output;
1059     output ~= marker;
1060     auto pushed = 0;
1061     auto codePoints = 0;
1062     foreach (dchar c; input) {
1063         codePoints++;
1064         if (c <= initialN) {
1065             output ~= c;
1066             pushed++;
1067         }
1068     }
1069     if (pushed < codePoints) {
1070         if (pushed > 0) {
1071             output ~= delimiter;
1072         }
1073     } else {
1074         // No encoding to do.
1075         return input;
1076     }
1077     bool first = true;
1078     while (pushed < codePoints) {
1079         auto best = dchar.max;
1080         foreach (dchar c; input) {
1081             if (n <= c && c < best) {
1082                 best = c;
1083             }
1084         }
1085         if (best == dchar.max) {
1086             throw new URLException("failed to find a new codepoint to process during punyencode");
1087         }
1088         delta += (best - n) * (pushed + 1);
1089         if (delta > uint.max) {
1090             // TODO better error message
1091             throw new URLException("overflow during punyencode");
1092         }
1093         n = best;
1094         foreach (dchar c; input) {
1095             if (c < n) {
1096                 delta++;
1097             }
1098             if (c == n) {
1099                 ulong q = delta;
1100                 auto k = base;
1101                 while (true) {
1102                     ulong t;
1103                     if (k <= bias) {
1104                         t = tmin;
1105                     } else if (k >= bias + tmax) {
1106                         t = tmax;
1107                     } else {
1108                         t = k - bias;
1109                     }
1110                     if (q < t) {
1111                         break;
1112                     }
1113                     output ~= digitToBasic(t + ((q - t) % (base - t)));
1114                     q = (q - t) / (base - t);
1115                     k += base;
1116                 }
1117                 output ~= digitToBasic(q);
1118                 pushed++;
1119                 bias = adapt(delta, pushed, first);
1120                 first = false;
1121                 delta = 0;
1122             }
1123         }
1124         delta++;
1125         n++;
1126     }
1127     return cast(string)output.data;
1128 }
1129 
1130 /**
1131  * Decode the input string using the Punycode algorithm.
1132  *
1133  * Punycode is used to encode UTF domain name segment. A Punycode-encoded segment will be marked
1134  * with "xn--". Each segment is encoded separately. For instance, if you wish to encode "☂.☃.com"
1135  * in Punycode, you will get "xn--m3h.xn--n3h.com".
1136  *
1137  * In order to puny-decode a domain name, you must split it into its components. The following will
1138  * typically suffice:
1139  * ---
1140  * auto domain = "xn--m3h.xn--n3h.com";
1141  * auto decodedDomain = domain.splitter(".").map!(punyDecode).join(".");
1142  * ---
1143  */
1144 string punyDecode(string input) {
1145     if (!input.startsWith(marker)) {
1146         return input;
1147     }
1148     input = input[marker.length..$];
1149 
1150     // let n = initial_n
1151     dchar n = cast(dchar)128;
1152 
1153     // let i = 0
1154     // let bias = initial_bias
1155     // let output = an empty string indexed from 0
1156     size_t i = 0;
1157     auto bias = initialBias;
1158     dchar[] output;
1159     // This reserves a bit more than necessary, but it should be more efficient overall than just
1160     // appending and inserting volo-nolo.
1161     output.reserve(input.length);
1162 
1163     // consume all code points before the last delimiter (if there is one)
1164     //   and copy them to output, fail on any non-basic code point
1165     // if more than zero code points were consumed then consume one more
1166     //   (which will be the last delimiter)
1167     auto end = input.lastIndexOf(delimiter);
1168     if (end > -1) {
1169         foreach (dchar c; input[0..end]) {
1170             output ~= c;
1171         }
1172         input = input[end+1 .. $];
1173     }
1174 
1175     // while the input is not exhausted do begin
1176     size_t pos = 0;
1177     while (pos < input.length) {
1178         //   let oldi = i
1179         //   let w = 1
1180         auto oldi = i;
1181         auto w = 1;
1182         //   for k = base to infinity in steps of base do begin
1183         for (ulong k = base; k < uint.max; k += base) {
1184             //     consume a code point, or fail if there was none to consume
1185             // Note that the input is all ASCII, so we can simply index the input string bytewise.
1186             auto c = input[pos];
1187             pos++;
1188             //     let digit = the code point's digit-value, fail if it has none
1189             auto digit = basicToDigit(c);
1190             //     let i = i + digit * w, fail on overflow
1191             i += digit * w;
1192             //     let t = tmin if k <= bias {+ tmin}, or
1193             //             tmax if k >= bias + tmax, or k - bias otherwise
1194             ulong t;
1195             if (k <= bias) {
1196                 t = tmin;
1197             } else if (k >= bias + tmax) {
1198                 t = tmax;
1199             } else {
1200                 t = k - bias;
1201             }
1202             //     if digit < t then break
1203             if (digit < t) {
1204                 break;
1205             }
1206             //     let w = w * (base - t), fail on overflow
1207             w *= (base - t);
1208             //   end
1209         }
1210         //   let bias = adapt(i - oldi, length(output) + 1, test oldi is 0?)
1211         bias = adapt(i - oldi, output.length + 1, oldi == 0);
1212         //   let n = n + i div (length(output) + 1), fail on overflow
1213         n += i / (output.length + 1);
1214         //   let i = i mod (length(output) + 1)
1215         i %= (output.length + 1);
1216         //   {if n is a basic code point then fail}
1217         // (We aren't actually going to fail here; it's clear what this means.)
1218         //   insert n into output at position i
1219         (() @trusted { output.insertInPlace(i, cast(dchar)n); })();  // should be @safe but isn't marked
1220         //   increment i
1221         i++;
1222         // end
1223     }
1224     return output.to!string;
1225 }
1226 
1227 // Lifted from punycode.js.
1228 private dchar digitToBasic(ulong digit) {
1229     return cast(dchar)(digit + 22 + 75 * (digit < 26));
1230 }
1231 
1232 // Lifted from punycode.js.
1233 private uint basicToDigit(char c) {
1234     auto codePoint = cast(uint)c;
1235     if (codePoint - 48 < 10) {
1236         return codePoint - 22;
1237     }
1238     if (codePoint - 65 < 26) {
1239         return codePoint - 65;
1240     }
1241     if (codePoint - 97 < 26) {
1242         return codePoint - 97;
1243     }
1244     return base;
1245 }
1246 
1247 unittest {
1248     {
1249         auto a = "b\u00FCcher";
1250         assert(punyEncode(a) == "xn--bcher-kva");
1251     }
1252     {
1253         auto a = "b\u00FCc\u00FCher";
1254         assert(punyEncode(a) == "xn--bcher-kvab");
1255     }
1256     {
1257         auto a = "ýbücher";
1258         auto b = punyEncode(a);
1259         assert(b == "xn--bcher-kvaf", b);
1260     }
1261 
1262     {
1263         auto a = "mañana";
1264         assert(punyEncode(a) == "xn--maana-pta");
1265     }
1266 
1267     {
1268         auto a = "\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644"
1269             ~ "\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F";
1270         auto b = punyEncode(a);
1271         assert(b == "xn--egbpdaj6bu4bxfgehfvwxn", b);
1272     }
1273     import std.stdio;
1274 }
1275 
1276 unittest {
1277     {
1278         auto b = punyDecode("xn--egbpdaj6bu4bxfgehfvwxn");
1279         assert(b == "ليهمابتكلموشعربي؟", b);
1280     }
1281     {
1282         assert(punyDecode("xn--maana-pta") == "mañana");
1283     }
1284 }
1285 
1286 unittest {
1287     import std..string, std.algorithm, std.array, std.range;
1288     {
1289         auto domain = "xn--m3h.xn--n3h.com";
1290         auto decodedDomain = domain.splitter(".").map!(punyDecode).join(".");
1291         assert(decodedDomain == "☂.☃.com", decodedDomain);
1292     }
1293     {
1294         auto domain = "☂.☃.com";
1295         auto decodedDomain = domain.splitter(".").map!(punyEncode).join(".");
1296         assert(decodedDomain == "xn--m3h.xn--n3h.com", decodedDomain);
1297     }
1298 }