String line to examine and find URLs in.
Maximum number of URLs to find. Default is uint.max.
A string[] array of found URLs. These include fragment identifiers.
// Replaces the following: // enum stephenhay = `\bhttps?://[^\s/$.?#].[^\s]*`; // static urlRegex = ctRegex!stephenhay; string[] urls = findURL("blah https://google.com http://facebook.com httpx://wefpokwe"); assert(urls.length == 2);
1 import std.conv : to; 2 3 { 4 const urls = findURLs("http://google.com"); 5 assert((urls.length == 1), urls.to!string); 6 assert((urls[0] == "http://google.com"), urls[0]); 7 } 8 { 9 const urls = findURLs("blah https://a.com http://b.com shttps://c https://d.asdf.asdf.asdf "); 10 assert((urls.length == 3), urls.to!string); 11 assert((urls == [ "https://a.com", "http://b.com", "https://d.asdf.asdf.asdf" ]), urls.to!string); 12 } 13 { 14 // max 2 15 const urls = findURLs("blah https://a.com http://b.com shttps://c https://d.asdf.asdf.asdf ", max: 2); 16 assert((urls.length == 2), urls.to!string); 17 assert((urls == [ "https://a.com", "http://b.com" ]), urls.to!string); 18 } 19 { 20 const urls = findURLs("http:// http://asdf https:// asdfhttpasdf http://google.com"); 21 assert((urls.length == 1), urls.to!string); 22 } 23 { 24 // max 0 25 const urls = findURLs("http:// http://asdf https:// asdfhttpasdf http://google.com", max: 0); 26 assert(!urls.length, urls.to!string); 27 } 28 { 29 const urls = findURLs("http://a.sehttp://a.shttp://a.http://http:"); 30 assert(!urls.length, urls.to!string); 31 } 32 { 33 const urls = findURLs("blahblah https://motorbörsen.se blhblah"); 34 assert(urls.length, urls.to!string); 35 } 36 { 37 // Let dlang-requests attempt complex URLs, don't validate more than necessary 38 const urls = findURLs("blahblah https://高所恐怖症。co.jp blhblah"); 39 assert(urls.length, urls.to!string); 40 } 41 { 42 const urls = findURLs("nyaa is now at https://nyaa.si, https://nyaa.si? " ~ 43 "https://nyaa.si. https://nyaa.si! and you should use it https://nyaa.si:"); 44 45 foreach (immutable url; urls) 46 { 47 assert((url == "https://nyaa.si"), url); 48 } 49 } 50 { 51 const urls = findURLs("https://google.se httpx://google.se https://google.se"); 52 assert((urls == [ "https://google.se", "https://google.se" ]), urls.to!string); 53 } 54 { 55 const urls = findURLs("https:// "); 56 assert(!urls.length, urls.to!string); 57 } 58 { 59 const urls = findURLs("http:// "); 60 assert(!urls.length, urls.to!string); 61 }
Finds URLs in a string, returning an array of them. Does not filter out duplicates.
Replacement for regex matching using much less memory when compiling (around ~300mb).
To consider: does this need a dstring?