Modul:URLutil: Unterschied zwischen den Versionen
w>PerfektesChaos (Mehr Funktionen) |
K (51 Versionen von wikivoyage:Modul:URLutil importiert) |
||
(38 dazwischenliegende Versionen von 12 Benutzern werden nicht angezeigt) | |||
Zeile 1: | Zeile 1: | ||
--[=[ | local URLutil = { suite = "URLutil", | ||
serial = "2022-04-05", | |||
item = 10859193 } | |||
--[=[ | |||
Utilities for URL etc. on www. | Utilities for URL etc. on www. | ||
* decode() | |||
* encode() | |||
* getAuthority() | * getAuthority() | ||
* getFragment() | |||
* getHost() | * getHost() | ||
* getLocation() | |||
* getNormalized() | |||
* getPath() | |||
* getPort() | * getPort() | ||
* getQuery() | |||
* getQueryTable() | |||
* getRelativePath() | |||
* getScheme() | * getScheme() | ||
* getSortkey() | |||
* getTLD() | |||
* getTop2domain() | |||
* getTop3domain() | |||
* isAuthority() | * isAuthority() | ||
* isDomain() | * isDomain() | ||
* isDomainExample() | |||
* isDomainInt() | |||
* isHost() | * isHost() | ||
* isHostPathResource() | |||
* isIP() | * isIP() | ||
* isIPlocal() | |||
* isIPv4() | * isIPv4() | ||
* isIPv6() | * isIPv6() | ||
* isMailAddress() | * isMailAddress() | ||
* isMailLink() | * isMailLink() | ||
* isProtocolWiki | * isProtocolDialog() | ||
* | * isProtocolWiki() | ||
* isResourceURL() | |||
* isSuspiciousURL() | * isSuspiciousURL() | ||
* isUnescapedURL() | * isUnescapedURL() | ||
* isWebURL() | * isWebURL() | ||
Only [[dotted decimal]] notation for IPv4 | * wikiEscapeURL() | ||
* failsafe() | |||
Only [[dotted decimal]] notation for IPv4 expected. | |||
Does not support dotted hexadecimal, dotted octal, or single-number formats. | Does not support dotted hexadecimal, dotted octal, or single-number formats. | ||
IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway. | IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway. | ||
]=] | |||
local Failsafe = URLutil | |||
local decodeComponentProtect = { F = "\"#%<>[\]^`{|}", | |||
P = "\"#%<>[\]^`{|}/?", | |||
Q = "\"#%<>[\]^`{|}&=+;,", | |||
X = "\"#%<>[\]^`{|}&=+;,/?" } | |||
local decodeComponentEscape = function ( averse, adapt ) | |||
return adapt == 20 or adapt == 127 or | |||
decodeComponentProtect[ averse ]:find( string.char( adapt ), | |||
1, | |||
true ) | |||
end -- decodeComponentEscape() | |||
local decodeComponentML = function ( ask ) | |||
local i = 1 | |||
local j, n, s | |||
while ( i ) do | |||
i = ask:find( "&#[xX]%x%x+;", i ) | |||
if i then | |||
j = ask:find( ";", i + 3, true ) | |||
s = ask:sub( i + 2, j - 1 ):upper() | |||
n = s:byte( 1, 1 ) | |||
if n == 88 then | |||
n = tonumber( s:sub( 2 ), 16 ) | |||
elseif s:match( "^%d+$" ) then | |||
n = tonumber( s ) | |||
else | |||
n = false | |||
end | |||
if n then | |||
if n >= 128 then | |||
s = string.format( "&#%d;", n ) | |||
elseif decodeComponentEscape( "X", n ) then | |||
s = string.format( "%%%02X", n ) | |||
else | |||
s = string.format( "%c", n ) | |||
end | |||
j = j + 1 | |||
if i == 1 then | |||
ask = s .. ask:sub( j ) | |||
else | |||
ask = string.format( "%s%s%s", | |||
ask:sub( 1, i - 1 ), | |||
s, | |||
ask:sub( j ) ) | |||
end | |||
end | |||
i = i + 1 | |||
end | |||
end -- while i | |||
return ask | |||
end -- decodeComponentML() | |||
local decodeComponentPercent = function ( ask, averse ) | |||
local i = 1 | |||
local j, k, m, n | |||
while ( i ) do | |||
i = ask:find( "%%[2-7]%x", i ) | |||
if i then | |||
j = i + 1 | |||
k = j + 1 | |||
n = ask:byte( k, k ) | |||
k = k + 1 | |||
m = ( n > 96 ) | |||
if m then | |||
n = n - 32 | |||
m = n | |||
end | |||
if n > 57 then | |||
n = n - 55 | |||
else | |||
n = n - 48 | |||
end | |||
n = ( ask:byte( j, j ) - 48 ) * 16 + n | |||
if n == 39 and | |||
ask:sub( i + 3, i + 5 ) == "%27" then | |||
j = i + 6 | |||
while ( ask:sub( j, j + 2 ) == "%27" ) do | |||
j = j + 3 | |||
end -- while "%27" | |||
elseif decodeComponentEscape( averse, n ) then | |||
if m then | |||
ask = string.format( "%s%c%s", | |||
ask:sub( 1, j ), | |||
m, | |||
ask:sub( k ) ) | |||
end | |||
elseif i == 1 then | |||
ask = string.format( "%c%s", n, ask:sub( k ) ) | |||
else | |||
ask = string.format( "%s%c%s", | |||
ask:sub( 1, i - 1 ), | |||
n, | |||
ask:sub( k ) ) | |||
end | |||
i = j | |||
end | |||
end -- while i | |||
return ask | |||
end -- decodeComponentPercent() | |||
local getTopDomain = function ( url, mode ) | |||
local r = URLutil.getHost( url ) | |||
if r then | |||
local pattern = "[%w%%%-]+%.%a[%w%-]*%a)$" | |||
if mode == 3 then | |||
pattern = "[%w%%%-]+%." .. pattern | |||
end | |||
r = mw.ustring.match( "." .. r, "%.(" .. pattern ) | |||
if not r then | |||
r = false | |||
end | |||
else | |||
r = false | |||
end | |||
return r | |||
end -- getTopDomain() | |||
local getHash = function ( url ) | |||
local r = url:find( "#", 1, true ) | |||
if r then | |||
local i = url:find( "&#", 1, true ) | |||
if i then | |||
local s | |||
while ( i ) do | |||
s = url:sub( i + 2 ) | |||
if s:match( "^%d+;" ) or s:match( "^x%x+;" ) then | |||
r = url:find( "#", i + 4, true ) | |||
if r then | |||
i = url:find( "&#", i + 4, true ) | |||
else | |||
i = false | |||
end | |||
else | |||
r = i + 1 | |||
i = false | |||
end | |||
end -- while i | |||
end | |||
end | |||
return r | |||
end -- getHash() | |||
URLutil.decode = function ( url, enctype ) | |||
local r, s | |||
if type( enctype ) == "string" then | |||
s = mw.text.trim( enctype ) | |||
if s == "" then | |||
s = false | |||
else | |||
s = s:upper() | |||
end | |||
end | |||
r = mw.text.encode( mw.uri.decode( url, s ) ) | |||
if r:find( "[%[|%]]" ) then | |||
local k | |||
r, k = r:gsub( "%[", "[" ) | |||
:gsub( "|", "|" ) | |||
:gsub( "%]", "]" ) | |||
end | |||
return r | |||
end -- URLutil.decode() | |||
URLutil.encode = function ( url, enctype ) | |||
local k, r, s | |||
if type( enctype ) == "string" then | |||
s = mw.text.trim( enctype ) | |||
if s == "" then | |||
s = false | |||
else | |||
s = s:upper() | |||
end | |||
end | |||
r = mw.uri.encode( url, s ) | |||
k = r:byte( 1, 1 ) | |||
if -- k == 35 or -- # | |||
k == 42 or -- * | |||
k == 58 or -- : | |||
k == 59 then -- ; | |||
r = string.format( "%%%X%s", k, r:sub( 2 ) ) | |||
end | |||
if r:find( "[%[|%]]" ) then | |||
r, k = r:gsub( "%[", "%5B" ) | |||
:gsub( "|", "%7C" ) | |||
:gsub( "%]", "%5D" ) | |||
end | |||
return r | |||
end -- URLutil.encode() | |||
function | URLutil.getAuthority = function ( url ) | ||
local r | |||
if type( url ) == "string" then | if type( url ) == "string" then | ||
local host | local colon, host, port | ||
if | local pattern = "^%s*%w*:?//([%w%.%%_-]+)(:?)([%d]*)/" | ||
local s = mw.text.decode( url ) | |||
local i = s:find( "#", 6, true ) | |||
if i then | |||
s = s:sub( 1, i - 1 ) .. "/" | |||
else | |||
s = s .. "/" | |||
end | |||
host, colon, port = mw.ustring.match( s, pattern ) | |||
if URLutil.isHost( host ) then | |||
host = mw.ustring.lower( host ) | host = mw.ustring.lower( host ) | ||
if colon == ":" then | if colon == ":" then | ||
if port:find( "^[1-9]" ) then | if port:find( "^[1-9]" ) then | ||
r = ( host .. ":" .. port ) | |||
end | end | ||
elseif #port == 0 then | elseif #port == 0 then | ||
r = host | |||
end | end | ||
end | end | ||
else | |||
r = false | |||
end | end | ||
return false | return r | ||
end -- | end -- URLutil.getAuthority() | ||
URLutil.getFragment = function ( url, decode ) | |||
local r | |||
if type( url ) == "string" then | |||
local i = getHash( url ) | |||
if i then | |||
r = mw.text.trim( url:sub( i ) ):sub( 2 ) | |||
if type( decode ) == "string" then | |||
local encoding = mw.text.trim( decode ) | |||
local launch | |||
if encoding == "%" then | |||
launch = true | |||
elseif encoding == "WIKI" then | |||
r = r:gsub( "%.(%x%x)", "%%%1" ) | |||
:gsub( "_", " " ) | |||
launch = true | |||
end | |||
if launch then | |||
r = mw.uri.decode( r, "PATH" ) | |||
end | |||
end | |||
else | |||
r = false | |||
end | |||
else | |||
r = nil | |||
end | |||
return r | |||
end -- URLutil.getFragment() | |||
URLutil.getHost = function ( url ) | |||
local r = URLutil.getAuthority( url ) | |||
if r then | |||
r = mw.ustring.match( r, "^([%w%.%%_%-]+):?[%d]*$" ) | |||
end | |||
return r | |||
end -- URLutil.getHost() | |||
URLutil.getLocation = function ( url ) | |||
local r | |||
if type( url ) == "string" then | |||
r = mw.text.trim( url ) | |||
if r == "" then | |||
r = false | |||
else | |||
local i | |||
i = getHash( r ) | |||
if i then | |||
if i == 1 then | |||
r = false | |||
else | |||
r = r:sub( 1, i - 1 ) | |||
end | |||
end | |||
end | |||
else | |||
r = nil | |||
end | |||
return r | |||
end -- URLutil.getLocation() | |||
URLutil.getNormalized = function ( url ) | |||
local r | |||
if type( url ) == "string" then | |||
r = mw.text.trim( url ) | |||
if r == "" then | |||
r = false | |||
else | |||
r = decodeComponentML( r ) | |||
end | |||
else | |||
r = false | |||
end | |||
if r then | |||
local k = r:find( "//", 1, true ) | |||
if k then | |||
local j = r:find( "/", k + 2, true ) | |||
local sF, sP, sQ | |||
if r:find( "%%[2-7]%x" ) then | |||
local i = getHash( r ) | |||
if i then | |||
sF = r:sub( i + 1 ) | |||
r = r:sub( 1, i - 1 ) | |||
if sF == "" then | |||
sF = false | |||
else | |||
sF = decodeComponentPercent( sF, "F" ) | |||
end | |||
end | |||
i = r:find( "?", 1, true ) | |||
if i then | |||
sQ = r:sub( i ) | |||
r = r:sub( 1, i - 1 ) | |||
sQ = decodeComponentPercent( sQ, "Q" ) | |||
end | |||
if j then | |||
if #r > j then | |||
sP = r:sub( j + 1 ) | |||
sP = decodeComponentPercent( sP, "P" ) | |||
end | |||
r = r:sub( 1, j - 1 ) | |||
end | |||
elseif j then | |||
local n = #r | |||
if r:byte( n, n ) == 35 then -- '#' | |||
n = n - 1 | |||
r = r:sub( 1, n ) | |||
end | |||
if n > j then | |||
sP = r:sub( j + 1 ) | |||
end | |||
r = r:sub( 1, j - 1 ) | |||
end | |||
r = mw.ustring.lower( r ) .. "/" | |||
if sP then | |||
r = r .. sP | |||
end | |||
if sQ then | |||
r = r .. sQ | |||
end | |||
if sF then | |||
r = string.format( "%s#%s", r, sF ) | |||
end | |||
end | |||
r = r:gsub( " ", "%%20" ) | |||
:gsub( "%[", "%%5B" ) | |||
:gsub( "|", "%%7C" ) | |||
:gsub( "%]", "%%5D" ) | |||
:gsub( "%<", "%%3C" ) | |||
:gsub( "%>", "%%3E" ) | |||
end | |||
return r | |||
end -- URLutil.getNormalized() | |||
URLutil.getPath = function ( url ) | |||
local r = URLutil.getRelativePath( url ) | |||
if r then | |||
local s = r:match( "^([^%?]*)%?" ) | |||
if s then | |||
r = s | |||
end | |||
s = r:match( "^([^#]*)#" ) | |||
if s then | |||
r = s | |||
end | |||
end | |||
return r | |||
end -- URLutil.getPath() | |||
URLutil.getPort = function ( url ) | |||
local r = URLutil.getAuthority( url ) | |||
if r then | |||
r = r:match( ":([1-9][0-9]*)$" ) | |||
if r then | |||
r = tonumber( r ) | |||
else | |||
r = false | |||
end | |||
end | |||
return r | |||
end -- URLutil.getPort() | |||
URLutil.getQuery = function ( url, key, separator ) | |||
local r = URLutil.getLocation( url ) | |||
if r then | |||
r = r:match( "^[^%?]*%?(.+)$" ) | |||
if r then | |||
if type( key ) == "string" then | |||
local single = mw.text.trim( key ) | |||
local sep = "&" | |||
local s, scan | |||
if type( separator ) == "string" then | |||
s = mw.text.trim( separator ) | |||
if s:match( "^[&;,/]$" ) then | |||
sep = s | |||
end | |||
end | |||
s = string.format( "%s%s%s", sep, r, sep ) | |||
scan = string.format( "%s%s=([^%s]*)%s", | |||
sep, key, sep, sep ) | |||
r = s:match( scan ) | |||
end | |||
end | |||
if not r then | |||
r = false | |||
end | |||
end | |||
return r | |||
end -- URLutil.getQuery() | |||
function | URLutil.getQueryTable = function ( url, separator ) | ||
local | local r = URLutil.getQuery( url ) | ||
if | if r then | ||
local sep = "&" | |||
local n, pairs, s, set | |||
if type( separator ) == "string" then | |||
s = mw.text.trim( separator ) | |||
if s:match( "^[&;,/]$" ) then | |||
sep = s | |||
end | |||
end | |||
pairs = mw.text.split( r, sep, true ) | |||
n = #pairs | |||
r = { } | |||
for i = 1, n do | |||
s = pairs[ i ] | |||
if s:find( "=", 2, true ) then | |||
s, set = s:match( "^([^=]+)=(.*)$" ) | |||
if s then | |||
r[ s ] = set | |||
end | |||
else | |||
r[ s ] = false | |||
end | |||
end -- for i | |||
end | end | ||
return | return r | ||
end -- | end -- URLutil.getQueryTable() | ||
function | URLutil.getRelativePath = function ( url ) | ||
local r | |||
if url then | if type( url ) == "string" then | ||
local s = url:match( "^%s*[a-zA-Z]*://(.*)$" ) | |||
if | if s then | ||
s = s:match( "[^/]+(/.*)$" ) | |||
else | |||
local x | |||
x, s = url:match( "^%s*(/?)(/.*)$" ) | |||
if x == "/" then | |||
s = s:match( "/[^/]+(/.*)$" ) | |||
end | |||
end | |||
if s then | |||
r = mw.text.trim( s ) | |||
elseif URLutil.isResourceURL( url ) then | |||
r = "/" | |||
else | |||
r = false | |||
end | end | ||
else | |||
r = nil | |||
end | end | ||
return | return r | ||
end -- | end -- URLutil.getRelativePath() | ||
function | URLutil.getScheme = function ( url ) | ||
local r | |||
if type( url ) == "string" then | if type( url ) == "string" then | ||
local | local pattern = "^%s*([a-zA-Z]*)(:?)(//)" | ||
local prot, colon, slashes = url:match( pattern ) | |||
r = false | |||
if slashes == "//" then | if slashes == "//" then | ||
if colon == ":" then | |||
if #prot > 2 then | |||
r = prot:lower() .. "://" | |||
end | |||
elseif #prot == 0 then | |||
r = "//" | |||
end | end | ||
end | |||
else | |||
r = nil | |||
end | |||
return r | |||
end -- URLutil.getScheme() | |||
URLutil.getSortkey = function ( url ) | |||
local r = url | |||
if type( url ) == "string" then | |||
local i = url:find( "//" ) | |||
if i then | |||
local scheme | |||
if i == 0 then | |||
scheme = "" | |||
else | |||
scheme = url:match( "^%s*([a-zA-Z]*)://" ) | |||
end | |||
if scheme then | |||
local s = url:sub( i + 2 ) | |||
local comps, site, m, suffix | |||
scheme = scheme:lower() | |||
i = s:find( "/" ) | |||
if i and i > 1 then | |||
suffix = s:sub( i + 1 ) -- mw.uri.encode() | |||
s = s:sub( 1, i - 1 ) | |||
suffix = suffix:gsub( "#", " " ) | |||
else | |||
suffix = "" | |||
end | |||
site, m = s:match( "^(.+)(:%d+)$" ) | |||
if not m then | |||
site = s | |||
m = 0 | |||
end | |||
comps = mw.text.split( site:lower(), ".", true ) | |||
r = "///" | |||
for i = #comps, 2, -1 do | |||
r = string.format( "%s%s.", r, comps[ i ] ) | |||
end -- for --i | |||
r = string.format( "%s%s %d %s: %s", | |||
r, comps[ 1 ], m, scheme, suffix ) | |||
end | |||
end | end | ||
end | end | ||
return false | return r | ||
end -- | end -- URLutil.getSortkey() | ||
URLutil.getTLD = function ( url ) | |||
local r = URLutil.getHost( url ) | |||
if r then | |||
r = mw.ustring.match( r, "%w+%.(%a[%w%-]*%a)$" ) | |||
if not r then | |||
r = false | |||
end | |||
end | |||
return r | |||
end -- URLutil.getTLD() | |||
URLutil.getTop2domain = function ( url ) | |||
return getTopDomain( url, 2 ) | |||
end -- URLutil.getTop2domain() | |||
URLutil.getTop3domain = function ( url ) | |||
return getTopDomain( url, 3 ) | |||
end -- URLutil.getTop3domain() | |||
function | URLutil.isAuthority = function ( s ) | ||
local r | |||
if type( s ) == "string" then | if type( s ) == "string" then | ||
local | local pattern = "^%s*([%w%.%%_-]+)(:?)(%d*)%s*$" | ||
local host, colon, port = mw.ustring.match( s, pattern ) | |||
if colon == ":" then | if colon == ":" then | ||
port = port:match( "^[1-9][0-9]*$" ) | port = port:match( "^[1-9][0-9]*$" ) | ||
if type( port ) ~= "string" then | if type( port ) ~= "string" then | ||
r = false | |||
end | end | ||
elseif port ~= "" then | elseif port ~= "" then | ||
r = false | |||
end | end | ||
r = URLutil.isHost( host ) | |||
else | |||
r = nil | |||
end | end | ||
return | return r | ||
end -- | end -- URLutil.isAuthority() | ||
function | URLutil.isDomain = function ( s ) | ||
local r | |||
if type( s ) == "string" then | if type( s ) == "string" then | ||
local scan = "^%s*([%w%.%%_-]*%w)%.(%a[%w-]*%a)%s*$" | |||
local scope | |||
s, scope = mw.ustring.match( s, scan ) | |||
if type( s ) == "string" then | if type( s ) == "string" then | ||
if mw.ustring.find( s, "^%w" ) then | if mw.ustring.find( s, "^%w" ) then | ||
if mw.ustring.find( s, "..", 1, true ) then | if mw.ustring.find( s, "..", 1, true ) then | ||
r = false | |||
else | else | ||
r = true | |||
end | end | ||
end | end | ||
end | end | ||
else | |||
r = nil | |||
end | end | ||
return false | return r | ||
end -- | end -- URLutil.isDomain() | ||
URLutil.isDomainExample = function ( url ) | |||
-- RFC 2606: example.com example.net example.org example.edu | |||
local r = getTopDomain( url, 2 ) | |||
if r then | |||
local s = r:lower():match( "^example%.([a-z][a-z][a-z])$" ) | |||
if s then | |||
r = ( s == "com" or | |||
s == "edu" or | |||
s == "net" or | |||
s == "org" ) | |||
else | |||
r = false | |||
end | |||
end | |||
return r | |||
end -- URLutil.isDomainExample() | |||
URLutil.isDomainInt = function ( url ) | |||
-- Internationalized Domain Name (Punycode) | |||
local r = URLutil.getHost( url ) | |||
if r then | |||
if r:match( "^[!-~]+$" ) then | |||
local s = "." .. r | |||
if s:find( ".xn--", 1, true ) then | |||
r = true | |||
else | |||
r = false | |||
end | |||
else | |||
r = true | |||
end | |||
end | |||
return r | |||
end -- URLutil.isDomainInt() | |||
URLutil.isHost = function ( s ) | |||
return URLutil.isDomain( s ) or URLutil.isIP( s ) | |||
end -- URLutil.isHost() | |||
URLutil.isHostPathResource = function ( s ) | |||
local r = URLutil.isResourceURL( s ) | |||
if not r and s then | |||
r = URLutil.isResourceURL( "//" .. mw.text.trim( s ) ) | |||
end | |||
return r | |||
end -- URLutil.isHostPathResource() | |||
URLutil.isIP = function ( s ) | |||
return URLutil.isIPv4( s ) and 4 or URLutil.isIPv6( s ) and 6 | |||
end -- URLutil.isIP() | |||
function | |||
end -- | URLutil.isIPlocal = function ( s ) | ||
-- IPv4 according to RFC 1918, RFC 1122; even any 0.0.0.0 (RFC 5735) | |||
local r = false | |||
local num = s:match( "^ *([01][0-9]*)%." ) | |||
if num then | |||
num = tonumber( num ) | |||
if num == 0 then | |||
r = s:match( "^ *0+%.[0-9]+%.[0-9]+%.[0-9]+ *$" ) | |||
elseif num == 10 or num == 127 then | |||
-- loopback; private/local host: 127.0.0.1 | |||
r = URLutil.isIPv4( s ) | |||
elseif num == 169 then | |||
-- 169.254.*.* | |||
elseif num == 172 then | |||
-- 172.(16...31).*.* | |||
num = s:match( "^ *0*172%.([0-9]+)%." ) | |||
if num then | |||
num = tonumber( num ) | |||
if num >= 16 and num <= 31 then | |||
r = URLutil.isIPv4( s ) | |||
end | |||
end | |||
elseif beg == 192 then | |||
-- 192.168.*.* | |||
num = s:match( "^ *0*192%.([0-9]+)%." ) | |||
if num then | |||
num = tonumber( num ) | |||
if num == 168 then | |||
r = URLutil.isIPv4( s ) | |||
end | |||
end | |||
end | |||
end | |||
if r then | |||
r = true | |||
end | |||
return r | |||
end -- URLutil.isIPlocal() | |||
function | URLutil.isIPv4 = function ( s ) | ||
local function legal( n ) | local function legal( n ) | ||
return ( tonumber( n ) < 256 ) | return ( tonumber( n ) < 256 ) | ||
end | end | ||
local r = false | |||
if type( s ) == "string" then | if type( s ) == "string" then | ||
local p1, p2, p3, p4 = s:match( "^%s*([ | local p1, p2, p3, p4 = s:match( "^%s*([1-9][0-9]?[0-9]?)%.([12]?[0-9]?[0-9])%.([12]?[0-9]?[0-9])%.([12]?[0-9]?[0-9])%s*$" ) | ||
if p1 and p2 and p3 and p4 then | if p1 and p2 and p3 and p4 then | ||
r = legal( p1 ) and legal( p2 ) and legal( p3 ) and legal( p4 ) | |||
end | end | ||
end | end | ||
return | return r | ||
end -- | end -- URLutil.isIPv4() | ||
function | URLutil.isIPv6 = function ( s ) | ||
local dcolon, groups | local dcolon, groups | ||
if type( s ) ~= "string" | if type( s ) ~= "string" | ||
Zeile 178: | Zeile 821: | ||
( dcolon == 0 and groups == 8 ) ) | ( dcolon == 0 and groups == 8 ) ) | ||
and ( s:len() == 0 or ( dcolon == 1 and s == ":" ) ) -- might be one dangling : if original ended with :: | and ( s:len() == 0 or ( dcolon == 1 and s == ":" ) ) -- might be one dangling : if original ended with :: | ||
end -- | end -- URLutil.isIPv6() | ||
function | URLutil.isMailAddress = function ( s ) | ||
if type( s ) == "string" then | if type( s ) == "string" then | ||
s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*$" ) | |||
return URLutil.isDomain( s ) | |||
end | end | ||
return false | return false | ||
end -- | end -- URLutil.isMailAddress() | ||
function | URLutil.isMailLink = function ( s ) | ||
if type( s ) == "string" then | if type( s ) == "string" then | ||
local addr | |||
s, addr = mw.ustring.match( s, "^%s*([Mm][Aa][Ii][Ll][Tt][Oo]):(%S[%w%.%%_-]*@[%w%.%%-]+)%s*$" ) | |||
if type( s ) == "string" then | |||
if s:lower() == "mailto" then | |||
return URLutil.isMailAddress( addr ) | |||
end | |||
end | |||
end | end | ||
end | end | ||
return false | return false | ||
end -- | end -- URLutil.isMailLink() | ||
function | local function isProtocolAccepted( prot, supplied ) | ||
if type( prot ) == "string" then | if type( prot ) == "string" then | ||
local scheme, colon, slashes = mw.ustring.match( prot, "^%s*([a-zA-Z]*)(:?)(/?/?)%s*$" ) | local scheme, colon, slashes = mw.ustring.match( prot, "^%s*([a-zA-Z]*)(:?)(/?/?)%s*$" ) | ||
Zeile 227: | Zeile 859: | ||
end | end | ||
elseif colon == ":" or slashes == "" then | elseif colon == ":" or slashes == "" then | ||
local s = | local s = supplied:match( " " .. scheme:lower() .. " " ) | ||
if type( s ) == "string" then | if type( s ) == "string" then | ||
return true | |||
end | end | ||
end | end | ||
Zeile 236: | Zeile 867: | ||
end | end | ||
return false | return false | ||
end -- | end -- isProtocolAccepted() | ||
function | URLutil.isProtocolDialog = function ( prot ) | ||
local scheme = | return isProtocolAccepted( prot, " mailto irc ircs ssh telnet " ) | ||
end -- URLutil.isProtocolDialog() | |||
URLutil.isProtocolWiki = function ( prot ) | |||
return isProtocolAccepted( prot, | |||
" ftp ftps git http https nntp sftp svn worldwind " ) | |||
end -- URLutil.isProtocolWiki() | |||
URLutil.isResourceURL = function ( url ) | |||
local scheme = URLutil.getScheme( url ) | |||
if scheme then | if scheme then | ||
local s = " // http:// https:// ftp:// " | local s = " // http:// https:// ftp:// sftp:// " | ||
s = s:find( " " | s = s:find( string.format( " %s ", scheme ) ) | ||
if s then | if s then | ||
if | if URLutil.getAuthority( url ) then | ||
if not url:match( "%S%s+%S" ) then | if not url:match( "%S%s+%S" ) then | ||
return true | local s1, s2 = url:match( "^([^#]+)(#.*)$" ) | ||
if s2 then | |||
if url:match( "^%s*[a-zA-Z]*:?//(.+)/" ) then | |||
return true | |||
end | |||
else | |||
return true | |||
end | |||
end | end | ||
end | end | ||
Zeile 254: | Zeile 905: | ||
end | end | ||
return false | return false | ||
end -- | end -- URLutil.isResourceURL() | ||
function | URLutil.isSuspiciousURL = function ( url ) | ||
if | if URLutil.isResourceURL( url ) then | ||
local s = | local s = URLutil.getAuthority( url ) | ||
local pat = "[%[|%]" .. | local pat = "[%[|%]" .. | ||
mw.ustring.char( 8201, 45, 8207, 8234, 45, 8239, 8288 ) | mw.ustring.char( 34, | ||
8201, 45, 8207, | |||
8234, 45, 8239, | |||
8288 ) | |||
.. "]" | .. "]" | ||
if s:find( "@" ) | if s:find( "@" ) | ||
Zeile 270: | Zeile 924: | ||
return true | return true | ||
end | end | ||
-- TODO zero width character | -- TODO zero width character ?? | ||
return false | return false | ||
end | end | ||
return true | return true | ||
end -- | end -- URLutil.isSuspiciousURL() | ||
function | URLutil.isUnescapedURL = function ( url, trailing ) | ||
if type( trailing ) ~= "string" then | if type( trailing ) ~= "string" then | ||
if | if URLutil.isWebURL( url ) then | ||
if url:match( "[%[|%]]" ) then | if url:match( "[%[|%]]" ) then | ||
return true | return true | ||
Zeile 287: | Zeile 941: | ||
end | end | ||
return false | return false | ||
end -- | end -- URLutil.isUnescapedURL() | ||
function | URLutil.isWebURL = function ( url ) | ||
if | if URLutil.getScheme( url ) and URLutil.getAuthority( url ) then | ||
if not url: | if not url:find( "%S%s+%S" ) and | ||
not url:find( "''", 1, true ) then | |||
return true | return true | ||
end | end | ||
end | end | ||
return false | return false | ||
end -- | end -- URLutil.isWebURL() | ||
URLutil.wikiEscapeURL = function ( url ) | |||
if url:find( "[%[|%]]" ) then | |||
local n | |||
url, n = url:gsub( "%[", "[" ) | |||
:gsub( "|", "|" ) | |||
:gsub( "%]", "]" ) | |||
end | |||
return url | |||
end -- URLutil.wikiEscapeURL() | |||
Failsafe.failsafe = function ( atleast ) | |||
-- Retrieve versioning and check for compliance | |||
-- Precondition: | |||
-- atleast -- string, with required version | |||
-- or wikidata|item|~|@ or false | |||
-- Postcondition: | |||
-- Returns string -- with queried version/item, also if problem | |||
-- false -- if appropriate | |||
-- 2020-08-17 | |||
local since = atleast | |||
local last = ( since == "~" ) | |||
local linked = ( since == "@" ) | |||
local link = ( since == "item" ) | |||
local r | |||
if last or link or linked or since == "wikidata" then | |||
local item = Failsafe.item | |||
since = false | |||
if type( item ) == "number" and item > 0 then | |||
local suited = string.format( "Q%d", item ) | |||
if link then | |||
r = suited | |||
else | |||
local entity = mw.wikibase.getEntity( suited ) | |||
if type( entity ) == "table" then | |||
local seek = Failsafe.serialProperty or "P348" | |||
local vsn = entity:formatPropertyValues( seek ) | |||
if type( vsn ) == "table" and | |||
type( vsn.value ) == "string" and | |||
vsn.value ~= "" then | |||
if last and vsn.value == Failsafe.serial then | |||
r = false | |||
elseif linked then | |||
if mw.title.getCurrentTitle().prefixedText | |||
== mw.wikibase.getSitelink( suited ) then | |||
r = false | |||
else | |||
r = suited | |||
end | |||
else | |||
r = vsn.value | |||
end | |||
end | |||
end | |||
end | |||
end | |||
end | |||
if type( r ) == "nil" then | |||
if not since or since <= Failsafe.serial then | |||
r = Failsafe.serial | |||
else | |||
r = false | |||
end | |||
end | |||
return r | |||
end -- Failsafe.failsafe() | |||
local function Template( frame, action, amount ) | |||
-- Run actual code from template transclusion | |||
-- Precondition: | |||
-- frame -- object | |||
-- action -- string, with function name | |||
-- amount -- number, of args if > 1 | |||
-- Postcondition: | |||
-- Return string or not | |||
local n = amount or 1 | |||
local v = { } | |||
local r, s | |||
for i = 1, n do | |||
s = frame.args[ i ] | |||
if s then | |||
s = mw.text.trim( s ) | |||
if s ~= "" then | |||
v[ i ] = s | |||
end | |||
end | |||
end -- for i | |||
if v[ 1 ] then | |||
r = URLutil[ action ]( v[ 1 ], v[ 2 ], v[ 3 ] ) | |||
end | |||
return r | |||
end -- Template() | |||
local p = {} | local p = {} | ||
function p.decode( frame ) | |||
return Template( frame, "decode", 2 ) or "" | |||
end | |||
function p.encode( frame ) | |||
return Template( frame, "encode", 2 ) or "" | |||
end | |||
function p.getAuthority( frame ) | function p.getAuthority( frame ) | ||
return | return Template( frame, "getAuthority" ) or "" | ||
end | |||
function p.getFragment( frame ) | |||
local r = Template( frame, "getFragment", 2 ) | |||
if r then | |||
r = "#" .. r | |||
else | |||
r = "" | |||
end | |||
return r | |||
end | end | ||
function p.getHost( frame ) | function p.getHost( frame ) | ||
return | return Template( frame, "getHost" ) or "" | ||
end | |||
function p.getLocation( frame ) | |||
return Template( frame, "getLocation" ) or "" | |||
end | |||
function p.getNormalized( frame ) | |||
return Template( frame, "getNormalized" ) or "" | |||
end | |||
function p.getPath( frame ) | |||
return Template( frame, "getPath" ) or "" | |||
end | end | ||
function p.getPort( frame ) | function p.getPort( frame ) | ||
return | return Template( frame, "getPort" ) or "" | ||
end | |||
function p.getQuery( frame ) | |||
local r = Template( frame, "getQuery", 3 ) | |||
if r then | |||
local key = frame.args[ 2 ] | |||
if key then | |||
key = mw.text.trim( key ) | |||
if key == "" then | |||
key = nil | |||
end | |||
end | |||
if not key then | |||
r = "?" .. r | |||
end | |||
else | |||
r = "" | |||
end | |||
return r | |||
end | |||
function p.getRelativePath( frame ) | |||
return Template( frame, "getRelativePath" ) or "" | |||
end | end | ||
function p.getScheme( frame ) | function p.getScheme( frame ) | ||
return | return Template( frame, "getScheme" ) or "" | ||
end | |||
function p.getSortkey( frame ) | |||
return Template( frame, "getSortkey" ) or "" | |||
end | |||
function p.getTLD( frame ) | |||
return Template( frame, "getTLD" ) or "" | |||
end | |||
function p.getTop2domain( frame ) | |||
return Template( frame, "getTop2domain" ) or "" | |||
end | |||
function p.getTop3domain( frame ) | |||
return Template( frame, "getTop3domain" ) or "" | |||
end | end | ||
function p.isAuthority( frame ) | function p.isAuthority( frame ) | ||
return | return Template( frame, "isAuthority" ) and "1" or "" | ||
end | end | ||
function p.isDomain( frame ) | function p.isDomain( frame ) | ||
return | return Template( frame, "isDomain" ) and "1" or "" | ||
end | |||
function p.isDomainExample( frame ) | |||
return Template( frame, "isDomainExample" ) and "1" or "" | |||
end | |||
function p.isDomainInt( frame ) | |||
return Template( frame, "isDomainInt" ) and "1" or "" | |||
end | end | ||
function p.isHost( frame ) | function p.isHost( frame ) | ||
return | return Template( frame, "isHost" ) and "1" or "" | ||
end | |||
function p.isHostPathResource( frame ) | |||
return Template( frame, "isHostPathResource" ) and "1" or "" | |||
end | end | ||
function p.isIP( frame ) | function p.isIP( frame ) | ||
return | return Template( frame, "isIP" ) or "" | ||
end | |||
function p.isIPlocal( frame ) | |||
return Template( frame, "isIPlocal" ) and "1" or "" | |||
end | end | ||
function p.isIPv4( frame ) | function p.isIPv4( frame ) | ||
return | return Template( frame, "isIPv4" ) and "1" or "" | ||
end | end | ||
function p.isIPv6( frame ) | function p.isIPv6( frame ) | ||
return | return Template( frame, "isIPv6" ) and "1" or "" | ||
end | end | ||
function p.isMailAddress( frame ) | function p.isMailAddress( frame ) | ||
return | return Template( frame, "isMailAddress" ) and "1" or "" | ||
end | end | ||
function p.isMailLink( frame ) | function p.isMailLink( frame ) | ||
return | return Template( frame, "isMailLink" ) and "1" or "" | ||
end | end | ||
function p. | function p.isProtocolDialog( frame ) | ||
return | return Template( frame, "isProtocolDialog" ) and "1" or "" | ||
end | end | ||
function p.isProtocolWiki( frame ) | function p.isProtocolWiki( frame ) | ||
return | return Template( frame, "isProtocolWiki" ) and "1" or "" | ||
end | end | ||
function p. | function p.isResourceURL( frame ) | ||
return | return Template( frame, "isResourceURL" ) and "1" or "" | ||
end | end | ||
function p.isSuspiciousURL( frame ) | function p.isSuspiciousURL( frame ) | ||
return | return Template( frame, "isSuspiciousURL" ) and "1" or "" | ||
end | end | ||
function p.isUnescapedURL( frame ) | function p.isUnescapedURL( frame ) | ||
return | return Template( frame, "isUnescapedURL", 2 ) and "1" or "" | ||
end | end | ||
function p.isWebURL( frame ) | function p.isWebURL( frame ) | ||
return | return Template( frame, "isWebURL" ) and "1" or "" | ||
end | |||
function p.wikiEscapeURL( frame ) | |||
return Template( frame, "wikiEscapeURL" ) | |||
end | |||
p.failsafe = function ( frame ) | |||
local s = type( frame ) | |||
local since | |||
if s == "table" then | |||
since = frame.args[ 1 ] | |||
elseif s == "string" then | |||
since = frame | |||
end | |||
if since then | |||
since = mw.text.trim( since ) | |||
if since == "" then | |||
since = false | |||
end | |||
end | |||
return Failsafe.failsafe( since ) or "" | |||
end | |||
function p.URLutil() | |||
return URLutil | |||
end | end | ||
return p | return p |
Aktuelle Version vom 11. Februar 2023, 21:01 Uhr
Die Dokumentation für dieses Modul kann unter Modul:URLutil/doc erstellt werden
local URLutil = { suite = "URLutil", serial = "2022-04-05", item = 10859193 } --[=[ Utilities for URL etc. on www. * decode() * encode() * getAuthority() * getFragment() * getHost() * getLocation() * getNormalized() * getPath() * getPort() * getQuery() * getQueryTable() * getRelativePath() * getScheme() * getSortkey() * getTLD() * getTop2domain() * getTop3domain() * isAuthority() * isDomain() * isDomainExample() * isDomainInt() * isHost() * isHostPathResource() * isIP() * isIPlocal() * isIPv4() * isIPv6() * isMailAddress() * isMailLink() * isProtocolDialog() * isProtocolWiki() * isResourceURL() * isSuspiciousURL() * isUnescapedURL() * isWebURL() * wikiEscapeURL() * failsafe() Only [[dotted decimal]] notation for IPv4 expected. Does not support dotted hexadecimal, dotted octal, or single-number formats. IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway. ]=] local Failsafe = URLutil local decodeComponentProtect = { F = "\"#%<>[\]^`{|}", P = "\"#%<>[\]^`{|}/?", Q = "\"#%<>[\]^`{|}&=+;,", X = "\"#%<>[\]^`{|}&=+;,/?" } local decodeComponentEscape = function ( averse, adapt ) return adapt == 20 or adapt == 127 or decodeComponentProtect[ averse ]:find( string.char( adapt ), 1, true ) end -- decodeComponentEscape() local decodeComponentML = function ( ask ) local i = 1 local j, n, s while ( i ) do i = ask:find( "&#[xX]%x%x+;", i ) if i then j = ask:find( ";", i + 3, true ) s = ask:sub( i + 2, j - 1 ):upper() n = s:byte( 1, 1 ) if n == 88 then n = tonumber( s:sub( 2 ), 16 ) elseif s:match( "^%d+$" ) then n = tonumber( s ) else n = false end if n then if n >= 128 then s = string.format( "&#%d;", n ) elseif decodeComponentEscape( "X", n ) then s = string.format( "%%%02X", n ) else s = string.format( "%c", n ) end j = j + 1 if i == 1 then ask = s .. ask:sub( j ) else ask = string.format( "%s%s%s", ask:sub( 1, i - 1 ), s, ask:sub( j ) ) end end i = i + 1 end end -- while i return ask end -- decodeComponentML() local decodeComponentPercent = function ( ask, averse ) local i = 1 local j, k, m, n while ( i ) do i = ask:find( "%%[2-7]%x", i ) if i then j = i + 1 k = j + 1 n = ask:byte( k, k ) k = k + 1 m = ( n > 96 ) if m then n = n - 32 m = n end if n > 57 then n = n - 55 else n = n - 48 end n = ( ask:byte( j, j ) - 48 ) * 16 + n if n == 39 and ask:sub( i + 3, i + 5 ) == "%27" then j = i + 6 while ( ask:sub( j, j + 2 ) == "%27" ) do j = j + 3 end -- while "%27" elseif decodeComponentEscape( averse, n ) then if m then ask = string.format( "%s%c%s", ask:sub( 1, j ), m, ask:sub( k ) ) end elseif i == 1 then ask = string.format( "%c%s", n, ask:sub( k ) ) else ask = string.format( "%s%c%s", ask:sub( 1, i - 1 ), n, ask:sub( k ) ) end i = j end end -- while i return ask end -- decodeComponentPercent() local getTopDomain = function ( url, mode ) local r = URLutil.getHost( url ) if r then local pattern = "[%w%%%-]+%.%a[%w%-]*%a)$" if mode == 3 then pattern = "[%w%%%-]+%." .. pattern end r = mw.ustring.match( "." .. r, "%.(" .. pattern ) if not r then r = false end else r = false end return r end -- getTopDomain() local getHash = function ( url ) local r = url:find( "#", 1, true ) if r then local i = url:find( "&#", 1, true ) if i then local s while ( i ) do s = url:sub( i + 2 ) if s:match( "^%d+;" ) or s:match( "^x%x+;" ) then r = url:find( "#", i + 4, true ) if r then i = url:find( "&#", i + 4, true ) else i = false end else r = i + 1 i = false end end -- while i end end return r end -- getHash() URLutil.decode = function ( url, enctype ) local r, s if type( enctype ) == "string" then s = mw.text.trim( enctype ) if s == "" then s = false else s = s:upper() end end r = mw.text.encode( mw.uri.decode( url, s ) ) if r:find( "[%[|%]]" ) then local k r, k = r:gsub( "%[", "[" ) :gsub( "|", "|" ) :gsub( "%]", "]" ) end return r end -- URLutil.decode() URLutil.encode = function ( url, enctype ) local k, r, s if type( enctype ) == "string" then s = mw.text.trim( enctype ) if s == "" then s = false else s = s:upper() end end r = mw.uri.encode( url, s ) k = r:byte( 1, 1 ) if -- k == 35 or -- # k == 42 or -- * k == 58 or -- : k == 59 then -- ; r = string.format( "%%%X%s", k, r:sub( 2 ) ) end if r:find( "[%[|%]]" ) then r, k = r:gsub( "%[", "%5B" ) :gsub( "|", "%7C" ) :gsub( "%]", "%5D" ) end return r end -- URLutil.encode() URLutil.getAuthority = function ( url ) local r if type( url ) == "string" then local colon, host, port local pattern = "^%s*%w*:?//([%w%.%%_-]+)(:?)([%d]*)/" local s = mw.text.decode( url ) local i = s:find( "#", 6, true ) if i then s = s:sub( 1, i - 1 ) .. "/" else s = s .. "/" end host, colon, port = mw.ustring.match( s, pattern ) if URLutil.isHost( host ) then host = mw.ustring.lower( host ) if colon == ":" then if port:find( "^[1-9]" ) then r = ( host .. ":" .. port ) end elseif #port == 0 then r = host end end else r = false end return r end -- URLutil.getAuthority() URLutil.getFragment = function ( url, decode ) local r if type( url ) == "string" then local i = getHash( url ) if i then r = mw.text.trim( url:sub( i ) ):sub( 2 ) if type( decode ) == "string" then local encoding = mw.text.trim( decode ) local launch if encoding == "%" then launch = true elseif encoding == "WIKI" then r = r:gsub( "%.(%x%x)", "%%%1" ) :gsub( "_", " " ) launch = true end if launch then r = mw.uri.decode( r, "PATH" ) end end else r = false end else r = nil end return r end -- URLutil.getFragment() URLutil.getHost = function ( url ) local r = URLutil.getAuthority( url ) if r then r = mw.ustring.match( r, "^([%w%.%%_%-]+):?[%d]*$" ) end return r end -- URLutil.getHost() URLutil.getLocation = function ( url ) local r if type( url ) == "string" then r = mw.text.trim( url ) if r == "" then r = false else local i i = getHash( r ) if i then if i == 1 then r = false else r = r:sub( 1, i - 1 ) end end end else r = nil end return r end -- URLutil.getLocation() URLutil.getNormalized = function ( url ) local r if type( url ) == "string" then r = mw.text.trim( url ) if r == "" then r = false else r = decodeComponentML( r ) end else r = false end if r then local k = r:find( "//", 1, true ) if k then local j = r:find( "/", k + 2, true ) local sF, sP, sQ if r:find( "%%[2-7]%x" ) then local i = getHash( r ) if i then sF = r:sub( i + 1 ) r = r:sub( 1, i - 1 ) if sF == "" then sF = false else sF = decodeComponentPercent( sF, "F" ) end end i = r:find( "?", 1, true ) if i then sQ = r:sub( i ) r = r:sub( 1, i - 1 ) sQ = decodeComponentPercent( sQ, "Q" ) end if j then if #r > j then sP = r:sub( j + 1 ) sP = decodeComponentPercent( sP, "P" ) end r = r:sub( 1, j - 1 ) end elseif j then local n = #r if r:byte( n, n ) == 35 then -- '#' n = n - 1 r = r:sub( 1, n ) end if n > j then sP = r:sub( j + 1 ) end r = r:sub( 1, j - 1 ) end r = mw.ustring.lower( r ) .. "/" if sP then r = r .. sP end if sQ then r = r .. sQ end if sF then r = string.format( "%s#%s", r, sF ) end end r = r:gsub( " ", "%%20" ) :gsub( "%[", "%%5B" ) :gsub( "|", "%%7C" ) :gsub( "%]", "%%5D" ) :gsub( "%<", "%%3C" ) :gsub( "%>", "%%3E" ) end return r end -- URLutil.getNormalized() URLutil.getPath = function ( url ) local r = URLutil.getRelativePath( url ) if r then local s = r:match( "^([^%?]*)%?" ) if s then r = s end s = r:match( "^([^#]*)#" ) if s then r = s end end return r end -- URLutil.getPath() URLutil.getPort = function ( url ) local r = URLutil.getAuthority( url ) if r then r = r:match( ":([1-9][0-9]*)$" ) if r then r = tonumber( r ) else r = false end end return r end -- URLutil.getPort() URLutil.getQuery = function ( url, key, separator ) local r = URLutil.getLocation( url ) if r then r = r:match( "^[^%?]*%?(.+)$" ) if r then if type( key ) == "string" then local single = mw.text.trim( key ) local sep = "&" local s, scan if type( separator ) == "string" then s = mw.text.trim( separator ) if s:match( "^[&;,/]$" ) then sep = s end end s = string.format( "%s%s%s", sep, r, sep ) scan = string.format( "%s%s=([^%s]*)%s", sep, key, sep, sep ) r = s:match( scan ) end end if not r then r = false end end return r end -- URLutil.getQuery() URLutil.getQueryTable = function ( url, separator ) local r = URLutil.getQuery( url ) if r then local sep = "&" local n, pairs, s, set if type( separator ) == "string" then s = mw.text.trim( separator ) if s:match( "^[&;,/]$" ) then sep = s end end pairs = mw.text.split( r, sep, true ) n = #pairs r = { } for i = 1, n do s = pairs[ i ] if s:find( "=", 2, true ) then s, set = s:match( "^([^=]+)=(.*)$" ) if s then r[ s ] = set end else r[ s ] = false end end -- for i end return r end -- URLutil.getQueryTable() URLutil.getRelativePath = function ( url ) local r if type( url ) == "string" then local s = url:match( "^%s*[a-zA-Z]*://(.*)$" ) if s then s = s:match( "[^/]+(/.*)$" ) else local x x, s = url:match( "^%s*(/?)(/.*)$" ) if x == "/" then s = s:match( "/[^/]+(/.*)$" ) end end if s then r = mw.text.trim( s ) elseif URLutil.isResourceURL( url ) then r = "/" else r = false end else r = nil end return r end -- URLutil.getRelativePath() URLutil.getScheme = function ( url ) local r if type( url ) == "string" then local pattern = "^%s*([a-zA-Z]*)(:?)(//)" local prot, colon, slashes = url:match( pattern ) r = false if slashes == "//" then if colon == ":" then if #prot > 2 then r = prot:lower() .. "://" end elseif #prot == 0 then r = "//" end end else r = nil end return r end -- URLutil.getScheme() URLutil.getSortkey = function ( url ) local r = url if type( url ) == "string" then local i = url:find( "//" ) if i then local scheme if i == 0 then scheme = "" else scheme = url:match( "^%s*([a-zA-Z]*)://" ) end if scheme then local s = url:sub( i + 2 ) local comps, site, m, suffix scheme = scheme:lower() i = s:find( "/" ) if i and i > 1 then suffix = s:sub( i + 1 ) -- mw.uri.encode() s = s:sub( 1, i - 1 ) suffix = suffix:gsub( "#", " " ) else suffix = "" end site, m = s:match( "^(.+)(:%d+)$" ) if not m then site = s m = 0 end comps = mw.text.split( site:lower(), ".", true ) r = "///" for i = #comps, 2, -1 do r = string.format( "%s%s.", r, comps[ i ] ) end -- for --i r = string.format( "%s%s %d %s: %s", r, comps[ 1 ], m, scheme, suffix ) end end end return r end -- URLutil.getSortkey() URLutil.getTLD = function ( url ) local r = URLutil.getHost( url ) if r then r = mw.ustring.match( r, "%w+%.(%a[%w%-]*%a)$" ) if not r then r = false end end return r end -- URLutil.getTLD() URLutil.getTop2domain = function ( url ) return getTopDomain( url, 2 ) end -- URLutil.getTop2domain() URLutil.getTop3domain = function ( url ) return getTopDomain( url, 3 ) end -- URLutil.getTop3domain() URLutil.isAuthority = function ( s ) local r if type( s ) == "string" then local pattern = "^%s*([%w%.%%_-]+)(:?)(%d*)%s*$" local host, colon, port = mw.ustring.match( s, pattern ) if colon == ":" then port = port:match( "^[1-9][0-9]*$" ) if type( port ) ~= "string" then r = false end elseif port ~= "" then r = false end r = URLutil.isHost( host ) else r = nil end return r end -- URLutil.isAuthority() URLutil.isDomain = function ( s ) local r if type( s ) == "string" then local scan = "^%s*([%w%.%%_-]*%w)%.(%a[%w-]*%a)%s*$" local scope s, scope = mw.ustring.match( s, scan ) if type( s ) == "string" then if mw.ustring.find( s, "^%w" ) then if mw.ustring.find( s, "..", 1, true ) then r = false else r = true end end end else r = nil end return r end -- URLutil.isDomain() URLutil.isDomainExample = function ( url ) -- RFC 2606: example.com example.net example.org example.edu local r = getTopDomain( url, 2 ) if r then local s = r:lower():match( "^example%.([a-z][a-z][a-z])$" ) if s then r = ( s == "com" or s == "edu" or s == "net" or s == "org" ) else r = false end end return r end -- URLutil.isDomainExample() URLutil.isDomainInt = function ( url ) -- Internationalized Domain Name (Punycode) local r = URLutil.getHost( url ) if r then if r:match( "^[!-~]+$" ) then local s = "." .. r if s:find( ".xn--", 1, true ) then r = true else r = false end else r = true end end return r end -- URLutil.isDomainInt() URLutil.isHost = function ( s ) return URLutil.isDomain( s ) or URLutil.isIP( s ) end -- URLutil.isHost() URLutil.isHostPathResource = function ( s ) local r = URLutil.isResourceURL( s ) if not r and s then r = URLutil.isResourceURL( "//" .. mw.text.trim( s ) ) end return r end -- URLutil.isHostPathResource() URLutil.isIP = function ( s ) return URLutil.isIPv4( s ) and 4 or URLutil.isIPv6( s ) and 6 end -- URLutil.isIP() URLutil.isIPlocal = function ( s ) -- IPv4 according to RFC 1918, RFC 1122; even any 0.0.0.0 (RFC 5735) local r = false local num = s:match( "^ *([01][0-9]*)%." ) if num then num = tonumber( num ) if num == 0 then r = s:match( "^ *0+%.[0-9]+%.[0-9]+%.[0-9]+ *$" ) elseif num == 10 or num == 127 then -- loopback; private/local host: 127.0.0.1 r = URLutil.isIPv4( s ) elseif num == 169 then -- 169.254.*.* elseif num == 172 then -- 172.(16...31).*.* num = s:match( "^ *0*172%.([0-9]+)%." ) if num then num = tonumber( num ) if num >= 16 and num <= 31 then r = URLutil.isIPv4( s ) end end elseif beg == 192 then -- 192.168.*.* num = s:match( "^ *0*192%.([0-9]+)%." ) if num then num = tonumber( num ) if num == 168 then r = URLutil.isIPv4( s ) end end end end if r then r = true end return r end -- URLutil.isIPlocal() URLutil.isIPv4 = function ( s ) local function legal( n ) return ( tonumber( n ) < 256 ) end local r = false if type( s ) == "string" then local p1, p2, p3, p4 = s:match( "^%s*([1-9][0-9]?[0-9]?)%.([12]?[0-9]?[0-9])%.([12]?[0-9]?[0-9])%.([12]?[0-9]?[0-9])%s*$" ) if p1 and p2 and p3 and p4 then r = legal( p1 ) and legal( p2 ) and legal( p3 ) and legal( p4 ) end end return r end -- URLutil.isIPv4() URLutil.isIPv6 = function ( s ) local dcolon, groups if type( s ) ~= "string" or s:len() == 0 or s:find( "[^:%x]" ) -- only colon and hex digits are legal chars or s:find( "^:[^:]" ) -- can begin or end with :: but not with single : or s:find( "[^:]:$" ) or s:find( ":::" ) then return false end s = mw.text.trim( s ) s, dcolon = s:gsub( "::", ":" ) if dcolon > 1 then return false end -- at most one :: s = s:gsub( "^:?", ":" ) -- prepend : if needed, upper s, groups = s:gsub( ":%x%x?%x?%x?", "" ) -- remove valid groups, and count them return ( ( dcolon == 1 and groups < 8 ) or ( dcolon == 0 and groups == 8 ) ) and ( s:len() == 0 or ( dcolon == 1 and s == ":" ) ) -- might be one dangling : if original ended with :: end -- URLutil.isIPv6() URLutil.isMailAddress = function ( s ) if type( s ) == "string" then s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*$" ) return URLutil.isDomain( s ) end return false end -- URLutil.isMailAddress() URLutil.isMailLink = function ( s ) if type( s ) == "string" then local addr s, addr = mw.ustring.match( s, "^%s*([Mm][Aa][Ii][Ll][Tt][Oo]):(%S[%w%.%%_-]*@[%w%.%%-]+)%s*$" ) if type( s ) == "string" then if s:lower() == "mailto" then return URLutil.isMailAddress( addr ) end end end return false end -- URLutil.isMailLink() local function isProtocolAccepted( prot, supplied ) if type( prot ) == "string" then local scheme, colon, slashes = mw.ustring.match( prot, "^%s*([a-zA-Z]*)(:?)(/?/?)%s*$" ) if slashes ~= "/" then if scheme == "" then if colon ~= ":" and slashes == "//" then return true end elseif colon == ":" or slashes == "" then local s = supplied:match( " " .. scheme:lower() .. " " ) if type( s ) == "string" then return true end end end end return false end -- isProtocolAccepted() URLutil.isProtocolDialog = function ( prot ) return isProtocolAccepted( prot, " mailto irc ircs ssh telnet " ) end -- URLutil.isProtocolDialog() URLutil.isProtocolWiki = function ( prot ) return isProtocolAccepted( prot, " ftp ftps git http https nntp sftp svn worldwind " ) end -- URLutil.isProtocolWiki() URLutil.isResourceURL = function ( url ) local scheme = URLutil.getScheme( url ) if scheme then local s = " // http:// https:// ftp:// sftp:// " s = s:find( string.format( " %s ", scheme ) ) if s then if URLutil.getAuthority( url ) then if not url:match( "%S%s+%S" ) then local s1, s2 = url:match( "^([^#]+)(#.*)$" ) if s2 then if url:match( "^%s*[a-zA-Z]*:?//(.+)/" ) then return true end else return true end end end end end return false end -- URLutil.isResourceURL() URLutil.isSuspiciousURL = function ( url ) if URLutil.isResourceURL( url ) then local s = URLutil.getAuthority( url ) local pat = "[%[|%]" .. mw.ustring.char( 34, 8201, 45, 8207, 8234, 45, 8239, 8288 ) .. "]" if s:find( "@" ) or url:find( "''" ) or url:find( pat ) or url:find( "[%.,]$" ) then return true end -- TODO zero width character ?? return false end return true end -- URLutil.isSuspiciousURL() URLutil.isUnescapedURL = function ( url, trailing ) if type( trailing ) ~= "string" then if URLutil.isWebURL( url ) then if url:match( "[%[|%]]" ) then return true end end end return false end -- URLutil.isUnescapedURL() URLutil.isWebURL = function ( url ) if URLutil.getScheme( url ) and URLutil.getAuthority( url ) then if not url:find( "%S%s+%S" ) and not url:find( "''", 1, true ) then return true end end return false end -- URLutil.isWebURL() URLutil.wikiEscapeURL = function ( url ) if url:find( "[%[|%]]" ) then local n url, n = url:gsub( "%[", "[" ) :gsub( "|", "|" ) :gsub( "%]", "]" ) end return url end -- URLutil.wikiEscapeURL() Failsafe.failsafe = function ( atleast ) -- Retrieve versioning and check for compliance -- Precondition: -- atleast -- string, with required version -- or wikidata|item|~|@ or false -- Postcondition: -- Returns string -- with queried version/item, also if problem -- false -- if appropriate -- 2020-08-17 local since = atleast local last = ( since == "~" ) local linked = ( since == "@" ) local link = ( since == "item" ) local r if last or link or linked or since == "wikidata" then local item = Failsafe.item since = false if type( item ) == "number" and item > 0 then local suited = string.format( "Q%d", item ) if link then r = suited else local entity = mw.wikibase.getEntity( suited ) if type( entity ) == "table" then local seek = Failsafe.serialProperty or "P348" local vsn = entity:formatPropertyValues( seek ) if type( vsn ) == "table" and type( vsn.value ) == "string" and vsn.value ~= "" then if last and vsn.value == Failsafe.serial then r = false elseif linked then if mw.title.getCurrentTitle().prefixedText == mw.wikibase.getSitelink( suited ) then r = false else r = suited end else r = vsn.value end end end end end end if type( r ) == "nil" then if not since or since <= Failsafe.serial then r = Failsafe.serial else r = false end end return r end -- Failsafe.failsafe() local function Template( frame, action, amount ) -- Run actual code from template transclusion -- Precondition: -- frame -- object -- action -- string, with function name -- amount -- number, of args if > 1 -- Postcondition: -- Return string or not local n = amount or 1 local v = { } local r, s for i = 1, n do s = frame.args[ i ] if s then s = mw.text.trim( s ) if s ~= "" then v[ i ] = s end end end -- for i if v[ 1 ] then r = URLutil[ action ]( v[ 1 ], v[ 2 ], v[ 3 ] ) end return r end -- Template() local p = {} function p.decode( frame ) return Template( frame, "decode", 2 ) or "" end function p.encode( frame ) return Template( frame, "encode", 2 ) or "" end function p.getAuthority( frame ) return Template( frame, "getAuthority" ) or "" end function p.getFragment( frame ) local r = Template( frame, "getFragment", 2 ) if r then r = "#" .. r else r = "" end return r end function p.getHost( frame ) return Template( frame, "getHost" ) or "" end function p.getLocation( frame ) return Template( frame, "getLocation" ) or "" end function p.getNormalized( frame ) return Template( frame, "getNormalized" ) or "" end function p.getPath( frame ) return Template( frame, "getPath" ) or "" end function p.getPort( frame ) return Template( frame, "getPort" ) or "" end function p.getQuery( frame ) local r = Template( frame, "getQuery", 3 ) if r then local key = frame.args[ 2 ] if key then key = mw.text.trim( key ) if key == "" then key = nil end end if not key then r = "?" .. r end else r = "" end return r end function p.getRelativePath( frame ) return Template( frame, "getRelativePath" ) or "" end function p.getScheme( frame ) return Template( frame, "getScheme" ) or "" end function p.getSortkey( frame ) return Template( frame, "getSortkey" ) or "" end function p.getTLD( frame ) return Template( frame, "getTLD" ) or "" end function p.getTop2domain( frame ) return Template( frame, "getTop2domain" ) or "" end function p.getTop3domain( frame ) return Template( frame, "getTop3domain" ) or "" end function p.isAuthority( frame ) return Template( frame, "isAuthority" ) and "1" or "" end function p.isDomain( frame ) return Template( frame, "isDomain" ) and "1" or "" end function p.isDomainExample( frame ) return Template( frame, "isDomainExample" ) and "1" or "" end function p.isDomainInt( frame ) return Template( frame, "isDomainInt" ) and "1" or "" end function p.isHost( frame ) return Template( frame, "isHost" ) and "1" or "" end function p.isHostPathResource( frame ) return Template( frame, "isHostPathResource" ) and "1" or "" end function p.isIP( frame ) return Template( frame, "isIP" ) or "" end function p.isIPlocal( frame ) return Template( frame, "isIPlocal" ) and "1" or "" end function p.isIPv4( frame ) return Template( frame, "isIPv4" ) and "1" or "" end function p.isIPv6( frame ) return Template( frame, "isIPv6" ) and "1" or "" end function p.isMailAddress( frame ) return Template( frame, "isMailAddress" ) and "1" or "" end function p.isMailLink( frame ) return Template( frame, "isMailLink" ) and "1" or "" end function p.isProtocolDialog( frame ) return Template( frame, "isProtocolDialog" ) and "1" or "" end function p.isProtocolWiki( frame ) return Template( frame, "isProtocolWiki" ) and "1" or "" end function p.isResourceURL( frame ) return Template( frame, "isResourceURL" ) and "1" or "" end function p.isSuspiciousURL( frame ) return Template( frame, "isSuspiciousURL" ) and "1" or "" end function p.isUnescapedURL( frame ) return Template( frame, "isUnescapedURL", 2 ) and "1" or "" end function p.isWebURL( frame ) return Template( frame, "isWebURL" ) and "1" or "" end function p.wikiEscapeURL( frame ) return Template( frame, "wikiEscapeURL" ) end p.failsafe = function ( frame ) local s = type( frame ) local since if s == "table" then since = frame.args[ 1 ] elseif s == "string" then since = frame end if since then since = mw.text.trim( since ) if since == "" then since = false end end return Failsafe.failsafe( since ) or "" end function p.URLutil() return URLutil end return p