Modul:URLutil: Unterschied zwischen den Versionen
w>PerfektesChaos (REfix) |
w>PerfektesChaos (update) |
||
Zeile 1: | Zeile 1: | ||
--[=[ URLutil 2013- | --[=[ URLutil 2013-07-10 | ||
Utilities for URL etc. on www. | Utilities for URL etc. on www. | ||
* getAuthority() | * getAuthority() | ||
Zeile 7: | Zeile 7: | ||
* getTLD() | * getTLD() | ||
* getTop2domain() | * getTop2domain() | ||
* getTop3domain() | |||
* isAuthority() | * isAuthority() | ||
* isDomain() | * isDomain() | ||
* isDomainExample() | |||
* isHost() | * isHost() | ||
* isIP() | * isIP() | ||
* isIPlocal() | |||
* isIPv4() | * isIPv4() | ||
* isIPv6() | * isIPv6() | ||
Zeile 31: | Zeile 34: | ||
-- table for export | -- table for export | ||
local URLutil = {} | local URLutil = {} | ||
local getTopDomain = function ( url, mode ) | |||
local host = URLutil.getHost( url ) | |||
if host then | |||
local pattern = "[%w%%]+%.[a-z][a-z]+)$" | |||
if mode == 3 then | |||
pattern = "[%w%%]+%." .. pattern | |||
end | |||
host = mw.ustring.match( "." .. host, "%.(" .. pattern ) | |||
if host then | |||
return host | |||
end | |||
end | |||
return false | |||
end -- getTopDomain() | |||
Zeile 106: | Zeile 126: | ||
URLutil.getTop2domain = function ( url ) | URLutil.getTop2domain = function ( url ) | ||
return getTopDomain( url, 2 ) | |||
end -- URLutil.getTop2domain() | end -- URLutil.getTop2domain() | ||
URLutil.getTop3domain = function ( url ) | |||
return getTopDomain( url, 3 ) | |||
end -- URLutil.getTop3domain() | |||
Zeile 152: | Zeile 170: | ||
return false | return false | ||
end -- URLutil.isDomain() | end -- URLutil.isDomain() | ||
URLutil.isDomainExample = function ( url ) | |||
-- RFC 2606: example.com example.net example.org example.edu | |||
local r = getTopDomain( url, 2 ) | |||
if r then | |||
local s = r:lower():match( "^example%.([a-z][a-z][a-z])$" ) | |||
if s then | |||
r = ( s == "com" or | |||
s == "edu" or | |||
s == "net" or | |||
s == "org" ) | |||
else | |||
r = false | |||
end | |||
end | |||
return r | |||
end -- URLutil.isDomainExample() | |||
Zeile 164: | Zeile 201: | ||
return URLutil.isIPv4( s ) and 4 or URLutil.isIPv6( s ) and 6 | return URLutil.isIPv4( s ) and 4 or URLutil.isIPv6( s ) and 6 | ||
end -- URLutil.isIP() | end -- URLutil.isIP() | ||
URLutil.isIPlocal = function ( s ) | |||
-- IPv4 according to RFC 1918, RFC 1122; even any 0.0.0.0 (RFC 5735) | |||
local r = false | |||
local num = s:match( "^ *([01][0-9]*)%." ) | |||
if num then | |||
num = tonumber( num ) | |||
if num == 0 then | |||
r = s:match( "^ *0+%.[0-9]+%.[0-9]+%.[0-9]+ *$" ) | |||
elseif num == 10 or num == 127 then | |||
-- loopback; private/local host: 127.0.0.1 | |||
r = URLutil.isIPv4( s ) | |||
elseif num == 169 then | |||
-- 169.254.*.* | |||
elseif num == 172 then | |||
-- 172.(16...31).*.* | |||
num = s:match( "^ *0*172%.([0-9]+)%." ) | |||
if num then | |||
num = tonumber( num ) | |||
if num >= 16 and num <= 31 then | |||
r = URLutil.isIPv4( s ) | |||
end | |||
end | |||
elseif beg == 192 then | |||
-- 192.168.*.* | |||
num = s:match( "^ *0*192%.([0-9]+)%." ) | |||
if num then | |||
num = tonumber( num ) | |||
if num == 168 then | |||
r = URLutil.isIPv4( s ) | |||
end | |||
end | |||
end | |||
end | |||
if r then | |||
r = true | |||
end | |||
return r | |||
end -- URLutil.isIPlocal() | |||
Zeile 361: | Zeile 439: | ||
function p.getTop2domain( frame ) | function p.getTop2domain( frame ) | ||
return URLutil.getTop2domain( frame.args[ 1 ] ) or "" | return URLutil.getTop2domain( frame.args[ 1 ] ) or "" | ||
end | |||
function p.getTop3domain( frame ) | |||
return URLutil.getTop3domain( frame.args[ 1 ] ) or "" | |||
end | end | ||
function p.isAuthority( frame ) | function p.isAuthority( frame ) | ||
Zeile 367: | Zeile 448: | ||
function p.isDomain( frame ) | function p.isDomain( frame ) | ||
return URLutil.isDomain( frame.args[ 1 ] ) and "1" or "" | return URLutil.isDomain( frame.args[ 1 ] ) and "1" or "" | ||
end | |||
function p.isDomainExample( frame ) | |||
return URLutil.isDomainExample( frame.args[ 1 ] ) and "1" or "" | |||
end | end | ||
function p.isHost( frame ) | function p.isHost( frame ) | ||
Zeile 373: | Zeile 457: | ||
function p.isIP( frame ) | function p.isIP( frame ) | ||
return URLutil.isIP( frame.args[ 1 ] ) or "" | return URLutil.isIP( frame.args[ 1 ] ) or "" | ||
end | |||
function p.isIPlocal( frame ) | |||
return URLutil.isIPlocal( frame.args[ 1 ] ) and "1" or "" | |||
end | end | ||
function p.isIPv4( frame ) | function p.isIPv4( frame ) |
Version vom 11. Juli 2013, 20:45 Uhr
Die Dokumentation für dieses Modul kann unter Modul:URLutil/doc erstellt werden
--[=[ URLutil 2013-07-10 Utilities for URL etc. on www. * getAuthority() * getHost() * getPort() * getScheme() * getTLD() * getTop2domain() * getTop3domain() * isAuthority() * isDomain() * isDomainExample() * isHost() * isIP() * isIPlocal() * isIPv4() * isIPv6() * isMailAddress() * isMailLink() * isProtocolDialog * isProtocolWiki * isResourceURL() * isSuspiciousURL() * isUnescapedURL() * isWebURL() * wikiEscapeURL() Only [[dotted decimal]] notation for IPv4 supported. Does not support dotted hexadecimal, dotted octal, or single-number formats. IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway. ]=] -- table for export local URLutil = {} local getTopDomain = function ( url, mode ) local host = URLutil.getHost( url ) if host then local pattern = "[%w%%]+%.[a-z][a-z]+)$" if mode == 3 then pattern = "[%w%%]+%." .. pattern end host = mw.ustring.match( "." .. host, "%.(" .. pattern ) if host then return host end end return false end -- getTopDomain() URLutil.getAuthority = function ( url ) if type( url ) == "string" then local host, colon, port = mw.ustring.match( url .. "/", "^%s*%w*:?//([%w%.%%-]+)(:?)([%d]*)/" ) if URLutil.isHost( host ) then host = mw.ustring.lower( host ) if colon == ":" then if port:find( "^[1-9]" ) then return ( host .. ":" .. port ) end elseif #port == 0 then return host end end end return false end -- URLutil.getAuthority() URLutil.getHost = function ( url ) local auth = URLutil.getAuthority( url ) if auth then return mw.ustring.match( auth, "^([%w%.%%-]+):?[%d]*$" ) end return false end -- URLutil.getHost() URLutil.getPort = function ( url ) url = URLutil.getAuthority( url ) if url then url = url:match( ":([1-9][0-9]*)$" ) if type( url ) == "string" then return tonumber( url ) end end return false end -- URLutil.getPort() URLutil.getScheme = function ( url ) if type( url ) == "string" then local prot, colon, slashes = url:match( "^%s*([a-zA-Z]*)(:?)(//)" ) if slashes == "//" then if colon == ":" then if #prot > 2 then return prot:lower() .. "://" end elseif #prot == 0 then return "//" end end end return false end -- URLutil.getScheme() URLutil.getTLD = function ( url ) local host = URLutil.getHost( url ) if host then host = mw.ustring.match( host, "[%w]+%.([a-z][a-z]+)$" ) return host or false end return false end -- URLutil.getTLD() URLutil.getTop2domain = function ( url ) return getTopDomain( url, 2 ) end -- URLutil.getTop2domain() URLutil.getTop3domain = function ( url ) return getTopDomain( url, 3 ) end -- URLutil.getTop3domain() URLutil.isAuthority = function ( s ) if type( s ) == "string" then local host, colon, port = mw.ustring.match( s, "^%s*([%w%.%%-]+)(:?)(%d*)%s*$" ) if colon == ":" then port = port:match( "^[1-9][0-9]*$" ) if type( port ) ~= "string" then return false end elseif port ~= "" then return false end return URLutil.isHost( host ) end return false end -- URLutil.isAuthority() URLutil.isDomain = function ( s ) if type( s ) == "string" then s = mw.ustring.match( s, "^%s*([%w%.%%-]+%w)%.[a-zA-Z][a-zA-Z]+%s*$" ) if type( s ) == "string" then if mw.ustring.find( s, "^%w" ) then if mw.ustring.find( s, "..", 1, true ) then return false else return true end end end end return false end -- URLutil.isDomain() URLutil.isDomainExample = function ( url ) -- RFC 2606: example.com example.net example.org example.edu local r = getTopDomain( url, 2 ) if r then local s = r:lower():match( "^example%.([a-z][a-z][a-z])$" ) if s then r = ( s == "com" or s == "edu" or s == "net" or s == "org" ) else r = false end end return r end -- URLutil.isDomainExample() URLutil.isHost = function ( s ) return URLutil.isDomain( s ) or URLutil.isIP( s ) end -- URLutil.isHost() URLutil.isIP = function ( s ) return URLutil.isIPv4( s ) and 4 or URLutil.isIPv6( s ) and 6 end -- URLutil.isIP() URLutil.isIPlocal = function ( s ) -- IPv4 according to RFC 1918, RFC 1122; even any 0.0.0.0 (RFC 5735) local r = false local num = s:match( "^ *([01][0-9]*)%." ) if num then num = tonumber( num ) if num == 0 then r = s:match( "^ *0+%.[0-9]+%.[0-9]+%.[0-9]+ *$" ) elseif num == 10 or num == 127 then -- loopback; private/local host: 127.0.0.1 r = URLutil.isIPv4( s ) elseif num == 169 then -- 169.254.*.* elseif num == 172 then -- 172.(16...31).*.* num = s:match( "^ *0*172%.([0-9]+)%." ) if num then num = tonumber( num ) if num >= 16 and num <= 31 then r = URLutil.isIPv4( s ) end end elseif beg == 192 then -- 192.168.*.* num = s:match( "^ *0*192%.([0-9]+)%." ) if num then num = tonumber( num ) if num == 168 then r = URLutil.isIPv4( s ) end end end end if r then r = true end return r end -- URLutil.isIPlocal() URLutil.isIPv4 = function ( s ) local function legal( n ) return ( tonumber( n ) < 256 ) end if type( s ) == "string" then local p1, p2, p3, p4 = s:match( "^%s*([1-9][0-9]?[0-9]?)%.([12]?[0-9]?[0-9])%.([12]?[0-9]?[0-9])%.([12]?[0-9]?[0-9])%s*$" ) if p1 and p2 and p3 and p4 then return legal( p1 ) and legal( p2 ) and legal( p3 ) and legal( p4 ) end end return false end -- URLutil.isIPv4() URLutil.isIPv6 = function ( s ) local dcolon, groups if type( s ) ~= "string" or s:len() == 0 or s:find( "[^:%x]" ) -- only colon and hex digits are legal chars or s:find( "^:[^:]" ) -- can begin or end with :: but not with single : or s:find( "[^:]:$" ) or s:find( ":::" ) then return false end s = mw.text.trim( s ) s, dcolon = s:gsub( "::", ":" ) if dcolon > 1 then return false end -- at most one :: s = s:gsub( "^:?", ":" ) -- prepend : if needed, upper s, groups = s:gsub( ":%x%x?%x?%x?", "" ) -- remove valid groups, and count them return ( ( dcolon == 1 and groups < 8 ) or ( dcolon == 0 and groups == 8 ) ) and ( s:len() == 0 or ( dcolon == 1 and s == ":" ) ) -- might be one dangling : if original ended with :: end -- URLutil.isIPv6() URLutil.isMailAddress = function ( s ) if type( s ) == "string" then s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*$" ) return URLutil.isDomain( s ) end return false end -- URLutil.isMailAddress() URLutil.isMailLink = function ( s ) if type( s ) == "string" then local addr s, addr = mw.ustring.match( s, "^%s*([Mm][Aa][Ii][Ll][Tt][Oo]):(%S[%w%.%%_-]*@[%w%.%%-]+)%s*$" ) if type( s ) == "string" then if s:lower() == "mailto" then return URLutil.isMailAddress( addr ) end end end return false end -- URLutil.isMailLink() local function isProtocolAccepted( prot, supplied ) if type( prot ) == "string" then local scheme, colon, slashes = mw.ustring.match( prot, "^%s*([a-zA-Z]*)(:?)(/?/?)%s*$" ) if slashes ~= "/" then if scheme == "" then if colon ~= ":" and slashes == "//" then return true end elseif colon == ":" or slashes == "" then local s = supplied:match( " " .. scheme:lower() .. " " ) if type( s ) == "string" then return true end end end end return false end -- isProtocolAccepted() URLutil.isProtocolDialog = function ( prot ) return isProtocolAccepted( prot, " mailto irc ircs ssh telnet " ) end -- URLutil.isProtocolDialog() URLutil.isProtocolWiki = function ( prot ) return isProtocolAccepted( prot, " ftp ftps git http https nntp sftp svn worldwind " ) end -- URLutil.isProtocolWiki() URLutil.isResourceURL = function ( url ) local scheme = URLutil.getScheme( url ) if scheme then local s = " // http:// https:// ftp:// " s = s:find( " " .. scheme .. " " ) if s then if URLutil.getAuthority( url ) then if not url:match( "%S%s+%S" ) then return true end end end end return false end -- URLutil.isResourceURL() URLutil.isSuspiciousURL = function ( url ) if URLutil.isResourceURL( url ) then local s = URLutil.getAuthority( url ) local pat = "[%[|%]" .. mw.ustring.char( 8201, 45, 8207, 8234, 45, 8239, 8288 ) .. "]" if s:find( "@" ) or url:find( "''" ) or url:find( pat ) or url:find( "[%.,]$" ) then return true end -- TODO zero width character ?? return false end return true end -- URLutil.isSuspiciousURL() URLutil.isUnescapedURL = function ( url, trailing ) if type( trailing ) ~= "string" then if URLutil.isWebURL( url ) then if url:match( "[%[|%]]" ) then return true end end end return false end -- URLutil.isUnescapedURL() URLutil.isWebURL = function ( url ) if URLutil.getScheme( url ) and URLutil.getAuthority( url ) then if not url:match( "%S%s+%S" ) then return true end end return false end -- URLutil.isWebURL() URLutil.wikiEscapeURL = function ( url ) if url:find( "[%[|%]]" ) then local n url, n = url:gsub( "%[", "[" ) :gsub( "|", "|" ) :gsub( "%]", "]" ) end return url end -- URLutil.wikiEscapeURL() -- Provide template access and expose URLutil table to require local p = {} function p.getAuthority( frame ) return URLutil.getAuthority( frame.args[ 1 ] ) or "" end function p.getHost( frame ) return URLutil.getHost( frame.args[ 1 ] ) or "" end function p.getPort( frame ) return URLutil.getPort( frame.args[ 1 ] ) or "" end function p.getScheme( frame ) return URLutil.getScheme( frame.args[ 1 ] ) or "" end function p.getTLD( frame ) return URLutil.getTLD( frame.args[ 1 ] ) or "" end function p.getTop2domain( frame ) return URLutil.getTop2domain( frame.args[ 1 ] ) or "" end function p.getTop3domain( frame ) return URLutil.getTop3domain( frame.args[ 1 ] ) or "" end function p.isAuthority( frame ) return URLutil.isAuthority( frame.args[ 1 ] ) and "1" or "" end function p.isDomain( frame ) return URLutil.isDomain( frame.args[ 1 ] ) and "1" or "" end function p.isDomainExample( frame ) return URLutil.isDomainExample( frame.args[ 1 ] ) and "1" or "" end function p.isHost( frame ) return URLutil.isHost( frame.args[ 1 ] ) and "1" or "" end function p.isIP( frame ) return URLutil.isIP( frame.args[ 1 ] ) or "" end function p.isIPlocal( frame ) return URLutil.isIPlocal( frame.args[ 1 ] ) and "1" or "" end function p.isIPv4( frame ) return URLutil.isIPv4( frame.args[ 1 ] ) and "1" or "" end function p.isIPv6( frame ) return URLutil.isIPv6( frame.args[ 1 ] ) and "1" or "" end function p.isMailAddress( frame ) return URLutil.isMailAddress( frame.args[ 1 ] ) and "1" or "" end function p.isMailLink( frame ) return URLutil.isMailLink( frame.args[ 1 ] ) and "1" or "" end function p.isProtocolDialog( frame ) return URLutil.isProtocolDialog( frame.args[ 1 ] ) and "1" or "" end function p.isProtocolWiki( frame ) return URLutil.isProtocolWiki( frame.args[ 1 ] ) and "1" or "" end function p.isResourceURL( frame ) return URLutil.isResourceURL( frame.args[ 1 ] ) and "1" or "" end function p.isSuspiciousURL( frame ) return URLutil.isSuspiciousURL( frame.args[ 1 ] ) and "1" or "" end function p.isUnescapedURL( frame ) return URLutil.isUnescapedURL( frame.args[ 1 ], frame.args[ 2 ] ) and "1" or "" end function p.isWebURL( frame ) return URLutil.isWebURL( frame.args[ 1 ] ) and "1" or "" end function p.wikiEscapeURL( frame ) return URLutil.wikiEscapeURL( frame.args[ 1 ] ) end function p.URLutil() return URLutil end return p