Modul:URLutil: Unterschied zwischen den Versionen
w>PerfektesChaos (update) |
w>PerfektesChaos (update) |
||
Zeile 1: | Zeile 1: | ||
--[=[ URLutil 2013-04- | --[=[ URLutil 2013-04-24 | ||
Utilities for URL etc. on www. | Utilities for URL etc. on www. | ||
* getAuthority() | * getAuthority() | ||
Zeile 15: | Zeile 15: | ||
* isProtocolDialog | * isProtocolDialog | ||
* isProtocolWiki | * isProtocolWiki | ||
* | * isResourceURL() | ||
* isSuspiciousURL() | * isSuspiciousURL() | ||
* isUnescapedURL() | * isUnescapedURL() | ||
Zeile 23: | Zeile 23: | ||
IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway. | IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway. | ||
Other modules can require this module and call functions via an object. | |||
We return an object with small stub functions to call the real ones | We return an object with small stub functions to call the real ones | ||
so that the functions can be called from templates also. | so that the functions can be called from templates also. | ||
Zeile 34: | Zeile 33: | ||
-- table for export | |||
local URLutil = {} | |||
function | |||
URLutil.getAuthority = function ( url ) | |||
if type( url ) == "string" then | if type( url ) == "string" then | ||
local host, colon, port = mw.ustring.match( url .. "/", "^%s*%w*:?//([%w.%%-]+)(:?)([%d]*)/" ) | local host, colon, port = mw.ustring.match( url .. "/", "^%s*%w*:?//([%w.%%-]+)(:?)([%d]*)/" ) | ||
if | if URLutil.isHost( host ) then | ||
host = mw.ustring.lower( host ) | host = mw.ustring.lower( host ) | ||
if colon == ":" then | if colon == ":" then | ||
Zeile 50: | Zeile 53: | ||
end | end | ||
return false | return false | ||
end -- | end -- URLutil.getAuthority() | ||
function | URLutil.getHost = function ( url ) | ||
local auth = | local auth = URLutil.getAuthority( url ) | ||
if auth then | if auth then | ||
return mw.ustring.match( auth, "^([%w%.%%-]+):?[%d]*$" ) | return mw.ustring.match( auth, "^([%w%.%%-]+):?[%d]*$" ) | ||
end | end | ||
return false | return false | ||
end -- | end -- URLutil.getHost() | ||
function | URLutil.getPort = function ( url ) | ||
url = | url = URLutil.getAuthority( url ) | ||
if url then | if url then | ||
url = url:match( ":([1-9][0-9]*)$" ) | url = url:match( ":([1-9][0-9]*)$" ) | ||
Zeile 73: | Zeile 76: | ||
end | end | ||
return false | return false | ||
end -- | end -- URLutil.getPort() | ||
function | URLutil.getScheme = function ( url ) | ||
if type( url ) == "string" then | if type( url ) == "string" then | ||
local prot, colon, slashes = url:match( "^%s*([a-zA-Z]*)(:?)(//)" ) | local prot, colon, slashes = url:match( "^%s*([a-zA-Z]*)(:?)(//)" ) | ||
Zeile 91: | Zeile 94: | ||
end | end | ||
return false | return false | ||
end -- | end -- URLutil.getScheme() | ||
function | URLutil.isAuthority = function ( s ) | ||
if type( s ) == "string" then | if type( s ) == "string" then | ||
local host, colon, port = mw.ustring.match( s, "^%s*([%w%.%%-]+)(:?)(%d*)%s*$" ) | local host, colon, port = mw.ustring.match( s, "^%s*([%w%.%%-]+)(:?)(%d*)%s*$" ) | ||
Zeile 106: | Zeile 109: | ||
return false | return false | ||
end | end | ||
return | return URLutil.isHost( host ) | ||
end | end | ||
return false | return false | ||
end -- | end -- URLutil.isAuthority() | ||
function | URLutil.isDomain = function ( s ) | ||
if type( s ) == "string" then | if type( s ) == "string" then | ||
s = mw.ustring.match( s, "^%s*([%w%.%%-]+%w)%.[a-zA-Z][a-zA-Z]+%s*$" ) | s = mw.ustring.match( s, "^%s*([%w%.%%-]+%w)%.[a-zA-Z][a-zA-Z]+%s*$" ) | ||
Zeile 127: | Zeile 130: | ||
end | end | ||
return false | return false | ||
end -- | end -- URLutil.isDomain() | ||
function | URLutil.isHost = function ( s ) | ||
return | return URLutil.isDomain( s ) or URLutil.isIP( s ) | ||
end -- | end -- URLutil.isHost() | ||
function | URLutil.isIP = function ( s ) | ||
return | return URLutil.isIPv4( s ) and 4 or URLutil.isIPv6( s ) and 6 | ||
end -- | end -- URLutil.isIP() | ||
function | URLutil.isIPv4 = function ( s ) | ||
local function legal( n ) | local function legal( n ) | ||
return ( tonumber( n ) < 256 ) | return ( tonumber( n ) < 256 ) | ||
Zeile 154: | Zeile 157: | ||
end | end | ||
return false | return false | ||
end -- | end -- URLutil.isIPv4() | ||
function | URLutil.isIPv6 = function ( s ) | ||
local dcolon, groups | local dcolon, groups | ||
if type( s ) ~= "string" | if type( s ) ~= "string" | ||
Zeile 179: | Zeile 182: | ||
( dcolon == 0 and groups == 8 ) ) | ( dcolon == 0 and groups == 8 ) ) | ||
and ( s:len() == 0 or ( dcolon == 1 and s == ":" ) ) -- might be one dangling : if original ended with :: | and ( s:len() == 0 or ( dcolon == 1 and s == ":" ) ) -- might be one dangling : if original ended with :: | ||
end -- | end -- URLutil.isIPv6() | ||
function | URLutil.isMailAddress = function ( s ) | ||
if type( s ) == "string" then | if type( s ) == "string" then | ||
s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*$" ) | s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*$" ) | ||
return | return URLutil.isDomain( s ) | ||
end | end | ||
return false | return false | ||
end -- | end -- URLutil.isMailAddress() | ||
function | URLutil.isMailLink = function ( s ) | ||
if type( s ) == "string" then | if type( s ) == "string" then | ||
local addr | local addr | ||
Zeile 199: | Zeile 202: | ||
if type( s ) == "string" then | if type( s ) == "string" then | ||
if s:lower() == "mailto" then | if s:lower() == "mailto" then | ||
return | return URLutil.isMailAddress( addr ) | ||
end | end | ||
end | end | ||
end | end | ||
return false | return false | ||
end -- | end -- URLutil.isMailLink() | ||
Zeile 229: | Zeile 232: | ||
function | URLutil.isProtocolDialog = function ( prot ) | ||
return isProtocolAccepted( prot, " mailto irc ircs telnet " ) | return isProtocolAccepted( prot, " mailto irc ircs telnet " ) | ||
end -- | end -- URLutil.isProtocolDialog() | ||
function | URLutil.isProtocolWiki = function ( prot ) | ||
return isProtocolAccepted( prot, | return isProtocolAccepted( prot, | ||
" ftp git http https nntp svn worldwind " ) | " ftp git http https nntp svn worldwind " ) | ||
end -- | end -- URLutil.isProtocolWiki() | ||
function | URLutil.isResourceURL = function ( url ) | ||
local scheme = | local scheme = URLutil.getScheme( url ) | ||
if scheme then | if scheme then | ||
local s = " // http:// https:// ftp:// " | local s = " // http:// https:// ftp:// " | ||
s = s:find( " " .. scheme .. " " ) | s = s:find( " " .. scheme .. " " ) | ||
if s then | if s then | ||
if | if URLutil.getAuthority( url ) then | ||
if not url:match( "%S%s+%S" ) then | if not url:match( "%S%s+%S" ) then | ||
return true | return true | ||
Zeile 256: | Zeile 259: | ||
end | end | ||
return false | return false | ||
end -- | end -- URLutil.isResourceURL() | ||
function | URLutil.isSuspiciousURL = function ( url ) | ||
if | if URLutil.isResourceURL( url ) then | ||
local s = | local s = URLutil.getAuthority( url ) | ||
local pat = "[%[|%]" .. | local pat = "[%[|%]" .. | ||
mw.ustring.char( 8201, 45, 8207, 8234, 45, 8239, 8288 ) | mw.ustring.char( 8201, 45, 8207, 8234, 45, 8239, 8288 ) | ||
Zeile 276: | Zeile 279: | ||
end | end | ||
return true | return true | ||
end -- | end -- URLutil.isSuspiciousURL() | ||
function | URLutil.isUnescapedURL = function ( url, trailing ) | ||
if type( trailing ) ~= "string" then | if type( trailing ) ~= "string" then | ||
if | if URLutil.isWebURL( url ) then | ||
if url:match( "[%[|%]]" ) then | if url:match( "[%[|%]]" ) then | ||
return true | return true | ||
Zeile 289: | Zeile 292: | ||
end | end | ||
return false | return false | ||
end -- | end -- URLutil.isUnescapedURL() | ||
function | URLutil.isWebURL = function ( url ) | ||
if | if URLutil.getScheme( url ) and URLutil.getAuthority( url ) then | ||
if not url:match( "%S%s+%S" ) then | if not url:match( "%S%s+%S" ) then | ||
return true | return true | ||
Zeile 300: | Zeile 303: | ||
end | end | ||
return false | return false | ||
end -- | end -- URLutil.isWebURL() | ||
-- Provide template access | -- Provide template access and expose URLutil table to require | ||
local p = {} | local p = {} | ||
function p.getAuthority( frame ) | function p.getAuthority( frame ) | ||
return | return URLutil.getAuthority( frame.args[ 1 ] ) or "" | ||
end | end | ||
function p.getHost( frame ) | function p.getHost( frame ) | ||
return | return URLutil.getHost( frame.args[ 1 ] ) or "" | ||
end | end | ||
function p.getPort( frame ) | function p.getPort( frame ) | ||
return | return URLutil.getPort( frame.args[ 1 ] ) or "" | ||
end | end | ||
function p.getScheme( frame ) | function p.getScheme( frame ) | ||
return | return URLutil.getScheme( frame.args[ 1 ] ) or "" | ||
end | end | ||
function p.isAuthority( frame ) | function p.isAuthority( frame ) | ||
return | return URLutil.isAuthority( frame.args[ 1 ] ) and "1" or "" | ||
end | end | ||
function p.isDomain( frame ) | function p.isDomain( frame ) | ||
return | return URLutil.isDomain( frame.args[ 1 ] ) and "1" or "" | ||
end | end | ||
function p.isHost( frame ) | function p.isHost( frame ) | ||
return | return URLutil.isHost( frame.args[ 1 ] ) and "1" or "" | ||
end | end | ||
function p.isIP( frame ) | function p.isIP( frame ) | ||
return | return URLutil.isIP( frame.args[ 1 ] ) or "" | ||
end | end | ||
function p.isIPv4( frame ) | function p.isIPv4( frame ) | ||
return | return URLutil.isIPv4( frame.args[ 1 ] ) and "1" or "" | ||
end | end | ||
function p.isIPv6( frame ) | function p.isIPv6( frame ) | ||
return | return URLutil.isIPv6( frame.args[ 1 ] ) and "1" or "" | ||
end | end | ||
function p.isMailAddress( frame ) | function p.isMailAddress( frame ) | ||
return | return URLutil.isMailAddress( frame.args[ 1 ] ) and "1" or "" | ||
end | end | ||
function p.isMailLink( frame ) | function p.isMailLink( frame ) | ||
return | return URLutil.isMailLink( frame.args[ 1 ] ) and "1" or "" | ||
end | end | ||
function p.isProtocolDialog( frame ) | function p.isProtocolDialog( frame ) | ||
return | return URLutil.isProtocolDialog( frame.args[ 1 ] ) and "1" or "" | ||
end | end | ||
function p.isProtocolWiki( frame ) | function p.isProtocolWiki( frame ) | ||
return | return URLutil.isProtocolWiki( frame.args[ 1 ] ) and "1" or "" | ||
end | end | ||
function p. | function p.isResourceURL( frame ) | ||
return | return URLutil.isResourceURL( frame.args[ 1 ] ) and "1" or "" | ||
end | end | ||
function p.isSuspiciousURL( frame ) | function p.isSuspiciousURL( frame ) | ||
return | return URLutil.isSuspiciousURL( frame.args[ 1 ] ) and "1" or "" | ||
end | end | ||
function p.isUnescapedURL( frame ) | function p.isUnescapedURL( frame ) | ||
return | return URLutil.isUnescapedURL( frame.args[ 1 ], frame.args[ 2 ] ) and "1" or "" | ||
end | end | ||
function p.isWebURL( frame ) | function p.isWebURL( frame ) | ||
return | return URLutil.isWebURL( frame.args[ 1 ] ) and "1" or "" | ||
end | |||
function p.URLutil() | |||
return URLutil | |||
end | end | ||
return p | return p |
Version vom 24. April 2013, 19:53 Uhr
Die Dokumentation für dieses Modul kann unter Modul:URLutil/doc erstellt werden
--[=[ URLutil 2013-04-24 Utilities for URL etc. on www. * getAuthority() * getHost() * getPort() * getScheme() * isAuthority() * isDomain() * isHost() * isIP() * isIPv4() * isIPv6() * isMailAddress() * isMailLink() * isProtocolDialog * isProtocolWiki * isResourceURL() * isSuspiciousURL() * isUnescapedURL() * isWebURL() Only [[dotted decimal]] notation for IPv4 supported. Does not support dotted hexadecimal, dotted octal, or single-number formats. IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway. Other modules can require this module and call functions via an object. We return an object with small stub functions to call the real ones so that the functions can be called from templates also. ---- Based upon w:en:Special:Permalink/542839577?title=Module:IPAddress 2013-03-01 Unit tests at :en:Module:IPAddress/tests ]=] -- table for export local URLutil = {} URLutil.getAuthority = function ( url ) if type( url ) == "string" then local host, colon, port = mw.ustring.match( url .. "/", "^%s*%w*:?//([%w.%%-]+)(:?)([%d]*)/" ) if URLutil.isHost( host ) then host = mw.ustring.lower( host ) if colon == ":" then if port:find( "^[1-9]" ) then return ( host .. ":" .. port ) end elseif #port == 0 then return host end end end return false end -- URLutil.getAuthority() URLutil.getHost = function ( url ) local auth = URLutil.getAuthority( url ) if auth then return mw.ustring.match( auth, "^([%w%.%%-]+):?[%d]*$" ) end return false end -- URLutil.getHost() URLutil.getPort = function ( url ) url = URLutil.getAuthority( url ) if url then url = url:match( ":([1-9][0-9]*)$" ) if type( url ) == "string" then return tonumber( url ) end end return false end -- URLutil.getPort() URLutil.getScheme = function ( url ) if type( url ) == "string" then local prot, colon, slashes = url:match( "^%s*([a-zA-Z]*)(:?)(//)" ) if slashes == "//" then if colon == ":" then if #prot > 2 then return prot:lower() .. "://" end elseif #prot == 0 then return "//" end end end return false end -- URLutil.getScheme() URLutil.isAuthority = function ( s ) if type( s ) == "string" then local host, colon, port = mw.ustring.match( s, "^%s*([%w%.%%-]+)(:?)(%d*)%s*$" ) if colon == ":" then port = port:match( "^[1-9][0-9]*$" ) if type( port ) ~= "string" then return false end elseif port ~= "" then return false end return URLutil.isHost( host ) end return false end -- URLutil.isAuthority() URLutil.isDomain = function ( s ) if type( s ) == "string" then s = mw.ustring.match( s, "^%s*([%w%.%%-]+%w)%.[a-zA-Z][a-zA-Z]+%s*$" ) if type( s ) == "string" then if mw.ustring.find( s, "^%w" ) then if mw.ustring.find( s, "..", 1, true ) then return false else return true end end end end return false end -- URLutil.isDomain() URLutil.isHost = function ( s ) return URLutil.isDomain( s ) or URLutil.isIP( s ) end -- URLutil.isHost() URLutil.isIP = function ( s ) return URLutil.isIPv4( s ) and 4 or URLutil.isIPv6( s ) and 6 end -- URLutil.isIP() URLutil.isIPv4 = function ( s ) local function legal( n ) return ( tonumber( n ) < 256 ) end if type( s ) == "string" then local p1, p2, p3, p4 = s:match( "^%s*([12][0-9]?[0-9]?)%.([012][0-9]?[0-9]?)%.([012][0-9]?[0-9]?)%.([012][0-9]?[0-9]?)%s*$" ) if p1 and p2 and p3 and p4 then return legal( p1 ) and legal( p2 ) and legal( p3 ) and legal( p4 ) end end return false end -- URLutil.isIPv4() URLutil.isIPv6 = function ( s ) local dcolon, groups if type( s ) ~= "string" or s:len() == 0 or s:find( "[^:%x]" ) -- only colon and hex digits are legal chars or s:find( "^:[^:]" ) -- can begin or end with :: but not with single : or s:find( "[^:]:$" ) or s:find( ":::" ) then return false end s = mw.text.trim( s ) s, dcolon = s:gsub( "::", ":" ) if dcolon > 1 then return false end -- at most one :: s = s:gsub( "^:?", ":" ) -- prepend : if needed, upper s, groups = s:gsub( ":%x%x?%x?%x?", "" ) -- remove valid groups, and count them return ( ( dcolon == 1 and groups < 8 ) or ( dcolon == 0 and groups == 8 ) ) and ( s:len() == 0 or ( dcolon == 1 and s == ":" ) ) -- might be one dangling : if original ended with :: end -- URLutil.isIPv6() URLutil.isMailAddress = function ( s ) if type( s ) == "string" then s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*$" ) return URLutil.isDomain( s ) end return false end -- URLutil.isMailAddress() URLutil.isMailLink = function ( s ) if type( s ) == "string" then local addr s, addr = mw.ustring.match( s, "^%s([Mm][Aa][Ii][Ll][Tt][Oo]):(%S[%w%.%%_-]*@[%w%.%%-]+)%s$*" ) if type( s ) == "string" then if s:lower() == "mailto" then return URLutil.isMailAddress( addr ) end end end return false end -- URLutil.isMailLink() local function isProtocolAccepted( prot, supplied ) if type( prot ) == "string" then local scheme, colon, slashes = mw.ustring.match( prot, "^%s*([a-zA-Z]*)(:?)(/?/?)%s*$" ) if slashes ~= "/" then if scheme == "" then if colon ~= ":" and slashes == "//" then return true end elseif colon == ":" or slashes == "" then local s = supplied:match( " " .. scheme:lower() .. " " ) if type( s ) == "string" then return true end end end end return false end -- isProtocolAccepted() URLutil.isProtocolDialog = function ( prot ) return isProtocolAccepted( prot, " mailto irc ircs telnet " ) end -- URLutil.isProtocolDialog() URLutil.isProtocolWiki = function ( prot ) return isProtocolAccepted( prot, " ftp git http https nntp svn worldwind " ) end -- URLutil.isProtocolWiki() URLutil.isResourceURL = function ( url ) local scheme = URLutil.getScheme( url ) if scheme then local s = " // http:// https:// ftp:// " s = s:find( " " .. scheme .. " " ) if s then if URLutil.getAuthority( url ) then if not url:match( "%S%s+%S" ) then return true end end end end return false end -- URLutil.isResourceURL() URLutil.isSuspiciousURL = function ( url ) if URLutil.isResourceURL( url ) then local s = URLutil.getAuthority( url ) local pat = "[%[|%]" .. mw.ustring.char( 8201, 45, 8207, 8234, 45, 8239, 8288 ) .. "]" if s:find( "@" ) or url:find( "''" ) or url:find( pat ) or url:find( "[%.,]$" ) then return true end -- TODO zero width character return false end return true end -- URLutil.isSuspiciousURL() URLutil.isUnescapedURL = function ( url, trailing ) if type( trailing ) ~= "string" then if URLutil.isWebURL( url ) then if url:match( "[%[|%]]" ) then return true end end end return false end -- URLutil.isUnescapedURL() URLutil.isWebURL = function ( url ) if URLutil.getScheme( url ) and URLutil.getAuthority( url ) then if not url:match( "%S%s+%S" ) then return true end end return false end -- URLutil.isWebURL() -- Provide template access and expose URLutil table to require local p = {} function p.getAuthority( frame ) return URLutil.getAuthority( frame.args[ 1 ] ) or "" end function p.getHost( frame ) return URLutil.getHost( frame.args[ 1 ] ) or "" end function p.getPort( frame ) return URLutil.getPort( frame.args[ 1 ] ) or "" end function p.getScheme( frame ) return URLutil.getScheme( frame.args[ 1 ] ) or "" end function p.isAuthority( frame ) return URLutil.isAuthority( frame.args[ 1 ] ) and "1" or "" end function p.isDomain( frame ) return URLutil.isDomain( frame.args[ 1 ] ) and "1" or "" end function p.isHost( frame ) return URLutil.isHost( frame.args[ 1 ] ) and "1" or "" end function p.isIP( frame ) return URLutil.isIP( frame.args[ 1 ] ) or "" end function p.isIPv4( frame ) return URLutil.isIPv4( frame.args[ 1 ] ) and "1" or "" end function p.isIPv6( frame ) return URLutil.isIPv6( frame.args[ 1 ] ) and "1" or "" end function p.isMailAddress( frame ) return URLutil.isMailAddress( frame.args[ 1 ] ) and "1" or "" end function p.isMailLink( frame ) return URLutil.isMailLink( frame.args[ 1 ] ) and "1" or "" end function p.isProtocolDialog( frame ) return URLutil.isProtocolDialog( frame.args[ 1 ] ) and "1" or "" end function p.isProtocolWiki( frame ) return URLutil.isProtocolWiki( frame.args[ 1 ] ) and "1" or "" end function p.isResourceURL( frame ) return URLutil.isResourceURL( frame.args[ 1 ] ) and "1" or "" end function p.isSuspiciousURL( frame ) return URLutil.isSuspiciousURL( frame.args[ 1 ] ) and "1" or "" end function p.isUnescapedURL( frame ) return URLutil.isUnescapedURL( frame.args[ 1 ], frame.args[ 2 ] ) and "1" or "" end function p.isWebURL( frame ) return URLutil.isWebURL( frame.args[ 1 ] ) and "1" or "" end function p.URLutil() return URLutil end return p