Modul:URLutil
Die Dokumentation für dieses Modul kann unter Modul:URLutil/doc erstellt werden
--[=[ URLutil 2013-04-15 Utilities for URL etc. on www. * getAuthority() * getHost() * getScheme() * isAuthority() * isDomain() * isHost() * isIP() * isIPv4() * isIPv6() * isMailAddress() * isPort() * isUnescapedURL() * isWebURL() Only [[dotted decimal]] notation for IPv4 supported. Does not support dotted hexadecimal, dotted octal, or single-number formats. IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway. Functions are not "local", so other modules can require this module and call them directly. We return an object with small stub functions to call the real ones so that the functions can be called from templates also. ---- Based upon w:en:Special:Permalink/542839577?title=Module:IPAddress 2013-03-01 Unit tests at :en:Module:IPAddress/tests ]=] function _getAuthority( url ) if type( url ) == "string" then local host, colon, port = mw.ustring.match( url .. "/", "^%s*%w*:?//([%w.%%-]+)(:?)([%d]*)/" ) if _isHost( host ) then host = mw.ustring.lower( host ) if colon == ":" then if port:find( "^[1-9]" ) then return ( host .. ":" .. port ) end elseif #port == 0 then return host end end end return false end -- _getAuthority() function _getHost( url ) if type( url ) == "string" then local auth = _getAuthority( url ) if auth then -- type( auth ) == "string" return mw.ustring.match( auth, "^([%w%.%%-]+):?[%d]*$" ) end end return false end -- _getHost() function _getScheme( url ) if type( url ) == "string" then local prot, colon, slashes = url:match( "^%s*([a-zA-Z]*)(:?)(//)" ) if slashes == "//" then if colon == ":" then if #prot > 2 then return prot:lower() .. "://" end elseif #prot == 0 then return "//" end end end return false end -- _getScheme() function _isAuthority( s ) if type( s ) == "string" then local host, colon, port = mw.ustring.match( s, "^%s*([%w%.%%-]+)(:?)(%d*)%s*$" ) if colon == ":" then if not _isPort( ":" .. port ) then return false end elseif type( port ) == "string" then host = host .. port end return _isHost( host ) end return false end -- _isAuthority() function _isDomain( s ) if type( s ) == "string" then s = mw.ustring.match( s, "^%s*([%w%.%%-]+%w)%.[a-zA-Z][a-zA-Z]+%s*$" ) if type( s ) == "string" then if mw.ustring.find( s, "^%w" ) then if mw.ustring.find( s, "..", 1, true ) then return false else return true end end end end return false end -- _isDomain() function _isHost( s ) return _isDomain( s ) or _isIP( s ) end -- _isHost() function _isIP( s ) return _isIPv4( s ) and "4" or _isIPv6( s ) and "6" end -- _isIP() function _isIPv4( s ) local function legal( n ) return ( tonumber( n ) or 256 ) < 256 and not n:find( "^0%d" ) end -- in lua 0 is true! if type( s ) ~= "string" then return false end local p1, p2, p3, p4 = s:match( "^%s*(%d+)%.(%d+)%.(%d+)%.(%d+)%s*$" ) return legal( p1 ) and legal( p2 ) and legal( p3 ) and legal( p4 ) end -- _isIPv4() function _isIPv6( s ) local dcolon, groups if type( s ) ~= "string" or s:len() == 0 or s:find( "[^:%x]" ) -- only colon and hex digits are legal chars or s:find( "^:[^:]" ) -- can begin or end with :: but not with single : or s:find( "[^:]:$" ) or s:find( ":::" ) then return false end s = mw.text.trim( s ) s, dcolon = s:gsub( "::", ":" ) if dcolon > 1 then return false end -- at most one :: s = s:gsub( "^:?", ":" ) -- prepend : if needed, upper s, groups = s:gsub( ":%x%x?%x?%x?", "" ) -- remove valid groups, and count them return ( ( dcolon == 1 and groups < 8 ) or ( dcolon == 0 and groups == 8 ) ) and ( s:len() == 0 or ( dcolon == 1 and s == ":" ) ) -- might be one dangling : if original ended with :: end -- _isIPv6() function _isMailAddress( s ) if type( s ) == "string" then s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*" ) return _isDomain( s ) end return false end -- _isMailAddress() function _isPort( port ) if type( port ) == "string" then if port:find( "^%s*:[1-9][0-9]*%s*$" ) then return true end end return false end -- _isPort() function _isUnescapedURL( url, trailing ) if type( trailing ) ~= "string" then if _isWebURL( url ) then if url:match( "[%[|%]]" ) then return true end end end return false end -- _isUnescapedURL() function _isWebURL( url ) if _getScheme( url ) and _getAuthority( url ) then if not url:match( "%S%s+%S" ) then return true end end return false end -- _isWebURL() -- Provide template access local p = {} function p.getAuthority( frame ) return _getAuthority( frame.args[ 1 ] ) or "" end function p.getHost( frame ) return _getHost( frame.args[ 1 ] ) or "" end function p.getScheme( frame ) return _getScheme( frame.args[ 1 ] ) or "" end function p.isAuthority( frame ) return _isAuthority( frame.args[ 1 ] ) and "1" or "" end function p.isDomain( frame ) return _isDomain( frame.args[ 1 ] ) and "1" or "" end function p.isHost( frame ) return _isHost( frame.args[ 1 ] ) and "1" or "" end function p.isIP( frame ) return _isIP( frame.args[ 1 ] ) or "" end function p.isIPv4( frame ) return _isIPv4( frame.args[ 1 ] ) and "1" or "" end function p.isIPv6( frame ) return _isIPv6( frame.args[ 1 ] ) and "1" or "" end function p.isMailAddress( frame ) return _isMailAddress( frame.args[ 1 ] ) and "1" or "" end function p.isPort( frame ) return _isPort( frame.args[ 1 ] ) and "1" or "" end function p.isUnescapedURL( frame ) return _isUnescapedURL( frame.args[ 1 ], frame.args[ 2 ] ) and "1" or "" end function p.isWebURL( frame ) return _isWebURL( frame.args[ 1 ] ) and "1" or "" end return p