Modul:URLutil
Die Dokumentation für dieses Modul kann unter Modul:URLutil/doc erstellt werden
--[=[ URLutil 2013-04-17 Utilities for URL etc. on www. * getAuthority() * getHost() * getPort() * getScheme() * isAuthority() * isDomain() * isHost() * isIP() * isIPv4() * isIPv6() * isMailAddress() * isMailLink() * isProtocolWiki * isRessourceURL() * isSuspiciousURL() * isUnescapedURL() * isWebURL() Only [[dotted decimal]] notation for IPv4 supported. Does not support dotted hexadecimal, dotted octal, or single-number formats. IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway. Functions are not "local", so other modules can require this module and call them directly. We return an object with small stub functions to call the real ones so that the functions can be called from templates also. ---- Based upon w:en:Special:Permalink/542839577?title=Module:IPAddress 2013-03-01 Unit tests at :en:Module:IPAddress/tests ]=] function _getAuthority( url ) if type( url ) == "string" then local host, colon, port = mw.ustring.match( url .. "/", "^%s*%w*:?//([%w.%%-]+)(:?)([%d]*)/" ) if _isHost( host ) then host = mw.ustring.lower( host ) if colon == ":" then if port:find( "^[1-9]" ) then return ( host .. ":" .. port ) end elseif #port == 0 then return host end end end return false end -- _getAuthority() function _getHost( url ) local auth = _getAuthority( url ) if auth then return mw.ustring.match( auth, "^([%w%.%%-]+):?[%d]*$" ) end return false end -- _getHost() function _getPort( url ) url = _getAuthority( url ) if url then url = url:match( ":([1-9][0-9]*)$" ) if type( url ) == "string" then return tonumber( url ) end end return false end -- _getPort() function _getScheme( url ) if type( url ) == "string" then local prot, colon, slashes = url:match( "^%s*([a-zA-Z]*)(:?)(//)" ) if slashes == "//" then if colon == ":" then if #prot > 2 then return prot:lower() .. "://" end elseif #prot == 0 then return "//" end end end return false end -- _getScheme() function _isAuthority( s ) if type( s ) == "string" then local host, colon, port = mw.ustring.match( s, "^%s*([%w%.%%-]+)(:?)(%d*)%s*$" ) if colon == ":" then port = port:match( "^[1-9][0-9]*$" ) if type( port ) ~= "string" then return false end elseif port ~= "" then return false end return _isHost( host ) end return false end -- _isAuthority() function _isDomain( s ) if type( s ) == "string" then s = mw.ustring.match( s, "^%s*([%w%.%%-]+%w)%.[a-zA-Z][a-zA-Z]+%s*$" ) if type( s ) == "string" then if mw.ustring.find( s, "^%w" ) then if mw.ustring.find( s, "..", 1, true ) then return false else return true end end end end return false end -- _isDomain() function _isHost( s ) return _isDomain( s ) or _isIP( s ) end -- _isHost() function _isIP( s ) return _isIPv4( s ) and 4 or _isIPv6( s ) and 6 end -- _isIP() function _isIPv4( s ) local function legal( n ) return ( tonumber( n ) < 256 ) end if type( s ) == "string" then local p1, p2, p3, p4 = s:match( "^%s*([12][0-9]?[0-9]?)%.([012][0-9]?[0-9]?)%.([012][0-9]?[0-9]?)%.([012][0-9]?[0-9]?)%s*$" ) if p1 and p2 and p3 and p4 then return legal( p1 ) and legal( p2 ) and legal( p3 ) and legal( p4 ) end end return false end -- _isIPv4() function _isIPv6( s ) local dcolon, groups if type( s ) ~= "string" or s:len() == 0 or s:find( "[^:%x]" ) -- only colon and hex digits are legal chars or s:find( "^:[^:]" ) -- can begin or end with :: but not with single : or s:find( "[^:]:$" ) or s:find( ":::" ) then return false end s = mw.text.trim( s ) s, dcolon = s:gsub( "::", ":" ) if dcolon > 1 then return false end -- at most one :: s = s:gsub( "^:?", ":" ) -- prepend : if needed, upper s, groups = s:gsub( ":%x%x?%x?%x?", "" ) -- remove valid groups, and count them return ( ( dcolon == 1 and groups < 8 ) or ( dcolon == 0 and groups == 8 ) ) and ( s:len() == 0 or ( dcolon == 1 and s == ":" ) ) -- might be one dangling : if original ended with :: end -- _isIPv6() function _isMailAddress( s ) if type( s ) == "string" then s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*$" ) return _isDomain( s ) end return false end -- _isMailAddress() function _isMailLink( s ) if type( s ) == "string" then local addr s, addr = mw.ustring.match( s, "^%s([Mm][Aa][Ii][Ll][Tt][Oo]):(%S[%w%.%%_-]*@[%w%.%%-]+)%s$*" ) if type( s ) == "string" then if s:lower() == "mailto" then return _isMailAddress( addr ) end end end return false end -- _isMailLink() function _isPort( port ) if type( port ) == "string" then if port:find( "^%s*:[1-9][0-9]*%s*$" ) then return true -- maybe numeric > 0 end end return false end -- _isPort() function _isProtocolWiki( prot ) if type( prot ) == "string" then local scheme, colon, slashes = mw.ustring.match( prot, "^%s*([a-zA-Z]*)(:?)(/?/?)%s*$" ) if slashes ~= "/" then if scheme == "" then if colon ~= ":" and slashes == "//" then return true end elseif colon == ":" or slashes == "" then local s = " ftp git http https irc ircs mms nntp svn telnet worldwind " s = s:match( " " .. scheme:lower() .. " " ) if type( s ) == "string" then return true end end end end return false end -- _isProtocolWiki() function _isRessourceURL( url ) local scheme = _getScheme( url ) if scheme then local s = " // http:// https:// ftp:// " s = s:find( " " .. scheme .. " " ) if s then if _getAuthority( url ) then if not url:match( "%S%s+%S" ) then return true end end end end return false end -- _isRessourceURL() function _isSuspiciousURL( url ) if _isRessourceURL( url ) then local s = _getAuthority( url ) local pat = "[%[|%]" .. mw.ustring.char( 8201, 45, 8207, 8234, 45, 8239, 8288 ) .. "]" if s:find( "@" ) or url:find( "''" ) or url:find( pat ) or url:find( "[%.,]$" ) then return true end -- TODO zero width character return false end return true end -- _isSuspiciousURL() function _isUnescapedURL( url, trailing ) if type( trailing ) ~= "string" then if _isWebURL( url ) then if url:match( "[%[|%]]" ) then return true end end end return false end -- _isUnescapedURL() function _isWebURL( url ) if _getScheme( url ) and _getAuthority( url ) then if not url:match( "%S%s+%S" ) then return true end end return false end -- _isWebURL() -- Provide template access local p = {} function p.getAuthority( frame ) return _getAuthority( frame.args[ 1 ] ) or "" end function p.getHost( frame ) return _getHost( frame.args[ 1 ] ) or "" end function p.getPort( frame ) return _getPort( frame.args[ 1 ] ) or "" end function p.getScheme( frame ) return _getScheme( frame.args[ 1 ] ) or "" end function p.isAuthority( frame ) return _isAuthority( frame.args[ 1 ] ) and "1" or "" end function p.isDomain( frame ) return _isDomain( frame.args[ 1 ] ) and "1" or "" end function p.isHost( frame ) return _isHost( frame.args[ 1 ] ) and "1" or "" end function p.isIP( frame ) return _isIP( frame.args[ 1 ] ) or "" end function p.isIPv4( frame ) return _isIPv4( frame.args[ 1 ] ) and "1" or "" end function p.isIPv6( frame ) return _isIPv6( frame.args[ 1 ] ) and "1" or "" end function p.isMailAddress( frame ) return _isMailAddress( frame.args[ 1 ] ) and "1" or "" end function p.isMailLink( frame ) return _isMailLink( frame.args[ 1 ] ) and "1" or "" end function p.isPort( frame ) -- OBSOLETED return _isPort( frame.args[ 1 ] ) and "1" or "" end function p.isProtocolWiki( frame ) return _isProtocolWiki( frame.args[ 1 ] ) and "1" or "" end function p.isRessourceURL( frame ) return _isRessourceURL( frame.args[ 1 ] ) and "1" or "" end function p.isSuspiciousURL( frame ) return _isSuspiciousURL( frame.args[ 1 ] ) and "1" or "" end function p.isUnescapedURL( frame ) return _isUnescapedURL( frame.args[ 1 ], frame.args[ 2 ] ) and "1" or "" end function p.isWebURL( frame ) return _isWebURL( frame.args[ 1 ] ) and "1" or "" end return p