Modul:URLutil: Unterschied zwischen den Versionen
w>PerfektesChaos (Komplettiert) |
w>PerfektesChaos (mehr Funktionen) |
||
Zeile 1: | Zeile 1: | ||
--[=[ | --[=[ URLutil 2013-04-15 | ||
Utilities for URL etc. on www. | |||
* getAuthority() | * getAuthority() | ||
* getHost() | * getHost() | ||
* getScheme() | * getScheme() | ||
* isAuthority() | |||
* isDomain() | |||
* isHost() | |||
* isIP() | * isIP() | ||
* isIPv4() | * isIPv4() | ||
* isIPv6() | * isIPv6() | ||
* isMailAddress() | * isMailAddress() | ||
* isPort() | |||
* isUnescapedURL() | |||
* isWebURL() | |||
Only [[dotted decimal]] notation for IPv4 supported. | Only [[dotted decimal]] notation for IPv4 supported. | ||
Does not support dotted hexadecimal, dotted octal, or single-number formats. | Does not support dotted hexadecimal, dotted octal, or single-number formats. | ||
IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway. | |||
Functions are not "local", | Functions are not "local", | ||
Zeile 25: | Zeile 32: | ||
function _getAuthority( url ) | function _getAuthority( url ) | ||
if type( url ) == "string" then | if type( url ) == "string" then | ||
local host, colon, port = mw.ustring.match( url .. "/", "^%s*%w*:?//([%w | local host, colon, port = mw.ustring.match( url .. "/", "^%s*%w*:?//([%w.%%-]+)(:?)([%d]*)/" ) | ||
if | if _isHost( host ) then | ||
host = mw.ustring.lower( host ) | host = mw.ustring.lower( host ) | ||
if colon == ":" then | if colon == ":" then | ||
if port: | if port:find( "^[1-9]" ) then | ||
return ( host .. ":" .. port ) | return ( host .. ":" .. port ) | ||
end | end | ||
Zeile 45: | Zeile 52: | ||
if type( url ) == "string" then | if type( url ) == "string" then | ||
local auth = _getAuthority( url ) | local auth = _getAuthority( url ) | ||
if type( auth ) == "string" | if auth then -- type( auth ) == "string" | ||
return mw.ustring.match( auth, "^([%w%.%%-]+):?[%d]*$" ) | return mw.ustring.match( auth, "^([%w%.%%-]+):?[%d]*$" ) | ||
end | end | ||
Zeile 72: | Zeile 79: | ||
function | function _isAuthority( s ) | ||
if type( s ) == "string" then | if type( s ) == "string" then | ||
local host, colon, port = mw.ustring.match( s, "^%s*([%w%.%%-]+)(:?)(%d*)%s*$" ) | |||
return | if colon == ":" then | ||
elseif type( mw.ustring.match( s, "^([%w%.%%-]+%w)%.[a-zA-Z][a-zA-Z]+$" ) ) == "string" then | if not _isPort( ":" .. port ) then | ||
if mw.ustring. | return false | ||
end | |||
elseif type( port ) == "string" then | |||
host = host .. port | |||
end | |||
return _isHost( host ) | |||
end | |||
return false | |||
end -- _isAuthority() | |||
function _isDomain( s ) | |||
if type( s ) == "string" then | |||
s = mw.ustring.match( s, "^%s*([%w%.%%-]+%w)%.[a-zA-Z][a-zA-Z]+%s*$" ) | |||
if type( s ) == "string" then | |||
if mw.ustring.find( s, "^%w" ) then | |||
if mw.ustring.find( s, "..", 1, true ) then | if mw.ustring.find( s, "..", 1, true ) then | ||
return false | return false | ||
else | else | ||
return | return true | ||
end | end | ||
end | end | ||
Zeile 88: | Zeile 110: | ||
end | end | ||
return false | return false | ||
end -- | end -- _isDomain() | ||
function _isHost( s ) | |||
return _isDomain( s ) or _isIP( s ) | |||
end -- _isHost() | |||
Zeile 101: | Zeile 129: | ||
local function legal( n ) | local function legal( n ) | ||
return ( tonumber( n ) or 256 ) < 256 | return ( tonumber( n ) or 256 ) < 256 | ||
and not n: | and not n:find( "^0%d" ) | ||
end -- in lua 0 is true! | end -- in lua 0 is true! | ||
if type( s ) ~= "string" then | if type( s ) ~= "string" then | ||
Zeile 140: | Zeile 168: | ||
if type( s ) == "string" then | if type( s ) == "string" then | ||
s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*" ) | s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*" ) | ||
return | return _isDomain( s ) | ||
end | end | ||
return false | return false | ||
end -- _isMailAddress() | end -- _isMailAddress() | ||
function _isPort( port ) | |||
if type( port ) == "string" then | |||
if port:find( "^%s*:[1-9][0-9]*%s*$" ) then | |||
return true | |||
end | |||
end | |||
return false | |||
end -- _isPort() | |||
function _isUnescapedURL( url, trailing ) | |||
if type( trailing ) ~= "string" then | |||
if _isWebURL( url ) then | |||
if url:match( "[%[|%]]" ) then | |||
return true | |||
end | |||
end | |||
end | |||
return false | |||
end -- _isUnescapedURL() | |||
function _isWebURL( url ) | |||
if _getScheme( url ) and _getAuthority( url ) then | |||
if not url:match( "%S%s+%S" ) then | |||
return true | |||
end | |||
end | |||
return false | |||
end -- _isWebURL() | |||
Zeile 159: | Zeile 222: | ||
function p.getScheme( frame ) | function p.getScheme( frame ) | ||
return _getScheme( frame.args[ 1 ] ) or "" | return _getScheme( frame.args[ 1 ] ) or "" | ||
end | |||
function p.isAuthority( frame ) | |||
return _isAuthority( frame.args[ 1 ] ) and "1" or "" | |||
end | |||
function p.isDomain( frame ) | |||
return _isDomain( frame.args[ 1 ] ) and "1" or "" | |||
end | |||
function p.isHost( frame ) | |||
return _isHost( frame.args[ 1 ] ) and "1" or "" | |||
end | end | ||
function p.isIP( frame ) | function p.isIP( frame ) | ||
Zeile 164: | Zeile 236: | ||
end | end | ||
function p.isIPv4( frame ) | function p.isIPv4( frame ) | ||
return _isIPv4( frame.args[ 1 ] ) and "1" or " | return _isIPv4( frame.args[ 1 ] ) and "1" or "" | ||
end | end | ||
function p.isIPv6( frame ) | function p.isIPv6( frame ) | ||
return _isIPv6( frame.args[ 1 ] ) and "1" or " | return _isIPv6( frame.args[ 1 ] ) and "1" or "" | ||
end | end | ||
function p.isMailAddress( frame ) | function p.isMailAddress( frame ) | ||
return _isMailAddress( frame.args[ 1 ] ) and "1" or " | return _isMailAddress( frame.args[ 1 ] ) and "1" or "" | ||
end | |||
function p.isPort( frame ) | |||
return _isPort( frame.args[ 1 ] ) and "1" or "" | |||
end | |||
function p.isUnescapedURL( frame ) | |||
return _isUnescapedURL( frame.args[ 1 ], frame.args[ 2 ] ) and "1" or "" | |||
end | |||
function p.isWebURL( frame ) | |||
return _isWebURL( frame.args[ 1 ] ) and "1" or "" | |||
end | end | ||
return p | return p |
Version vom 15. April 2013, 21:38 Uhr
Die Dokumentation für dieses Modul kann unter Modul:URLutil/doc erstellt werden
--[=[ URLutil 2013-04-15 Utilities for URL etc. on www. * getAuthority() * getHost() * getScheme() * isAuthority() * isDomain() * isHost() * isIP() * isIPv4() * isIPv6() * isMailAddress() * isPort() * isUnescapedURL() * isWebURL() Only [[dotted decimal]] notation for IPv4 supported. Does not support dotted hexadecimal, dotted octal, or single-number formats. IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway. Functions are not "local", so other modules can require this module and call them directly. We return an object with small stub functions to call the real ones so that the functions can be called from templates also. ---- Based upon w:en:Special:Permalink/542839577?title=Module:IPAddress 2013-03-01 Unit tests at :en:Module:IPAddress/tests ]=] function _getAuthority( url ) if type( url ) == "string" then local host, colon, port = mw.ustring.match( url .. "/", "^%s*%w*:?//([%w.%%-]+)(:?)([%d]*)/" ) if _isHost( host ) then host = mw.ustring.lower( host ) if colon == ":" then if port:find( "^[1-9]" ) then return ( host .. ":" .. port ) end elseif #port == 0 then return host end end end return false end -- _getAuthority() function _getHost( url ) if type( url ) == "string" then local auth = _getAuthority( url ) if auth then -- type( auth ) == "string" return mw.ustring.match( auth, "^([%w%.%%-]+):?[%d]*$" ) end end return false end -- _getHost() function _getScheme( url ) if type( url ) == "string" then local prot, colon, slashes = url:match( "^%s*([a-zA-Z]*)(:?)(//)" ) if slashes == "//" then if colon == ":" then if #prot > 2 then return prot:lower() .. "://" end elseif #prot == 0 then return "//" end end end return false end -- _getScheme() function _isAuthority( s ) if type( s ) == "string" then local host, colon, port = mw.ustring.match( s, "^%s*([%w%.%%-]+)(:?)(%d*)%s*$" ) if colon == ":" then if not _isPort( ":" .. port ) then return false end elseif type( port ) == "string" then host = host .. port end return _isHost( host ) end return false end -- _isAuthority() function _isDomain( s ) if type( s ) == "string" then s = mw.ustring.match( s, "^%s*([%w%.%%-]+%w)%.[a-zA-Z][a-zA-Z]+%s*$" ) if type( s ) == "string" then if mw.ustring.find( s, "^%w" ) then if mw.ustring.find( s, "..", 1, true ) then return false else return true end end end end return false end -- _isDomain() function _isHost( s ) return _isDomain( s ) or _isIP( s ) end -- _isHost() function _isIP( s ) return _isIPv4( s ) and "4" or _isIPv6( s ) and "6" end -- _isIP() function _isIPv4( s ) local function legal( n ) return ( tonumber( n ) or 256 ) < 256 and not n:find( "^0%d" ) end -- in lua 0 is true! if type( s ) ~= "string" then return false end local p1, p2, p3, p4 = s:match( "^%s*(%d+)%.(%d+)%.(%d+)%.(%d+)%s*$" ) return legal( p1 ) and legal( p2 ) and legal( p3 ) and legal( p4 ) end -- _isIPv4() function _isIPv6( s ) local dcolon, groups if type( s ) ~= "string" or s:len() == 0 or s:find( "[^:%x]" ) -- only colon and hex digits are legal chars or s:find( "^:[^:]" ) -- can begin or end with :: but not with single : or s:find( "[^:]:$" ) or s:find( ":::" ) then return false end s = mw.text.trim( s ) s, dcolon = s:gsub( "::", ":" ) if dcolon > 1 then return false end -- at most one :: s = s:gsub( "^:?", ":" ) -- prepend : if needed, upper s, groups = s:gsub( ":%x%x?%x?%x?", "" ) -- remove valid groups, and count them return ( ( dcolon == 1 and groups < 8 ) or ( dcolon == 0 and groups == 8 ) ) and ( s:len() == 0 or ( dcolon == 1 and s == ":" ) ) -- might be one dangling : if original ended with :: end -- _isIPv6() function _isMailAddress( s ) if type( s ) == "string" then s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*" ) return _isDomain( s ) end return false end -- _isMailAddress() function _isPort( port ) if type( port ) == "string" then if port:find( "^%s*:[1-9][0-9]*%s*$" ) then return true end end return false end -- _isPort() function _isUnescapedURL( url, trailing ) if type( trailing ) ~= "string" then if _isWebURL( url ) then if url:match( "[%[|%]]" ) then return true end end end return false end -- _isUnescapedURL() function _isWebURL( url ) if _getScheme( url ) and _getAuthority( url ) then if not url:match( "%S%s+%S" ) then return true end end return false end -- _isWebURL() -- Provide template access local p = {} function p.getAuthority( frame ) return _getAuthority( frame.args[ 1 ] ) or "" end function p.getHost( frame ) return _getHost( frame.args[ 1 ] ) or "" end function p.getScheme( frame ) return _getScheme( frame.args[ 1 ] ) or "" end function p.isAuthority( frame ) return _isAuthority( frame.args[ 1 ] ) and "1" or "" end function p.isDomain( frame ) return _isDomain( frame.args[ 1 ] ) and "1" or "" end function p.isHost( frame ) return _isHost( frame.args[ 1 ] ) and "1" or "" end function p.isIP( frame ) return _isIP( frame.args[ 1 ] ) or "" end function p.isIPv4( frame ) return _isIPv4( frame.args[ 1 ] ) and "1" or "" end function p.isIPv6( frame ) return _isIPv6( frame.args[ 1 ] ) and "1" or "" end function p.isMailAddress( frame ) return _isMailAddress( frame.args[ 1 ] ) and "1" or "" end function p.isPort( frame ) return _isPort( frame.args[ 1 ] ) and "1" or "" end function p.isUnescapedURL( frame ) return _isUnescapedURL( frame.args[ 1 ], frame.args[ 2 ] ) and "1" or "" end function p.isWebURL( frame ) return _isWebURL( frame.args[ 1 ] ) and "1" or "" end return p