Modul:URLutil: Unterschied zwischen den Versionen
Mehr Funktionen
w>PerfektesChaos (mehr Funktionen) |
w>PerfektesChaos (Mehr Funktionen) |
||
Zeile 1: | Zeile 1: | ||
--[=[ URLutil 2013-04- | --[=[ URLutil 2013-04-17 | ||
Utilities for URL etc. on www. | Utilities for URL etc. on www. | ||
* getAuthority() | * getAuthority() | ||
* getHost() | * getHost() | ||
* getPort() | |||
* getScheme() | * getScheme() | ||
* isAuthority() | * isAuthority() | ||
Zeile 11: | Zeile 12: | ||
* isIPv6() | * isIPv6() | ||
* isMailAddress() | * isMailAddress() | ||
* | * isMailLink() | ||
* isProtocolWiki | |||
* isRessourceURL() | |||
* isSuspiciousURL() | |||
* isUnescapedURL() | * isUnescapedURL() | ||
* isWebURL() | * isWebURL() | ||
Zeile 50: | Zeile 54: | ||
function _getHost( url ) | function _getHost( url ) | ||
local auth = _getAuthority( url ) | |||
if auth then | |||
return mw.ustring.match( auth, "^([%w%.%%-]+):?[%d]*$" ) | |||
end | |||
return false | |||
end -- _getHost() | |||
function _getPort( url ) | |||
url = _getAuthority( url ) | |||
if url then | |||
url = url:match( ":([1-9][0-9]*)$" ) | |||
if type( url ) == "string" then | |||
return tonumber( url ) | |||
end | end | ||
end | end | ||
return false | return false | ||
end -- | end -- _getPort() | ||
Zeile 83: | Zeile 98: | ||
local host, colon, port = mw.ustring.match( s, "^%s*([%w%.%%-]+)(:?)(%d*)%s*$" ) | local host, colon, port = mw.ustring.match( s, "^%s*([%w%.%%-]+)(:?)(%d*)%s*$" ) | ||
if colon == ":" then | if colon == ":" then | ||
port = port:match( "^[1-9][0-9]*$" ) | |||
if type( port ) ~= "string" then | |||
return false | return false | ||
end | end | ||
elseif | elseif port ~= "" then | ||
return false | |||
end | end | ||
return | return _isHost( host ) | ||
end | end | ||
return false | return false | ||
Zeile 121: | Zeile 137: | ||
function _isIP( s ) | function _isIP( s ) | ||
return _isIPv4( s ) and | return _isIPv4( s ) and 4 or _isIPv6( s ) and 6 | ||
end -- _isIP() | end -- _isIP() | ||
Zeile 128: | Zeile 144: | ||
function _isIPv4( s ) | function _isIPv4( s ) | ||
local function legal( n ) | local function legal( n ) | ||
return ( tonumber( n | return ( tonumber( n ) < 256 ) | ||
end | |||
end | if type( s ) == "string" then | ||
if type( s ) | local p1, p2, p3, p4 = s:match( "^%s*([12][0-9]?[0-9]?)%.([012][0-9]?[0-9]?)%.([012][0-9]?[0-9]?)%.([012][0-9]?[0-9]?)%s*$" ) | ||
return | if p1 and p2 and p3 and p4 then | ||
return legal( p1 ) and legal( p2 ) and legal( p3 ) and legal( p4 ) | |||
end | |||
end | end | ||
return false | |||
return | |||
end -- _isIPv4() | end -- _isIPv4() | ||
Zeile 167: | Zeile 184: | ||
function _isMailAddress( s ) | function _isMailAddress( s ) | ||
if type( s ) == "string" then | if type( s ) == "string" then | ||
s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*" ) | s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*$" ) | ||
return _isDomain( s ) | return _isDomain( s ) | ||
end | end | ||
return false | return false | ||
end -- _isMailAddress() | end -- _isMailAddress() | ||
function _isMailLink( s ) | |||
if type( s ) == "string" then | |||
local addr | |||
s, addr = mw.ustring.match( s, "^%s([Mm][Aa][Ii][Ll][Tt][Oo]):(%S[%w%.%%_-]*@[%w%.%%-]+)%s$*" ) | |||
if type( s ) == "string" then | |||
if s:lower() == "mailto" then | |||
return _isMailAddress( addr ) | |||
end | |||
end | |||
end | |||
return false | |||
end -- _isMailLink() | |||
Zeile 178: | Zeile 210: | ||
if type( port ) == "string" then | if type( port ) == "string" then | ||
if port:find( "^%s*:[1-9][0-9]*%s*$" ) then | if port:find( "^%s*:[1-9][0-9]*%s*$" ) then | ||
return true | return true -- maybe numeric > 0 | ||
end | end | ||
end | end | ||
return false | return false | ||
end -- _isPort() | end -- _isPort() | ||
function _isProtocolWiki( prot ) | |||
if type( prot ) == "string" then | |||
local scheme, colon, slashes = mw.ustring.match( prot, "^%s*([a-zA-Z]*)(:?)(/?/?)%s*$" ) | |||
if slashes ~= "/" then | |||
if scheme == "" then | |||
if colon ~= ":" and slashes == "//" then | |||
return true | |||
end | |||
elseif colon == ":" or slashes == "" then | |||
local s = " ftp git http https irc ircs mms nntp svn telnet worldwind " | |||
s = s:match( " " .. scheme:lower() .. " " ) | |||
if type( s ) == "string" then | |||
return true | |||
end | |||
end | |||
end | |||
end | |||
return false | |||
end -- _isProtocolWiki() | |||
function _isRessourceURL( url ) | |||
local scheme = _getScheme( url ) | |||
if scheme then | |||
local s = " // http:// https:// ftp:// " | |||
s = s:find( " " .. scheme .. " " ) | |||
if s then | |||
if _getAuthority( url ) then | |||
if not url:match( "%S%s+%S" ) then | |||
return true | |||
end | |||
end | |||
end | |||
end | |||
return false | |||
end -- _isRessourceURL() | |||
function _isSuspiciousURL( url ) | |||
if _isRessourceURL( url ) then | |||
local s = _getAuthority( url ) | |||
local pat = "[%[|%]" .. | |||
mw.ustring.char( 8201, 45, 8207, 8234, 45, 8239, 8288 ) | |||
.. "]" | |||
if s:find( "@" ) | |||
or url:find( "''" ) | |||
or url:find( pat ) | |||
or url:find( "[%.,]$" ) then | |||
return true | |||
end | |||
-- TODO zero width character | |||
return false | |||
end | |||
return true | |||
end -- _isSuspiciousURL() | |||
Zeile 219: | Zeile 311: | ||
function p.getHost( frame ) | function p.getHost( frame ) | ||
return _getHost( frame.args[ 1 ] ) or "" | return _getHost( frame.args[ 1 ] ) or "" | ||
end | |||
function p.getPort( frame ) | |||
return _getPort( frame.args[ 1 ] ) or "" | |||
end | end | ||
function p.getScheme( frame ) | function p.getScheme( frame ) | ||
Zeile 244: | Zeile 339: | ||
return _isMailAddress( frame.args[ 1 ] ) and "1" or "" | return _isMailAddress( frame.args[ 1 ] ) and "1" or "" | ||
end | end | ||
function p.isPort( frame ) | function p.isMailLink( frame ) | ||
return _isMailLink( frame.args[ 1 ] ) and "1" or "" | |||
end | |||
function p.isPort( frame ) -- OBSOLETED | |||
return _isPort( frame.args[ 1 ] ) and "1" or "" | return _isPort( frame.args[ 1 ] ) and "1" or "" | ||
end | |||
function p.isProtocolWiki( frame ) | |||
return _isProtocolWiki( frame.args[ 1 ] ) and "1" or "" | |||
end | |||
function p.isRessourceURL( frame ) | |||
return _isRessourceURL( frame.args[ 1 ] ) and "1" or "" | |||
end | |||
function p.isSuspiciousURL( frame ) | |||
return _isSuspiciousURL( frame.args[ 1 ] ) and "1" or "" | |||
end | end | ||
function p.isUnescapedURL( frame ) | function p.isUnescapedURL( frame ) |