Modul:URLutil: Unterschied zwischen den Versionen

Aus skandinavien-wiki.net
w>PerfektesChaos
(mehr Funktionen)
w>PerfektesChaos
(Mehr Funktionen)
Zeile 1: Zeile 1:
--[=[ URLutil 2013-04-15
--[=[ URLutil 2013-04-17
Utilities for URL etc. on www.
Utilities for URL etc. on www.
* getAuthority()
* getAuthority()
* getHost()
* getHost()
* getPort()
* getScheme()
* getScheme()
* isAuthority()
* isAuthority()
Zeile 11: Zeile 12:
* isIPv6()
* isIPv6()
* isMailAddress()
* isMailAddress()
* isPort()
* isMailLink()
* isProtocolWiki
* isRessourceURL()
* isSuspiciousURL()
* isUnescapedURL()
* isUnescapedURL()
* isWebURL()
* isWebURL()
Zeile 50: Zeile 54:


function _getHost( url )
function _getHost( url )
     if type( url ) == "string" then
     local auth = _getAuthority( url )
        local auth = _getAuthority( url )
    if auth then
        if auth then                         --  type( auth ) == "string"
        return mw.ustring.match( auth, "^([%w%.%%-]+):?[%d]*$" )
            return mw.ustring.match( auth, "^([%w%.%%-]+):?[%d]*$" )
    end
    return false
end -- _getHost()
 
 
 
function _getPort( url )
    url = _getAuthority( url )
    if url then
        url = url:match( ":([1-9][0-9]*)$" )
        if type( url ) == "string" then
            return tonumber( url )
         end
         end
     end
     end
     return false
     return false
end -- _getHost()
end -- _getPort()




Zeile 83: Zeile 98:
         local host, colon, port = mw.ustring.match( s, "^%s*([%w%.%%-]+)(:?)(%d*)%s*$" )
         local host, colon, port = mw.ustring.match( s, "^%s*([%w%.%%-]+)(:?)(%d*)%s*$" )
         if colon == ":" then
         if colon == ":" then
             if not _isPort( ":" .. port ) then
             port = port:match( "^[1-9][0-9]*$" )
            if type( port ) ~= "string" then
                 return false
                 return false
             end
             end
         elseif type( port ) == "string" then
         elseif port ~= "" then
             host = host .. port
             return false
         end
         end
         return _isHost( host )
         return _isHost( host )
     end
     end
     return false
     return false
Zeile 121: Zeile 137:


function _isIP( s )
function _isIP( s )
     return _isIPv4( s ) and "4" or _isIPv6( s ) and "6"
     return _isIPv4( s ) and 4 or _isIPv6( s ) and 6
end -- _isIP()
end -- _isIP()


Zeile 128: Zeile 144:
function _isIPv4( s )
function _isIPv4( s )
     local function legal( n )
     local function legal( n )
               return ( tonumber( n ) or 256 ) < 256
               return ( tonumber( n ) < 256 )
                    and not n:find( "^0%d" )
           end
           end -- in lua 0 is true!
     if type( s ) == "string" then
     if type( s ) ~= "string" then
         local p1, p2, p3, p4 = s:match( "^%s*([12][0-9]?[0-9]?)%.([012][0-9]?[0-9]?)%.([012][0-9]?[0-9]?)%.([012][0-9]?[0-9]?)%s*$" )
         return false
        if p1 and p2 and p3 and p4 then
            return legal( p1 ) and legal( p2 ) and legal( p3 ) and legal( p4 )
        end
     end
     end
    local p1, p2, p3, p4 = s:match( "^%s*(%d+)%.(%d+)%.(%d+)%.(%d+)%s*$" )
     return false
     return legal( p1 ) and legal( p2 ) and legal( p3 ) and legal( p4 )
end -- _isIPv4()
end -- _isIPv4()


Zeile 167: Zeile 184:
function _isMailAddress( s )
function _isMailAddress( s )
     if type( s ) == "string" then
     if type( s ) == "string" then
         s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*" )
         s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*$" )
         return _isDomain( s )
         return _isDomain( s )
     end
     end
     return false
     return false
end -- _isMailAddress()
end -- _isMailAddress()
function _isMailLink( s )
    if type( s ) == "string" then
        local addr
        s, addr = mw.ustring.match( s, "^%s([Mm][Aa][Ii][Ll][Tt][Oo]):(%S[%w%.%%_-]*@[%w%.%%-]+)%s$*" )
        if type( s ) == "string" then
            if s:lower() == "mailto" then
                return _isMailAddress( addr )
            end
        end
    end
    return false
end -- _isMailLink()




Zeile 178: Zeile 210:
     if type( port ) == "string" then
     if type( port ) == "string" then
         if port:find( "^%s*:[1-9][0-9]*%s*$" ) then
         if port:find( "^%s*:[1-9][0-9]*%s*$" ) then
             return true
             return true   -- maybe numeric > 0
         end
         end
     end
     end
     return false
     return false
end -- _isPort()
end -- _isPort()
function _isProtocolWiki( prot )
    if type( prot ) == "string" then
        local scheme, colon, slashes = mw.ustring.match( prot, "^%s*([a-zA-Z]*)(:?)(/?/?)%s*$" )
        if slashes ~= "/" then
            if scheme == "" then
                if colon ~= ":" and slashes == "//" then
                    return true
                end
            elseif colon == ":" or slashes == "" then
                local s = " ftp git http https irc ircs mms nntp svn telnet worldwind "
                s = s:match( " " .. scheme:lower() .. " " )
                if type( s ) == "string" then
                  return true
                end
            end
        end
    end
    return false
end -- _isProtocolWiki()
function _isRessourceURL( url )
    local scheme = _getScheme( url )
    if scheme then
        local s = " // http:// https:// ftp:// "
        s = s:find( " " .. scheme .. " " )
        if s then
            if _getAuthority( url ) then
                if not url:match( "%S%s+%S" ) then
                    return true
                end
            end
        end
    end
    return false
end -- _isRessourceURL()
function _isSuspiciousURL( url )
    if _isRessourceURL( url ) then
        local s = _getAuthority( url )
        local pat = "[%[|%]" ..
                    mw.ustring.char( 8201, 45, 8207, 8234, 45, 8239, 8288 )
                    .. "]"
        if s:find( "@" )
          or url:find( "''" )
          or url:find( pat )
          or url:find( "[%.,]$" ) then
            return true
        end
        -- TODO  zero width character
        return false
    end
    return true
end -- _isSuspiciousURL()




Zeile 219: Zeile 311:
function p.getHost( frame )
function p.getHost( frame )
     return _getHost( frame.args[ 1 ] ) or ""
     return _getHost( frame.args[ 1 ] ) or ""
end
function p.getPort( frame )
    return _getPort( frame.args[ 1 ] ) or ""
end
end
function p.getScheme( frame )
function p.getScheme( frame )
Zeile 244: Zeile 339:
     return _isMailAddress( frame.args[ 1 ] ) and "1" or ""
     return _isMailAddress( frame.args[ 1 ] ) and "1" or ""
end
end
function p.isPort( frame )
function p.isMailLink( frame )
    return _isMailLink( frame.args[ 1 ] ) and "1" or ""
end
function p.isPort( frame ) -- OBSOLETED
     return _isPort( frame.args[ 1 ] ) and "1" or ""
     return _isPort( frame.args[ 1 ] ) and "1" or ""
end
function p.isProtocolWiki( frame )
    return _isProtocolWiki( frame.args[ 1 ] ) and "1" or ""
end
function p.isRessourceURL( frame )
    return _isRessourceURL( frame.args[ 1 ] ) and "1" or ""
end
function p.isSuspiciousURL( frame )
    return _isSuspiciousURL( frame.args[ 1 ] ) and "1" or ""
end
end
function p.isUnescapedURL( frame )
function p.isUnescapedURL( frame )

Version vom 18. April 2013, 19:48 Uhr

Die Dokumentation für dieses Modul kann unter Modul:URLutil/doc erstellt werden

--[=[ URLutil 2013-04-17
Utilities for URL etc. on www.
* getAuthority()
* getHost()
* getPort()
* getScheme()
* isAuthority()
* isDomain()
* isHost()
* isIP()
* isIPv4()
* isIPv6()
* isMailAddress()
* isMailLink()
* isProtocolWiki
* isRessourceURL()
* isSuspiciousURL()
* isUnescapedURL()
* isWebURL()
Only [[dotted decimal]] notation for IPv4 supported.
Does not support dotted hexadecimal, dotted octal, or single-number formats.
IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway.

Functions are not "local",
so other modules can require this module and call them directly.
We return an object with small stub functions to call the real ones
so that the functions can be called from templates also.
----
Based upon   w:en:Special:Permalink/542839577?title=Module:IPAddress   2013-03-01
Unit tests at :en:Module:IPAddress/tests
]=]




function _getAuthority( url )
    if type( url ) == "string" then
        local host, colon, port = mw.ustring.match( url .. "/", "^%s*%w*:?//([%w.%%-]+)(:?)([%d]*)/" )
        if _isHost( host ) then
            host = mw.ustring.lower( host )
            if colon == ":" then
                if port:find( "^[1-9]" ) then
                    return ( host .. ":" .. port )
                end
            elseif #port == 0 then
                return host
            end
        end
    end
    return false
end -- _getAuthority()



function _getHost( url )
    local auth = _getAuthority( url )
    if auth then
        return mw.ustring.match( auth, "^([%w%.%%-]+):?[%d]*$" )
    end
    return false
end -- _getHost()



function _getPort( url )
    url = _getAuthority( url )
    if url then
        url = url:match( ":([1-9][0-9]*)$" )
        if type( url ) == "string" then
            return tonumber( url )
        end
    end
    return false
end -- _getPort()



function _getScheme( url )
    if type( url ) == "string" then
        local prot, colon, slashes = url:match( "^%s*([a-zA-Z]*)(:?)(//)" )
        if slashes == "//" then
           if colon == ":" then
               if #prot > 2 then
                   return prot:lower() .. "://"
               end
           elseif #prot == 0 then
               return "//"
           end
        end
    end
    return false
end -- _getScheme()



function _isAuthority( s )
    if type( s ) == "string" then
        local host, colon, port = mw.ustring.match( s, "^%s*([%w%.%%-]+)(:?)(%d*)%s*$" )
        if colon == ":" then
            port = port:match( "^[1-9][0-9]*$" )
            if type( port ) ~= "string" then
                return false
            end
        elseif port ~= "" then
            return false
        end
        return _isHost( host )
    end
    return false
end -- _isAuthority()



function _isDomain( s )
    if type( s ) == "string" then
        s = mw.ustring.match( s, "^%s*([%w%.%%-]+%w)%.[a-zA-Z][a-zA-Z]+%s*$" )
        if type( s ) == "string" then
            if mw.ustring.find( s, "^%w" ) then
                if mw.ustring.find( s, "..", 1, true ) then
                    return false
                else
                    return true
                end
            end
        end
    end
    return false
end -- _isDomain()



function _isHost( s )
    return _isDomain( s ) or _isIP( s )
end -- _isHost()



function _isIP( s )
    return _isIPv4( s ) and 4 or _isIPv6( s ) and 6
end -- _isIP()



function _isIPv4( s )
    local function legal( n )
              return ( tonumber( n ) < 256 )
          end
    if type( s ) == "string" then
        local p1, p2, p3, p4 = s:match( "^%s*([12][0-9]?[0-9]?)%.([012][0-9]?[0-9]?)%.([012][0-9]?[0-9]?)%.([012][0-9]?[0-9]?)%s*$" )
        if p1 and p2 and p3 and p4 then
            return legal( p1 ) and legal( p2 ) and legal( p3 ) and legal( p4 )
        end
    end
    return false
end -- _isIPv4()



function _isIPv6( s )
    local dcolon, groups
    if type( s ) ~= "string"
        or s:len() == 0
        or s:find( "[^:%x]" ) -- only colon and hex digits are legal chars
        or s:find( "^:[^:]" ) -- can begin or end with :: but not with single :
        or s:find( "[^:]:$" )
        or s:find( ":::" )
    then
        return false
    end
    s = mw.text.trim( s )
    s, dcolon = s:gsub( "::", ":" )
    if dcolon > 1 then
        return false
    end -- at most one ::
    s = s:gsub( "^:?", ":" ) -- prepend : if needed, upper
    s, groups = s:gsub( ":%x%x?%x?%x?", "" ) -- remove valid groups, and count them
    return ( ( dcolon == 1 and groups < 8 ) or
             ( dcolon == 0 and groups == 8 ) )
        and ( s:len() == 0 or ( dcolon == 1 and s == ":" ) ) -- might be one dangling : if original ended with ::
end -- _isIPv6()



function _isMailAddress( s )
    if type( s ) == "string" then
         s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*$" )
         return _isDomain( s )
    end
    return false
end -- _isMailAddress()



function _isMailLink( s )
    if type( s ) == "string" then
         local addr
         s, addr = mw.ustring.match( s, "^%s([Mm][Aa][Ii][Ll][Tt][Oo]):(%S[%w%.%%_-]*@[%w%.%%-]+)%s$*" )
         if type( s ) == "string" then
             if s:lower() == "mailto" then
                 return _isMailAddress( addr )
             end
         end
    end
    return false
end -- _isMailLink()



function _isPort( port )
    if type( port ) == "string" then
        if port:find( "^%s*:[1-9][0-9]*%s*$" ) then
            return true   -- maybe numeric > 0
        end
    end
    return false
end -- _isPort()



function _isProtocolWiki( prot )
    if type( prot ) == "string" then
        local scheme, colon, slashes = mw.ustring.match( prot, "^%s*([a-zA-Z]*)(:?)(/?/?)%s*$" )
        if slashes ~= "/" then
            if scheme == "" then
                if colon ~= ":" and slashes == "//" then
                    return true
                end
             elseif colon == ":" or slashes == "" then
                local s = " ftp git http https irc ircs mms nntp svn telnet worldwind "
                s = s:match( " " .. scheme:lower() .. " " )
                if type( s ) == "string" then
                   return true
                end
            end
        end
    end
    return false
end -- _isProtocolWiki()



function _isRessourceURL( url )
    local scheme = _getScheme( url )
    if scheme then
        local s = " // http:// https:// ftp:// "
        s = s:find( " " .. scheme .. " " )
        if s then
            if _getAuthority( url ) then
                if not url:match( "%S%s+%S" ) then
                    return true
                end
            end
        end
    end
    return false
end -- _isRessourceURL()



function _isSuspiciousURL( url )
    if _isRessourceURL( url ) then
        local s = _getAuthority( url )
        local pat = "[%[|%]" ..
                    mw.ustring.char( 8201, 45, 8207, 8234, 45, 8239, 8288 )
                    .. "]"
        if s:find( "@" )
           or url:find( "''" )
           or url:find( pat )
           or url:find( "[%.,]$" ) then
            return true
        end
        -- TODO  zero width character
        return false
    end
    return true
end -- _isSuspiciousURL()



function _isUnescapedURL( url, trailing )
    if type( trailing ) ~= "string" then
        if _isWebURL( url ) then
            if url:match( "[%[|%]]" ) then
                return true
            end
        end
    end
    return false
end -- _isUnescapedURL()



function _isWebURL( url )
    if _getScheme( url ) and _getAuthority( url ) then
        if not url:match( "%S%s+%S" ) then
            return true
        end
    end
    return false
end -- _isWebURL()



-- Provide template access

local p = {}

function p.getAuthority( frame )
    return _getAuthority( frame.args[ 1 ] ) or ""
end
function p.getHost( frame )
    return _getHost( frame.args[ 1 ] ) or ""
end
function p.getPort( frame )
    return _getPort( frame.args[ 1 ] ) or ""
end
function p.getScheme( frame )
    return _getScheme( frame.args[ 1 ] ) or ""
end
function p.isAuthority( frame )
    return _isAuthority( frame.args[ 1 ] ) and "1" or ""
end
function p.isDomain( frame )
    return _isDomain( frame.args[ 1 ] ) and "1" or ""
end
function p.isHost( frame )
    return _isHost( frame.args[ 1 ] ) and "1" or ""
end
function p.isIP( frame )
    return _isIP( frame.args[ 1 ] ) or ""
end
function p.isIPv4( frame )
    return _isIPv4( frame.args[ 1 ] ) and "1" or ""
end
function p.isIPv6( frame )
    return _isIPv6( frame.args[ 1 ] ) and "1" or ""
end
function p.isMailAddress( frame )
    return _isMailAddress( frame.args[ 1 ] ) and "1" or ""
end
function p.isMailLink( frame )
    return _isMailLink( frame.args[ 1 ] ) and "1" or ""
end
function p.isPort( frame )  -- OBSOLETED
    return _isPort( frame.args[ 1 ] ) and "1" or ""
end
function p.isProtocolWiki( frame )
    return _isProtocolWiki( frame.args[ 1 ] ) and "1" or ""
end
function p.isRessourceURL( frame )
    return _isRessourceURL( frame.args[ 1 ] ) and "1" or ""
end
function p.isSuspiciousURL( frame )
    return _isSuspiciousURL( frame.args[ 1 ] ) and "1" or ""
end
function p.isUnescapedURL( frame )
    return _isUnescapedURL( frame.args[ 1 ], frame.args[ 2 ] ) and "1" or ""
end
function p.isWebURL( frame )
    return _isWebURL( frame.args[ 1 ] ) and "1" or ""
end

return p