Modul:URLutil: Unterschied zwischen den Versionen

Aus skandinavien-wiki.net
w>PerfektesChaos
(Komplettiert)
w>PerfektesChaos
(mehr Funktionen)
Zeile 1: Zeile 1:
--[=[
--[=[ URLutil 2013-04-15
URLutil: Utilities for URL etc.
Utilities for URL etc. on www.
* getAuthority()
* getAuthority()
* getHost()
* getHost()
* getScheme()
* getScheme()
* isAuthority()
* isDomain()
* isHost()
* isIP()
* isIP()
* isIPv4()
* isIPv4()
* isIPv6()
* isIPv6()
* isMailAddress()
* isMailAddress()
* isPort()
* isUnescapedURL()
* isWebURL()
Only [[dotted decimal]] notation for IPv4 supported.
Only [[dotted decimal]] notation for IPv4 supported.
Does not support dotted hexadecimal, dotted octal, or single-number formats.
Does not support dotted hexadecimal, dotted octal, or single-number formats.
IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway.


Functions are not "local",
Functions are not "local",
Zeile 25: Zeile 32:
function _getAuthority( url )
function _getAuthority( url )
     if type( url ) == "string" then
     if type( url ) == "string" then
         local host, colon, port = mw.ustring.match( url .. "/", "^%s*%w*:?//([%w%.%%-]+)(:?)([%d]*)/" )
         local host, colon, port = mw.ustring.match( url .. "/", "^%s*%w*:?//([%w.%%-]+)(:?)([%d]*)/" )
         if isHost( host ) then
         if _isHost( host ) then
             host = mw.ustring.lower( host )
             host = mw.ustring.lower( host )
             if colon == ":" then
             if colon == ":" then
                 if port:match( "^[1-9]" ) then
                 if port:find( "^[1-9]" ) then
                     return ( host .. ":" .. port )
                     return ( host .. ":" .. port )
                 end
                 end
Zeile 45: Zeile 52:
     if type( url ) == "string" then
     if type( url ) == "string" then
         local auth = _getAuthority( url )
         local auth = _getAuthority( url )
         if type( auth ) == "string" then
         if auth then                        --  type( auth ) == "string"
             return mw.ustring.match( auth, "^([%w%.%%-]+):?[%d]*$" )
             return mw.ustring.match( auth, "^([%w%.%%-]+):?[%d]*$" )
         end
         end
Zeile 72: Zeile 79:




function isHost( s )
function _isAuthority( s )
    -- internal only
     if type( s ) == "string" then
     if type( s ) == "string" then
         if _isIP( s ) then
         local host, colon, port = mw.ustring.match( s, "^%s*([%w%.%%-]+)(:?)(%d*)%s*$" )
             return s
        if colon == ":" then
         elseif type( mw.ustring.match( s, "^([%w%.%%-]+%w)%.[a-zA-Z][a-zA-Z]+$" ) ) == "string" then
             if not _isPort( ":" .. port ) then
             if mw.ustring.match( s, "^%w" ) then
                return false
            end
         elseif type( port ) == "string" then
            host = host .. port
        end
        return  _isHost( host )
    end
    return false
end -- _isAuthority()
 
 
 
function _isDomain( s )
    if type( s ) == "string" then
        s = mw.ustring.match( s, "^%s*([%w%.%%-]+%w)%.[a-zA-Z][a-zA-Z]+%s*$" )
        if type( s ) == "string" then
             if mw.ustring.find( s, "^%w" ) then
                 if mw.ustring.find( s, "..", 1, true ) then
                 if mw.ustring.find( s, "..", 1, true ) then
                     return false
                     return false
                 else
                 else
                     return s
                     return true
                 end
                 end
             end
             end
Zeile 88: Zeile 110:
     end
     end
     return false
     return false
end -- isHost()
end -- _isDomain()
 
 
 
function _isHost( s )
    return _isDomain( s ) or _isIP( s )
end -- _isHost()




Zeile 101: Zeile 129:
     local function legal( n )
     local function legal( n )
               return ( tonumber( n ) or 256 ) < 256
               return ( tonumber( n ) or 256 ) < 256
                     and not n:match( "^0%d" )
                     and not n:find( "^0%d" )
           end -- in lua 0 is true!
           end -- in lua 0 is true!
     if type( s ) ~= "string" then
     if type( s ) ~= "string" then
Zeile 140: Zeile 168:
     if type( s ) == "string" then
     if type( s ) == "string" then
         s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*" )
         s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*" )
         return isHost( s )
         return _isDomain( s )
     end
     end
     return false
     return false
end -- _isMailAddress()
end -- _isMailAddress()
function _isPort( port )
    if type( port ) == "string" then
        if port:find( "^%s*:[1-9][0-9]*%s*$" ) then
            return true
        end
    end
    return false
end -- _isPort()
function _isUnescapedURL( url, trailing )
    if type( trailing ) ~= "string" then
        if _isWebURL( url ) then
            if url:match( "[%[|%]]" ) then
                return true
            end
        end
    end
    return false
end -- _isUnescapedURL()
function _isWebURL( url )
    if _getScheme( url ) and _getAuthority( url ) then
        if not url:match( "%S%s+%S" ) then
            return true
        end
    end
    return false
end -- _isWebURL()




Zeile 159: Zeile 222:
function p.getScheme( frame )
function p.getScheme( frame )
     return _getScheme( frame.args[ 1 ] ) or ""
     return _getScheme( frame.args[ 1 ] ) or ""
end
function p.isAuthority( frame )
    return _isAuthority( frame.args[ 1 ] ) and "1" or ""
end
function p.isDomain( frame )
    return _isDomain( frame.args[ 1 ] ) and "1" or ""
end
function p.isHost( frame )
    return _isHost( frame.args[ 1 ] ) and "1" or ""
end
end
function p.isIP( frame )
function p.isIP( frame )
Zeile 164: Zeile 236:
end
end
function p.isIPv4( frame )
function p.isIPv4( frame )
     return _isIPv4( frame.args[ 1 ] ) and "1" or "0"
     return _isIPv4( frame.args[ 1 ] ) and "1" or ""
end
end
function p.isIPv6( frame )
function p.isIPv6( frame )
     return _isIPv6( frame.args[ 1 ] ) and "1" or "0"
     return _isIPv6( frame.args[ 1 ] ) and "1" or ""
end
end
function p.isMailAddress( frame )
function p.isMailAddress( frame )
     return _isMailAddress( frame.args[ 1 ] ) and "1" or "0"
     return _isMailAddress( frame.args[ 1 ] ) and "1" or ""
end
function p.isPort( frame )
    return _isPort( frame.args[ 1 ] ) and "1" or ""
end
function p.isUnescapedURL( frame )
    return _isUnescapedURL( frame.args[ 1 ], frame.args[ 2 ] ) and "1" or ""
end
function p.isWebURL( frame )
    return _isWebURL( frame.args[ 1 ] ) and "1" or ""
end
end


return p
return p

Version vom 15. April 2013, 21:38 Uhr

Die Dokumentation für dieses Modul kann unter Modul:URLutil/doc erstellt werden

--[=[ URLutil 2013-04-15
Utilities for URL etc. on www.
* getAuthority()
* getHost()
* getScheme()
* isAuthority()
* isDomain()
* isHost()
* isIP()
* isIPv4()
* isIPv6()
* isMailAddress()
* isPort()
* isUnescapedURL()
* isWebURL()
Only [[dotted decimal]] notation for IPv4 supported.
Does not support dotted hexadecimal, dotted octal, or single-number formats.
IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway.

Functions are not "local",
so other modules can require this module and call them directly.
We return an object with small stub functions to call the real ones
so that the functions can be called from templates also.
----
Based upon   w:en:Special:Permalink/542839577?title=Module:IPAddress   2013-03-01
Unit tests at :en:Module:IPAddress/tests
]=]




function _getAuthority( url )
    if type( url ) == "string" then
        local host, colon, port = mw.ustring.match( url .. "/", "^%s*%w*:?//([%w.%%-]+)(:?)([%d]*)/" )
        if _isHost( host ) then
            host = mw.ustring.lower( host )
            if colon == ":" then
                if port:find( "^[1-9]" ) then
                    return ( host .. ":" .. port )
                end
            elseif #port == 0 then
                return host
            end
        end
    end
    return false
end -- _getAuthority()



function _getHost( url )
    if type( url ) == "string" then
        local auth = _getAuthority( url )
        if auth then                         --  type( auth ) == "string"
            return mw.ustring.match( auth, "^([%w%.%%-]+):?[%d]*$" )
        end
    end
    return false
end -- _getHost()



function _getScheme( url )
    if type( url ) == "string" then
        local prot, colon, slashes = url:match( "^%s*([a-zA-Z]*)(:?)(//)" )
        if slashes == "//" then
           if colon == ":" then
               if #prot > 2 then
                   return prot:lower() .. "://"
               end
           elseif #prot == 0 then
               return "//"
           end
        end
    end
    return false
end -- _getScheme()



function _isAuthority( s )
    if type( s ) == "string" then
        local host, colon, port = mw.ustring.match( s, "^%s*([%w%.%%-]+)(:?)(%d*)%s*$" )
        if colon == ":" then
            if not _isPort( ":" .. port ) then
                return false
            end
        elseif type( port ) == "string" then
            host = host .. port
        end
        return  _isHost( host )
    end
    return false
end -- _isAuthority()



function _isDomain( s )
    if type( s ) == "string" then
        s = mw.ustring.match( s, "^%s*([%w%.%%-]+%w)%.[a-zA-Z][a-zA-Z]+%s*$" )
        if type( s ) == "string" then
            if mw.ustring.find( s, "^%w" ) then
                if mw.ustring.find( s, "..", 1, true ) then
                    return false
                else
                    return true
                end
            end
        end
    end
    return false
end -- _isDomain()



function _isHost( s )
    return _isDomain( s ) or _isIP( s )
end -- _isHost()



function _isIP( s )
    return _isIPv4( s ) and "4" or _isIPv6( s ) and "6"
end -- _isIP()



function _isIPv4( s )
    local function legal( n )
              return ( tonumber( n ) or 256 ) < 256
                     and not n:find( "^0%d" )
          end -- in lua 0 is true!
    if type( s ) ~= "string" then
        return false
    end
    local p1, p2, p3, p4 = s:match( "^%s*(%d+)%.(%d+)%.(%d+)%.(%d+)%s*$" )
    return legal( p1 ) and legal( p2 ) and legal( p3 ) and legal( p4 )
end -- _isIPv4()



function _isIPv6( s )
    local dcolon, groups
    if type( s ) ~= "string"
        or s:len() == 0
        or s:find( "[^:%x]" ) -- only colon and hex digits are legal chars
        or s:find( "^:[^:]" ) -- can begin or end with :: but not with single :
        or s:find( "[^:]:$" )
        or s:find( ":::" )
    then
        return false
    end
    s = mw.text.trim( s )
    s, dcolon = s:gsub( "::", ":" )
    if dcolon > 1 then
        return false
    end -- at most one ::
    s = s:gsub( "^:?", ":" ) -- prepend : if needed, upper
    s, groups = s:gsub( ":%x%x?%x?%x?", "" ) -- remove valid groups, and count them
    return ( ( dcolon == 1 and groups < 8 ) or
             ( dcolon == 0 and groups == 8 ) )
        and ( s:len() == 0 or ( dcolon == 1 and s == ":" ) ) -- might be one dangling : if original ended with ::
end -- _isIPv6()



function _isMailAddress( s )
    if type( s ) == "string" then
         s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*" )
         return _isDomain( s )
    end
    return false
end -- _isMailAddress()



function _isPort( port )
    if type( port ) == "string" then
        if port:find( "^%s*:[1-9][0-9]*%s*$" ) then
            return true
        end
    end
    return false
end -- _isPort()



function _isUnescapedURL( url, trailing )
    if type( trailing ) ~= "string" then
        if _isWebURL( url ) then
            if url:match( "[%[|%]]" ) then
                return true
            end
        end
    end
    return false
end -- _isUnescapedURL()



function _isWebURL( url )
    if _getScheme( url ) and _getAuthority( url ) then
        if not url:match( "%S%s+%S" ) then
            return true
        end
    end
    return false
end -- _isWebURL()



-- Provide template access

local p = {}

function p.getAuthority( frame )
    return _getAuthority( frame.args[ 1 ] ) or ""
end
function p.getHost( frame )
    return _getHost( frame.args[ 1 ] ) or ""
end
function p.getScheme( frame )
    return _getScheme( frame.args[ 1 ] ) or ""
end
function p.isAuthority( frame )
    return _isAuthority( frame.args[ 1 ] ) and "1" or ""
end
function p.isDomain( frame )
    return _isDomain( frame.args[ 1 ] ) and "1" or ""
end
function p.isHost( frame )
    return _isHost( frame.args[ 1 ] ) and "1" or ""
end
function p.isIP( frame )
    return _isIP( frame.args[ 1 ] ) or ""
end
function p.isIPv4( frame )
    return _isIPv4( frame.args[ 1 ] ) and "1" or ""
end
function p.isIPv6( frame )
    return _isIPv6( frame.args[ 1 ] ) and "1" or ""
end
function p.isMailAddress( frame )
    return _isMailAddress( frame.args[ 1 ] ) and "1" or ""
end
function p.isPort( frame )
    return _isPort( frame.args[ 1 ] ) and "1" or ""
end
function p.isUnescapedURL( frame )
    return _isUnescapedURL( frame.args[ 1 ], frame.args[ 2 ] ) and "1" or ""
end
function p.isWebURL( frame )
    return _isWebURL( frame.args[ 1 ] ) and "1" or ""
end

return p