Modul:URLutil: Unterschied zwischen den Versionen

Aus skandinavien-wiki.net
w>PerfektesChaos
(Anpassung)
K (51 Versionen von wikivoyage:Modul:URLutil importiert)
 
(41 dazwischenliegende Versionen von 12 Benutzern werden nicht angezeigt)
Zeile 1: Zeile 1:
local URLutil = { suite  = "URLutil",
                  serial = "2022-04-05",
                  item  = 10859193 }
--[=[
--[=[
URLutil: Utilities for URL etc.
Utilities for URL etc. on www.
* decode()
* encode()
* getAuthority()
* getFragment()
* getHost()
* getLocation()
* getNormalized()
* getPath()
* getPort()
* getQuery()
* getQueryTable()
* getRelativePath()
* getScheme()
* getScheme()
* getDomain()
* getSortkey()
* getTLD()
* getTop2domain()
* getTop3domain()
* isAuthority()
* isDomain()
* isDomainExample()
* isDomainInt()
* isHost()
* isHostPathResource()
* isIP()
* isIP()
* isIPlocal()
* isIPv4()
* isIPv4()
* isIPv6()
* isIPv6()
Only [[dotted decimal]] notation for IPv4 supported.
* isMailAddress()
* isMailLink()
* isProtocolDialog()
* isProtocolWiki()
* isResourceURL()
* isSuspiciousURL()
* isUnescapedURL()
* isWebURL()
* wikiEscapeURL()
* failsafe()
Only [[dotted decimal]] notation for IPv4 expected.
Does not support dotted hexadecimal, dotted octal, or single-number formats.
Does not support dotted hexadecimal, dotted octal, or single-number formats.
IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway.
]=]
local Failsafe  = URLutil
local decodeComponentProtect = { F = "\"#%<>[\]^`{|}",
                                P = "\"#%<>[\]^`{|}/?",
                                Q = "\"#%<>[\]^`{|}&=+;,",
                                X = "\"#%<>[\]^`{|}&=+;,/?" }
local decodeComponentEscape = function ( averse, adapt )
    return  adapt == 20  or  adapt == 127  or
            decodeComponentProtect[ averse ]:find( string.char( adapt ),
                                                  1,
                                                  true )
end -- decodeComponentEscape()
local decodeComponentML = function ( ask )
    local i = 1
    local j, n, s
    while ( i ) do
        i = ask:find( "&#[xX]%x%x+;", i )
        if i then
            j = ask:find( ";",  i + 3,  true )
            s = ask:sub( i + 2,  j - 1 ):upper()
            n = s:byte( 1, 1 )
            if n == 88 then
                n = tonumber( s:sub( 2 ),  16 )
            elseif s:match( "^%d+$" ) then
                n = tonumber( s )
            else
                n = false
            end
            if n then
                if n >= 128 then
                    s = string.format( "&#%d;", n )
                elseif decodeComponentEscape( "X", n ) then
                    s = string.format( "%%%02X", n )
                else
                    s = string.format( "%c", n )
                end
                j = j + 1
                if i == 1 then
                    ask = s .. ask:sub( j )
                else
                    ask = string.format( "%s%s%s",
                                        ask:sub( 1,  i - 1 ),
                                        s,
                                        ask:sub( j ) )
                end
            end
            i = i + 1
        end
    end -- while i
    return ask
end -- decodeComponentML()
local decodeComponentPercent = function ( ask, averse )
    local i = 1
    local j, k, m, n
    while ( i ) do
        i = ask:find( "%%[2-7]%x", i )
        if i then
            j = i + 1
            k = j + 1
            n = ask:byte( k, k )
            k = k + 1
            m = ( n > 96 )
            if m then
                n = n - 32
                m = n
            end
            if n > 57 then
                n = n - 55
            else
                n = n - 48
            end
            n = ( ask:byte( j, j ) - 48 )  *  16  +  n
            if n == 39  and
              ask:sub( i + 3,  i + 5 ) == "%27" then
              j = i + 6
              while ( ask:sub( j,  j + 2 )  ==  "%27" ) do
                  j = j + 3
              end -- while "%27"
            elseif decodeComponentEscape( averse, n ) then
                if m then
                    ask = string.format( "%s%c%s",
                                        ask:sub( 1, j ),
                                        m,
                                        ask:sub( k ) )
                end
            elseif i == 1 then
                ask = string.format( "%c%s",  n,  ask:sub( k ) )
            else
                ask = string.format( "%s%c%s",
                                    ask:sub( 1,  i - 1 ),
                                    n,
                                    ask:sub( k ) )
            end
            i = j
        end
    end -- while i
    return ask
end -- decodeComponentPercent()
local getTopDomain = function ( url, mode )
    local r = URLutil.getHost( url )
    if r then
        local pattern = "[%w%%%-]+%.%a[%w%-]*%a)$"
        if mode == 3 then
            pattern = "[%w%%%-]+%." .. pattern
        end
        r = mw.ustring.match( "." .. r,  "%.(" .. pattern )
        if not r then
            r = false
        end
    else
        r = false
    end
    return r
end -- getTopDomain()
local getHash = function ( url )
    local r = url:find( "#", 1, true )
    if r then
        local i = url:find( "&#", 1, true )
        if i then
            local s
            while ( i ) do
                s = url:sub( i + 2 )
                if s:match( "^%d+;" ) or s:match( "^x%x+;" ) then
                    r = url:find( "#",  i + 4,  true )
                    if r then
                        i = url:find( "&#",  i + 4,  true )
                    else
                        i = false
                    end
                else
                    r = i + 1
                    i = false
                end
            end -- while i
        end
    end
    return r
end -- getHash()
URLutil.decode = function ( url, enctype )
    local r, s
    if type( enctype ) == "string" then
        s = mw.text.trim( enctype )
        if s == "" then
            s = false
        else
            s = s:upper()
        end
    end
    r = mw.text.encode( mw.uri.decode( url, s ) )
    if r:find( "[%[|%]]" ) then
        local k
        r, k = r:gsub( "%[", "&#91;" )
                :gsub( "|", "&#124;" )
                :gsub( "%]", "&#93;" )
    end
    return r
end -- URLutil.decode()
URLutil.encode = function ( url, enctype )
    local k, r, s
    if type( enctype ) == "string" then
        s = mw.text.trim( enctype )
        if s == "" then
            s = false
        else
            s = s:upper()
        end
    end
    r = mw.uri.encode( url, s )
    k = r:byte( 1, 1 )
    if -- k == 35  or      -- #
          k == 42  or      -- *
          k == 58  or      -- :
          k == 59 then    -- ;
        r = string.format( "%%%X%s", k, r:sub( 2 ) )
    end
    if r:find( "[%[|%]]" ) then
        r, k = r:gsub( "%[", "%5B" )
                :gsub( "|",  "%7C" )
                :gsub( "%]", "%5D" )
    end
    return r
end -- URLutil.encode()
URLutil.getAuthority = function ( url )
    local r
    if type( url ) == "string" then
        local colon, host, port
        local pattern = "^%s*%w*:?//([%w%.%%_-]+)(:?)([%d]*)/"
        local s = mw.text.decode( url )
        local i = s:find( "#", 6, true )
        if i then
            s = s:sub( 1,  i - 1 )  ..  "/"
        else
            s = s .. "/"
        end
        host, colon, port = mw.ustring.match( s, pattern )
        if URLutil.isHost( host ) then
            host = mw.ustring.lower( host )
            if colon == ":" then
                if port:find( "^[1-9]" ) then
                    r = ( host .. ":" .. port )
                end
            elseif #port == 0 then
                r = host
            end
        end
    else
        r = false
    end
    return r
end -- URLutil.getAuthority()
URLutil.getFragment = function ( url, decode )
    local r
    if type( url ) == "string" then
        local i = getHash( url )
        if i then
            r = mw.text.trim( url:sub( i ) ):sub( 2 )
            if type( decode ) == "string" then
                local encoding = mw.text.trim( decode )
                local launch
                if encoding == "%" then
                    launch = true
                elseif encoding == "WIKI" then
                    r = r:gsub( "%.(%x%x)", "%%%1" )
                        :gsub( "_", " " )
                    launch = true
                end
                if launch then
                    r = mw.uri.decode( r, "PATH" )
                end
            end
        else
            r = false
        end
    else
        r = nil
    end
    return r
end -- URLutil.getFragment()
URLutil.getHost = function ( url )
    local r = URLutil.getAuthority( url )
    if r then
        r = mw.ustring.match( r, "^([%w%.%%_%-]+):?[%d]*$" )
    end
    return r
end -- URLutil.getHost()
URLutil.getLocation = function ( url )
    local r
    if type( url ) == "string" then
        r = mw.text.trim( url )
        if r == "" then
            r = false
        else
            local i
            i = getHash( r )
            if i then
                if i == 1 then
                    r = false
                else
                    r = r:sub( 1,  i - 1 )
                end
            end
        end
    else
        r = nil
    end
    return r
end -- URLutil.getLocation()
URLutil.getNormalized = function ( url )
    local r
    if type( url ) == "string" then
        r = mw.text.trim( url )
        if r == "" then
            r = false
        else
            r = decodeComponentML( r )
        end
    else
        r = false
    end
    if r then
        local k = r:find( "//", 1, true )
        if k then
            local j = r:find( "/",  k + 2,  true )
            local sF, sP, sQ
            if r:find( "%%[2-7]%x" ) then
                local i = getHash( r )
                if i then
                    sF = r:sub( i + 1 )
                    r  = r:sub( 1,  i - 1 )
                    if sF == "" then
                        sF = false
                    else
                        sF = decodeComponentPercent( sF, "F" )
                    end
                end
                i = r:find( "?", 1, true )
                if i then
                    sQ = r:sub( i )
                    r  = r:sub( 1,  i - 1 )
                    sQ = decodeComponentPercent( sQ, "Q" )
                end
                if j then
                    if #r > j then
                        sP = r:sub( j + 1 )
                        sP = decodeComponentPercent( sP, "P" )
                    end
                    r = r:sub( 1,  j - 1 )
                end
            elseif j then
                local n = #r
                if r:byte( n, n ) == 35 then    -- '#'
                    n = n - 1
                    r = r:sub( 1, n )
                end
                if n > j then
                    sP = r:sub( j + 1 )
                end
                r = r:sub( 1,  j - 1 )
            end
            r = mw.ustring.lower( r ) .. "/"
            if sP then
                r = r .. sP
            end
            if sQ then
                r = r .. sQ
            end
            if sF then
                r = string.format( "%s#%s", r, sF )
            end
        end
        r = r:gsub( " ",  "%%20" )
            :gsub( "%[", "%%5B" )
            :gsub( "|",  "%%7C" )
            :gsub( "%]", "%%5D" )
            :gsub( "%<", "%%3C" )
            :gsub( "%>", "%%3E" )
    end
    return r
end -- URLutil.getNormalized()
URLutil.getPath = function ( url )
    local r = URLutil.getRelativePath( url )
    if r then
        local s = r:match( "^([^%?]*)%?" )
        if s then
            r = s
        end
        s = r:match( "^([^#]*)#" )
        if s then
            r = s
        end
    end
    return r
end -- URLutil.getPath()
URLutil.getPort = function ( url )
    local r = URLutil.getAuthority( url )
    if r then
        r = r:match( ":([1-9][0-9]*)$" )
        if r then
            r = tonumber( r )
        else
            r = false
        end
    end
    return r
end -- URLutil.getPort()
URLutil.getQuery = function ( url, key, separator )
    local r = URLutil.getLocation( url )
    if r then
        r = r:match( "^[^%?]*%?(.+)$" )
        if r then
            if type( key ) == "string" then
                local single = mw.text.trim( key )
                local sep = "&"
                local s, scan
                if type( separator ) == "string" then
                    s = mw.text.trim( separator )
                    if s:match( "^[&;,/]$" ) then
                        sep = s
                    end
                end
                s = string.format( "%s%s%s", sep, r, sep )
                scan = string.format( "%s%s=([^%s]*)%s",
                                      sep, key, sep, sep )
                r = s:match( scan )
            end
        end
        if not r then
            r = false
        end
    end
    return r
end -- URLutil.getQuery()
URLutil.getQueryTable = function ( url, separator )
    local r = URLutil.getQuery( url )
    if r then
        local sep = "&"
        local n, pairs, s, set
        if type( separator ) == "string" then
            s = mw.text.trim( separator )
            if s:match( "^[&;,/]$" ) then
                sep = s
            end
        end
        pairs = mw.text.split( r, sep, true )
        n = #pairs
        r = { }
        for i = 1, n do
            s = pairs[ i ]
            if s:find( "=", 2, true ) then
                s, set = s:match( "^([^=]+)=(.*)$" )
                if s then
                    r[ s ] = set
                end
            else
                r[ s ] = false
            end
        end -- for i
    end
    return r
end -- URLutil.getQueryTable()
URLutil.getRelativePath = function ( url )
    local r
    if type( url ) == "string" then
        local s = url:match( "^%s*[a-zA-Z]*://(.*)$" )
        if s then
            s = s:match( "[^/]+(/.*)$" )
        else
            local x
            x, s = url:match( "^%s*(/?)(/.*)$" )
            if x == "/" then
                s = s:match( "/[^/]+(/.*)$" )
            end
        end
        if s then
            r = mw.text.trim( s )
        elseif URLutil.isResourceURL( url ) then
            r = "/"
        else
            r = false
        end
    else
        r = nil
    end
    return r
end -- URLutil.getRelativePath()
URLutil.getScheme = function ( url )
    local r
    if type( url ) == "string" then
        local pattern = "^%s*([a-zA-Z]*)(:?)(//)"
        local prot, colon, slashes = url:match( pattern )
        r = false
        if slashes == "//" then
            if colon == ":" then
                if #prot > 2 then
                    r = prot:lower() .. "://"
                end
            elseif #prot == 0 then
                r = "//"
            end
        end
    else
        r = nil
    end
    return r
end -- URLutil.getScheme()
URLutil.getSortkey = function ( url )
    local r = url
    if type( url ) == "string" then
        local i = url:find( "//" )
        if i then
            local scheme
            if i == 0 then
                scheme = ""
            else
                scheme = url:match( "^%s*([a-zA-Z]*)://" )
            end
            if scheme then
                local s = url:sub( i + 2 )
                local comps, site, m, suffix
                scheme = scheme:lower()
                i      = s:find( "/" )
                if i  and  i > 1 then
                    suffix = s:sub( i + 1 )            -- mw.uri.encode()
                    s      = s:sub( 1,  i - 1 )
                    suffix = suffix:gsub( "#", " " )
                else
                    suffix = ""
                end
                site, m = s:match( "^(.+)(:%d+)$" )
                if not m then
                    site = s
                    m    = 0
                end
                comps = mw.text.split( site:lower(), ".", true )
                r = "///"
                for i = #comps, 2, -1 do
                    r =  string.format( "%s%s.", r, comps[ i ] )
                end -- for --i
                r = string.format( "%s%s %d %s: %s",
                                  r, comps[ 1 ], m, scheme, suffix )
            end
        end
    end
    return r
end -- URLutil.getSortkey()
URLutil.getTLD = function ( url )
    local r = URLutil.getHost( url )
    if r then
        r = mw.ustring.match( r, "%w+%.(%a[%w%-]*%a)$" )
        if not r then
            r = false
        end
    end
    return r
end -- URLutil.getTLD()
URLutil.getTop2domain = function ( url )
    return getTopDomain( url, 2 )
end -- URLutil.getTop2domain()
URLutil.getTop3domain = function ( url )
    return getTopDomain( url, 3 )
end -- URLutil.getTop3domain()
URLutil.isAuthority = function ( s )
    local r
    if type( s ) == "string" then
        local pattern = "^%s*([%w%.%%_-]+)(:?)(%d*)%s*$"
        local host, colon, port = mw.ustring.match( s, pattern )
        if colon == ":" then
            port = port:match( "^[1-9][0-9]*$" )
            if type( port ) ~= "string" then
                r = false
            end
        elseif port ~= "" then
            r = false
        end
        r = URLutil.isHost( host )
    else
        r = nil
    end
    return r
end -- URLutil.isAuthority()
URLutil.isDomain = function ( s )
    local r
    if type( s ) == "string" then
        local scan = "^%s*([%w%.%%_-]*%w)%.(%a[%w-]*%a)%s*$"
        local scope
        s, scope = mw.ustring.match( s, scan )
        if type( s ) == "string" then
            if mw.ustring.find( s, "^%w" ) then
                if mw.ustring.find( s, "..", 1, true ) then
                    r = false
                else
                    r = true
                end
            end
        end
    else
        r = nil
    end
    return r
end -- URLutil.isDomain()
URLutil.isDomainExample = function ( url )
    -- RFC 2606: example.com example.net example.org example.edu
    local r = getTopDomain( url, 2 )
    if r then
        local s = r:lower():match( "^example%.([a-z][a-z][a-z])$" )
        if s then
            r = ( s == "com" or
                  s == "edu" or
                  s == "net" or
                  s == "org" )
        else
            r = false
        end
    end
    return r
end -- URLutil.isDomainExample()
URLutil.isDomainInt = function ( url )
    -- Internationalized Domain Name (Punycode)
    local r = URLutil.getHost( url )
    if r then
        if r:match( "^[!-~]+$" ) then
            local s = "." .. r
            if s:find( ".xn--", 1, true ) then
                r = true
            else
                r = false
            end
        else
            r = true
        end
    end
    return r
end -- URLutil.isDomainInt()
URLutil.isHost = function ( s )
    return URLutil.isDomain( s ) or URLutil.isIP( s )
end -- URLutil.isHost()
URLutil.isHostPathResource = function ( s )
    local r = URLutil.isResourceURL( s )
    if not r  and s then
        r = URLutil.isResourceURL( "//" .. mw.text.trim( s ) )
    end
    return r
end -- URLutil.isHostPathResource()
URLutil.isIP = function ( s )
    return URLutil.isIPv4( s ) and 4 or URLutil.isIPv6( s ) and 6
end -- URLutil.isIP()
URLutil.isIPlocal = function ( s )
    -- IPv4 according to RFC 1918, RFC 1122; even any 0.0.0.0 (RFC 5735)
    local r = false
    local num = s:match( "^ *([01][0-9]*)%." )
    if num then
        num = tonumber( num )
        if num == 0 then
            r = s:match( "^ *0+%.[0-9]+%.[0-9]+%.[0-9]+ *$" )
        elseif num == 10  or  num == 127 then
            -- loopback; private/local host: 127.0.0.1
            r = URLutil.isIPv4( s )
        elseif num == 169 then
            -- 169.254.*.*
        elseif num == 172 then
            -- 172.(16...31).*.*
            num = s:match( "^ *0*172%.([0-9]+)%." )
            if num then
                num = tonumber( num )
                if num >= 16  and  num <= 31 then
                    r = URLutil.isIPv4( s )
                end
            end
        elseif beg == 192 then
            -- 192.168.*.*
            num = s:match( "^ *0*192%.([0-9]+)%." )
            if num then
                num = tonumber( num )
                if num == 168 then
                    r = URLutil.isIPv4( s )
                end
            end
        end
    end
    if r then
        r = true
    end
    return r
end -- URLutil.isIPlocal()
URLutil.isIPv4 = function ( s )
    local function legal( n )
              return ( tonumber( n ) < 256 )
          end
    local r = false
    if type( s ) == "string" then
        local p1, p2, p3, p4 = s:match( "^%s*([1-9][0-9]?[0-9]?)%.([12]?[0-9]?[0-9])%.([12]?[0-9]?[0-9])%.([12]?[0-9]?[0-9])%s*$" )
        if p1 and p2 and p3 and p4 then
            r = legal( p1 ) and legal( p2 ) and legal( p3 ) and legal( p4 )
        end
    end
    return r
end -- URLutil.isIPv4()


Functions are not "local", so other modules can require this module and call them directly.
We return an object with small stub functions to call the real ones so that the functions
can be called from templates also.
----
Based upon  w:en:Special:Permalink/542839577?title=Module:IPAddress  2013-03-01
Unit tests at :en:Module:IPAddress/tests
]=]


function _isIPv6( s )
URLutil.isIPv6 = function ( s )
     local dcolon, groups
     local dcolon, groups
     if type( s ) ~= "string"
     if type( s ) ~= "string"
Zeile 27: Zeile 810:
     then
     then
         return false
         return false
     end  
     end
    s = mw.text.trim( s )
     s, dcolon = s:gsub( "::", ":" )
     s, dcolon = s:gsub( "::", ":" )
     if dcolon > 1 then return false end -- at most one ::
     if dcolon > 1 then
        return false
    end -- at most one ::
     s = s:gsub( "^:?", ":" ) -- prepend : if needed, upper
     s = s:gsub( "^:?", ":" ) -- prepend : if needed, upper
     s, groups = s:gsub( ":%x%x?%x?%x?", "" ) -- remove valid groups, and count them
     s, groups = s:gsub( ":%x%x?%x?%x?", "" ) -- remove valid groups, and count them
     return ( ( dcolon == 1 and groups < 8 ) or ( dcolon == 0 and groups == 8 ) )
     return ( ( dcolon == 1 and groups < 8 ) or
            ( dcolon == 0 and groups == 8 ) )
         and ( s:len() == 0 or ( dcolon == 1 and s == ":" ) ) -- might be one dangling : if original ended with ::
         and ( s:len() == 0 or ( dcolon == 1 and s == ":" ) ) -- might be one dangling : if original ended with ::
end
end -- URLutil.isIPv6()
 
 
 
URLutil.isMailAddress = function ( s )
    if type( s ) == "string" then
        s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*$" )
        return URLutil.isDomain( s )
    end
    return false
end -- URLutil.isMailAddress()
 
 
 
URLutil.isMailLink = function ( s )
    if type( s ) == "string" then
        local addr
        s, addr = mw.ustring.match( s, "^%s*([Mm][Aa][Ii][Ll][Tt][Oo]):(%S[%w%.%%_-]*@[%w%.%%-]+)%s*$" )
        if type( s ) == "string" then
            if s:lower() == "mailto" then
                return URLutil.isMailAddress( addr )
            end
        end
    end
    return false
end -- URLutil.isMailLink()
 
 
 
local function isProtocolAccepted( prot, supplied )
    if type( prot ) == "string" then
        local scheme, colon, slashes = mw.ustring.match( prot, "^%s*([a-zA-Z]*)(:?)(/?/?)%s*$" )
        if slashes ~= "/" then
            if scheme == "" then
                if colon ~= ":" and slashes == "//" then
                    return true
                end
            elseif colon == ":" or slashes == "" then
                local s = supplied:match( " " .. scheme:lower() .. " " )
                if type( s ) == "string" then
                    return true
                end
            end
        end
    end
    return false
end -- isProtocolAccepted()
 
 
 
URLutil.isProtocolDialog = function ( prot )
    return isProtocolAccepted( prot, " mailto irc ircs ssh telnet " )
end -- URLutil.isProtocolDialog()
 
 
 
URLutil.isProtocolWiki = function ( prot )
    return isProtocolAccepted( prot,
                              " ftp ftps git http https nntp sftp svn worldwind " )
end -- URLutil.isProtocolWiki()
 
 
 
URLutil.isResourceURL = function ( url )
    local scheme = URLutil.getScheme( url )
    if scheme then
        local s = " // http:// https:// ftp:// sftp:// "
        s = s:find( string.format( " %s ", scheme ) )
        if s then
            if URLutil.getAuthority( url ) then
                if not url:match( "%S%s+%S" ) then
                    local s1, s2 = url:match( "^([^#]+)(#.*)$" )
                    if s2 then
                        if url:match( "^%s*[a-zA-Z]*:?//(.+)/" ) then
                            return true
                        end
                    else
                        return true
                    end
                end
            end
        end
    end
    return false
end -- URLutil.isResourceURL()
 
 
 
URLutil.isSuspiciousURL = function ( url )
    if URLutil.isResourceURL( url ) then
        local s = URLutil.getAuthority( url )
        local pat = "[%[|%]" ..
                    mw.ustring.char( 34,
                                    8201, 45, 8207,
                                    8234, 45, 8239,
                                    8288 )
                    .. "]"
        if s:find( "@" )
          or url:find( "''" )
          or url:find( pat )
          or url:find( "[%.,]$" ) then
            return true
        end
        -- TODO  zero width character ??
        return false
    end
    return true
end -- URLutil.isSuspiciousURL()
 
 
 
URLutil.isUnescapedURL = function ( url, trailing )
    if type( trailing ) ~= "string" then
        if URLutil.isWebURL( url ) then
            if url:match( "[%[|%]]" ) then
                return true
            end
        end
    end
    return false
end -- URLutil.isUnescapedURL()
 
 
 
URLutil.isWebURL = function ( url )
    if URLutil.getScheme( url ) and URLutil.getAuthority( url ) then
        if not url:find( "%S%s+%S" )  and
          not url:find( "''", 1, true ) then
            return true
        end
    end
    return false
end -- URLutil.isWebURL()
 
 
 
URLutil.wikiEscapeURL = function ( url )
    if url:find( "[%[|%]]" ) then
        local n
        url, n = url:gsub( "%[", "&#91;" )
                    :gsub( "|", "&#124;" )
                    :gsub( "%]", "&#93;" )
    end
    return url
end -- URLutil.wikiEscapeURL()
 
 
 
Failsafe.failsafe = function ( atleast )
    -- Retrieve versioning and check for compliance
    -- Precondition:
    --    atleast  -- string, with required version
    --                        or wikidata|item|~|@ or false
    -- Postcondition:
    --    Returns  string  -- with queried version/item, also if problem
    --              false  -- if appropriate
    -- 2020-08-17
    local since = atleast
    local last    = ( since == "~" )
    local linked  = ( since == "@" )
    local link    = ( since == "item" )
    local r
    if last  or  link  or  linked  or  since == "wikidata" then
        local item = Failsafe.item
        since = false
        if type( item ) == "number"  and  item > 0 then
            local suited = string.format( "Q%d", item )
            if link then
                r = suited
            else
                local entity = mw.wikibase.getEntity( suited )
                if type( entity ) == "table" then
                    local seek = Failsafe.serialProperty or "P348"
                    local vsn  = entity:formatPropertyValues( seek )
                    if type( vsn ) == "table"  and
                      type( vsn.value ) == "string"  and
                      vsn.value ~= "" then
                        if last  and  vsn.value == Failsafe.serial then
                            r = false
                        elseif linked then
                            if mw.title.getCurrentTitle().prefixedText
                              ==  mw.wikibase.getSitelink( suited ) then
                                r = false
                            else
                                r = suited
                            end
                        else
                            r = vsn.value
                        end
                    end
                end
            end
        end
    end
    if type( r ) == "nil" then
        if not since  or  since <= Failsafe.serial then
            r = Failsafe.serial
        else
            r = false
        end
    end
    return r
end -- Failsafe.failsafe()
 
 
 
local function Template( frame, action, amount )
    -- Run actual code from template transclusion
    -- Precondition:
    --    frame  -- object
    --    action  -- string, with function name
    --    amount  -- number, of args if > 1
    -- Postcondition:
    --    Return string or not
    local n = amount or 1
    local v = { }
    local r, s
    for i = 1, n do
        s = frame.args[ i ]
        if s then
            s = mw.text.trim( s )
            if s ~= "" then
                v[ i ] = s
            end
        end
    end -- for i
    if v[ 1 ] then
        r = URLutil[ action ](  v[ 1 ], v[ 2 ], v[ 3 ] )
    end
    return r
end -- Template()


function _isIPv4( s )
    local function legal( n ) return ( tonumber( n ) or 256 ) < 256  and not n:match("^0%d") end-- in lua 0 is true!
   
    if type( s ) ~= "string" then return false end
    local p1, p2, p3, p4 = s:match( "^(%d+)%.(%d+)%.(%d+)%.(%d+)$" )
    return legal( p1 ) and legal( p2 ) and legal( p3 ) and legal( p4 )
end


function _isIP( s )
    return _isIPv4( s ) and "4" or _isIPv6( s ) and "6"
end


local p = {}
local p = {}


function p.isIPv6(frame) return _isIPv6( frame.args[ 1 ] ) and "1" or "0" end
function p.decode( frame )
function p.isIPv4(frame) return _isIPv4( frame.args[ 1 ] ) and "1" or "0" end
    return Template( frame, "decode", 2 ) or ""
function p.isIP(frame) return _isIP( frame.args[ 1 ] ) or "" end
end
function p.encode( frame )
    return Template( frame, "encode", 2 ) or ""
end
function p.getAuthority( frame )
    return Template( frame, "getAuthority" ) or ""
end
function p.getFragment( frame )
    local r = Template( frame, "getFragment", 2 )
    if r then
        r = "#" .. r
    else
        r = ""
    end
    return r
end
function p.getHost( frame )
    return Template( frame, "getHost" ) or ""
end
function p.getLocation( frame )
    return Template( frame, "getLocation" ) or ""
end
function p.getNormalized( frame )
    return Template( frame, "getNormalized" ) or ""
end
function p.getPath( frame )
    return Template( frame, "getPath" ) or ""
end
function p.getPort( frame )
    return Template( frame, "getPort" ) or ""
end
function p.getQuery( frame )
    local r = Template( frame, "getQuery", 3 )
    if r then
        local key = frame.args[ 2 ]
        if key then
            key = mw.text.trim( key )
            if key == "" then
                key = nil
            end
        end
        if not key then
            r = "?" .. r
        end
    else
        r = ""
    end
    return r
end
function p.getRelativePath( frame )
    return Template( frame, "getRelativePath" ) or ""
end
function p.getScheme( frame )
    return Template( frame, "getScheme" ) or ""
end
function p.getSortkey( frame )
    return Template( frame, "getSortkey" ) or ""
end
function p.getTLD( frame )
    return Template( frame, "getTLD" ) or ""
end
function p.getTop2domain( frame )
    return Template( frame, "getTop2domain" ) or ""
end
function p.getTop3domain( frame )
    return Template( frame, "getTop3domain" ) or ""
end
function p.isAuthority( frame )
    return Template( frame, "isAuthority" ) and "1" or ""
end
function p.isDomain( frame )
    return Template( frame, "isDomain" ) and "1" or ""
end
function p.isDomainExample( frame )
    return Template( frame, "isDomainExample" ) and "1" or ""
end
function p.isDomainInt( frame )
    return Template( frame, "isDomainInt" ) and "1" or ""
end
function p.isHost( frame )
    return Template( frame, "isHost" ) and "1" or ""
end
function p.isHostPathResource( frame )
    return Template( frame, "isHostPathResource" ) and "1" or ""
end
function p.isIP( frame )
    return Template( frame, "isIP" ) or ""
end
function p.isIPlocal( frame )
    return Template( frame, "isIPlocal" ) and "1" or ""
end
function p.isIPv4( frame )
    return Template( frame, "isIPv4" ) and "1" or ""
end
function p.isIPv6( frame )
    return Template( frame, "isIPv6" ) and "1" or ""
end
function p.isMailAddress( frame )
    return Template( frame, "isMailAddress" ) and "1" or ""
end
function p.isMailLink( frame )
    return Template( frame, "isMailLink" ) and "1" or ""
end
function p.isProtocolDialog( frame )
    return Template( frame, "isProtocolDialog" ) and "1" or ""
end
function p.isProtocolWiki( frame )
    return Template( frame, "isProtocolWiki" ) and "1" or ""
end
function p.isResourceURL( frame )
    return Template( frame, "isResourceURL" ) and "1" or ""
end
function p.isSuspiciousURL( frame )
    return Template( frame, "isSuspiciousURL" ) and "1" or ""
end
function p.isUnescapedURL( frame )
    return Template( frame, "isUnescapedURL", 2 ) and "1" or ""
end
function p.isWebURL( frame )
    return Template( frame, "isWebURL" ) and "1" or ""
end
function p.wikiEscapeURL( frame )
    return Template( frame, "wikiEscapeURL" )
end
p.failsafe = function ( frame )
    local s = type( frame )
    local since
    if s == "table" then
        since = frame.args[ 1 ]
    elseif s == "string" then
        since = frame
    end
    if since then
        since = mw.text.trim( since )
        if since == "" then
            since = false
        end
    end
    return Failsafe.failsafe( since ) or ""
end
function p.URLutil()
    return URLutil
end


return p
return p

Aktuelle Version vom 11. Februar 2023, 21:01 Uhr

Die Dokumentation für dieses Modul kann unter Modul:URLutil/doc erstellt werden

local URLutil = { suite  = "URLutil",
                  serial = "2022-04-05",
                  item   = 10859193 }
--[=[
Utilities for URL etc. on www.
* decode()
* encode()
* getAuthority()
* getFragment()
* getHost()
* getLocation()
* getNormalized()
* getPath()
* getPort()
* getQuery()
* getQueryTable()
* getRelativePath()
* getScheme()
* getSortkey()
* getTLD()
* getTop2domain()
* getTop3domain()
* isAuthority()
* isDomain()
* isDomainExample()
* isDomainInt()
* isHost()
* isHostPathResource()
* isIP()
* isIPlocal()
* isIPv4()
* isIPv6()
* isMailAddress()
* isMailLink()
* isProtocolDialog()
* isProtocolWiki()
* isResourceURL()
* isSuspiciousURL()
* isUnescapedURL()
* isWebURL()
* wikiEscapeURL()
* failsafe()
Only [[dotted decimal]] notation for IPv4 expected.
Does not support dotted hexadecimal, dotted octal, or single-number formats.
IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway.
]=]
local Failsafe  = URLutil



local decodeComponentProtect = { F = "\"#%<>[\]^`{|}",
                                 P = "\"#%<>[\]^`{|}/?",
                                 Q = "\"#%<>[\]^`{|}&=+;,",
                                 X = "\"#%<>[\]^`{|}&=+;,/?" }



local decodeComponentEscape = function ( averse, adapt )
    return  adapt == 20  or  adapt == 127  or
            decodeComponentProtect[ averse ]:find( string.char( adapt ),
                                                   1,
                                                   true )
end -- decodeComponentEscape()



local decodeComponentML = function ( ask )
    local i = 1
    local j, n, s
    while ( i ) do
        i = ask:find( "&#[xX]%x%x+;", i )
        if i then
            j = ask:find( ";",  i + 3,  true )
            s = ask:sub( i + 2,  j - 1 ):upper()
            n = s:byte( 1, 1 )
            if n == 88 then
                n = tonumber( s:sub( 2 ),  16 )
            elseif s:match( "^%d+$" ) then
                n = tonumber( s )
            else
                n = false
            end
            if n then
                if n >= 128 then
                    s = string.format( "&#%d;", n )
                elseif decodeComponentEscape( "X", n ) then
                    s = string.format( "%%%02X", n )
                else
                    s = string.format( "%c", n )
                end
                j = j + 1
                if i == 1 then
                    ask = s .. ask:sub( j )
                else
                    ask = string.format( "%s%s%s",
                                         ask:sub( 1,  i - 1 ),
                                         s,
                                         ask:sub( j ) )
                end
            end
            i = i + 1
        end
    end -- while i
    return ask
end -- decodeComponentML()



local decodeComponentPercent = function ( ask, averse )
    local i = 1
    local j, k, m, n
    while ( i ) do
        i = ask:find( "%%[2-7]%x", i )
        if i then
            j = i + 1
            k = j + 1
            n = ask:byte( k, k )
            k = k + 1
            m = ( n > 96 )
            if m then
                n = n - 32
                m = n
            end
            if n > 57 then
                n = n - 55
            else
                n = n - 48
            end
            n = ( ask:byte( j, j ) - 48 )  *  16   +   n
            if n == 39  and
               ask:sub( i + 3,  i + 5 ) == "%27" then
               j = i + 6
               while ( ask:sub( j,  j + 2 )  ==  "%27" ) do
                  j = j + 3
               end -- while "%27"
            elseif decodeComponentEscape( averse, n ) then
                if m then
                    ask = string.format( "%s%c%s",
                                         ask:sub( 1, j ),
                                         m,
                                         ask:sub( k ) )
                end
            elseif i == 1 then
                ask = string.format( "%c%s",  n,  ask:sub( k ) )
            else
                ask = string.format( "%s%c%s",
                                     ask:sub( 1,  i - 1 ),
                                     n,
                                     ask:sub( k ) )
            end
            i = j
        end
    end -- while i
    return ask
end -- decodeComponentPercent()



local getTopDomain = function ( url, mode )
    local r = URLutil.getHost( url )
    if r then
        local pattern = "[%w%%%-]+%.%a[%w%-]*%a)$"
        if mode == 3 then
            pattern = "[%w%%%-]+%." .. pattern
        end
        r = mw.ustring.match( "." .. r,  "%.(" .. pattern )
        if not r then
            r = false
        end
    else
        r = false
    end
    return r
end -- getTopDomain()



local getHash = function ( url )
    local r = url:find( "#", 1, true )
    if r then
        local i = url:find( "&#", 1, true )
        if i then
            local s
            while ( i ) do
                s = url:sub( i + 2 )
                if s:match( "^%d+;" ) or s:match( "^x%x+;" ) then
                    r = url:find( "#",  i + 4,  true )
                    if r then
                        i = url:find( "&#",  i + 4,  true )
                    else
                        i = false
                    end
                else
                    r = i + 1
                    i = false
                end
            end -- while i
        end
    end
    return r
end -- getHash()



URLutil.decode = function ( url, enctype )
    local r, s
    if type( enctype ) == "string" then
        s = mw.text.trim( enctype )
        if s == "" then
            s = false
        else
            s = s:upper()
        end
    end
    r = mw.text.encode( mw.uri.decode( url, s ) )
    if r:find( "[%[|%]]" ) then
        local k
        r, k = r:gsub( "%[", "&#91;" )
                :gsub( "|", "&#124;" )
                :gsub( "%]", "&#93;" )
    end
    return r
end -- URLutil.decode()



URLutil.encode = function ( url, enctype )
    local k, r, s
    if type( enctype ) == "string" then
        s = mw.text.trim( enctype )
        if s == "" then
            s = false
        else
            s = s:upper()
        end
    end
    r = mw.uri.encode( url, s )
    k = r:byte( 1, 1 )
    if -- k == 35  or      -- #
          k == 42  or      -- *
          k == 58  or      -- :
          k == 59 then     -- ;
        r = string.format( "%%%X%s", k, r:sub( 2 ) )
    end
    if r:find( "[%[|%]]" ) then
        r, k = r:gsub( "%[", "%5B" )
                :gsub( "|",  "%7C" )
                :gsub( "%]", "%5D" )
    end
    return r
end -- URLutil.encode()



URLutil.getAuthority = function ( url )
    local r
    if type( url ) == "string" then
        local colon, host, port
        local pattern = "^%s*%w*:?//([%w%.%%_-]+)(:?)([%d]*)/"
        local s = mw.text.decode( url )
        local i = s:find( "#", 6, true )
        if i then
            s = s:sub( 1,  i - 1 )  ..  "/"
        else
            s = s .. "/"
        end
        host, colon, port = mw.ustring.match( s, pattern )
        if URLutil.isHost( host ) then
            host = mw.ustring.lower( host )
            if colon == ":" then
                if port:find( "^[1-9]" ) then
                    r = ( host .. ":" .. port )
                end
            elseif #port == 0 then
                r = host
            end
        end
    else
        r = false
    end
    return r
end -- URLutil.getAuthority()



URLutil.getFragment = function ( url, decode )
    local r
    if type( url ) == "string" then
        local i = getHash( url )
        if i then
            r = mw.text.trim( url:sub( i ) ):sub( 2 )
            if type( decode ) == "string" then
                local encoding = mw.text.trim( decode )
                local launch
                if encoding == "%" then
                    launch = true
                elseif encoding == "WIKI" then
                    r = r:gsub( "%.(%x%x)", "%%%1" )
                         :gsub( "_", " " )
                    launch = true
                end
                if launch then
                    r = mw.uri.decode( r, "PATH" )
                end
            end
        else
            r = false
        end
    else
        r = nil
    end
    return r
end -- URLutil.getFragment()



URLutil.getHost = function ( url )
    local r = URLutil.getAuthority( url )
    if r then
        r = mw.ustring.match( r, "^([%w%.%%_%-]+):?[%d]*$" )
    end
    return r
end -- URLutil.getHost()



URLutil.getLocation = function ( url )
    local r
    if type( url ) == "string" then
        r = mw.text.trim( url )
        if r == "" then
            r = false
        else
            local i
            i = getHash( r )
            if i then
                if i == 1 then
                    r = false
                else
                    r = r:sub( 1,  i - 1 )
                end
            end
        end
    else
        r = nil
    end
    return r
end -- URLutil.getLocation()



URLutil.getNormalized = function ( url )
    local r
    if type( url ) == "string" then
        r = mw.text.trim( url )
        if r == "" then
            r = false
        else
            r = decodeComponentML( r )
        end
    else
        r = false
    end
    if r then
        local k = r:find( "//", 1, true )
        if k then
            local j = r:find( "/",  k + 2,  true )
            local sF, sP, sQ
            if r:find( "%%[2-7]%x" ) then
                local i = getHash( r )
                if i then
                    sF = r:sub( i + 1 )
                    r  = r:sub( 1,  i - 1 )
                    if sF == "" then
                        sF = false
                    else
                        sF = decodeComponentPercent( sF, "F" )
                    end
                end
                i = r:find( "?", 1, true )
                if i then
                    sQ = r:sub( i )
                    r  = r:sub( 1,  i - 1 )
                    sQ = decodeComponentPercent( sQ, "Q" )
                end
                if j then
                    if #r > j then
                        sP = r:sub( j + 1 )
                        sP = decodeComponentPercent( sP, "P" )
                    end
                    r = r:sub( 1,  j - 1 )
                end
            elseif j then
                local n = #r
                if r:byte( n, n ) == 35 then    -- '#'
                    n = n - 1
                    r = r:sub( 1, n )
                end
                if n > j then
                    sP = r:sub( j + 1 )
                end
                r = r:sub( 1,  j - 1 )
            end
            r = mw.ustring.lower( r ) .. "/"
            if sP then
                r = r .. sP
            end
            if sQ then
                r = r .. sQ
            end
            if sF then
                r = string.format( "%s#%s", r, sF )
            end
        end
        r = r:gsub( " ",  "%%20" )
             :gsub( "%[", "%%5B" )
             :gsub( "|",  "%%7C" )
             :gsub( "%]", "%%5D" )
             :gsub( "%<", "%%3C" )
             :gsub( "%>", "%%3E" )
    end
    return r
end -- URLutil.getNormalized()



URLutil.getPath = function ( url )
    local r = URLutil.getRelativePath( url )
    if r then
        local s = r:match( "^([^%?]*)%?" )
        if s then
            r = s
        end
        s = r:match( "^([^#]*)#" )
        if s then
            r = s
        end
    end
    return r
end -- URLutil.getPath()



URLutil.getPort = function ( url )
    local r = URLutil.getAuthority( url )
    if r then
        r = r:match( ":([1-9][0-9]*)$" )
        if r then
            r = tonumber( r )
        else
            r = false
        end
    end
    return r
end -- URLutil.getPort()



URLutil.getQuery = function ( url, key, separator )
    local r = URLutil.getLocation( url )
    if r then
        r = r:match( "^[^%?]*%?(.+)$" )
        if r then
            if type( key ) == "string" then
                local single = mw.text.trim( key )
                local sep = "&"
                local s, scan
                if type( separator ) == "string" then
                    s = mw.text.trim( separator )
                    if s:match( "^[&;,/]$" ) then
                        sep = s
                    end
                end
                s = string.format( "%s%s%s", sep, r, sep )
                scan = string.format( "%s%s=([^%s]*)%s",
                                      sep, key, sep, sep )
                r = s:match( scan )
            end
        end
        if not r then
            r = false
        end
    end
    return r
end -- URLutil.getQuery()



URLutil.getQueryTable = function ( url, separator )
    local r = URLutil.getQuery( url )
    if r then
        local sep = "&"
        local n, pairs, s, set
        if type( separator ) == "string" then
            s = mw.text.trim( separator )
            if s:match( "^[&;,/]$" ) then
                sep = s
            end
        end
        pairs = mw.text.split( r, sep, true )
        n = #pairs
        r = { }
        for i = 1, n do
            s = pairs[ i ]
            if s:find( "=", 2, true ) then
                s, set = s:match( "^([^=]+)=(.*)$" )
                if s then
                    r[ s ] = set
                end
            else
                r[ s ] = false
            end
        end -- for i
    end
    return r
end -- URLutil.getQueryTable()



URLutil.getRelativePath = function ( url )
    local r
    if type( url ) == "string" then
        local s = url:match( "^%s*[a-zA-Z]*://(.*)$" )
        if s then
            s = s:match( "[^/]+(/.*)$" )
        else
            local x
            x, s = url:match( "^%s*(/?)(/.*)$" )
            if x == "/" then
                s = s:match( "/[^/]+(/.*)$" )
            end
        end
        if s then
            r = mw.text.trim( s )
        elseif URLutil.isResourceURL( url ) then
            r = "/"
        else
            r = false
        end
    else
        r = nil
    end
    return r
end -- URLutil.getRelativePath()



URLutil.getScheme = function ( url )
    local r
    if type( url ) == "string" then
        local pattern = "^%s*([a-zA-Z]*)(:?)(//)"
        local prot, colon, slashes = url:match( pattern )
        r = false
        if slashes == "//" then
            if colon == ":" then
                if #prot > 2 then
                    r = prot:lower() .. "://"
                end
            elseif #prot == 0 then
                r = "//"
            end
        end
    else
        r = nil
    end
    return r
end -- URLutil.getScheme()



URLutil.getSortkey = function ( url )
    local r = url
    if type( url ) == "string" then
        local i = url:find( "//" )
        if i then
            local scheme
            if i == 0 then
                scheme = ""
            else
                scheme = url:match( "^%s*([a-zA-Z]*)://" )
            end
            if scheme then
                local s = url:sub( i + 2 )
                local comps, site, m, suffix
                scheme = scheme:lower()
                i      = s:find( "/" )
                if i  and  i > 1 then
                    suffix = s:sub( i + 1 )            -- mw.uri.encode()
                    s      = s:sub( 1,  i - 1 )
                    suffix = suffix:gsub( "#", " " )
                else
                    suffix = ""
                end
                site, m = s:match( "^(.+)(:%d+)$" )
                if not m then
                    site = s
                    m    = 0
                end
                comps = mw.text.split( site:lower(), ".", true )
                r = "///"
                for i = #comps, 2, -1 do
                    r =  string.format( "%s%s.", r, comps[ i ] )
                end -- for --i
                r = string.format( "%s%s %d %s: %s",
                                   r, comps[ 1 ], m, scheme, suffix )
            end
        end
    end
    return r
end -- URLutil.getSortkey()



URLutil.getTLD = function ( url )
    local r = URLutil.getHost( url )
    if r then
        r = mw.ustring.match( r, "%w+%.(%a[%w%-]*%a)$" )
        if not r then
            r = false
        end
    end
    return r
end -- URLutil.getTLD()



URLutil.getTop2domain = function ( url )
    return getTopDomain( url, 2 )
end -- URLutil.getTop2domain()



URLutil.getTop3domain = function ( url )
    return getTopDomain( url, 3 )
end -- URLutil.getTop3domain()



URLutil.isAuthority = function ( s )
    local r
    if type( s ) == "string" then
        local pattern = "^%s*([%w%.%%_-]+)(:?)(%d*)%s*$"
        local host, colon, port = mw.ustring.match( s, pattern )
        if colon == ":" then
            port = port:match( "^[1-9][0-9]*$" )
            if type( port ) ~= "string" then
                r = false
            end
        elseif port ~= "" then
            r = false
        end
        r = URLutil.isHost( host )
    else
        r = nil
    end
    return r
end -- URLutil.isAuthority()



URLutil.isDomain = function ( s )
    local r
    if type( s ) == "string" then
        local scan = "^%s*([%w%.%%_-]*%w)%.(%a[%w-]*%a)%s*$"
        local scope
        s, scope = mw.ustring.match( s, scan )
        if type( s ) == "string" then
            if mw.ustring.find( s, "^%w" ) then
                if mw.ustring.find( s, "..", 1, true ) then
                    r = false
                else
                    r = true
                end
            end
        end
    else
        r = nil
    end
    return r
end -- URLutil.isDomain()



URLutil.isDomainExample = function ( url )
    -- RFC 2606: example.com example.net example.org example.edu
    local r = getTopDomain( url, 2 )
    if r then
        local s = r:lower():match( "^example%.([a-z][a-z][a-z])$" )
        if s then
            r = ( s == "com" or
                  s == "edu" or
                  s == "net" or
                  s == "org" )
        else
            r = false
        end
    end
    return r
end -- URLutil.isDomainExample()



URLutil.isDomainInt = function ( url )
    -- Internationalized Domain Name (Punycode)
    local r = URLutil.getHost( url )
    if r then
        if r:match( "^[!-~]+$" ) then
            local s = "." .. r
            if s:find( ".xn--", 1, true ) then
                r = true
            else
                r = false
            end
        else
            r = true
        end
    end
    return r
end -- URLutil.isDomainInt()



URLutil.isHost = function ( s )
    return URLutil.isDomain( s ) or URLutil.isIP( s )
end -- URLutil.isHost()



URLutil.isHostPathResource = function ( s )
    local r = URLutil.isResourceURL( s )
    if not r  and s then
        r = URLutil.isResourceURL( "//" .. mw.text.trim( s ) )
    end
    return r
end -- URLutil.isHostPathResource()



URLutil.isIP = function ( s )
    return URLutil.isIPv4( s ) and 4 or URLutil.isIPv6( s ) and 6
end -- URLutil.isIP()



URLutil.isIPlocal = function ( s )
    -- IPv4 according to RFC 1918, RFC 1122; even any 0.0.0.0 (RFC 5735)
    local r = false
    local num = s:match( "^ *([01][0-9]*)%." )
    if num then
        num = tonumber( num )
        if num == 0 then
            r = s:match( "^ *0+%.[0-9]+%.[0-9]+%.[0-9]+ *$" )
        elseif num == 10  or  num == 127 then
            -- loopback; private/local host: 127.0.0.1
            r = URLutil.isIPv4( s )
        elseif num == 169 then
            -- 169.254.*.*
        elseif num == 172 then
            -- 172.(16...31).*.*
            num = s:match( "^ *0*172%.([0-9]+)%." )
            if num then
                num = tonumber( num )
                if num >= 16  and  num <= 31 then
                    r = URLutil.isIPv4( s )
                end
            end
        elseif beg == 192 then
            -- 192.168.*.*
            num = s:match( "^ *0*192%.([0-9]+)%." )
            if num then
                num = tonumber( num )
                if num == 168 then
                    r = URLutil.isIPv4( s )
                end
            end
        end
    end
    if r then
        r = true
    end
    return r
end -- URLutil.isIPlocal()



URLutil.isIPv4 = function ( s )
    local function legal( n )
              return ( tonumber( n ) < 256 )
          end
    local r = false
    if type( s ) == "string" then
        local p1, p2, p3, p4 = s:match( "^%s*([1-9][0-9]?[0-9]?)%.([12]?[0-9]?[0-9])%.([12]?[0-9]?[0-9])%.([12]?[0-9]?[0-9])%s*$" )
        if p1 and p2 and p3 and p4 then
            r = legal( p1 ) and legal( p2 ) and legal( p3 ) and legal( p4 )
        end
    end
    return r
end -- URLutil.isIPv4()



URLutil.isIPv6 = function ( s )
    local dcolon, groups
    if type( s ) ~= "string"
        or s:len() == 0
        or s:find( "[^:%x]" ) -- only colon and hex digits are legal chars
        or s:find( "^:[^:]" ) -- can begin or end with :: but not with single :
        or s:find( "[^:]:$" )
        or s:find( ":::" )
    then
        return false
    end
    s = mw.text.trim( s )
    s, dcolon = s:gsub( "::", ":" )
    if dcolon > 1 then
        return false
    end -- at most one ::
    s = s:gsub( "^:?", ":" ) -- prepend : if needed, upper
    s, groups = s:gsub( ":%x%x?%x?%x?", "" ) -- remove valid groups, and count them
    return ( ( dcolon == 1 and groups < 8 ) or
             ( dcolon == 0 and groups == 8 ) )
        and ( s:len() == 0 or ( dcolon == 1 and s == ":" ) ) -- might be one dangling : if original ended with ::
end -- URLutil.isIPv6()



URLutil.isMailAddress = function ( s )
    if type( s ) == "string" then
        s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*$" )
        return URLutil.isDomain( s )
    end
    return false
end -- URLutil.isMailAddress()



URLutil.isMailLink = function ( s )
    if type( s ) == "string" then
        local addr
        s, addr = mw.ustring.match( s, "^%s*([Mm][Aa][Ii][Ll][Tt][Oo]):(%S[%w%.%%_-]*@[%w%.%%-]+)%s*$" )
        if type( s ) == "string" then
            if s:lower() == "mailto" then
                return URLutil.isMailAddress( addr )
            end
        end
    end
    return false
end -- URLutil.isMailLink()



local function isProtocolAccepted( prot, supplied )
    if type( prot ) == "string" then
        local scheme, colon, slashes = mw.ustring.match( prot, "^%s*([a-zA-Z]*)(:?)(/?/?)%s*$" )
        if slashes ~= "/" then
            if scheme == "" then
                if colon ~= ":" and slashes == "//" then
                    return true
                end
             elseif colon == ":" or slashes == "" then
                local s = supplied:match( " " .. scheme:lower() .. " " )
                if type( s ) == "string" then
                    return true
                end
            end
        end
    end
    return false
end -- isProtocolAccepted()



URLutil.isProtocolDialog = function ( prot )
    return isProtocolAccepted( prot, " mailto irc ircs ssh telnet " )
end -- URLutil.isProtocolDialog()



URLutil.isProtocolWiki = function ( prot )
    return isProtocolAccepted( prot,
                               " ftp ftps git http https nntp sftp svn worldwind " )
end -- URLutil.isProtocolWiki()



URLutil.isResourceURL = function ( url )
    local scheme = URLutil.getScheme( url )
    if scheme then
        local s = " // http:// https:// ftp:// sftp:// "
        s = s:find( string.format( " %s ", scheme ) )
        if s then
            if URLutil.getAuthority( url ) then
                if not url:match( "%S%s+%S" ) then
                    local s1, s2 = url:match( "^([^#]+)(#.*)$" )
                    if s2 then
                        if url:match( "^%s*[a-zA-Z]*:?//(.+)/" ) then
                            return true
                        end
                    else
                        return true
                    end
                end
            end
        end
    end
    return false
end -- URLutil.isResourceURL()



URLutil.isSuspiciousURL = function ( url )
    if URLutil.isResourceURL( url ) then
        local s = URLutil.getAuthority( url )
        local pat = "[%[|%]" ..
                    mw.ustring.char( 34,
                                     8201, 45, 8207,
                                     8234, 45, 8239,
                                     8288 )
                    .. "]"
        if s:find( "@" )
           or url:find( "''" )
           or url:find( pat )
           or url:find( "[%.,]$" ) then
            return true
        end
        -- TODO  zero width character ??
        return false
    end
    return true
end -- URLutil.isSuspiciousURL()



URLutil.isUnescapedURL = function ( url, trailing )
    if type( trailing ) ~= "string" then
        if URLutil.isWebURL( url ) then
            if url:match( "[%[|%]]" ) then
                return true
            end
        end
    end
    return false
end -- URLutil.isUnescapedURL()



URLutil.isWebURL = function ( url )
    if URLutil.getScheme( url ) and URLutil.getAuthority( url ) then
        if not url:find( "%S%s+%S" )  and
           not url:find( "''", 1, true ) then
            return true
        end
    end
    return false
end -- URLutil.isWebURL()



URLutil.wikiEscapeURL = function ( url )
    if url:find( "[%[|%]]" ) then
        local n
        url, n = url:gsub( "%[", "&#91;" )
                    :gsub( "|", "&#124;" )
                    :gsub( "%]", "&#93;" )
    end
    return url
end -- URLutil.wikiEscapeURL()



Failsafe.failsafe = function ( atleast )
    -- Retrieve versioning and check for compliance
    -- Precondition:
    --     atleast  -- string, with required version
    --                         or wikidata|item|~|@ or false
    -- Postcondition:
    --     Returns  string  -- with queried version/item, also if problem
    --              false   -- if appropriate
    -- 2020-08-17
    local since = atleast
    local last    = ( since == "~" )
    local linked  = ( since == "@" )
    local link    = ( since == "item" )
    local r
    if last  or  link  or  linked  or  since == "wikidata" then
        local item = Failsafe.item
        since = false
        if type( item ) == "number"  and  item > 0 then
            local suited = string.format( "Q%d", item )
            if link then
                r = suited
            else
                local entity = mw.wikibase.getEntity( suited )
                if type( entity ) == "table" then
                    local seek = Failsafe.serialProperty or "P348"
                    local vsn  = entity:formatPropertyValues( seek )
                    if type( vsn ) == "table"  and
                       type( vsn.value ) == "string"  and
                       vsn.value ~= "" then
                        if last  and  vsn.value == Failsafe.serial then
                            r = false
                        elseif linked then
                            if mw.title.getCurrentTitle().prefixedText
                               ==  mw.wikibase.getSitelink( suited ) then
                                r = false
                            else
                                r = suited
                            end
                        else
                            r = vsn.value
                        end
                    end
                end
            end
        end
    end
    if type( r ) == "nil" then
        if not since  or  since <= Failsafe.serial then
            r = Failsafe.serial
        else
            r = false
        end
    end
    return r
end -- Failsafe.failsafe()



local function Template( frame, action, amount )
    -- Run actual code from template transclusion
    -- Precondition:
    --     frame   -- object
    --     action  -- string, with function name
    --     amount  -- number, of args if > 1
    -- Postcondition:
    --     Return string or not
    local n = amount or 1
    local v = { }
    local r, s
    for i = 1, n do
        s = frame.args[ i ]
        if s then
             s = mw.text.trim( s )
             if s ~= "" then
                 v[ i ] = s
             end
         end
    end -- for i
    if v[ 1 ] then
         r = URLutil[ action ](  v[ 1 ], v[ 2 ], v[ 3 ] )
    end
    return r
end -- Template()



local p = {}

function p.decode( frame )
    return Template( frame, "decode", 2 ) or ""
end
function p.encode( frame )
    return Template( frame, "encode", 2 ) or ""
end
function p.getAuthority( frame )
    return Template( frame, "getAuthority" ) or ""
end
function p.getFragment( frame )
    local r = Template( frame, "getFragment", 2 )
    if r then
        r = "#" .. r
    else
        r = ""
    end
    return r
end
function p.getHost( frame )
    return Template( frame, "getHost" ) or ""
end
function p.getLocation( frame )
    return Template( frame, "getLocation" ) or ""
end
function p.getNormalized( frame )
    return Template( frame, "getNormalized" ) or ""
end
function p.getPath( frame )
    return Template( frame, "getPath" ) or ""
end
function p.getPort( frame )
    return Template( frame, "getPort" ) or ""
end
function p.getQuery( frame )
    local r = Template( frame, "getQuery", 3 )
    if r then
        local key = frame.args[ 2 ]
        if key then
            key = mw.text.trim( key )
            if key == "" then
                key = nil
            end
        end
        if not key then
            r = "?" .. r
        end
    else
        r = ""
    end
    return r
end
function p.getRelativePath( frame )
    return Template( frame, "getRelativePath" ) or ""
end
function p.getScheme( frame )
    return Template( frame, "getScheme" ) or ""
end
function p.getSortkey( frame )
    return Template( frame, "getSortkey" ) or ""
end
function p.getTLD( frame )
    return Template( frame, "getTLD" ) or ""
end
function p.getTop2domain( frame )
    return Template( frame, "getTop2domain" ) or ""
end
function p.getTop3domain( frame )
    return Template( frame, "getTop3domain" ) or ""
end
function p.isAuthority( frame )
    return Template( frame, "isAuthority" ) and "1" or ""
end
function p.isDomain( frame )
    return Template( frame, "isDomain" ) and "1" or ""
end
function p.isDomainExample( frame )
    return Template( frame, "isDomainExample" ) and "1" or ""
end
function p.isDomainInt( frame )
    return Template( frame, "isDomainInt" ) and "1" or ""
end
function p.isHost( frame )
    return Template( frame, "isHost" ) and "1" or ""
end
function p.isHostPathResource( frame )
    return Template( frame, "isHostPathResource" ) and "1" or ""
end
function p.isIP( frame )
    return Template( frame, "isIP" ) or ""
end
function p.isIPlocal( frame )
    return Template( frame, "isIPlocal" ) and "1" or ""
end
function p.isIPv4( frame )
    return Template( frame, "isIPv4" ) and "1" or ""
end
function p.isIPv6( frame )
    return Template( frame, "isIPv6" ) and "1" or ""
end
function p.isMailAddress( frame )
    return Template( frame, "isMailAddress" ) and "1" or ""
end
function p.isMailLink( frame )
    return Template( frame, "isMailLink" ) and "1" or ""
end
function p.isProtocolDialog( frame )
    return Template( frame, "isProtocolDialog" ) and "1" or ""
end
function p.isProtocolWiki( frame )
    return Template( frame, "isProtocolWiki" ) and "1" or ""
end
function p.isResourceURL( frame )
    return Template( frame, "isResourceURL" ) and "1" or ""
end
function p.isSuspiciousURL( frame )
    return Template( frame, "isSuspiciousURL" ) and "1" or ""
end
function p.isUnescapedURL( frame )
    return Template( frame, "isUnescapedURL", 2 ) and "1" or ""
end
function p.isWebURL( frame )
    return Template( frame, "isWebURL" ) and "1" or ""
end
function p.wikiEscapeURL( frame )
    return Template( frame, "wikiEscapeURL" )
end
p.failsafe = function ( frame )
    local s = type( frame )
    local since
    if s == "table" then
        since = frame.args[ 1 ]
    elseif s == "string" then
        since = frame
    end
    if since then
        since = mw.text.trim( since )
        if since == "" then
            since = false
        end
    end
    return Failsafe.failsafe( since ) or ""
end
function p.URLutil()
    return URLutil
end

return p