Modul:UrlCheck: Unterschied zwischen den Versionen

Wartung
KKeine Bearbeitungszusammenfassung
(Wartung)
Zeile 2: Zeile 2:


uc.msg = {
uc.msg = {
[0] = 'No error detected',
unknown = 'Unknown error',
[1] = 'Host with non-ASCII characters',
[ 0 ] = 'No error detected',
[2] = 'Host is IP address',
[ 1 ] = 'Host with non-ASCII characters',
[3] = 'URL is empty',
[ 2 ] = 'Host is IP address',
[4] = 'URL has more then 2048 characters',
[ 3 ] = 'URL is empty',
[5] = 'URL contains control marks or spaces',
[ 4 ] = 'URL has more then 2048 characters',
[6] = 'Missing or wrong protocol',
[ 5 ] = 'URL contains control marks or spaces',
[7] = 'Host starts with slash / character',
[ 6 ] = 'Missing or wrong protocol',
[8] = 'Host contains invalid character combinations',
[ 7 ] = 'Host starts with slash / character',
[9] = 'More than one commercial ats @ in host detected',
[ 8 ] = 'Host contains invalid character combinations',
[10] = 'Commercial at @ detected but user or host are missing',
[ 9 ] = 'More than one commercial ats @ in host detected',
[11] = 'More than one colon : in user detected',
[ 10 ] = 'Commercial at @ detected but user or host are missing',
[12] = 'Colon : detected but user or password are missing',
[ 11 ] = 'More than one colon : in user detected',
[13] = 'User has more than 64 characters',
[ 12 ] = 'Colon : detected but user or password are missing',
[14] = 'Host ist empty',
[ 13 ] = 'User has more than 64 characters',
[15] = 'More than one colon : in host detected',
[ 14 ] = 'Host ist empty',
[16] = 'Colon : detected but host or port are missing',
[ 15 ] = 'More than one colon : in host detected',
[17] = 'Port out of range of 0 ... 65535',
[ 16 ] = 'Colon : detected but host or port are missing',
[18] = 'Host has more than 253 characters',
[ 17 ] = 'Port out of range of 0 65535',
[19] = 'Host with missing dot as domain separator',
[ 18 ] = 'Host has more than 253 characters',
[20] = 'Host has dot but no top-level domain',
[ 19 ] = 'Host with missing dot as domain separator',
[21] = 'Unknown top-level domain',
[ 20 ] = 'Host has dot but no top-level domain',
[22] = 'Invalid host structure',
[ 21 ] = 'Unknown top-level domain',
[ 22 ] = 'Invalid host structure',
}
}


function uc.ip4( address )
function uc.ip4( address )
local value
local parts = { address:match( '(%d+)%.(%d+)%.(%d+)%.(%d+)' ) }, value
local parts = { address:match("(%d+)%.(%d+)%.(%d+)%.(%d+)") }
if #parts == 4 then
if #parts == 4 then
for _,value in pairs( parts ) do
for _, value in pairs( parts ) do
if tonumber( value ) < 0 or tonumber( value ) > 255
if tonumber( value ) < 0 or tonumber( value ) > 255 then
then return 0 end
return 0
end
end
end
return 1 -- ok
return 1 -- ok
else
return 0
end
end
return 0
end
end


function uc.isUrl( url )
function uc.isUrl( url )
-- return codes 0 through 2 reserved
-- return codes 0 through 2 reserved
if ( url == nil ) or ( type( url ) ~= 'string' ) then return 3 end
if not url or type( url ) ~= 'string' then
return 3
end


local s = mw.text.trim( url )
local s = mw.text.trim( url ), count
if s == '' then return 3 end
if s == '' then
if #s > 2048 then return 4 end
return 3
-- limitation because of search engines or IE
elseif #s > 2048 then -- limitation because of search engines or IE
if s:find( '%s' ) or s:find( '%c' ) or s:match( '^%.' ) then return 5 end
return 4
elseif s:find( '%s' ) or s:find( '%c' ) or s:match( '^%.' ) then
return 5
end
-- https://max:muster@www.example.com:8080/index.html?p1=A&p2=B#ressource
-- https://max:muster@www.example.com:8080/index.html?p1=A&p2=B#ressource


-- protocol
-- protocol
local count
s, count = s:gsub( '^https?://', '' )
s,count = s:gsub( '^http://', '' )
if count == 0 then -- missing or wrong protocol
if count == 0 then
return 6
s,count = s:gsub( '^https://', '' )
-- if count == 0 then
-- s,count = s:gsub( '^//', '' )
-- end
end
end
if count == 0 then return 6 end -- missing or wrong protocol


local user = ''
local user = '', at
local password = ''
local password = ''
local host = ''
local host = ''
local port = ''
local port = ''
local path = ''
local aPath = ''
local topLevel = ''
local topLevel = ''
local at


-- split path from host
-- split path from host
at = s:find( '/' )
at = s:find( '/' )
if at ~= nil then
if at then
path = s:sub( at + 1, #s )
aPath = s:sub( at + 1, #s )
s = s:sub( 1, at - 1 )
s = s:sub( 1, at - 1 )
if not s then return 7 end
if not s then
return 7
end
end
end


Zeile 83: Zeile 84:
-- split at '/', last part: t == mw.uri.encode( t ) ?
-- split at '/', last part: t == mw.uri.encode( t ) ?


if s:find( '%.%.' ) or s:find( '%.@' ) or s:find( '@[%.%-]' )
if s:find( '%.%.' ) or s:find( '%.@' ) or s:find( '@[%.%-]' ) or s:find( '%-%.' )
or s:find( '%-%.' ) or s:find( '%.%-' ) or s:find( '%./' ) or s:find( '/%.' )
or s:find( '%.%-' ) or s:find( '%./' ) or s:find( '/%.' ) then
then return 8 end
return 8
end


-- user and password
-- user and password
_,count = s:gsub( '@', '@' )
_, count = s:gsub( '@', '@' )
if count > 1 then return 9 end
if count > 1 then
if count == 1 then
return 9
elseif count == 1 then
at = s:find( '@' )
at = s:find( '@' )
user = s:sub( 1, at - 1 )
user = s:sub( 1, at - 1 )
host = s:sub( at + 1, #s )
host = s:sub( at + 1, #s )
if not user or not s then return 10 end
if not user or not s then
return 10
end


_,count = user:gsub( ':', ':' )
_,count = user:gsub( ':', ':' )
if count > 1 then return 11 end
if count > 1 then
if count == 1 then
return 11
elseif count == 1 then
at = user:find( ':' )
at = user:find( ':' )
password = user:sub( at + 1, #user )
password = user:sub( at + 1, #user )
user = user:sub( 1, at - 1 )
user = user:sub( 1, at - 1 )
if not user or not password then return 12 end
if not user or not password then
if #user > 64 then return 13 end
return 12
elseif #user > 64 then
return 13
end
end
end
else
else
host = s
host = s
end
end
if host == '' then return 14 end
if host == '' then
return 14
end


-- host and port
-- host and port
_,count = host:gsub( ':', ':' )
_, count = host:gsub( ':', ':' )
if count > 1 then return 15 end
if count > 1 then
if count == 1 then
return 15
elseif count == 1 then
at = host:find( ':' )
at = host:find( ':' )
port = host:sub( at + 1, #host )
port = host:sub( at + 1, #host )
host = host:sub( 1, at - 1 )
host = host:sub( 1, at - 1 )
if not host or not port then return 16 end
if not host or not port then
if ( port:match( '^[1-9]%d*$' ) == nil ) or
return 16
( tonumber( port ) > 65535 ) then return 17 end
elseif not port:match( '^[1-9]%d*$' ) or tonumber( port ) > 65535 then
return 17
end
end
end


-- handle host part
-- handle host part
if #host > 253 then return 18 end
if #host > 253 then
return 18
end


-- getting top-level domain
-- getting top-level domain
at = host:match( '^.*()%.' ) -- find last dot
at = host:match( '^.*()%.' ) -- find last dot
if at == nil then return 19 end
if not at then
return 19
end
topLevel = host:sub( at + 1, #host )
topLevel = host:sub( at + 1, #host )
if not topLevel then return 20 end
if not topLevel then
return 20
end


-- future: check of top-level domain
-- future: check of top-level domain


if uc.ip4( host ) == 1 then return 2 end -- is ip4 address
if uc.ip4( host ) == 1 then -- is ip4 address
if not mw.ustring.match( host, '^[%w%.%-]+%.%a%a+$' ) then return 22 end
return 2
if not host:match( '^[%w%.%-]+%.%a%a+$' ) then
elseif not mw.ustring.match( host, '^[%w%.%-]+%.%a%a+$' ) then
return 22
elseif not host:match( '^[%w%.%-]+%.%a%a+$' ) then
return 1 -- matches domain only in UTF 8 mode
return 1 -- matches domain only in UTF 8 mode
end
end
Zeile 149: Zeile 171:
local result = uc.isUrl( args.url )
local result = uc.isUrl( args.url )
if args.show:lower() == 'msg' then
if args.show:lower() == 'msg' then
if uc.msg[result] == nil then
if uc.msg[ result ] then
return 'Unknown error'
return uc.msg[ result ]
else
else
return uc.msg[result]
return uc.msg.unknown
end
end
else
return result
end
end
return result
end
end


return uc
return uc
Anonymer Benutzer