Modul:UrlCheck: Unterschied zwischen den Versionen

K
30 Versionen von wikivoyage:Modul:UrlCheck importiert
(Ergebnis uc.ip4 angepasst)
K (30 Versionen von wikivoyage:Modul:UrlCheck importiert)
 
(23 dazwischenliegende Versionen von 3 Benutzern werden nicht angezeigt)
Zeile 1: Zeile 1:
local uc = {}
-- module variable and administration
 
local uc = {
uc.msg = {
moduleInterface = {
unknown = 'Unknown error',
suite = 'UrlCheck',
[ 0 ] = 'No error detected',
serial = '2023-01-17',
[ 1 ]  = 'Host with non-ASCII characters',
item  = 40849609
[ 2 ]  = 'Host is IP address',
}
[ 3 ]  = 'URL is empty',
[ 4 ]  = 'URL has more then 2048 characters',
[ 5 ]  = 'URL contains control marks or spaces',
[ 6 ]  = 'Missing or wrong protocol',
[ 7 ]  = 'Host starts with slash / character',
[ 8 ]  = 'Host contains invalid character combinations',
[ 9 ]  = 'More than one commercial ats @ in host detected',
[ 10 ]  = 'Commercial at @ detected but user or host are missing',
[ 11 ]  = 'More than one colon : in user detected',
[ 12 ]  = 'Colon : detected but user or password are missing',
[ 13 ]  = 'User has more than 64 characters',
[ 14 ]  = 'Host ist empty',
[ 15 ]  = 'More than one colon : in host detected',
[ 16 ]  = 'Colon : detected but host or port are missing',
[ 17 ]  = 'Port out of range of 0 … 65535',
[ 18 ]  = 'Host has more than 253 characters',
[ 19 ]  = 'Host with missing dot as domain separator',
[ 20 ]  = 'Host has dot but no top-level domain',
[ 21 ]  = 'Unknown top-level domain',
[ 22 ]  = 'Invalid host structure',
}
}


function uc.ip4( address )
function uc.ip4( address )
local parts = { address:match( '(%d+)%.(%d+)%.(%d+)%.(%d+)' ) }, value
local parts = { address:match( '(%d+)%.(%d+)%.(%d+)%.(%d+)' ) }
local value
if #parts == 4 then
if #parts == 4 then
for _, value in pairs( parts ) do
for _, value in pairs( parts ) do
Zeile 41: Zeile 22:
end
end


function uc.isUrl( url )
function uc.isUrl( url, skipPathCheck )
-- return codes 0 through 2 reserved
-- return codes 0 through 2 reserved
if not url or type( url ) ~= 'string' then
if not url or type( url ) ~= 'string' then
Zeile 47: Zeile 28:
end
end


local s = mw.text.trim( url ), count
local s = mw.text.trim( url )
if s == '' then
if s == '' then
return 3
return 3
Zeile 59: Zeile 40:


-- protocol
-- protocol
local count
s, count = s:gsub( '^https?://', '' )
s, count = s:gsub( '^https?://', '' )
if count == 0 then
s, count = s:gsub( '^//', '' )
end
if count == 0 then -- missing or wrong protocol
if count == 0 then -- missing or wrong protocol
return 6
return 6
end
end


local user = '', at
local user = ''
local password = ''
local password = ''
local host = ''
local host = ''
Zeile 72: Zeile 57:


-- split path from host
-- split path from host
at = s:find( '/' )
local at = s:find( '/' )
if at then
if at then
aPath = s:sub( at + 1, #s )
aPath = s:sub( at + 1, #s )
Zeile 81: Zeile 66:
end
end


-- future: add path check
-- path check
-- split at '/', last part: t == mw.uri.encode( t ) ?
if not skipPathCheck and aPath ~= '' then
if not aPath:match( '^[-A-Za-z0-9_.,~%%%+&:;#*?!=()@/\128-\255]*$' ) then
return 23
end
end


if s:find( '%.%.' ) or s:find( '%.@' ) or s:find( '@[%.%-]' ) or s:find( '%-%.' )
if s:find( '%.%.' ) or s:find( '%.@' ) or s:find( '@[%.%-]' ) or s:find( '%-%.' )
Zeile 90: Zeile 79:


-- user and password
-- user and password
_, count = s:gsub( '@', '@' )
s, count = s:gsub( '@', '@' )
if count > 1 then
if count > 1 then
return 9
return 9
Zeile 101: Zeile 90:
end
end


_,count = user:gsub( ':', ':' )
user, count = user:gsub( ':', ':' )
if count > 1 then
if count > 1 then
return 11
return 11
Zeile 122: Zeile 111:


-- host and port
-- host and port
_, count = host:gsub( ':', ':' )
host, count = host:gsub( ':', ':' )
if count > 1 then
if count > 1 then
return 15
return 15
Zeile 155: Zeile 144:
if uc.ip4( host ) then -- is ip4 address
if uc.ip4( host ) then -- is ip4 address
return 2
return 2
elseif not mw.ustring.match( host, '^[%w%.%-]+%.%a%a+$' ) then
elseif not mw.ustring.match( host, '^[ะ-๏%w%.%-]+%.%a%a+$' ) then
-- Thai diacritical marks ะ (0E30) - ๏ (0E4F)
return 22
return 22
elseif not host:match( '^[%w%.%-]+%.%a%a+$' ) then
elseif not host:match( '^[%w%.%-]+%.%a%a+$' ) then
Zeile 162: Zeile 152:


return 0
return 0
end
function uc.uriEncodePath( url )
local at, to = url:find( '[^/]/[^/]' )
if at then
local domain = url:sub( 1, at + 1 )
local aPath = url:sub( at + 2, #url )
url = domain .. mw.uri.encode( aPath, 'PATH' )
end
return url
end
end


function uc.checkUrl( frame )
function uc.checkUrl( frame )
local args = frame.args
local args = frame.args
args.url   = args.url or ''
local pArgs = frame:getParent().args
args.show = args.show or ''
args.url   = args.url or pArgs.url or ''
args.show   = args.show or pArgs.show or ''


local result = uc.isUrl( args.url )
local result = uc.isUrl( args.url, false )
if args.show:lower() == 'msg' then
if args.show:lower() == 'msg' then
if uc.msg[ result ] then
local ui = mw.loadData( 'Module:UrlCheck/i18n')
return uc.msg[ result ]
if ui[ result ] then
return ui[ result ]
else
else
return uc.msg.unknown
return ui.unknown
end
end
end
end
return result
return result
end
function uc.encodePath( frame )
local args  = frame.args
local pArgs = frame:getParent().args
args.url    = args.url or args[ 1 ] or pArgs.url or pArgs[ 1 ] or ''
return uc.uriEncodePath( args.url )
end
end


return uc
return uc