Modul:UrlCheck: Unterschied zwischen den Versionen

Aus skandinavien-wiki.net
(Ergebnis uc.ip4 angepasst)
(Zeichenketten ausgelagert)
Zeile 1: Zeile 1:
-- module import
local ui = mw.loadData( 'Module:UrlCheck/i18n')
-- module variable
local uc = {}
local uc = {}
uc.msg = {
unknown = 'Unknown error',
[ 0 ]  = 'No error detected',
[ 1 ]  = 'Host with non-ASCII characters',
[ 2 ]  = 'Host is IP address',
[ 3 ]  = 'URL is empty',
[ 4 ]  = 'URL has more then 2048 characters',
[ 5 ]  = 'URL contains control marks or spaces',
[ 6 ]  = 'Missing or wrong protocol',
[ 7 ]  = 'Host starts with slash / character',
[ 8 ]  = 'Host contains invalid character combinations',
[ 9 ]  = 'More than one commercial ats @ in host detected',
[ 10 ]  = 'Commercial at @ detected but user or host are missing',
[ 11 ]  = 'More than one colon : in user detected',
[ 12 ]  = 'Colon : detected but user or password are missing',
[ 13 ]  = 'User has more than 64 characters',
[ 14 ]  = 'Host ist empty',
[ 15 ]  = 'More than one colon : in host detected',
[ 16 ]  = 'Colon : detected but host or port are missing',
[ 17 ]  = 'Port out of range of 0 … 65535',
[ 18 ]  = 'Host has more than 253 characters',
[ 19 ]  = 'Host with missing dot as domain separator',
[ 20 ]  = 'Host has dot but no top-level domain',
[ 21 ]  = 'Unknown top-level domain',
[ 22 ]  = 'Invalid host structure',
}


function uc.ip4( address )
function uc.ip4( address )
Zeile 171: Zeile 148:
local result = uc.isUrl( args.url )
local result = uc.isUrl( args.url )
if args.show:lower() == 'msg' then
if args.show:lower() == 'msg' then
if uc.msg[ result ] then
if ui[ result ] then
return uc.msg[ result ]
return ui[ result ]
else
else
return uc.msg.unknown
return ui.unknown
end
end
end
end

Version vom 21. Juli 2019, 07:24 Uhr

Die Dokumentation für dieses Modul kann unter Modul:UrlCheck/doc erstellt werden

-- module import
local ui = mw.loadData( 'Module:UrlCheck/i18n')

-- module variable
local uc = {}

function uc.ip4( address )
	local parts = { address:match( '(%d+)%.(%d+)%.(%d+)%.(%d+)' ) }, value
	if #parts == 4 then
		for _, value in pairs( parts ) do
			if tonumber( value ) < 0 or tonumber( value ) > 255 then
				return false
			end
		end
		return true -- ok
	end
	return false
end

function uc.isUrl( url )
	-- return codes 0 through 2 reserved
	if not url or type( url ) ~= 'string' then
		return 3
	end

	local s = mw.text.trim( url ), count
	if s == '' then
		return 3
	elseif #s > 2048 then -- limitation because of search engines or IE
		return 4
	elseif s:find( '%s' ) or s:find( '%c' ) or s:match( '^%.' ) then
		return 5
	end
	
	-- https://max:muster@www.example.com:8080/index.html?p1=A&p2=B#ressource

	-- protocol
	s, count = s:gsub( '^https?://', '' )
	if count == 0 then -- missing or wrong protocol
		return 6
	end

	local user = '', at
	local password = ''
	local host = ''
	local port = ''
	local aPath = ''
	local topLevel = ''

	-- split path from host
	at = s:find( '/' )
	if at then
		aPath = s:sub( at + 1, #s )
		s = s:sub( 1, at - 1 )
		if not s then
			return 7
		end
	end

	-- future: add path check
	-- split at '/', last part: t == mw.uri.encode( t ) ?

	if s:find( '%.%.' ) or s:find( '%.@' ) or s:find( '@[%.%-]' ) or s:find( '%-%.' )
		or s:find( '%.%-' ) or s:find( '%./' ) or s:find( '/%.' ) then
		return 8
	end

	-- user and password
	_, count = s:gsub( '@', '@' )
	if count > 1 then
		return 9
	elseif count == 1 then
		at = s:find( '@' )
		user = s:sub( 1, at - 1 )
		host = s:sub( at + 1, #s )
		if not user or not s then
			return 10
		end

		_,count = user:gsub( ':', ':' )
		if count > 1 then
			return 11
		elseif count == 1 then
			at = user:find( ':' )
			password = user:sub( at + 1, #user )
			user = user:sub( 1, at - 1 )
			if not user or not password then
				return 12
			elseif #user > 64 then
				return 13
			end
		end
	else
		host = s
	end
	if host == '' then
		return 14
	end

	-- host and port
	_, count = host:gsub( ':', ':' )
	if count > 1 then
		return 15
	elseif count == 1 then
		at = host:find( ':' )
		port = host:sub( at + 1, #host )
		host = host:sub( 1, at - 1 )
		if not host or not port then
			return 16
		elseif not port:match( '^[1-9]%d*$' ) or tonumber( port ) > 65535 then
			return 17
		end
	end

	-- handle host part
	if #host > 253 then
		return 18
	end

	-- getting top-level domain
	at = host:match( '^.*()%.' ) -- find last dot
	if not at then
		return 19
	end
	topLevel = host:sub( at + 1, #host )
	if not topLevel then
		return 20
	end

	-- future: check of top-level domain

	if uc.ip4( host ) then -- is ip4 address
		return 2
	elseif not mw.ustring.match( host, '^[%w%.%-]+%.%a%a+$' ) then
		return 22
	elseif not host:match( '^[%w%.%-]+%.%a%a+$' ) then
		return 1 -- matches domain only in UTF 8 mode
	end

	return 0
end

function uc.checkUrl( frame )
	local args = frame.args
	args.url   = args.url or ''
	args.show  = args.show or ''

	local result = uc.isUrl( args.url )
	if args.show:lower() == 'msg' then
		if ui[ result ] then
			return ui[ result ]
		else
			return ui.unknown
		end
	end
	return result
end

return uc