Modul:UrlCheck: Unterschied zwischen den Versionen

Aus skandinavien-wiki.net
K (path früher abgetrennt)
K (+ip4)
Zeile 1: Zeile 1:
local uc = {}
local uc = {}
function uc.ip4( address )
local value
local parts = { address:match("(%d+)%.(%d+)%.(%d+)%.(%d+)") }
if #parts == 4 then
for _,value in pairs( parts ) do
if tonumber( value ) < 0 or tonumber( value ) > 255
then return 0 end
end
return 1 -- ok
else
return 0
end
end


function uc.isUrl( url )
function uc.isUrl( url )
Zeile 92: Zeile 106:
-- future: check of top-level domain
-- future: check of top-level domain


if uc.ip4( host ) == 1 then return 0 end -- is ip4 address
if not mw.ustring.match( host, '^[%w%.%-]+%.%a%a+$' ) then return 20 end
if not mw.ustring.match( host, '^[%w%.%-]+%.%a%a+$' ) then return 20 end
if not host:match( '^[%w%.%-]+%.%a%a+$' ) then
if not host:match( '^[%w%.%-]+%.%a%a+$' ) then

Version vom 29. August 2017, 18:59 Uhr

Die Dokumentation für dieses Modul kann unter Modul:UrlCheck/doc erstellt werden

local uc = {}

function uc.ip4( address )
	local value
	local parts = { address:match("(%d+)%.(%d+)%.(%d+)%.(%d+)") }
	if #parts == 4 then
		for _,value in pairs( parts ) do
			if tonumber( value ) < 0 or tonumber( value ) > 255
				then return 0 end
		end
		return 1 -- ok
	else 
		return 0
	end
end

function uc.isUrl( url )
	-- return codes 0 and 1 reserved
	if ( url == nil ) or ( type( url ) ~= 'string' ) then return 2 end

	local s = mw.text.trim( url )
	if ( s == '' ) or ( #s > 2048 ) then return 3 end
		-- limitation because of search engines or IE
	if s:find( '%s' ) or s:find( '%c' ) or s:match( '^%.' ) then return 4 end
	
	-- https://max:muster@www.example.com:8080/index.html?p1=A&p2=B#ressource

	-- protocol
	local count
	s,count = s:gsub( '^http://', '' )
	if count == 0 then
		s,count = s:gsub( '^https://', '' )
--		if count == 0 then
--			s,count = s:gsub( '^//', '' )
--		end
	end
	if count == 0 then return 5 end -- missing or wrong protocol

	local user = ''
	local password = ''
	local host = ''
	local port = ''
	local path = ''
	local topLevel = ''
	local at

	-- split path from host
	at = s:find( '/' )
	if at ~= nil then
		path = s:sub( at + 1, #s )
		s = s:sub( 1, at - 1 )
		if not s then return 6 end
	end

	-- future: add path check
	-- split at '/', last part: t == mw.uri.encode( t ) ?

	if s:find( '%.%.' ) or s:find( '%.@' ) or s:find( '@[%.%-]' )
		or s:find( '%-%.' ) or s:find( '%.%-' ) or s:find( '%./' ) or s:find( '/%.' )
		then return 4 end

	-- user and password
	_,count = s:gsub( '@', '@' )
	if count > 1 then return 7 end
	if count == 1 then
		at = s:find( '@' )
		user = s:sub( 1, at - 1 )
		host = s:sub( at + 1, #s )
		if not user or not s then return 8 end

		_,count = user:gsub( ':', ':' )
		if count > 1 then return 9 end
		if count == 1 then
			at = user:find( ':' )
			password = user:sub( at + 1, #user )
			user = user:sub( 1, at - 1 )
			if not user or not password then return 10 end
			if #user > 64 then return 11 end
		end
	else
		host = s
	end
	if host == '' then return 6 end

	-- host and port
	_,count = host:gsub( ':', ':' )
	if count > 1 then return 12 end
	if count == 1 then
		at = host:find( ':' )
		port = host:sub( at + 1, #host )
		host = host:sub( 1, at - 1 )
		if not host or not port then return 13 end
		if ( port:match( '^[1-9]%d*$' ) == nil ) or
			( tonumber( port ) > 65535 ) then return 14 end
	end

	-- handle host part
	if #host > 253 then return 15 end

	-- getting top-level domain
	at = host:match( '^.*()%.' ) -- find last dot
	if at == nil then return 16 end
	topLevel = host:sub( at + 1, #host )
	if not topLevel then return 17 end

	-- future: check of top-level domain

	if uc.ip4( host ) == 1 then return 0 end -- is ip4 address
	if not mw.ustring.match( host, '^[%w%.%-]+%.%a%a+$' ) then return 20 end
	if not host:match( '^[%w%.%-]+%.%a%a+$' ) then
		return 1 -- matches domain only in UTF 8 mode
	end

	return 0
end

function uc.checkUrl( frame )
	local args = frame.args
	return uc.isUrl( args.url or '' )
end

return uc