Modul:UrlCheck: Unterschied zwischen den Versionen

Aus skandinavien-wiki.net
KKeine Bearbeitungszusammenfassung
K (30 Versionen von wikivoyage:Modul:UrlCheck importiert)
 
(30 dazwischenliegende Versionen von 3 Benutzern werden nicht angezeigt)
Zeile 1: Zeile 1:
local uc = {}
-- module variable and administration
local uc = {
moduleInterface = {
suite  = 'UrlCheck',
serial = '2023-01-17',
item  = 40849609
}
}


function uc.isUrl( url )
function uc.ip4( address )
-- return codes 0 and 1 reserved
local parts = { address:match( '(%d+)%.(%d+)%.(%d+)%.(%d+)' ) }
if ( url == nil ) or ( type( url ) ~= 'string' ) then return 2 end
local value
if #parts == 4 then
for _, value in pairs( parts ) do
if tonumber( value ) < 0 or tonumber( value ) > 255 then
return false
end
end
return true -- ok
end
return false
end
 
function uc.isUrl( url, skipPathCheck )
-- return codes 0 through 2 reserved
if not url or type( url ) ~= 'string' then
return 3
end


local s = mw.text.trim( url )
local s = mw.text.trim( url )
if ( s == '' ) or ( #s > 2048 ) then return 3 end
if s == '' then
-- limitation because of search engines or IE
return 3
if s:find( '%s' ) or s:find( '%c' ) or s:find( '%.%.' ) or s:find( '%.@' )
elseif #s > 2048 then -- limitation because of search engines or IE
or s:find( '@[%.%-]' ) or s:find( '%-%.' ) or s:find( '%.%-' )
return 4
or s:find( '%./' ) or s:find( '/%.' ) or s:match( '^%.' )
elseif s:find( '%s' ) or s:find( '%c' ) or s:match( '^%.' ) then
then return 4 end
return 5
 
end
-- https://max:muster@www.example.com:8080/index.html?p1=A&p2=B#ressource
-- https://max:muster@www.example.com:8080/index.html?p1=A&p2=B#ressource


-- protocol
-- protocol
local count
local count
s,count = s:gsub( '^http://', '' )
s, count = s:gsub( '^https?://', '' )
if count == 0 then
if count == 0 then
s,count = s:gsub( '^https://', '' )
s, count = s:gsub( '^//', '' )
-- if count == 0 then
end
-- s,count = s:gsub( '^//', '' )
if count == 0 then -- missing or wrong protocol
-- end
return 6
end
end
if count == 0 then return 5 end -- missing or wrong protocol


local user = ''
local user = ''
Zeile 30: Zeile 53:
local host = ''
local host = ''
local port = ''
local port = ''
local path = ''
local aPath = ''
local topLevel = ''
local topLevel = ''
local at
 
-- split path from host
local at = s:find( '/' )
if at then
aPath = s:sub( at + 1, #s )
s = s:sub( 1, at - 1 )
if not s then
return 7
end
end
 
-- path check
if not skipPathCheck and aPath ~= '' then
if not aPath:match( '^[-A-Za-z0-9_.,~%%%+&:;#*?!=()@/\128-\255]*$' ) then
return 23
end
end
 
if s:find( '%.%.' ) or s:find( '%.@' ) or s:find( '@[%.%-]' ) or s:find( '%-%.' )
or s:find( '%.%-' ) or s:find( '%./' ) or s:find( '/%.' ) then
return 8
end


-- user and password
-- user and password
_,count = s:gsub( '@', '@' )
s, count = s:gsub( '@', '@' )
if count > 1 then return 6 end
if count > 1 then
if count == 1 then
return 9
elseif count == 1 then
at = s:find( '@' )
at = s:find( '@' )
user = s:sub( 1, at - 1 )
user = s:sub( 1, at - 1 )
s = s:sub( at + 1, #s )
host = s:sub( at + 1, #s )
if not user or not s then return 7 end
if not user or not s then
return 10
end


_,count = user:gsub( ':', ':' )
user, count = user:gsub( ':', ':' )
if count > 1 then return 8 end
if count > 1 then
if count == 1 then
return 11
elseif count == 1 then
at = user:find( ':' )
at = user:find( ':' )
password = user:sub( at + 1, #user )
password = user:sub( at + 1, #user )
user = user:sub( 1, at - 1 )
user = user:sub( 1, at - 1 )
if not user or not password then return 9 end
if not user or not password then
if #user > 64 then return 10 end
return 12
elseif #user > 64 then
return 13
end
end
end
end
-- host and path
at = s:find( '/' )
if at ~= nil then
host = s:sub( 1, at - 1 )
path = s:sub( at + 1, #s )
if not host then return 11 end
else
else
host = s
host = s
if host == '' then return 11 end
end
end
 
if host == '' then
-- future: add path check
return 14
-- split at '/', last part: t == mw.uri.encode( t ) ?
end


-- host and port
-- host and port
_,count = host:gsub( ':', ':' )
host, count = host:gsub( ':', ':' )
if count > 1 then return 13 end
if count > 1 then
if count == 1 then
return 15
elseif count == 1 then
at = host:find( ':' )
at = host:find( ':' )
port = host:sub( at + 1, #host )
port = host:sub( at + 1, #host )
host = host:sub( 1, at - 1 )
host = host:sub( 1, at - 1 )
if not host or not port then return 11 end
if not host or not port then
if ( port:match( '^[1-9]%d*$' ) == nil ) or
return 16
( tonumber( port ) > 65535 ) then return 14 end
elseif not port:match( '^[1-9]%d*$' ) or tonumber( port ) > 65535 then
return 17
end
end
end


-- handle host part
-- handle host part
if #host > 253 then return 15 end
if #host > 253 then
return 18
end


-- getting top-level domain
-- getting top-level domain
at = host:match( '^.*()%.' ) -- find last dot
at = host:match( '^.*()%.' ) -- find last dot
if at == nil then return 16 end
if not at then
return 19
end
topLevel = host:sub( at + 1, #host )
topLevel = host:sub( at + 1, #host )
if not topLevel then return 17 end
if not topLevel then
return 20
end


-- future: check of top-level domain
-- future: check of top-level domain


if not mw.ustring.match( host, '^[%w%.%-]+%.%a%a+$' ) then return 20 end
if uc.ip4( host ) then -- is ip4 address
if not host:match( '^[%w%.%-]+%.%a%a+$' ) then
return 2
elseif not mw.ustring.match( host, '^[ะ-๏%w%.%-]+%.%a%a+$' ) then
-- Thai diacritical marks ะ (0E30) - ๏ (0E4F)
return 22
elseif not host:match( '^[%w%.%-]+%.%a%a+$' ) then
return 1 -- matches domain only in UTF 8 mode
return 1 -- matches domain only in UTF 8 mode
end
end


return 0
return 0
end
function uc.uriEncodePath( url )
local at, to = url:find( '[^/]/[^/]' )
if at then
local domain = url:sub( 1, at + 1 )
local aPath = url:sub( at + 2, #url )
url = domain .. mw.uri.encode( aPath, 'PATH' )
end
return url
end
end


function uc.checkUrl( frame )
function uc.checkUrl( frame )
local args = frame.args
local args = frame.args
return uc.isUrl( args.url or '' )
local pArgs = frame:getParent().args
args.url    = args.url or pArgs.url or ''
args.show  = args.show or pArgs.show or ''
 
local result = uc.isUrl( args.url, false )
if args.show:lower() == 'msg' then
local ui = mw.loadData( 'Module:UrlCheck/i18n')
if ui[ result ] then
return ui[ result ]
else
return ui.unknown
end
end
return result
end
 
function uc.encodePath( frame )
local args  = frame.args
local pArgs = frame:getParent().args
args.url    = args.url or args[ 1 ] or pArgs.url or pArgs[ 1 ] or ''
return uc.uriEncodePath( args.url )
end
end


return uc
return uc

Aktuelle Version vom 11. Februar 2023, 21:58 Uhr

Die Dokumentation für dieses Modul kann unter Modul:UrlCheck/doc erstellt werden

-- module variable and administration
local uc = {
	moduleInterface = {
		suite  = 'UrlCheck',
		serial = '2023-01-17',
		item   = 40849609
	}
}

function uc.ip4( address )
	local parts = { address:match( '(%d+)%.(%d+)%.(%d+)%.(%d+)' ) }
	local value
	if #parts == 4 then
		for _, value in pairs( parts ) do
			if tonumber( value ) < 0 or tonumber( value ) > 255 then
				return false
			end
		end
		return true -- ok
	end
	return false
end

function uc.isUrl( url, skipPathCheck )
	-- return codes 0 through 2 reserved
	if not url or type( url ) ~= 'string' then
		return 3
	end

	local s = mw.text.trim( url )
	if s == '' then
		return 3
	elseif #s > 2048 then -- limitation because of search engines or IE
		return 4
	elseif s:find( '%s' ) or s:find( '%c' ) or s:match( '^%.' ) then
		return 5
	end
	
	-- https://max:muster@www.example.com:8080/index.html?p1=A&p2=B#ressource

	-- protocol
	local count
	s, count = s:gsub( '^https?://', '' )
	if count == 0 then
		s, count = s:gsub( '^//', '' )
	end
	if count == 0 then -- missing or wrong protocol
		return 6
	end

	local user = ''
	local password = ''
	local host = ''
	local port = ''
	local aPath = ''
	local topLevel = ''

	-- split path from host
	local at = s:find( '/' )
	if at then
		aPath = s:sub( at + 1, #s )
		s = s:sub( 1, at - 1 )
		if not s then
			return 7
		end
	end

	-- path check
	if not skipPathCheck and aPath ~= '' then
		if not aPath:match( '^[-A-Za-z0-9_.,~%%%+&:;#*?!=()@/\128-\255]*$' ) then
			return 23
		end
	end

	if s:find( '%.%.' ) or s:find( '%.@' ) or s:find( '@[%.%-]' ) or s:find( '%-%.' )
		or s:find( '%.%-' ) or s:find( '%./' ) or s:find( '/%.' ) then
		return 8
	end

	-- user and password
	s, count = s:gsub( '@', '@' )
	if count > 1 then
		return 9
	elseif count == 1 then
		at = s:find( '@' )
		user = s:sub( 1, at - 1 )
		host = s:sub( at + 1, #s )
		if not user or not s then
			return 10
		end

		user, count = user:gsub( ':', ':' )
		if count > 1 then
			return 11
		elseif count == 1 then
			at = user:find( ':' )
			password = user:sub( at + 1, #user )
			user = user:sub( 1, at - 1 )
			if not user or not password then
				return 12
			elseif #user > 64 then
				return 13
			end
		end
	else
		host = s
	end
	if host == '' then
		return 14
	end

	-- host and port
	host, count = host:gsub( ':', ':' )
	if count > 1 then
		return 15
	elseif count == 1 then
		at = host:find( ':' )
		port = host:sub( at + 1, #host )
		host = host:sub( 1, at - 1 )
		if not host or not port then
			return 16
		elseif not port:match( '^[1-9]%d*$' ) or tonumber( port ) > 65535 then
			return 17
		end
	end

	-- handle host part
	if #host > 253 then
		return 18
	end

	-- getting top-level domain
	at = host:match( '^.*()%.' ) -- find last dot
	if not at then
		return 19
	end
	topLevel = host:sub( at + 1, #host )
	if not topLevel then
		return 20
	end

	-- future: check of top-level domain

	if uc.ip4( host ) then -- is ip4 address
		return 2
	elseif not mw.ustring.match( host, '^[ะ-๏%w%.%-]+%.%a%a+$' ) then
		-- Thai diacritical marks ะ (0E30) - ๏ (0E4F)
		return 22
	elseif not host:match( '^[%w%.%-]+%.%a%a+$' ) then
		return 1 -- matches domain only in UTF 8 mode
	end

	return 0
end

function uc.uriEncodePath( url )
	local at, to = url:find( '[^/]/[^/]' )
	if at then
		local domain = url:sub( 1, at + 1 )
		local aPath = url:sub( at + 2, #url )
		url = domain .. mw.uri.encode( aPath, 'PATH' )
	end
	return url
end

function uc.checkUrl( frame )
	local args  = frame.args
	local pArgs = frame:getParent().args
	args.url    = args.url or pArgs.url or ''
	args.show   = args.show or pArgs.show or ''

	local result = uc.isUrl( args.url, false )
	if args.show:lower() == 'msg' then
		local ui = mw.loadData( 'Module:UrlCheck/i18n')
		if ui[ result ] then
			return ui[ result ]
		else
			return ui.unknown
		end
	end
	return result
end

function uc.encodePath( frame )
	local args  = frame.args
	local pArgs = frame:getParent().args
	args.url    = args.url or args[ 1 ] or pArgs.url or pArgs[ 1 ] or ''
	return uc.uriEncodePath( args.url )
end

return uc