Modul:Citation/utilities: Unterschied zwischen den Versionen

Aus skandinavien-wiki.net
K (Interpunktionszeichen)
K (51 Versionen von wikivoyage:Modul:Citation/utilities importiert)
 
(31 dazwischenliegende Versionen von einem anderen Benutzer werden nicht angezeigt)
Zeile 1: Zeile 1:
-- documentation
-- documentation
local citation = {
local citationUtilities = {
suite  = 'Citation',
suite  = 'Citation',
sub    = 'utilities',  
sub    = 'utilities',  
serial = '2020-03-31',
serial = '2022-10-21'
}
}


-- module variable
-- module variable and administration
local cu = {}
local cu = {
moduleInterface = citationUtilities
}


-- module import
-- module import
-- require( 'strict' )
local ci = require( 'Module:Citation/i18n' )
local ci = require( 'Module:Citation/i18n' )


Zeile 24: Zeile 27:
   local mult = 10^( decimalPlaces or 0 )
   local mult = 10^( decimalPlaces or 0 )
   return math.floor( num * mult + 0.5 ) / mult
   return math.floor( num * mult + 0.5 ) / mult
end
-- converts string to number
function cu.getNumber( s )
if not cu.isSet( s ) then
return 0
end
if s:find( '[sic!]', 1, true ) then
return 0
end
local kb = s:gsub( ci.texts.decimalPoint, '%.' )
return tonumber( kb ) or 0
end
end


Zeile 33: Zeile 48:
-- make complete message from message array
-- make complete message from message array
function cu.getErrorMsgs()
function cu.getErrorMsgs()
local i, j, result
-- remove duplicates
-- remove duplicates
for i = #errorMsgs, 1, -1 do
for i = #errorMsgs, 1, -1 do
Zeile 44: Zeile 58:
end
end


result = table.concat( errorMsgs, ' ' )
local result = table.concat( errorMsgs, ' ' )
if result ~= '' then
if result ~= '' then
result = result .. ' '
result = result .. ' '
Zeile 66: Zeile 80:
end
end


local index, value
for index, value in ipairs( tab ) do
for index, value in ipairs( tab ) do
if value == val then
if value == val then
Zeile 78: Zeile 91:
-- convert values t from list if translated
-- convert values t from list if translated
function cu.getKey( t, list )
function cu.getKey( t, list )
     local result = '', key, tab
     local result = ''
     for key, tab in pairs( list ) do
     for key, tab in pairs( list ) do
         if cu.inArray( tab, t ) then
         if cu.inArray( tab, t ) then
Zeile 90: Zeile 103:
-- returns a single value from frame argument table
-- returns a single value from frame argument table
function cu.getArgValue( list, param, args )
function cu.getArgValue( list, param, args )
value = '', k, v
local value = ''
if list[ param ] then
if list[ param ] then
for k, v in ipairs( list[ param ] ) do
for k, v in ipairs( list[ param ] ) do
Zeile 105: Zeile 118:
function cu.parameterCleanup( s )
function cu.parameterCleanup( s )
if not cu.isSet( s ) then
if not cu.isSet( s ) then
return s
return s -- nil or '' should be kept
end
end


Zeile 114: Zeile 127:


-- remove tags
-- remove tags
s = mw.ustring.gsub( s, '</*br[^/>]*/*>', '' ) -- <br> tg
s = mw.ustring.gsub( s, '</*br[^/>]*/*>', '' ) -- <br> tag
s = mw.ustring.gsub( s, '</*p[^/>]*/*>', '' ) -- <p> tag
s = mw.ustring.gsub( s, '</*p[^/>]*/*>', '' ) -- <p> tag
s = mw.ustring.gsub( s, '</*div[^/>]*/*>', '' ) -- <div> tag
s = mw.ustring.gsub( s, '</*div[^/>]*/*>', '' ) -- <div> tag
Zeile 122: Zeile 135:
end
end


-- replace character references and entities
s = mw.text.decode( s, true )
-- replace characters
-- replace characters
s = mw.ustring.gsub( s, '%.%.%.', '…' )
s = mw.ustring.gsub( s, '%.%.%.', '…' )
Zeile 128: Zeile 143:
end
end


-- remove illegal chars from pages parameters
function cu.cleanupPageNumbers( pages )
if not cu.isSet( pages ) then
return ''
end
-- replace dashes with hyphens
return mw.ustring.gsub( '' .. pages, '[–‒—]', '-' );
end
-- string cleanup before COinS creation
function cu.coinsCleanup( s )
function cu.coinsCleanup( s )
if not cu.isSet( s ) then
if not cu.isSet( s ) then
return s
return ''
end
end
local i, value


-- replace characters
-- replace characters
s = s:gsub( '\226\128\138', ' ' ); -- hair space
s = s:gsub( '\226\128\138', ' ' ); -- hair space
s = mw.ustring.gsub( s, '[\226\128\141\226\128\139\194\173]', '' ); -- zero-width joiner, zero-width space, soft hyphen
s = mw.ustring.gsub( s, '[\226\128\141\226\128\139\194\173]', '' );
-- zero-width joiner, zero-width space, soft hyphen


-- replace entities
-- remove characters: soft hyphen, LTR mark, RTL mark
s = s:gsub( '&amp;', '&' )
for i, value in ipairs( { '­', '', '' } ) do
for i, value in ipairs( { '&shy;', '&zwj;', '­' } ) do
s = mw.ustring.gsub( s, value, '' )
s = mw.ustring.gsub( s, value, '' )
end
end
for i, value in ipairs( { '&ndash;', '&mdash;' } ) do
s = mw.ustring.gsub( s, value, '-' )
end
for i, value in ipairs( { '&nbsp;' } ) do
s = mw.ustring.gsub( s, value, ' ' )
end
s = mw.ustring.gsub( s, '&%w+;', '' ) -- remove entities


-- replace Wiki syntax
-- replace Wiki syntax
Zeile 165: Zeile 182:
end
end


-- remove illegal chars from page paramters
-- remove adjoining punctuation marks etc.
function cu.cleanupPageNumbers( pages )
function cu.finalCleanup( s )
if not cu.isSet( pages ) then
s = s:gsub( '%.+%.', '.' ):gsub( '%s%s+', ' ' ):gsub( '([,;:])(%s%.+)', '.' )
return ''
for _, replacement in ipairs( ci.replacements ) do
s = mw.ustring.gsub( s, replacement.s, replacement.r )
end
end
pages = ( '' .. pages ):gsub( '', '-' ); -- replace endashes with hyphens
s = s:gsub( '#b#', '.' ) -- restore bibcode
return pages:gsub( '&[%w+d%];', '-' ); -- replace html entities with hyphens
return s
end
end


function cu.makeLink( url, text )
function cu.makeLink( url, text )
return '[' .. url .. ' ' .. text .. ']'
return mw.ustring.format( '[%s %s]', url, text )
end
 
function cu.makeDoiLink( doi )
doi = mw.ustring.gsub( doi, 'https?://doi.org/', '' )
doi = doi:gsub( '%[', '%%5B' )
doi = doi:gsub( '%]', '%%5D' )
return cu.makeLink( 'https://doi.org/' .. mw.uri.encode( doi ), doi )
end
end


Zeile 188: Zeile 199:
local function formatDate( aDate, aFormat )
local function formatDate( aDate, aFormat )
return mw.getContentLanguage():formatDate( aFormat, aDate, true )
return mw.getContentLanguage():formatDate( aFormat, aDate, true )
end
if aFormat == 'R' then
return aDate
end
end


if aDate ~='' then
if aDate ~='' then
local success, t;
local success, t = pcall( formatDate, aDate, aFormat )
success, t = pcall( formatDate, aDate, aFormat )
if success then
if success then
return t
return t
Zeile 210: Zeile 224:
return url:match( ext .. '$' ) or url:match( ext .. '[%?#]' ) or
return url:match( ext .. '$' ) or url:match( ext .. '[%?#]' ) or
url:match( ext .. '&#035' ); -- &#035 is #
url:match( ext .. '&#035' ); -- &#035 is #
end
-- remove adjoining punctuation marks
function cu.finalCleanup( s )
s = mw.ustring.gsub( s, '%.+%.', '.' )
s = mw.ustring.gsub( s, '([!%?…‥])%s*%.+', '%1' )
return mw.ustring.gsub( s, '([,;:])(%s%.+)', '.' )
end
end


Zeile 266: Zeile 273:
return ''
return ''
end
end
end
function cu.templateStyles( frame )
return frame:extensionTag( 'templatestyles', '', { src = ci.styleSrc } );
end
-- Check digit estimation for countries at, ch, de, and fi
-- See: https://github.com/bohnelang/URN-Pruefziffer
-- Description of the algorithm: http://www.pruefziffernberechnung.de/U/URN.shtml
function cu.getNbnCheckDigit( urn )
-- two-digits codes for ascii characters starting from &#45; == '-'
local code='3947450102030405060708094117############1814191516212223242542262713282931123233113435363738########43'
local sum = 0
local pos = 1
local digit1, digit2, x
urn = urn:upper():sub( 1, -2 ) -- remove last character
for i = 1, urn:len() do
x = 2 * ( urn:byte( i ) - 45 ); -- &#45; == '-'
digit1 = tonumber( code:sub( x + 1, x + 1 ) );
digit2 = tonumber( code:sub( x + 2, x + 2 ) );
if digit1 == 0 then
sum = sum + digit2 * pos
pos = pos + 1
else
sum = sum + digit1 * pos + digit2 * ( pos + 1 )
pos = pos + 2
end
end
return tostring( math.floor( sum / digit2 ) % 10 );
end
function cu.check_UrnNbn( urn )
urn = urn:gsub( '/fragment/.+$', '' ) -- remove fragment
return urn:sub( -1 ) == cu.getNbnCheckDigit( urn )
end
end


return cu
return cu

Aktuelle Version vom 11. Februar 2023, 20:58 Uhr

Die Dokumentation für dieses Modul kann unter Modul:Citation/utilities/doc erstellt werden

-- documentation
local citationUtilities = {
	suite  = 'Citation',
	sub    = 'utilities', 
	serial = '2022-10-21'
}

-- module variable and administration
local cu = {
	moduleInterface = citationUtilities
}

-- module import
-- require( 'strict' )
local ci = require( 'Module:Citation/i18n' )

-- global variable
local errorMsgs = {}

-- helper functions
function cu.isSet( param )
	return param and param ~= '';
end

-- math function round
function cu.round( num, decimalPlaces )
  local mult = 10^( decimalPlaces or 0 )
  return math.floor( num * mult + 0.5 ) / mult
end

-- converts string to number
function cu.getNumber( s )
	if not cu.isSet( s ) then
		return 0
	end
	if s:find( '[sic!]', 1, true ) then
		return 0
	end
	local kb = s:gsub( ci.texts.decimalPoint, '%.' )
	return tonumber( kb ) or 0
end

-- adds error message to array
function cu.addErrorMsg( msg )
	table.insert( errorMsgs, msg )
end

-- make complete message from message array
function cu.getErrorMsgs()
	-- remove duplicates
	for i = #errorMsgs, 1, -1 do
		for j = 1, #errorMsgs - 1, 1 do
			if errorMsgs[ i ] == errorMsgs[ j ] then
				table.remove( errorMsgs, i )
				break
			end
		end
	end

	local result = table.concat( errorMsgs, ' ' )
	if result ~= '' then
		result = result .. ' '
	end
	return result
end

-- get first item of a delimiter-separated list
function cu.getFirst( s, delimiter )
	local at = s:find( delimiter )
	if at then
		s = mw.text.trim( s:sub( 1, at - 1 ) )
	end
	return s
end

-- check if table contains the value
function cu.inArray( tab, val )
	if type( tab ) == 'string' then
		return tab == val
	end

	for index, value in ipairs( tab ) do
		if value == val then
			return true
		end
	end

	return false
end

-- convert values t from list if translated
function cu.getKey( t, list )
    local result = ''
    for key, tab in pairs( list ) do
        if cu.inArray( tab, t ) then
            result = key
            break
        end
    end
    return result
end

-- returns a single value from frame argument table
function cu.getArgValue( list, param, args )
	local value = ''
	if list[ param ] then
		for k, v in ipairs( list[ param ] ) do
			if cu.isSet( args[ v ] ) then
				value = args[ v ]
				break
			end
		end
	end
	return value
end

-- string cleanup
function cu.parameterCleanup( s )
	if not cu.isSet( s ) then
		return s -- nil or '' should be kept
	end

	local orig = s
	-- replace control characters
	s = s:gsub( '[\009\010\013]', ' ' ) -- horizontal tab, line feed, carriage return
	s = s:gsub( '[%z%c]', '' ) -- control characters

	-- remove tags
	s = mw.ustring.gsub( s, '</*br[^/>]*/*>', '' ) -- <br> tag
	s = mw.ustring.gsub( s, '</*p[^/>]*/*>', '' ) -- <p> tag
	s = mw.ustring.gsub( s, '</*div[^/>]*/*>', '' ) -- <div> tag

	if orig ~= s then
		cu.addErrorMsg( ci.texts.wrongChars )
	end

	-- replace character references and entities
	s = mw.text.decode( s, true )
	-- replace characters
	s = mw.ustring.gsub( s, '%.%.%.', '…' )
	s = mw.ustring.gsub( s, '%.%.', '‥' )
	return s:gsub( '%s%s+', ' ' ) -- multiple spaces
end

-- remove illegal chars from pages parameters
function cu.cleanupPageNumbers( pages )
	if not cu.isSet( pages ) then
		return ''
	end

	-- replace dashes with hyphens
	return mw.ustring.gsub( '' .. pages, '[–‒—]', '-' );
end

-- string cleanup before COinS creation
function cu.coinsCleanup( s )
	if not cu.isSet( s ) then
		return ''
	end

	-- replace characters
	s = s:gsub( '\226\128\138', ' ' ); -- hair space
	s = mw.ustring.gsub( s, '[\226\128\141\226\128\139\194\173]', '' );
		-- zero-width joiner, zero-width space, soft hyphen

	-- remove characters: soft hyphen, LTR mark, RTL mark
	for i, value in ipairs( { '­', '‎', '‏' } ) do
		s = mw.ustring.gsub( s, value, '' )
	end

	-- replace Wiki syntax
	s = s:gsub( "''+", '' ) -- multiple apostrophes
	s = mw.ustring.gsub( s, '</*span[^/>]*/*>', '' ) -- span tags
	s = mw.ustring.gsub( s, '%[%[[^%[%]]*|([^%[%]]*)%]%]', '%1' ) -- MediaWiki links
	s = mw.ustring.gsub( s, '%[%[([^%[%]]*)%]%]', '%1' )
	s = mw.ustring.gsub( s, '%[%a*:?//[^ ]+%s+([^%]]+)%]', '%1' ) -- web links
	s = mw.ustring.gsub( s, '%[mailto:[^ ]+%s+([^%]]+)%]', '%1' )
	s = mw.ustring.gsub( s, '%[%a*:?//([^%]]+)%]', '%1' )
	s = mw.ustring.gsub( s, '%[mailto:([^%]]+)%]', '%1' )

	return s:gsub( '%s%s+', ' ' ) -- multiple spaces
end

-- remove adjoining punctuation marks etc.
function cu.finalCleanup( s )
	s = s:gsub(	'%.+%.', '.' ):gsub( '%s%s+', ' ' ):gsub( '([,;:])(%s%.+)', '.' )
	for _, replacement in ipairs( ci.replacements ) do
		s = mw.ustring.gsub( s, replacement.s, replacement.r )
	end
	s = s:gsub(	'#b#', '.' ) -- restore bibcode
	return s
end

function cu.makeLink( url, text )
	return mw.ustring.format( '[%s %s]', url, text )
end

function cu.getDate( aDate, aFormat, errorMsg )
	local function formatDate( aDate, aFormat )
		return mw.getContentLanguage():formatDate( aFormat, aDate, true )
	end

	if aFormat == 'R' then
		return aDate
	end

	if aDate ~='' then
		local success, t = pcall( formatDate, aDate, aFormat )
		if success then
			return t
		else
			cu.addErrorMsg( errorMsg )
			return ''
		end
	else
		return ''
	end
end

-- check if url contains a file extension
function cu.hasExtension( url, ext )
	ext = '%.' .. ext:upper()
	url = url:upper()
	return url:match( ext .. '$' ) or url:match( ext .. '[%?#]' ) or
		url:match( ext .. '&#035' ); -- &#035 is #
end

function cu.insertItem( tab, s, formatStr )
	if cu.isSet( s ) then
		if formatStr then
			table.insert( tab,
				mw.ustring.format( formatStr, s )
			)
		else
			table.insert( tab, s )
		end
	end
end

function cu.insertItem2( tab, s1, s2, formatStr )
	if cu.isSet( s1 ) and cu.isSet( s2 ) then
		if formatStr then
			table.insert( tab,
				mw.ustring.format( formatStr, s1, s2 )
			)
		else
			table.insert( tab, s1 )
			table.insert( tab, s2 )
		end
	end
end

function cu.formatItem( s, formatStr )
	if cu.isSet( s ) then
		if formatStr then
			return mw.ustring.format( formatStr, s )
		else
			return s
		end
	else
		return ''
	end
end

function cu.formatItem2( s1, s2, formatStr )
	if cu.isSet( s1 ) and cu.isSet( s2 ) then
		if formatStr then
			 return mw.ustring.format( formatStr, s1, s2 )
		else
			return s1 .. ' ' .. s2
		end
	else
		return ''
	end
end

function cu.templateStyles( frame )
	return frame:extensionTag( 'templatestyles', '', { src = ci.styleSrc } );
end

-- Check digit estimation for countries at, ch, de, and fi
-- See: https://github.com/bohnelang/URN-Pruefziffer
-- Description of the algorithm: http://www.pruefziffernberechnung.de/U/URN.shtml
function cu.getNbnCheckDigit( urn )
	-- two-digits codes for ascii characters starting from &#45; == '-'
	local code='3947450102030405060708094117############1814191516212223242542262713282931123233113435363738########43'
	local sum = 0
	local pos = 1
	local digit1, digit2, x
	urn = urn:upper():sub( 1, -2 ) -- remove last character
	for i = 1, urn:len() do
		x = 2 * ( urn:byte( i ) - 45 ); -- &#45; == '-' 
		digit1 = tonumber( code:sub( x + 1, x + 1 ) );
		digit2 = tonumber( code:sub( x + 2, x + 2 ) );
		if digit1 == 0 then
			sum = sum + digit2 * pos
			pos = pos + 1
		else
			sum = sum + digit1 * pos + digit2 * ( pos + 1 )
			pos = pos + 2
		end
	end
	return tostring( math.floor( sum / digit2 ) % 10 );
end

function cu.check_UrnNbn( urn )
	urn = urn:gsub( '/fragment/.+$', '' ) -- remove fragment
	return urn:sub( -1 ) == cu.getNbnCheckDigit( urn )
end

return cu