Modul:Citation/utilities: Unterschied zwischen den Versionen
KKeine Bearbeitungszusammenfassung |
(Umstellung) |
||
Zeile 105: | Zeile 105: | ||
function cu.parameterCleanup( s ) | function cu.parameterCleanup( s ) | ||
if not cu.isSet( s ) then | if not cu.isSet( s ) then | ||
return s | return s -- nil or '' should be kept | ||
end | end | ||
Zeile 114: | Zeile 114: | ||
-- remove tags | -- remove tags | ||
s = mw.ustring.gsub( s, '</*br[^/>]*/*>', '' ) -- <br> | s = mw.ustring.gsub( s, '</*br[^/>]*/*>', '' ) -- <br> tag | ||
s = mw.ustring.gsub( s, '</*p[^/>]*/*>', '' ) -- <p> tag | s = mw.ustring.gsub( s, '</*p[^/>]*/*>', '' ) -- <p> tag | ||
s = mw.ustring.gsub( s, '</*div[^/>]*/*>', '' ) -- <div> tag | s = mw.ustring.gsub( s, '</*div[^/>]*/*>', '' ) -- <div> tag | ||
Zeile 128: | Zeile 128: | ||
end | end | ||
-- remove illegal chars from pages parameters | |||
function cu.cleanupPageNumbers( pages ) | |||
if not cu.isSet( pages ) then | |||
return '' | |||
end | |||
local i, value | |||
pages = '' .. pages -- to string | |||
pages = mw.ustring.gsub( pages, '[–‒—]', '-' ); -- replace dashes with hyphens | |||
pages = mw.ustring.gsub( pages, ' ', ' ' ); | |||
-- replace html entities with hyphens | |||
for i, value in ipairs( { '–', '—', '&%w+;', '&#%d+;', '&#x%x+;' } ) do | |||
pages = mw.ustring.gsub( pages, value, '-' ) | |||
end | |||
return pages | |||
end | |||
-- string cleanup before COinS creation | |||
function cu.coinsCleanup( s ) | function cu.coinsCleanup( s ) | ||
if not cu.isSet( s ) then | if not cu.isSet( s ) then | ||
return | return '' | ||
end | end | ||
-- replace characters | -- replace characters | ||
s = s:gsub( '\226\128\138', ' ' ); -- hair space | s = s:gsub( '\226\128\138', ' ' ); -- hair space | ||
s = mw.ustring.gsub( s, '[\226\128\141\226\128\139\194\173]', '' ); -- zero-width joiner, zero-width space, soft hyphen | s = mw.ustring.gsub( s, '[\226\128\141\226\128\139\194\173]', '' ); | ||
-- zero-width joiner, zero-width space, soft hyphen | |||
-- replace entities | -- replace entities | ||
local i, value | |||
s = s:gsub( '&', '&' ) | s = s:gsub( '&', '&' ) | ||
for i, value in ipairs( { '–', '—' } ) do | for i, value in ipairs( { '–', '—' } ) do | ||
s = mw.ustring.gsub( s, value, '-' ) | s = mw.ustring.gsub( s, value, '-' ) | ||
Zeile 150: | Zeile 165: | ||
s = mw.ustring.gsub( s, value, ' ' ) | s = mw.ustring.gsub( s, value, ' ' ) | ||
end | end | ||
s = mw.ustring.gsub( s, | -- remove entities | ||
for i, value in ipairs( { '&%w+;', '&#%d+;', '&#x%x+;', '' } ) do | |||
s = mw.ustring.gsub( s, value, '' ) | |||
end | |||
-- replace Wiki syntax | -- replace Wiki syntax | ||
Zeile 165: | Zeile 183: | ||
end | end | ||
-- remove | -- remove adjoining punctuation marks | ||
function cu. | function cu.finalCleanup( s ) | ||
s = mw.ustring.gsub( s, '%.+%.', '.' ) | |||
s = mw.ustring.gsub( s, '([!%?…‥])%s*%.+', '%1' ) | |||
s = mw.ustring.gsub( s, "([!%?…‥]'')%s*%.+", '%1' ) | |||
return mw.ustring.gsub( s, '([,;:])(%s%.+)', '.' ) | |||
return | |||
end | end | ||
Zeile 210: | Zeile 227: | ||
return url:match( ext .. '$' ) or url:match( ext .. '[%?#]' ) or | return url:match( ext .. '$' ) or url:match( ext .. '[%?#]' ) or | ||
url:match( ext .. '#' ); -- # is # | url:match( ext .. '#' ); -- # is # | ||
end | end | ||
Version vom 31. März 2020, 16:29 Uhr
Die Dokumentation für dieses Modul kann unter Modul:Citation/utilities/doc erstellt werden
-- documentation local citation = { suite = 'Citation', sub = 'utilities', serial = '2020-03-31', } -- module variable local cu = {} -- module import local ci = require( 'Module:Citation/i18n' ) -- global variable local errorMsgs = {} -- helper functions function cu.isSet( param ) return param and param ~= ''; end -- math function round function cu.round( num, decimalPlaces ) local mult = 10^( decimalPlaces or 0 ) return math.floor( num * mult + 0.5 ) / mult end -- adds error message to array function cu.addErrorMsg( msg ) table.insert( errorMsgs, msg ) end -- make complete message from message array function cu.getErrorMsgs() local i, j, result -- remove duplicates for i = #errorMsgs, 1, -1 do for j = 1, #errorMsgs - 1, 1 do if errorMsgs[ i ] == errorMsgs[ j ] then table.remove( errorMsgs, i ) break end end end result = table.concat( errorMsgs, ' ' ) if result ~= '' then result = result .. ' ' end return result end -- get first item of a delimiter-separated list function cu.getFirst( s, delimiter ) local at = s:find( delimiter ) if at then s = mw.text.trim( s:sub( 1, at - 1 ) ) end return s end -- check if table contains the value function cu.inArray( tab, val ) if type( tab ) == 'string' then return tab == val end local index, value for index, value in ipairs( tab ) do if value == val then return true end end return false end -- convert values t from list if translated function cu.getKey( t, list ) local result = '', key, tab for key, tab in pairs( list ) do if cu.inArray( tab, t ) then result = key break end end return result end -- returns a single value from frame argument table function cu.getArgValue( list, param, args ) value = '', k, v if list[ param ] then for k, v in ipairs( list[ param ] ) do if cu.isSet( args[ v ] ) then value = args[ v ] break end end end return value end -- string cleanup function cu.parameterCleanup( s ) if not cu.isSet( s ) then return s -- nil or '' should be kept end local orig = s -- replace control characters s = s:gsub( '[\009\010\013]', ' ' ) -- horizontal tab, line feed, carriage return s = s:gsub( '[%z%c]', '' ) -- control characters -- remove tags s = mw.ustring.gsub( s, '</*br[^/>]*/*>', '' ) -- <br> tag s = mw.ustring.gsub( s, '</*p[^/>]*/*>', '' ) -- <p> tag s = mw.ustring.gsub( s, '</*div[^/>]*/*>', '' ) -- <div> tag if orig ~= s then cu.addErrorMsg( ci.texts.wrongChars ) end -- replace characters s = mw.ustring.gsub( s, '%.%.%.', '…' ) s = mw.ustring.gsub( s, '%.%.', '‥' ) return s:gsub( '%s%s+', ' ' ) -- multiple spaces end -- remove illegal chars from pages parameters function cu.cleanupPageNumbers( pages ) if not cu.isSet( pages ) then return '' end local i, value pages = '' .. pages -- to string pages = mw.ustring.gsub( pages, '[–‒—]', '-' ); -- replace dashes with hyphens pages = mw.ustring.gsub( pages, ' ', ' ' ); -- replace html entities with hyphens for i, value in ipairs( { '–', '—', '&%w+;', '&#%d+;', '&#x%x+;' } ) do pages = mw.ustring.gsub( pages, value, '-' ) end return pages end -- string cleanup before COinS creation function cu.coinsCleanup( s ) if not cu.isSet( s ) then return '' end -- replace characters s = s:gsub( '\226\128\138', ' ' ); -- hair space s = mw.ustring.gsub( s, '[\226\128\141\226\128\139\194\173]', '' ); -- zero-width joiner, zero-width space, soft hyphen -- replace entities local i, value s = s:gsub( '&', '&' ) for i, value in ipairs( { '–', '—' } ) do s = mw.ustring.gsub( s, value, '-' ) end for i, value in ipairs( { ' ' } ) do s = mw.ustring.gsub( s, value, ' ' ) end -- remove entities for i, value in ipairs( { '&%w+;', '&#%d+;', '&#x%x+;', '' } ) do s = mw.ustring.gsub( s, value, '' ) end -- replace Wiki syntax s = s:gsub( "''+", '' ) -- multiple apostrophes s = mw.ustring.gsub( s, '</*span[^/>]*/*>', '' ) -- span tags s = mw.ustring.gsub( s, '%[%[[^%[%]]*|([^%[%]]*)%]%]', '%1' ) -- MediaWiki links s = mw.ustring.gsub( s, '%[%[([^%[%]]*)%]%]', '%1' ) s = mw.ustring.gsub( s, '%[%a*:?//[^ ]+%s+([^%]]+)%]', '%1' ) -- web links s = mw.ustring.gsub( s, '%[mailto:[^ ]+%s+([^%]]+)%]', '%1' ) s = mw.ustring.gsub( s, '%[%a*:?//([^%]]+)%]', '%1' ) s = mw.ustring.gsub( s, '%[mailto:([^%]]+)%]', '%1' ) return s:gsub( '%s%s+', ' ' ) -- multiple spaces end -- remove adjoining punctuation marks function cu.finalCleanup( s ) s = mw.ustring.gsub( s, '%.+%.', '.' ) s = mw.ustring.gsub( s, '([!%?…‥])%s*%.+', '%1' ) s = mw.ustring.gsub( s, "([!%?…‥]'')%s*%.+", '%1' ) return mw.ustring.gsub( s, '([,;:])(%s%.+)', '.' ) end function cu.makeLink( url, text ) return '[' .. url .. ' ' .. text .. ']' end function cu.makeDoiLink( doi ) doi = mw.ustring.gsub( doi, 'https?://doi.org/', '' ) doi = doi:gsub( '%[', '%%5B' ) doi = doi:gsub( '%]', '%%5D' ) return cu.makeLink( 'https://doi.org/' .. mw.uri.encode( doi ), doi ) end function cu.getDate( aDate, aFormat, errorMsg ) local function formatDate( aDate, aFormat ) return mw.getContentLanguage():formatDate( aFormat, aDate, true ) end if aDate ~='' then local success, t; success, t = pcall( formatDate, aDate, aFormat ) if success then return t else cu.addErrorMsg( errorMsg ) return '' end else return '' end end -- check if url contains a file extension function cu.hasExtension( url, ext ) ext = '%.' .. ext:upper() url = url:upper() return url:match( ext .. '$' ) or url:match( ext .. '[%?#]' ) or url:match( ext .. '#' ); -- # is # end function cu.insertItem( tab, s, formatStr ) if cu.isSet( s ) then if formatStr then table.insert( tab, mw.ustring.format( formatStr, s ) ) else table.insert( tab, s ) end end end function cu.insertItem2( tab, s1, s2, formatStr ) if cu.isSet( s1 ) and cu.isSet( s2 ) then if formatStr then table.insert( tab, mw.ustring.format( formatStr, s1, s2 ) ) else table.insert( tab, s1 ) table.insert( tab, s2 ) end end end function cu.formatItem( s, formatStr ) if cu.isSet( s ) then if formatStr then return mw.ustring.format( formatStr, s ) else return s end else return '' end end function cu.formatItem2( s1, s2, formatStr ) if cu.isSet( s1 ) and cu.isSet( s2 ) then if formatStr then return mw.ustring.format( formatStr, s1, s2 ) else return s1 .. ' ' .. s2 end else return '' end end return cu