Modul:Citation/utilities: Unterschied zwischen den Versionen
+cu.coinsCleanup
(+cu.formatItem) |
(+cu.coinsCleanup) |
||
| Zeile 3: | Zeile 3: | ||
suite = 'Citation', | suite = 'Citation', | ||
sub = 'utilities', | sub = 'utilities', | ||
serial = '2020-03- | serial = '2020-03-31', | ||
} | } | ||
| Zeile 100: | Zeile 100: | ||
-- string cleanup | -- string cleanup | ||
function cu. | function cu.parameterCleanup( s ) | ||
if not cu.isSet( s ) then | if not cu.isSet( s ) then | ||
return s | return s | ||
end | end | ||
-- replace control characters | |||
s = s:gsub( '[\009\010\013]', ' ' ) -- horizontal tab, line feed, carriage return | s = s:gsub( '[\009\010\013]', ' ' ) -- horizontal tab, line feed, carriage return | ||
s = s:gsub( '[%z%c]', '' ) -- control characters | s = s:gsub( '[%z%c]', '' ) -- control characters | ||
s = s:gsub( ' | |||
-- remove tags | |||
s = mw.ustring.gsub( s, '</*br[^/>]*/*>', '' ) -- <br> tg | |||
s = mw.ustring.gsub( s, '</*p[^/>]*/*>', '' ) -- <p> tag | |||
s = mw.ustring.gsub( s, '</*div[^/>]*/*>', '' ) -- <div> tag | |||
-- replace characters | |||
s = mw.ustring.gsub( s, '%.%.%.', '…' ) | |||
s = mw.ustring.gsub( s, '%.%.', '‥' ) | |||
return s:gsub( '%s%s+', ' ' ) -- multiple spaces | |||
end | |||
function cu.coinsCleanup( s ) | |||
if not cu.isSet( s ) then | |||
return s | |||
end | |||
local i, value | |||
-- replace characters | |||
s = s:gsub( '\226\128\138', ' ' ); -- hair space | s = s:gsub( '\226\128\138', ' ' ); -- hair space | ||
s = mw.ustring.gsub( s, '[\226\128\141\226\128\139\194\173]', '' ); -- zero-width joiner, zero-width space, soft hyphen | s = mw.ustring.gsub( s, '[\226\128\141\226\128\139\194\173]', '' ); -- zero-width joiner, zero-width space, soft hyphen | ||
s = | -- replace entities | ||
s = s:gsub( '&', '&' ) | |||
for i, value in ipairs( { '­', '‍' } ) do | |||
s = mw.ustring.gsub( s, value, '' ) | |||
end | |||
for i, value in ipairs( { '–', '—' } ) do | |||
s = mw.ustring.gsub( s, value, '-' ) | |||
end | |||
s = mw.ustring.gsub( s, | for i, value in ipairs( { ' ' } ) do | ||
s = mw.ustring.gsub( s, value, ' ' ) | |||
s = mw.ustring.gsub( s, | |||
end | end | ||
args[ key ] = mw.ustring.gsub( args[ key ], '&%w+;', '' ) -- remove entities | |||
-- replace Wiki syntax | |||
s = s:gsub( "''+", '' ) -- multiple apostrophes | |||
s = mw.ustring.gsub( s, '</*span[^/>]*/*>', '' ) -- span tags | |||
s = mw.ustring.gsub( s, '%[%[[^%[%]]*|([^%[%]]*)%]%]', '%1' ) -- MediaWiki links | |||
s = mw.ustring.gsub( s, '%[%[([^%[%]]*)%]%]', '%1' ) | |||
s = mw.ustring.gsub( s, '%[%a*:?//[^ ]+%s+([^%]]+)%]', '%1' ) -- web links | |||
s = mw.ustring.gsub( s, '%[mailto:[^ ]+%s+([^%]]+)%]', '%1' ) | |||
s = mw.ustring.gsub( s, '%[%a*:?//([^%]]+)%]', '%1' ) | |||
s = mw.ustring.gsub( s, '%[mailto:([^%]]+)%]', '%1' ) | |||
return s:gsub( '%s%s+', ' ' ) -- multiple spaces | return s:gsub( '%s%s+', ' ' ) -- multiple spaces | ||
end | end | ||