Modul:Citation/utilities: Unterschied zwischen den Versionen
Umstellung
KKeine Bearbeitungszusammenfassung |
(Umstellung) |
||
| Zeile 105: | Zeile 105: | ||
function cu.parameterCleanup( s ) | function cu.parameterCleanup( s ) | ||
if not cu.isSet( s ) then | if not cu.isSet( s ) then | ||
return s | return s -- nil or '' should be kept | ||
end | end | ||
| Zeile 114: | Zeile 114: | ||
-- remove tags | -- remove tags | ||
s = mw.ustring.gsub( s, '</*br[^/>]*/*>', '' ) -- <br> | s = mw.ustring.gsub( s, '</*br[^/>]*/*>', '' ) -- <br> tag | ||
s = mw.ustring.gsub( s, '</*p[^/>]*/*>', '' ) -- <p> tag | s = mw.ustring.gsub( s, '</*p[^/>]*/*>', '' ) -- <p> tag | ||
s = mw.ustring.gsub( s, '</*div[^/>]*/*>', '' ) -- <div> tag | s = mw.ustring.gsub( s, '</*div[^/>]*/*>', '' ) -- <div> tag | ||
| Zeile 128: | Zeile 128: | ||
end | end | ||
-- remove illegal chars from pages parameters | |||
function cu.cleanupPageNumbers( pages ) | |||
if not cu.isSet( pages ) then | |||
return '' | |||
end | |||
local i, value | |||
pages = '' .. pages -- to string | |||
pages = mw.ustring.gsub( pages, '[–‒—]', '-' ); -- replace dashes with hyphens | |||
pages = mw.ustring.gsub( pages, ' ', ' ' ); | |||
-- replace html entities with hyphens | |||
for i, value in ipairs( { '–', '—', '&%w+;', '&#%d+;', '&#x%x+;' } ) do | |||
pages = mw.ustring.gsub( pages, value, '-' ) | |||
end | |||
return pages | |||
end | |||
-- string cleanup before COinS creation | |||
function cu.coinsCleanup( s ) | function cu.coinsCleanup( s ) | ||
if not cu.isSet( s ) then | if not cu.isSet( s ) then | ||
return | return '' | ||
end | end | ||
-- replace characters | -- replace characters | ||
s = s:gsub( '\226\128\138', ' ' ); -- hair space | s = s:gsub( '\226\128\138', ' ' ); -- hair space | ||
s = mw.ustring.gsub( s, '[\226\128\141\226\128\139\194\173]', '' ); -- zero-width joiner, zero-width space, soft hyphen | s = mw.ustring.gsub( s, '[\226\128\141\226\128\139\194\173]', '' ); | ||
-- zero-width joiner, zero-width space, soft hyphen | |||
-- replace entities | -- replace entities | ||
local i, value | |||
s = s:gsub( '&', '&' ) | s = s:gsub( '&', '&' ) | ||
for i, value in ipairs( { '–', '—' } ) do | for i, value in ipairs( { '–', '—' } ) do | ||
s = mw.ustring.gsub( s, value, '-' ) | s = mw.ustring.gsub( s, value, '-' ) | ||
| Zeile 150: | Zeile 165: | ||
s = mw.ustring.gsub( s, value, ' ' ) | s = mw.ustring.gsub( s, value, ' ' ) | ||
end | end | ||
s = mw.ustring.gsub( s, | -- remove entities | ||
for i, value in ipairs( { '&%w+;', '&#%d+;', '&#x%x+;', '' } ) do | |||
s = mw.ustring.gsub( s, value, '' ) | |||
end | |||
-- replace Wiki syntax | -- replace Wiki syntax | ||
| Zeile 165: | Zeile 183: | ||
end | end | ||
-- remove | -- remove adjoining punctuation marks | ||
function cu. | function cu.finalCleanup( s ) | ||
s = mw.ustring.gsub( s, '%.+%.', '.' ) | |||
s = mw.ustring.gsub( s, '([!%?…‥])%s*%.+', '%1' ) | |||
s = mw.ustring.gsub( s, "([!%?…‥]'')%s*%.+", '%1' ) | |||
return mw.ustring.gsub( s, '([,;:])(%s%.+)', '.' ) | |||
return | |||
end | end | ||
| Zeile 210: | Zeile 227: | ||
return url:match( ext .. '$' ) or url:match( ext .. '[%?#]' ) or | return url:match( ext .. '$' ) or url:match( ext .. '[%?#]' ) or | ||
url:match( ext .. '#' ); -- # is # | url:match( ext .. '#' ); -- # is # | ||
end | end | ||