Modul:Citation/utilities: Unterschied zwischen den Versionen

Umstellung
KKeine Bearbeitungszusammenfassung
(Umstellung)
Zeile 105: Zeile 105:
function cu.parameterCleanup( s )
function cu.parameterCleanup( s )
if not cu.isSet( s ) then
if not cu.isSet( s ) then
return s
return s -- nil or '' should be kept
end
end


Zeile 114: Zeile 114:


-- remove tags
-- remove tags
s = mw.ustring.gsub( s, '</*br[^/>]*/*>', '' ) -- <br> tg
s = mw.ustring.gsub( s, '</*br[^/>]*/*>', '' ) -- <br> tag
s = mw.ustring.gsub( s, '</*p[^/>]*/*>', '' ) -- <p> tag
s = mw.ustring.gsub( s, '</*p[^/>]*/*>', '' ) -- <p> tag
s = mw.ustring.gsub( s, '</*div[^/>]*/*>', '' ) -- <div> tag
s = mw.ustring.gsub( s, '</*div[^/>]*/*>', '' ) -- <div> tag
Zeile 128: Zeile 128:
end
end


-- remove illegal chars from pages parameters
function cu.cleanupPageNumbers( pages )
if not cu.isSet( pages ) then
return ''
end
local i, value
pages = '' .. pages -- to string
pages = mw.ustring.gsub( pages, '[–‒—]', '-' ); -- replace dashes with hyphens
pages = mw.ustring.gsub( pages, '&nbsp;', ' ' );
-- replace html entities with hyphens
for i, value in ipairs( { '&ndash;', '&mdash;', '&%w+;', '&#%d+;', '&#x%x+;' } ) do
pages = mw.ustring.gsub( pages, value, '-' )
end
return pages
end
-- string cleanup before COinS creation
function cu.coinsCleanup( s )
function cu.coinsCleanup( s )
if not cu.isSet( s ) then
if not cu.isSet( s ) then
return s
return ''
end
end
local i, value


-- replace characters
-- replace characters
s = s:gsub( '\226\128\138', ' ' ); -- hair space
s = s:gsub( '\226\128\138', ' ' ); -- hair space
s = mw.ustring.gsub( s, '[\226\128\141\226\128\139\194\173]', '' ); -- zero-width joiner, zero-width space, soft hyphen
s = mw.ustring.gsub( s, '[\226\128\141\226\128\139\194\173]', '' );
-- zero-width joiner, zero-width space, soft hyphen


-- replace entities
-- replace entities
local i, value
s = s:gsub( '&amp;', '&' )
s = s:gsub( '&amp;', '&' )
for i, value in ipairs( { '&shy;', '&zwj;', '­' } ) do
s = mw.ustring.gsub( s, value, '' )
end
for i, value in ipairs( { '&ndash;', '&mdash;' } ) do
for i, value in ipairs( { '&ndash;', '&mdash;' } ) do
s = mw.ustring.gsub( s, value, '-' )
s = mw.ustring.gsub( s, value, '-' )
Zeile 150: Zeile 165:
s = mw.ustring.gsub( s, value, ' ' )
s = mw.ustring.gsub( s, value, ' ' )
end
end
s = mw.ustring.gsub( s, '&%w+;', '' ) -- remove entities
-- remove entities
for i, value in ipairs( { '&%w+;', '&#%d+;', '&#x%x+;', '­' } ) do
s = mw.ustring.gsub( s, value, '' )
end


-- replace Wiki syntax
-- replace Wiki syntax
Zeile 165: Zeile 183:
end
end


-- remove illegal chars from page paramters
-- remove adjoining punctuation marks
function cu.cleanupPageNumbers( pages )
function cu.finalCleanup( s )
if not cu.isSet( pages ) then
s = mw.ustring.gsub( s, '%.+%.', '.' )
return ''
s = mw.ustring.gsub( s, '([!%?…‥])%s*%.+', '%1' )
end
s = mw.ustring.gsub( s, "([!%?…‥]'')%s*%.+", '%1' )
pages = ( '' .. pages ):gsub( '', '-' ); -- replace endashes with hyphens
return mw.ustring.gsub( s, '([,;:])(%s%.+)', '.' )
return pages:gsub( '&%w+;', '-' ); -- replace html entities with hyphens
end
end


Zeile 210: Zeile 227:
return url:match( ext .. '$' ) or url:match( ext .. '[%?#]' ) or
return url:match( ext .. '$' ) or url:match( ext .. '[%?#]' ) or
url:match( ext .. '&#035' ); -- &#035 is #
url:match( ext .. '&#035' ); -- &#035 is #
end
-- remove adjoining punctuation marks
function cu.finalCleanup( s )
s = mw.ustring.gsub( s, '%.+%.', '.' )
s = mw.ustring.gsub( s, '([!%?…‥])%s*%.+', '%1' )
s = mw.ustring.gsub( s, "([!%?…‥]'')%s*%.+", '%1' )
return mw.ustring.gsub( s, '([,;:])(%s%.+)', '.' )
end
end


Anonymer Benutzer