Modul:Text: Unterschied zwischen den Versionen
w>Mps KKeine Bearbeitungszusammenfassung |
w>Mps KKeine Bearbeitungszusammenfassung |
||
Zeile 337: | Zeile 337: | ||
function p.removeDiacritics(frame) | function p.removeDiacritics(frame) | ||
local combiningDiacriticalMarks = "[" .. mw.ustring.char(0x0300) .. "-" .. mw.ustring.char(0x036F) .. "]" | local combiningDiacriticalMarks = "[" .. mw.ustring.char(0x0300) .. "-" .. mw.ustring.char(0x036F) .. "]" | ||
return mw.ustring.toNFC(mw.ustring.gsub(mw.ustring.toNFD(frame.args[1] or ""), | return mw.ustring.toNFC(mw.ustring.gsub(mw.ustring.toNFD(frame.args[1] or ""), combiningDiacriticalMarks, "")) | ||
end | end | ||
Version vom 14. März 2015, 12:47 Uhr
Die Dokumentation für dieses Modul kann unter Modul:Text/doc erstellt werden
--[=[ 2014-09-27 Text utilities ]=] local Text = { } local patternCJK = false local patternLatin = false local patternTerminated = false Text.concatParams = function ( args, apply, adapt ) -- Concat list items into one string -- Parameter: -- args -- table (sequence) with numKey=string -- apply -- string (optional); separator (default: "|") -- adapt -- string (optional); format including "%s" -- Returns: string local collect = { } for k, v in pairs( args ) do if type( k ) == "number" then v = mw.text.trim( v ) if v ~= "" then if adapt then v = mw.ustring.format( adapt, v ) end table.insert( collect, v ) end end end return table.concat( collect, apply or "|" ) end -- Text.concatParams() Text.containsCJK = function ( analyse ) -- Is any CJK code within? -- Parameter: -- analyse -- string -- Returns: true, if CJK detected local r if not patternCJK then patternCJK = mw.ustring.char( 91, 13312, 45, 40959, 131072, 45, 178207, 93 ) end if mw.ustring.find( analyse, patternCJK ) then r = true else r = false end return r end -- Text.containsCJK() Text.listToText = function ( args, adapt ) -- Format list items similar to mw.text.listToText() -- Parameter: -- args -- table (sequence) with numKey=string -- adapt -- string (optional); format including "%s" -- Returns: string local collect = { } for k, v in pairs( args ) do if type( k ) == "number" then v = mw.text.trim( v ) if v ~= "" then if adapt then v = mw.ustring.format( adapt, v ) end table.insert( collect, v ) end end end return mw.text.listToText( collect ) end -- Text.listToText() Text.sentenceTerminated = function ( analyse ) -- Is string terminated by dot, question or exclamation mark? -- Quotation, link termination and so on granted -- Parameter: -- analyse -- string -- Returns: true, if sentence terminated local r if not patternTerminated then patternTerminated = mw.ustring.char( 91, 12290, 65281, 65294, 65311 ) .. "!%.%?…][\"'%]‹›«»‘’“”]*$" end if mw.ustring.find( analyse, patternTerminated ) then r = true else r = false end return r end -- Text.sentenceTerminated() Text.ucfirstAll = function ( adjust ) -- Capitalize all words -- Precondition: -- adjust -- string -- Returns: string with all first letters in upper case local r = " " .. adjust local i = 1 local c, j, m if adjust:find( "&" ) then r = r:gsub( "&", "&" ) :gsub( "<", "<" ) :gsub( ">", ">" ) :gsub( " ", " " ) :gsub( " ", " " ) :gsub( "‌", "‌" ) :gsub( "‍", "‍" ) :gsub( "‎", "‎" ) :gsub( "‏", "‏" ) m = true end while i do i = mw.ustring.find( r, "%W%l", i ) if i then j = i + 1 c = mw.ustring.upper( mw.ustring.sub( r, j, j ) ) r = string.format( "%s%s%s", mw.ustring.sub( r, 1, i ), c, mw.ustring.sub( r, i + 2 ) ) i = j end end -- while i r = r:sub( 2 ) if m then r = r:gsub( "&", "&" ) :gsub( "<", "<" ) :gsub( ">", ">" ) :gsub( " ", " " ) :gsub( " ", " " ) :gsub( "‌", "‌" ) :gsub( "‍", "‍" ) :gsub( "‎", "‎" ) :gsub( "‏", "‏" ) :gsub( "&#X(%x+);", "&#x%1;" ) end return r end -- Text.ucfirstAll() Text.uprightNonlatin = function ( adjust ) -- Ensure non-italics for non-latin text parts -- One single greek letter might be granted -- Precondition: -- adjust -- string -- Returns: string with non-latin parts enclosed in <span> local r if not patternLatin then patternLatin = mw.ustring.char( 94, 91, 7, 45, 591, 8194, 45, 8250, 93, 42, 36 ) end if mw.ustring.match( adjust, patternLatin ) then -- latin only, horizontal dashes, quotes r = adjust else local c local j = false local k = 1 local m = false local n = mw.ustring.len( adjust ) local span = "%s%s<span style='font-style:normal'>%s</span>" local flat = function ( a ) -- isLatin return a <= 591 or ( a >= 8194 and a <= 8250 ) end -- flat() local form = function ( a ) return string.format( span, r, mw.ustring.sub( adjust, k, j - 1 ), mw.ustring.sub( adjust, j, a ) ) end -- form() r = "" for i = 1, n do c = mw.ustring.codepoint( adjust, i, i ) if c > 64 or c == 38 or c == 60 then -- '&' '<' if flat( c ) then if j then if m then if i == m then -- single greek letter. j = false end m = false end if j then local nx = i - 1 local s = "" for ix = nx, 1, -1 do c = mw.ustring.sub( adjust, ix, ix ) if c == " " or c == "(" then nx = nx - 1 s = c .. s else break -- for ix end end -- for ix r = form( nx ) .. s j = false k = i end end elseif not j then j = i if c >= 880 and c <= 1023 then -- single greek letter? m = i + 1 else m = false end end elseif m then m = m + 1 end end -- for i if j and ( not m or m < n ) then r = form( n ) else r = r .. mw.ustring.sub( adjust, k ) end end return r end -- Text.uprightNonlatin() -- Export local p = { } function p.concatParams( frame ) local args local template = frame.args.template if type( template ) == "string" then template = mw.text.trim( template ) template = ( template == "1" ) end if template then args = frame:getParent().args else args = frame.args end return Text.concatParams( args, frame.args.separator, frame.args.format ) end function p.containsCJK( frame ) return Text.containsCJK( frame.args[ 1 ] or "" ) and "1" or "" end function p.listToText( frame ) local args local template = frame.args.template if type( template ) == "string" then template = mw.text.trim( template ) template = ( template == "1" ) end if template then args = frame:getParent().args else args = frame.args end return Text.listToText( args, frame.args.format ) end function p.sentenceTerminated( frame ) return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or "" end function p.ucfirstAll( frame ) return Text.ucfirstAll( frame.args[ 1 ] or "" ) end function p.uprightNonlatin( frame ) return Text.uprightNonlatin( frame.args[ 1 ] or "" ) end function p.zip(frame) local lists = {} local seps = {} local defaultsep = frame.args["sep"] or "" local innersep = frame.args["isep"] or "" local outersep = frame.args["osep"] or "" -- Parameter parsen for k, v in pairs(frame.args) do local knum = tonumber(k) if knum then lists[knum] = v else if string.sub(k, 1, 3) == "sep" then local sepnum = tonumber(string.sub(k, 4)) if sepnum then seps[sepnum] = v end end end end -- sofern keine expliziten Separatoren angegeben sind, den Standardseparator verwenden for i = 1, math.max(#seps, #lists) do if not seps[i] then seps[i] = defaultsep end end -- Listen splitten local maxListLen = 0 for i = 1, #lists do lists[i] = mw.text.split(lists[i], seps[i]) if #lists[i] > maxListLen then maxListLen = #lists[i] end end local result = "" for i = 1, maxListLen do if i ~= 1 then result = result .. outersep end for j = 1, #lists do if j ~= 1 then result = result .. innersep end result = result .. (lists[j][i] or "") end end return result end -- removes all diacritics from the input string, be decomposing the characters, removing the combining diacritical marks and recomposing the remaining characters function p.removeDiacritics(frame) local combiningDiacriticalMarks = "[" .. mw.ustring.char(0x0300) .. "-" .. mw.ustring.char(0x036F) .. "]" return mw.ustring.toNFC(mw.ustring.gsub(mw.ustring.toNFD(frame.args[1] or ""), combiningDiacriticalMarks, "")) end p.Text = function () return Text end -- p.Text return p