Modul:Text: Unterschied zwischen den Versionen
w>PerfektesChaos (+ ucfirstAll, update uprightNonlatin) |
K (32 Versionen von wikivoyage:Modul:Text importiert) |
||
(29 dazwischenliegende Versionen von 9 Benutzern werden nicht angezeigt) | |||
Zeile 1: | Zeile 1: | ||
--[=[ | local yesNo = require("Module:Yesno") | ||
local Text = { serial = "2022-07-21", | |||
suite = "Text" } | |||
--[=[ | |||
Text utilities | Text utilities | ||
]=] | ]=] | ||
Zeile 5: | Zeile 8: | ||
local | -- local globals | ||
local | local PatternCJK = false | ||
local | local PatternCombined = false | ||
local | local PatternLatin = false | ||
local PatternTerminated = false | |||
local QuoteLang = false | |||
local QuoteType = false | |||
local RangesLatin = false | |||
local SeekQuote = false | |||
local function initLatinData() | |||
if not RangesLatin then | |||
RangesLatin = { { 7, 687 }, | |||
{ 7531, 7578 }, | |||
{ 7680, 7935 }, | |||
{ 8194, 8250 } } | |||
end | |||
if not PatternLatin then | |||
local range | |||
PatternLatin = "^[" | |||
for i = 1, #RangesLatin do | |||
range = RangesLatin[ i ] | |||
PatternLatin = PatternLatin .. | |||
mw.ustring.char( range[ 1 ], 45, range[ 2 ] ) | |||
end -- for i | |||
PatternLatin = PatternLatin .. "]*$" | |||
end | |||
end | |||
local function initQuoteData() | |||
-- Create quote definitions | |||
if not QuoteLang then | |||
QuoteLang = | |||
{ af = "bd", | |||
ar = "la", | |||
be = "labd", | |||
bg = "bd", | |||
ca = "la", | |||
cs = "bd", | |||
da = "bd", | |||
de = "bd", | |||
dsb = "bd", | |||
et = "bd", | |||
el = "lald", | |||
en = "ld", | |||
es = "la", | |||
eu = "la", | |||
-- fa = "la", | |||
fi = "rd", | |||
fr = "laSPC", | |||
ga = "ld", | |||
he = "ldla", | |||
hr = "bd", | |||
hsb = "bd", | |||
hu = "bd", | |||
hy = "labd", | |||
id = "rd", | |||
is = "bd", | |||
it = "ld", | |||
ja = "x300C", | |||
ka = "bd", | |||
ko = "ld", | |||
lt = "bd", | |||
lv = "bd", | |||
nl = "ld", | |||
nn = "la", | |||
no = "la", | |||
pl = "bdla", | |||
pt = "lald", | |||
ro = "bdla", | |||
ru = "labd", | |||
sk = "bd", | |||
sl = "bd", | |||
sq = "la", | |||
sr = "bx", | |||
sv = "rd", | |||
th = "ld", | |||
tr = "ld", | |||
uk = "la", | |||
zh = "ld", | |||
["de-ch"] = "la", | |||
["en-gb"] = "lsld", | |||
["en-us"] = "ld", | |||
["fr-ch"] = "la", | |||
["it-ch"] = "la", | |||
["pt-br"] = "ldla", | |||
["zh-tw"] = "x300C", | |||
["zh-cn"] = "ld" } | |||
end | |||
if not QuoteType then | |||
QuoteType = | |||
{ bd = { { 8222, 8220 }, { 8218, 8217 } }, | |||
bdla = { { 8222, 8220 }, { 171, 187 } }, | |||
bx = { { 8222, 8221 }, { 8218, 8217 } }, | |||
la = { { 171, 187 }, { 8249, 8250 } }, | |||
laSPC = { { 171, 187 }, { 8249, 8250 }, true }, | |||
labd = { { 171, 187 }, { 8222, 8220 } }, | |||
lald = { { 171, 187 }, { 8220, 8221 } }, | |||
ld = { { 8220, 8221 }, { 8216, 8217 } }, | |||
ldla = { { 8220, 8221 }, { 171, 187 } }, | |||
lsld = { { 8216, 8217 }, { 8220, 8221 } }, | |||
rd = { { 8221, 8221 }, { 8217, 8217 } }, | |||
x300C = { { 0x300C, 0x300D }, | |||
{ 0x300E, 0x300F } } } | |||
end | |||
end -- initQuoteData() | |||
local function fiatQuote( apply, alien, advance ) | |||
-- Quote text | |||
-- Parameter: | |||
-- apply -- string, with text | |||
-- alien -- string, with language code | |||
-- advance -- number, with level 1 or 2 | |||
local r = apply and tostring(apply) or "" | |||
alien = alien or "en" | |||
advance = tonumber(advance) or 0 | |||
local suite | |||
initQuoteData() | |||
local slang = alien:match( "^(%l+)-" ) | |||
suite = QuoteLang[alien] or slang and QuoteLang[slang] or QuoteLang["en"] | |||
if suite then | |||
local quotes = QuoteType[ suite ] | |||
if quotes then | |||
local space | |||
if quotes[ 3 ] then | |||
space = " " | |||
else | |||
space = "" | |||
end | |||
quotes = quotes[ advance ] | |||
if quotes then | |||
r = mw.ustring.format( "%s%s%s%s%s", | |||
mw.ustring.char( quotes[ 1 ] ), | |||
space, | |||
apply, | |||
space, | |||
mw.ustring.char( quotes[ 2 ] ) ) | |||
end | |||
else | |||
mw.log( "fiatQuote() " .. suite ) | |||
end | |||
end | |||
return r | |||
end -- fiatQuote() | |||
Text.char = function ( apply, again, accept ) | |||
-- Create string from codepoints | |||
-- Parameter: | |||
-- apply -- table (sequence) with numerical codepoints, or nil | |||
-- again -- number of repetitions, or nil | |||
-- accept -- true, if no error messages to be appended | |||
-- Returns: string | |||
local r = "" | |||
apply = type(apply) == "table" and apply or {} | |||
again = math.floor(tonumber(again) or 1) | |||
if again < 1 then | |||
return "" | |||
end | |||
local bad = { } | |||
local codes = { } | |||
for _, v in ipairs( apply ) do | |||
local n = tonumber(v) | |||
if not n or (n < 32 and n ~= 9 and n ~= 10) then | |||
table.insert(bad, tostring(v)) | |||
else | |||
table.insert(codes, math.floor(n)) | |||
end | |||
end | |||
if #bad > 0 then | |||
if not accept then | |||
r = tostring( mw.html.create( "span" ) | |||
:addClass( "error" ) | |||
:wikitext( "bad codepoints: " .. table.concat( bad, " " )) ) | |||
end | |||
return r | |||
end | |||
if #codes > 0 then | |||
r = mw.ustring.char( unpack( codes ) ) | |||
if again > 1 then | |||
r = r:rep(again) | |||
end | |||
end | |||
return r | |||
end -- Text.char() | |||
local function trimAndFormat(args, fmt) | |||
local result = {} | |||
if type(args) ~= 'table' then | |||
args = {args} | |||
end | |||
for _, v in ipairs(args) do | |||
v = mw.text.trim(tostring(v)) | |||
if v ~= "" then | |||
table.insert(result,fmt and mw.ustring.format(fmt, v) or v) | |||
end | |||
end | |||
return result | |||
end | |||
Text.concatParams = function ( args, apply, adapt ) | Text.concatParams = function ( args, apply, adapt ) | ||
Zeile 20: | Zeile 219: | ||
-- Returns: string | -- Returns: string | ||
local collect = { } | local collect = { } | ||
return table.concat(trimAndFormat(args,adapt), apply or "|") | |||
end -- Text.concatParams() | end -- Text.concatParams() | ||
Text.containsCJK = function ( | Text.containsCJK = function ( s ) | ||
-- Is any CJK code within? | -- Is any CJK code within? | ||
-- Parameter: | -- Parameter: | ||
-- | -- s -- string | ||
-- Returns: true, if CJK detected | -- Returns: true, if CJK detected | ||
s = s and tostring(s) or "" | |||
if not patternCJK then | if not patternCJK then | ||
patternCJK = mw.ustring.char( 91, | patternCJK = mw.ustring.char( 91, | ||
4352, 45, 4607, | |||
131072, 45, | 11904, 45, 42191, | ||
43072, 45, 43135, | |||
44032, 45, 55215, | |||
63744, 45, 64255, | |||
65072, 45, 65103, | |||
65381, 45, 65500, | |||
131072, 45, 196607, | |||
93 ) | 93 ) | ||
end | end | ||
return mw.ustring.find( s, patternCJK ) ~= nil | |||
end -- Text.containsCJK() | |||
Text.removeDelimited = function (s, prefix, suffix) | |||
-- Remove all text in s delimited by prefix and suffix (inclusive) | |||
-- Arguments: | |||
-- s = string to process | |||
-- prefix = initial delimiter | |||
-- suffix = ending delimiter | |||
-- Returns: stripped string | |||
s = s and tostring(s) or "" | |||
prefix = prefix and tostring(prefix) or "" | |||
suffix = suffix and tostring(suffix) or "" | |||
local prefixLen = mw.ustring.len(prefix) | |||
local suffixLen = mw.ustring.len(suffix) | |||
if prefixLen == 0 or suffixLen == 0 then | |||
return s | |||
end | |||
local i = s:find(prefix, 1, true) | |||
local r = s | |||
local j | |||
while i do | |||
j = r:find(suffix, i + prefixLen) | |||
if j then | |||
r = r:sub(1, i - 1)..r:sub(j+suffixLen) | |||
else | |||
r = r:sub(1, i - 1) | |||
end | |||
i = r:find(prefix, 1, true) | |||
end | |||
return r | |||
end | |||
Text.getPlain = function ( adjust ) | |||
-- Remove wikisyntax from string, except templates | |||
-- Parameter: | |||
-- adjust -- string | |||
-- Returns: string | |||
local r = Text.removeDelimited(adjust,"<!--","-->") | |||
r = r:gsub( "(</?%l[^>]*>)", "" ) | |||
:gsub( "'''", "" ) | |||
:gsub( "''", "" ) | |||
:gsub( " ", " " ) | |||
return r | |||
end -- Text.getPlain() | |||
Text.isLatinRange = function (s) | |||
-- Are characters expected to be latin or symbols within latin texts? | |||
-- Arguments: | |||
-- s = string to analyze | |||
-- Returns: true, if valid for latin only | |||
s = s and tostring(s) or "" --- ensure input is always string | |||
initLatinData() | |||
return mw.ustring.match(s, PatternLatin) ~= nil | |||
end -- Text.isLatinRange() | |||
Text.isQuote = function ( s ) | |||
-- Is this character any quotation mark? | |||
-- Parameter: | |||
-- s = single character to analyze | |||
-- Returns: true, if s is quotation mark | |||
s = s and tostring(s) or "" | |||
if s == "" then | |||
return false | |||
end | |||
if not SeekQuote then | |||
SeekQuote = mw.ustring.char( 34, -- " | |||
39, -- ' | |||
171, -- laquo | |||
187, -- raquo | |||
8216, -- lsquo | |||
8217, -- rsquo | |||
8218, -- sbquo | |||
8220, -- ldquo | |||
8221, -- rdquo | |||
8222, -- bdquo | |||
8249, -- lsaquo | |||
8250, -- rsaquo | |||
0x300C, -- CJK | |||
0x300D, -- CJK | |||
0x300E, -- CJK | |||
0x300F ) -- CJK | |||
end | end | ||
return | return mw.ustring.find( SeekQuote, s, 1, true ) ~= nil | ||
end -- Text. | end -- Text.isQuote() | ||
Zeile 64: | Zeile 338: | ||
-- adapt -- string (optional); format including "%s" | -- adapt -- string (optional); format including "%s" | ||
-- Returns: string | -- Returns: string | ||
return mw.text.listToText(trimAndFormat(args, adapt)) | |||
end -- Text.listToText() | |||
Text.quote = function ( apply, alien, advance ) | |||
-- Quote text | |||
-- Parameter: | |||
-- apply -- string, with text | |||
-- alien -- string, with language code, or nil | |||
-- advance -- number, with level 1 or 2, or nil | |||
-- Returns: quoted string | |||
apply = apply and tostring(apply) or "" | |||
local mode, slang | |||
if type( alien ) == "string" then | |||
slang = mw.text.trim( alien ):lower() | |||
else | |||
slang = mw.title.getCurrentTitle().pageLanguage | |||
if not slang then | |||
-- TODO FIXME: Introduction expected 2017-04 | |||
slang = mw.language.getContentLanguage():getCode() | |||
end | |||
end | |||
if advance == 2 then | |||
mode = 2 | |||
else | |||
mode = 1 | |||
end | |||
return fiatQuote( mw.text.trim( apply ), slang, mode ) | |||
end -- Text.quote() | |||
Text.quoteUnquoted = function ( apply, alien, advance ) | |||
-- Quote text, if not yet quoted and not empty | |||
-- Parameter: | |||
-- apply -- string, with text | |||
-- alien -- string, with language code, or nil | |||
-- advance -- number, with level 1 or 2, or nil | |||
-- Returns: string; possibly quoted | |||
local r = mw.text.trim( apply and tostring(apply) or "" ) | |||
local s = mw.ustring.sub( r, 1, 1 ) | |||
if s ~= "" and not Text.isQuote( s, advance ) then | |||
s = mw.ustring.sub( r, -1, 1 ) | |||
if not Text.isQuote( s ) then | |||
r = Text.quote( r, alien, advance ) | |||
end | end | ||
end | end | ||
return mw. | return r | ||
end -- Text. | end -- Text.quoteUnquoted() | ||
Text.removeDiacritics = function ( adjust ) | |||
-- Remove all diacritics | |||
-- Parameter: | |||
-- adjust -- string | |||
-- Returns: string; all latin letters should be ASCII | |||
-- or basic greek or cyrillic or symbols etc. | |||
local cleanup, decomposed | |||
if not PatternCombined then | |||
PatternCombined = mw.ustring.char( 91, | |||
0x0300, 45, 0x036F, | |||
0x1AB0, 45, 0x1AFF, | |||
0x1DC0, 45, 0x1DFF, | |||
0xFE20, 45, 0xFE2F, | |||
93 ) | |||
end | |||
decomposed = mw.ustring.toNFD( adjust and tostring(adjust) or "" ) | |||
cleanup = mw.ustring.gsub( decomposed, PatternCombined, "" ) | |||
return mw.ustring.toNFC( cleanup ) | |||
end -- Text.removeDiacritics() | |||
Zeile 88: | Zeile 420: | ||
-- Returns: true, if sentence terminated | -- Returns: true, if sentence terminated | ||
local r | local r | ||
if not | if not PatternTerminated then | ||
PatternTerminated = mw.ustring.char( 91, | |||
12290, | 12290, | ||
65281, | 65281, | ||
Zeile 96: | Zeile 428: | ||
.. "!%.%?…][\"'%]‹›«»‘’“”]*$" | .. "!%.%?…][\"'%]‹›«»‘’“”]*$" | ||
end | end | ||
if mw.ustring.find( analyse, | if mw.ustring.find( analyse, PatternTerminated ) then | ||
r = true | r = true | ||
else | else | ||
Zeile 106: | Zeile 438: | ||
Text.ucfirstAll = function ( adjust ) | Text.ucfirstAll = function ( adjust) | ||
-- Capitalize all words | -- Capitalize all words | ||
-- | -- Arguments: | ||
-- adjust | -- adjust = string to adjust | ||
-- Returns: string with all first letters in upper case | -- Returns: string with all first letters in upper case | ||
local r = | adjust = adjust and tostring(adjust) or "" | ||
local r = mw.text.decode(adjust,true) | |||
local i = 1 | local i = 1 | ||
local c, j, m | local c, j, m | ||
m = (r ~= adjust) | |||
r = " "..r | |||
while i do | while i do | ||
i = mw.ustring.find( r, "%W%l", i ) | i = mw.ustring.find( r, "%W%l", i ) | ||
Zeile 140: | Zeile 463: | ||
r = r:sub( 2 ) | r = r:sub( 2 ) | ||
if m then | if m then | ||
r = mw.text.encode(r) | |||
end | end | ||
return r | return r | ||
end -- Text.ucfirstAll() | end -- Text.ucfirstAll() | ||
Zeile 163: | Zeile 476: | ||
-- Returns: string with non-latin parts enclosed in <span> | -- Returns: string with non-latin parts enclosed in <span> | ||
local r | local r | ||
initLatinData() | |||
if mw.ustring.match( adjust, PatternLatin ) then | |||
if mw.ustring.match( adjust, | |||
-- latin only, horizontal dashes, quotes | -- latin only, horizontal dashes, quotes | ||
r = adjust | r = adjust | ||
Zeile 178: | Zeile 486: | ||
local m = false | local m = false | ||
local n = mw.ustring.len( adjust ) | local n = mw.ustring.len( adjust ) | ||
local span = "%s%s<span style='font-style:normal'>%s</span>" | local span = "%s%s<span dir='auto' style='font-style:normal'>%s</span>" | ||
local flat = function ( a ) | local flat = function ( a ) | ||
-- isLatin | |||
local range | |||
for i = 1, #RangesLatin do | |||
range = RangesLatin[ i ] | |||
if a >= range[ 1 ] and a <= range[ 2 ] then | |||
return true | |||
end | |||
end -- for i | |||
end -- flat() | end -- flat() | ||
local focus = function ( a ) | |||
-- char is not ambivalent | |||
local r = ( a > 64 ) | |||
if r then | |||
r = ( a < 8192 or a > 8212 ) | |||
else | |||
r = ( a == 38 or a == 60 ) -- '&' '<' | |||
end | |||
return r | |||
end -- focus() | |||
local form = function ( a ) | local form = function ( a ) | ||
return string.format( span, | return string.format( span, | ||
Zeile 192: | Zeile 516: | ||
for i = 1, n do | for i = 1, n do | ||
c = mw.ustring.codepoint( adjust, i, i ) | c = mw.ustring.codepoint( adjust, i, i ) | ||
if c | if focus( c ) then | ||
if flat( c ) then | if flat( c ) then | ||
if j then | if j then | ||
Zeile 231: | Zeile 555: | ||
m = m + 1 | m = m + 1 | ||
end | end | ||
end -- for i | end -- for i | ||
if j and ( not m or m < n ) then | if j and ( not m or m < n ) then | ||
r = form( n ) | r = form( n ) | ||
Zeile 240: | Zeile 564: | ||
return r | return r | ||
end -- Text.uprightNonlatin() | end -- Text.uprightNonlatin() | ||
Text.test = function ( about ) | |||
local r | |||
if about == "quote" then | |||
initQuoteData() | |||
r = { } | |||
r.QuoteLang = QuoteLang | |||
r.QuoteType = QuoteType | |||
end | |||
return r | |||
end -- Text.test() | |||
Zeile 245: | Zeile 581: | ||
-- Export | -- Export | ||
local p = { } | local p = { } | ||
for _, func in ipairs({'containsCJK','isLatinRange','isQuote','sentenceTerminated'}) do | |||
p[func] = function (frame) | |||
return Text[func]( frame.args[ 1 ] or "" ) and "1" or "" | |||
end | |||
end | |||
for _, func in ipairs({'getPlain','removeDiacritics','ucfirstAll','uprightNonlatin'}) do | |||
p[func] = function (frame) | |||
return Text[func]( frame.args[ 1 ] or "" ) | |||
end | |||
end | |||
function p.char( frame ) | |||
local params = frame:getParent().args | |||
local story = params[ 1 ] | |||
local codes, lenient, multiple | |||
if not story then | |||
params = frame.args | |||
story = params[ 1 ] | |||
end | |||
if story then | |||
local items = mw.text.split( mw.text.trim(story), "%s+" ) | |||
if #items > 0 then | |||
local j | |||
lenient = (yesNo(params.errors) == false) | |||
codes = { } | |||
multiple = tonumber( params[ "*" ] ) | |||
for _, v in ipairs( items ) do | |||
j = tonumber((v:sub( 1, 1 ) == "x" and "0" or "") .. v) | |||
table.insert( codes, j or v ) | |||
end | |||
end | |||
end | |||
return Text.char( codes, multiple, lenient ) | |||
end | |||
function p.concatParams( frame ) | function p.concatParams( frame ) | ||
Zeile 263: | Zeile 635: | ||
end | end | ||
function p. | |||
function p.listToFormat(frame) | |||
local lists = {} | |||
local pformat = frame.args["format"] | |||
local sep = frame.args["sep"] or ";" | |||
-- Parameter parsen: Listen | |||
for k, v in pairs(frame.args) do | |||
local knum = tonumber(k) | |||
if knum then lists[knum] = v end | |||
end | |||
-- Listen splitten | |||
local maxListLen = 0 | |||
for i = 1, #lists do | |||
lists[i] = mw.text.split(lists[i], sep) | |||
if #lists[i] > maxListLen then maxListLen = #lists[i] end | |||
end | |||
-- Ergebnisstring generieren | |||
local result = "" | |||
local result_line = "" | |||
for i = 1, maxListLen do | |||
result_line = pformat | |||
for j = 1, #lists do | |||
result_line = mw.ustring.gsub(result_line, "%%s", lists[j][i], 1) | |||
end | |||
result = result .. result_line | |||
end | |||
return result | |||
end | end | ||
function p.listToText( frame ) | function p.listToText( frame ) | ||
Zeile 282: | Zeile 685: | ||
end | end | ||
function p. | |||
return Text. | |||
function p.quote( frame ) | |||
local slang = frame.args[2] | |||
if type( slang ) == "string" then | |||
slang = mw.text.trim( slang ) | |||
if slang == "" then | |||
slang = false | |||
end | |||
end | |||
return Text.quote( frame.args[ 1 ] or "", | |||
slang, | |||
tonumber( frame.args[3] ) ) | |||
end | |||
function p.quoteUnquoted( frame ) | |||
local slang = frame.args[2] | |||
if type( slang ) == "string" then | |||
slang = mw.text.trim( slang ) | |||
if slang == "" then | |||
slang = false | |||
end | |||
end | |||
return Text.quoteUnquoted( frame.args[ 1 ] or "", | |||
slang, | |||
tonumber( frame.args[3] ) ) | |||
end | end | ||
function p. | |||
function p.zip(frame) | |||
local lists = {} | |||
local seps = {} | |||
local defaultsep = frame.args["sep"] or "" | |||
local innersep = frame.args["isep"] or "" | |||
local outersep = frame.args["osep"] or "" | |||
-- Parameter parsen | |||
for k, v in pairs(frame.args) do | |||
local knum = tonumber(k) | |||
if knum then lists[knum] = v else | |||
if string.sub(k, 1, 3) == "sep" then | |||
local sepnum = tonumber(string.sub(k, 4)) | |||
if sepnum then seps[sepnum] = v end | |||
end | |||
end | |||
end | |||
-- sofern keine expliziten Separatoren angegeben sind, den Standardseparator verwenden | |||
for i = 1, math.max(#seps, #lists) do | |||
if not seps[i] then seps[i] = defaultsep end | |||
end | |||
-- Listen splitten | |||
local maxListLen = 0 | |||
for i = 1, #lists do | |||
lists[i] = mw.text.split(lists[i], seps[i]) | |||
if #lists[i] > maxListLen then maxListLen = #lists[i] end | |||
end | |||
local result = "" | |||
for i = 1, maxListLen do | |||
if i ~= 1 then result = result .. outersep end | |||
for j = 1, #lists do | |||
if j ~= 1 then result = result .. innersep end | |||
result = result .. (lists[j][i] or "") | |||
end | |||
end | |||
return result | |||
end | end | ||
function p. | |||
return Text. | |||
function p.failsafe() | |||
return Text.serial | |||
end | end | ||
p.Text = function () | p.Text = function () |
Aktuelle Version vom 27. Januar 2023, 13:51 Uhr
Die Dokumentation für dieses Modul kann unter Modul:Text/doc erstellt werden
local yesNo = require("Module:Yesno") local Text = { serial = "2022-07-21", suite = "Text" } --[=[ Text utilities ]=] -- local globals local PatternCJK = false local PatternCombined = false local PatternLatin = false local PatternTerminated = false local QuoteLang = false local QuoteType = false local RangesLatin = false local SeekQuote = false local function initLatinData() if not RangesLatin then RangesLatin = { { 7, 687 }, { 7531, 7578 }, { 7680, 7935 }, { 8194, 8250 } } end if not PatternLatin then local range PatternLatin = "^[" for i = 1, #RangesLatin do range = RangesLatin[ i ] PatternLatin = PatternLatin .. mw.ustring.char( range[ 1 ], 45, range[ 2 ] ) end -- for i PatternLatin = PatternLatin .. "]*$" end end local function initQuoteData() -- Create quote definitions if not QuoteLang then QuoteLang = { af = "bd", ar = "la", be = "labd", bg = "bd", ca = "la", cs = "bd", da = "bd", de = "bd", dsb = "bd", et = "bd", el = "lald", en = "ld", es = "la", eu = "la", -- fa = "la", fi = "rd", fr = "laSPC", ga = "ld", he = "ldla", hr = "bd", hsb = "bd", hu = "bd", hy = "labd", id = "rd", is = "bd", it = "ld", ja = "x300C", ka = "bd", ko = "ld", lt = "bd", lv = "bd", nl = "ld", nn = "la", no = "la", pl = "bdla", pt = "lald", ro = "bdla", ru = "labd", sk = "bd", sl = "bd", sq = "la", sr = "bx", sv = "rd", th = "ld", tr = "ld", uk = "la", zh = "ld", ["de-ch"] = "la", ["en-gb"] = "lsld", ["en-us"] = "ld", ["fr-ch"] = "la", ["it-ch"] = "la", ["pt-br"] = "ldla", ["zh-tw"] = "x300C", ["zh-cn"] = "ld" } end if not QuoteType then QuoteType = { bd = { { 8222, 8220 }, { 8218, 8217 } }, bdla = { { 8222, 8220 }, { 171, 187 } }, bx = { { 8222, 8221 }, { 8218, 8217 } }, la = { { 171, 187 }, { 8249, 8250 } }, laSPC = { { 171, 187 }, { 8249, 8250 }, true }, labd = { { 171, 187 }, { 8222, 8220 } }, lald = { { 171, 187 }, { 8220, 8221 } }, ld = { { 8220, 8221 }, { 8216, 8217 } }, ldla = { { 8220, 8221 }, { 171, 187 } }, lsld = { { 8216, 8217 }, { 8220, 8221 } }, rd = { { 8221, 8221 }, { 8217, 8217 } }, x300C = { { 0x300C, 0x300D }, { 0x300E, 0x300F } } } end end -- initQuoteData() local function fiatQuote( apply, alien, advance ) -- Quote text -- Parameter: -- apply -- string, with text -- alien -- string, with language code -- advance -- number, with level 1 or 2 local r = apply and tostring(apply) or "" alien = alien or "en" advance = tonumber(advance) or 0 local suite initQuoteData() local slang = alien:match( "^(%l+)-" ) suite = QuoteLang[alien] or slang and QuoteLang[slang] or QuoteLang["en"] if suite then local quotes = QuoteType[ suite ] if quotes then local space if quotes[ 3 ] then space = " " else space = "" end quotes = quotes[ advance ] if quotes then r = mw.ustring.format( "%s%s%s%s%s", mw.ustring.char( quotes[ 1 ] ), space, apply, space, mw.ustring.char( quotes[ 2 ] ) ) end else mw.log( "fiatQuote() " .. suite ) end end return r end -- fiatQuote() Text.char = function ( apply, again, accept ) -- Create string from codepoints -- Parameter: -- apply -- table (sequence) with numerical codepoints, or nil -- again -- number of repetitions, or nil -- accept -- true, if no error messages to be appended -- Returns: string local r = "" apply = type(apply) == "table" and apply or {} again = math.floor(tonumber(again) or 1) if again < 1 then return "" end local bad = { } local codes = { } for _, v in ipairs( apply ) do local n = tonumber(v) if not n or (n < 32 and n ~= 9 and n ~= 10) then table.insert(bad, tostring(v)) else table.insert(codes, math.floor(n)) end end if #bad > 0 then if not accept then r = tostring( mw.html.create( "span" ) :addClass( "error" ) :wikitext( "bad codepoints: " .. table.concat( bad, " " )) ) end return r end if #codes > 0 then r = mw.ustring.char( unpack( codes ) ) if again > 1 then r = r:rep(again) end end return r end -- Text.char() local function trimAndFormat(args, fmt) local result = {} if type(args) ~= 'table' then args = {args} end for _, v in ipairs(args) do v = mw.text.trim(tostring(v)) if v ~= "" then table.insert(result,fmt and mw.ustring.format(fmt, v) or v) end end return result end Text.concatParams = function ( args, apply, adapt ) -- Concat list items into one string -- Parameter: -- args -- table (sequence) with numKey=string -- apply -- string (optional); separator (default: "|") -- adapt -- string (optional); format including "%s" -- Returns: string local collect = { } return table.concat(trimAndFormat(args,adapt), apply or "|") end -- Text.concatParams() Text.containsCJK = function ( s ) -- Is any CJK code within? -- Parameter: -- s -- string -- Returns: true, if CJK detected s = s and tostring(s) or "" if not patternCJK then patternCJK = mw.ustring.char( 91, 4352, 45, 4607, 11904, 45, 42191, 43072, 45, 43135, 44032, 45, 55215, 63744, 45, 64255, 65072, 45, 65103, 65381, 45, 65500, 131072, 45, 196607, 93 ) end return mw.ustring.find( s, patternCJK ) ~= nil end -- Text.containsCJK() Text.removeDelimited = function (s, prefix, suffix) -- Remove all text in s delimited by prefix and suffix (inclusive) -- Arguments: -- s = string to process -- prefix = initial delimiter -- suffix = ending delimiter -- Returns: stripped string s = s and tostring(s) or "" prefix = prefix and tostring(prefix) or "" suffix = suffix and tostring(suffix) or "" local prefixLen = mw.ustring.len(prefix) local suffixLen = mw.ustring.len(suffix) if prefixLen == 0 or suffixLen == 0 then return s end local i = s:find(prefix, 1, true) local r = s local j while i do j = r:find(suffix, i + prefixLen) if j then r = r:sub(1, i - 1)..r:sub(j+suffixLen) else r = r:sub(1, i - 1) end i = r:find(prefix, 1, true) end return r end Text.getPlain = function ( adjust ) -- Remove wikisyntax from string, except templates -- Parameter: -- adjust -- string -- Returns: string local r = Text.removeDelimited(adjust,"<!--","-->") r = r:gsub( "(</?%l[^>]*>)", "" ) :gsub( "'''", "" ) :gsub( "''", "" ) :gsub( " ", " " ) return r end -- Text.getPlain() Text.isLatinRange = function (s) -- Are characters expected to be latin or symbols within latin texts? -- Arguments: -- s = string to analyze -- Returns: true, if valid for latin only s = s and tostring(s) or "" --- ensure input is always string initLatinData() return mw.ustring.match(s, PatternLatin) ~= nil end -- Text.isLatinRange() Text.isQuote = function ( s ) -- Is this character any quotation mark? -- Parameter: -- s = single character to analyze -- Returns: true, if s is quotation mark s = s and tostring(s) or "" if s == "" then return false end if not SeekQuote then SeekQuote = mw.ustring.char( 34, -- " 39, -- ' 171, -- laquo 187, -- raquo 8216, -- lsquo 8217, -- rsquo 8218, -- sbquo 8220, -- ldquo 8221, -- rdquo 8222, -- bdquo 8249, -- lsaquo 8250, -- rsaquo 0x300C, -- CJK 0x300D, -- CJK 0x300E, -- CJK 0x300F ) -- CJK end return mw.ustring.find( SeekQuote, s, 1, true ) ~= nil end -- Text.isQuote() Text.listToText = function ( args, adapt ) -- Format list items similar to mw.text.listToText() -- Parameter: -- args -- table (sequence) with numKey=string -- adapt -- string (optional); format including "%s" -- Returns: string return mw.text.listToText(trimAndFormat(args, adapt)) end -- Text.listToText() Text.quote = function ( apply, alien, advance ) -- Quote text -- Parameter: -- apply -- string, with text -- alien -- string, with language code, or nil -- advance -- number, with level 1 or 2, or nil -- Returns: quoted string apply = apply and tostring(apply) or "" local mode, slang if type( alien ) == "string" then slang = mw.text.trim( alien ):lower() else slang = mw.title.getCurrentTitle().pageLanguage if not slang then -- TODO FIXME: Introduction expected 2017-04 slang = mw.language.getContentLanguage():getCode() end end if advance == 2 then mode = 2 else mode = 1 end return fiatQuote( mw.text.trim( apply ), slang, mode ) end -- Text.quote() Text.quoteUnquoted = function ( apply, alien, advance ) -- Quote text, if not yet quoted and not empty -- Parameter: -- apply -- string, with text -- alien -- string, with language code, or nil -- advance -- number, with level 1 or 2, or nil -- Returns: string; possibly quoted local r = mw.text.trim( apply and tostring(apply) or "" ) local s = mw.ustring.sub( r, 1, 1 ) if s ~= "" and not Text.isQuote( s, advance ) then s = mw.ustring.sub( r, -1, 1 ) if not Text.isQuote( s ) then r = Text.quote( r, alien, advance ) end end return r end -- Text.quoteUnquoted() Text.removeDiacritics = function ( adjust ) -- Remove all diacritics -- Parameter: -- adjust -- string -- Returns: string; all latin letters should be ASCII -- or basic greek or cyrillic or symbols etc. local cleanup, decomposed if not PatternCombined then PatternCombined = mw.ustring.char( 91, 0x0300, 45, 0x036F, 0x1AB0, 45, 0x1AFF, 0x1DC0, 45, 0x1DFF, 0xFE20, 45, 0xFE2F, 93 ) end decomposed = mw.ustring.toNFD( adjust and tostring(adjust) or "" ) cleanup = mw.ustring.gsub( decomposed, PatternCombined, "" ) return mw.ustring.toNFC( cleanup ) end -- Text.removeDiacritics() Text.sentenceTerminated = function ( analyse ) -- Is string terminated by dot, question or exclamation mark? -- Quotation, link termination and so on granted -- Parameter: -- analyse -- string -- Returns: true, if sentence terminated local r if not PatternTerminated then PatternTerminated = mw.ustring.char( 91, 12290, 65281, 65294, 65311 ) .. "!%.%?…][\"'%]‹›«»‘’“”]*$" end if mw.ustring.find( analyse, PatternTerminated ) then r = true else r = false end return r end -- Text.sentenceTerminated() Text.ucfirstAll = function ( adjust) -- Capitalize all words -- Arguments: -- adjust = string to adjust -- Returns: string with all first letters in upper case adjust = adjust and tostring(adjust) or "" local r = mw.text.decode(adjust,true) local i = 1 local c, j, m m = (r ~= adjust) r = " "..r while i do i = mw.ustring.find( r, "%W%l", i ) if i then j = i + 1 c = mw.ustring.upper( mw.ustring.sub( r, j, j ) ) r = string.format( "%s%s%s", mw.ustring.sub( r, 1, i ), c, mw.ustring.sub( r, i + 2 ) ) i = j end end -- while i r = r:sub( 2 ) if m then r = mw.text.encode(r) end return r end -- Text.ucfirstAll() Text.uprightNonlatin = function ( adjust ) -- Ensure non-italics for non-latin text parts -- One single greek letter might be granted -- Precondition: -- adjust -- string -- Returns: string with non-latin parts enclosed in <span> local r initLatinData() if mw.ustring.match( adjust, PatternLatin ) then -- latin only, horizontal dashes, quotes r = adjust else local c local j = false local k = 1 local m = false local n = mw.ustring.len( adjust ) local span = "%s%s<span dir='auto' style='font-style:normal'>%s</span>" local flat = function ( a ) -- isLatin local range for i = 1, #RangesLatin do range = RangesLatin[ i ] if a >= range[ 1 ] and a <= range[ 2 ] then return true end end -- for i end -- flat() local focus = function ( a ) -- char is not ambivalent local r = ( a > 64 ) if r then r = ( a < 8192 or a > 8212 ) else r = ( a == 38 or a == 60 ) -- '&' '<' end return r end -- focus() local form = function ( a ) return string.format( span, r, mw.ustring.sub( adjust, k, j - 1 ), mw.ustring.sub( adjust, j, a ) ) end -- form() r = "" for i = 1, n do c = mw.ustring.codepoint( adjust, i, i ) if focus( c ) then if flat( c ) then if j then if m then if i == m then -- single greek letter. j = false end m = false end if j then local nx = i - 1 local s = "" for ix = nx, 1, -1 do c = mw.ustring.sub( adjust, ix, ix ) if c == " " or c == "(" then nx = nx - 1 s = c .. s else break -- for ix end end -- for ix r = form( nx ) .. s j = false k = i end end elseif not j then j = i if c >= 880 and c <= 1023 then -- single greek letter? m = i + 1 else m = false end end elseif m then m = m + 1 end end -- for i if j and ( not m or m < n ) then r = form( n ) else r = r .. mw.ustring.sub( adjust, k ) end end return r end -- Text.uprightNonlatin() Text.test = function ( about ) local r if about == "quote" then initQuoteData() r = { } r.QuoteLang = QuoteLang r.QuoteType = QuoteType end return r end -- Text.test() -- Export local p = { } for _, func in ipairs({'containsCJK','isLatinRange','isQuote','sentenceTerminated'}) do p[func] = function (frame) return Text[func]( frame.args[ 1 ] or "" ) and "1" or "" end end for _, func in ipairs({'getPlain','removeDiacritics','ucfirstAll','uprightNonlatin'}) do p[func] = function (frame) return Text[func]( frame.args[ 1 ] or "" ) end end function p.char( frame ) local params = frame:getParent().args local story = params[ 1 ] local codes, lenient, multiple if not story then params = frame.args story = params[ 1 ] end if story then local items = mw.text.split( mw.text.trim(story), "%s+" ) if #items > 0 then local j lenient = (yesNo(params.errors) == false) codes = { } multiple = tonumber( params[ "*" ] ) for _, v in ipairs( items ) do j = tonumber((v:sub( 1, 1 ) == "x" and "0" or "") .. v) table.insert( codes, j or v ) end end end return Text.char( codes, multiple, lenient ) end function p.concatParams( frame ) local args local template = frame.args.template if type( template ) == "string" then template = mw.text.trim( template ) template = ( template == "1" ) end if template then args = frame:getParent().args else args = frame.args end return Text.concatParams( args, frame.args.separator, frame.args.format ) end function p.listToFormat(frame) local lists = {} local pformat = frame.args["format"] local sep = frame.args["sep"] or ";" -- Parameter parsen: Listen for k, v in pairs(frame.args) do local knum = tonumber(k) if knum then lists[knum] = v end end -- Listen splitten local maxListLen = 0 for i = 1, #lists do lists[i] = mw.text.split(lists[i], sep) if #lists[i] > maxListLen then maxListLen = #lists[i] end end -- Ergebnisstring generieren local result = "" local result_line = "" for i = 1, maxListLen do result_line = pformat for j = 1, #lists do result_line = mw.ustring.gsub(result_line, "%%s", lists[j][i], 1) end result = result .. result_line end return result end function p.listToText( frame ) local args local template = frame.args.template if type( template ) == "string" then template = mw.text.trim( template ) template = ( template == "1" ) end if template then args = frame:getParent().args else args = frame.args end return Text.listToText( args, frame.args.format ) end function p.quote( frame ) local slang = frame.args[2] if type( slang ) == "string" then slang = mw.text.trim( slang ) if slang == "" then slang = false end end return Text.quote( frame.args[ 1 ] or "", slang, tonumber( frame.args[3] ) ) end function p.quoteUnquoted( frame ) local slang = frame.args[2] if type( slang ) == "string" then slang = mw.text.trim( slang ) if slang == "" then slang = false end end return Text.quoteUnquoted( frame.args[ 1 ] or "", slang, tonumber( frame.args[3] ) ) end function p.zip(frame) local lists = {} local seps = {} local defaultsep = frame.args["sep"] or "" local innersep = frame.args["isep"] or "" local outersep = frame.args["osep"] or "" -- Parameter parsen for k, v in pairs(frame.args) do local knum = tonumber(k) if knum then lists[knum] = v else if string.sub(k, 1, 3) == "sep" then local sepnum = tonumber(string.sub(k, 4)) if sepnum then seps[sepnum] = v end end end end -- sofern keine expliziten Separatoren angegeben sind, den Standardseparator verwenden for i = 1, math.max(#seps, #lists) do if not seps[i] then seps[i] = defaultsep end end -- Listen splitten local maxListLen = 0 for i = 1, #lists do lists[i] = mw.text.split(lists[i], seps[i]) if #lists[i] > maxListLen then maxListLen = #lists[i] end end local result = "" for i = 1, maxListLen do if i ~= 1 then result = result .. outersep end for j = 1, #lists do if j ~= 1 then result = result .. innersep end result = result .. (lists[j][i] or "") end end return result end function p.failsafe() return Text.serial end p.Text = function () return Text end -- p.Text return p