Modul:Text: Unterschied zwischen den Versionen

Aus skandinavien-wiki.net
w>Mps
KKeine Bearbeitungszusammenfassung
K (32 Versionen von wikivoyage:Modul:Text importiert)
 
(24 dazwischenliegende Versionen von 9 Benutzern werden nicht angezeigt)
Zeile 1: Zeile 1:
--[=[ 2014-09-27
local yesNo = require("Module:Yesno")
local Text = { serial = "2022-07-21",
              suite  = "Text" }
--[=[
Text utilities
Text utilities
]=]
]=]
Zeile 5: Zeile 8:




local Text = { }
-- local globals
local patternCJK        = false
local PatternCJK        = false
local patternLatin     = false
local PatternCombined  = false
local patternTerminated = false
local PatternLatin     = false
local PatternTerminated = false
local QuoteLang        = false
local QuoteType        = false
local RangesLatin      = false
local SeekQuote        = false
 
local function initLatinData()
    if not RangesLatin then
        RangesLatin = { {    7,  687 },
                        { 7531, 7578 },
                        { 7680, 7935 },
                        { 8194, 8250 } }
    end
    if not PatternLatin then
        local range
        PatternLatin = "^["
        for i = 1, #RangesLatin do
            range = RangesLatin[ i ]
            PatternLatin = PatternLatin ..
                          mw.ustring.char( range[ 1 ], 45, range[ 2 ] )
        end    -- for i
        PatternLatin = PatternLatin .. "]*$"
    end
end
 
local function initQuoteData()
    -- Create quote definitions
    if not QuoteLang then
    QuoteLang =
            { af        = "bd",
                  ar        = "la",
                  be        = "labd",
                  bg        = "bd",
                  ca        = "la",
                  cs        = "bd",
                  da        = "bd",
                  de        = "bd",
                  dsb      = "bd",
                  et        = "bd",
                  el        = "lald",
                  en        = "ld",
                  es        = "la",
                  eu        = "la",
            --    fa        = "la",
                  fi        = "rd",
                  fr        = "laSPC",
                  ga        = "ld",
                  he        = "ldla",
                  hr        = "bd",
                  hsb      = "bd",
                  hu        = "bd",
                  hy        = "labd",
                  id        = "rd",
                  is        = "bd",
                  it        = "ld",
                  ja        = "x300C",
                  ka        = "bd",
                  ko        = "ld",
                  lt        = "bd",
                  lv        = "bd",
                  nl        = "ld",
                  nn        = "la",
                  no        = "la",
                  pl        = "bdla",
                  pt        = "lald",
                  ro        = "bdla",
                  ru        = "labd",
                  sk        = "bd",
                  sl        = "bd",
                  sq        = "la",
                  sr        = "bx",
                  sv        = "rd",
                  th        = "ld",
                  tr        = "ld",
                  uk        = "la",
                  zh        = "ld",
                  ["de-ch"] = "la",
                  ["en-gb"] = "lsld",
                  ["en-us"] = "ld",
                  ["fr-ch"] = "la",
                  ["it-ch"] = "la",
                  ["pt-br"] = "ldla",
                  ["zh-tw"] = "x300C",
                  ["zh-cn"] = "ld" }
    end
    if not QuoteType then
    QuoteType =
            { bd    = { { 8222, 8220 },  { 8218, 8217 } },
                  bdla  = { { 8222, 8220 },  {  171,  187 } },
                  bx    = { { 8222, 8221 },  { 8218, 8217 } },
                  la    = { {  171,  187 },  { 8249, 8250 } },
                  laSPC = { {  171,  187 },  { 8249, 8250 },  true },
                  labd  = { {  171,  187 },  { 8222, 8220 } },
                  lald  = { {  171,  187 },  { 8220, 8221 } },
                  ld    = { { 8220, 8221 },  { 8216, 8217 } },
                  ldla  = { { 8220, 8221 },  {  171,  187 } },
                  lsld  = { { 8216, 8217 },  { 8220, 8221 } },
                  rd    = { { 8221, 8221 },  { 8217, 8217 } },
                  x300C = { { 0x300C, 0x300D },
                            { 0x300E, 0x300F } } }
    end
end -- initQuoteData()
 
 
 
local function fiatQuote( apply, alien, advance )
    -- Quote text
    -- Parameter:
    --    apply    -- string, with text
    --    alien    -- string, with language code
    --    advance  -- number, with level 1 or 2
    local r = apply and tostring(apply) or ""
    alien = alien or "en"
    advance = tonumber(advance) or 0
    local suite
    initQuoteData()
    local slang = alien:match( "^(%l+)-" )
    suite = QuoteLang[alien] or slang and QuoteLang[slang] or QuoteLang["en"]
    if suite then
        local quotes = QuoteType[ suite ]
        if quotes then
            local space
            if quotes[ 3 ] then
                space = " "
            else
                space = ""
            end
            quotes = quotes[ advance ]
            if quotes then
                r = mw.ustring.format( "%s%s%s%s%s",
                                      mw.ustring.char( quotes[ 1 ] ),
                                      space,
                                      apply,
                                      space,
                                      mw.ustring.char( quotes[ 2 ] ) )
            end
        else
            mw.log( "fiatQuote() " .. suite )
        end
    end
    return r
end -- fiatQuote()
 




Text.char = function ( apply, again, accept )
    -- Create string from codepoints
    -- Parameter:
    --    apply  -- table (sequence) with numerical codepoints, or nil
    --    again  -- number of repetitions, or nil
    --    accept  -- true, if no error messages to be appended
    -- Returns: string
    local r = ""
    apply = type(apply) == "table" and apply or {}
    again = math.floor(tonumber(again) or 1)
    if again < 1 then
    return ""
    end
    local bad  = { }
    local codes = { }
    for _, v in ipairs( apply ) do
    local n = tonumber(v)
    if not n or (n < 32 and n ~= 9 and n ~= 10) then
    table.insert(bad, tostring(v))
    else
    table.insert(codes, math.floor(n))
end
    end
    if #bad > 0 then
    if not accept then
    r = tostring(  mw.html.create( "span" )
                    :addClass( "error" )
                    :wikitext( "bad codepoints: " .. table.concat( bad, " " )) )
    end
    return r
    end
    if #codes > 0 then
    r = mw.ustring.char( unpack( codes ) )
    if again > 1 then
    r = r:rep(again)
    end
end
    return r
end -- Text.char()
local function trimAndFormat(args, fmt)
local result = {}
if type(args) ~= 'table' then
args = {args}
end
for _, v in ipairs(args) do
v = mw.text.trim(tostring(v))
if v ~= "" then
table.insert(result,fmt and mw.ustring.format(fmt, v) or v)
end
end
return result
end


Text.concatParams = function ( args, apply, adapt )
Text.concatParams = function ( args, apply, adapt )
Zeile 20: Zeile 219:
     -- Returns: string
     -- Returns: string
     local collect = { }
     local collect = { }
     for k, v in pairs( args ) do
     return table.concat(trimAndFormat(args,adapt), apply or "|")
        if type( k ) == "number" then
            v = mw.text.trim( v )
            if v ~= "" then
                if adapt then
                    v = mw.ustring.format( adapt, v )
                end
                table.insert( collect, v )
            end
        end
    end
    return table.concat( collect, apply or "|" )
end -- Text.concatParams()
end -- Text.concatParams()






Text.containsCJK = function ( analyse )
Text.containsCJK = function ( s )
     -- Is any CJK code within?
     -- Is any CJK code within?
     -- Parameter:
     -- Parameter:
     --    analyse -- string
     --    s -- string
     -- Returns: true, if CJK detected
     -- Returns: true, if CJK detected
     local r
     s = s and tostring(s) or ""
     if not patternCJK then
     if not patternCJK then
         patternCJK = mw.ustring.char( 91,
         patternCJK = mw.ustring.char( 91,
                                      13312, 45,  40959,
                                    4352, 45,  4607,
                                       131072, 45, 178207,
                                  11904, 45,  42191,
                                  43072, 45,  43135,
                                  44032, 45,  55215,
                                  63744, 45,  64255,
                                  65072, 45,  65103,
                                  65381, 45,  65500,
                                       131072, 45, 196607,
                                       93 )
                                       93 )
     end
     end
     if mw.ustring.find( analyse, patternCJK ) then
     return mw.ustring.find( s, patternCJK ) ~= nil
        r = true
end -- Text.containsCJK()
     else
 
        r = false
Text.removeDelimited = function (s, prefix, suffix)
-- Remove all text in s delimited by prefix and suffix (inclusive)
-- Arguments:
--    s = string to process
--    prefix = initial delimiter
--    suffix = ending delimiter
-- Returns: stripped string
s = s and tostring(s) or ""
prefix = prefix and tostring(prefix) or ""
suffix = suffix and tostring(suffix) or ""
local prefixLen = mw.ustring.len(prefix)
local suffixLen = mw.ustring.len(suffix)
if prefixLen == 0 or suffixLen == 0 then
return s
end
local i = s:find(prefix, 1, true)
local r = s
local j
while i do
j = r:find(suffix, i + prefixLen)
if j then
r = r:sub(1, i - 1)..r:sub(j+suffixLen)
else
r = r:sub(1, i - 1)
end
i = r:find(prefix, 1, true)
end
return r
end
 
Text.getPlain = function ( adjust )
    -- Remove wikisyntax from string, except templates
    -- Parameter:
    --    adjust  -- string
    -- Returns: string
    local r = Text.removeDelimited(adjust,"<!--","-->")
     r = r:gsub( "(</?%l[^>]*>)", "" )
        :gsub( "'''", "" )
        :gsub( "''", "" )
        :gsub( "&nbsp;", " " )
    return r
end -- Text.getPlain()
 
Text.isLatinRange = function (s)
    -- Are characters expected to be latin or symbols within latin texts?
    -- Arguments:
    --  s = string to analyze
    -- Returns: true, if valid for latin only
    s = s and tostring(s) or ""  --- ensure input is always string
    initLatinData()
    return mw.ustring.match(s, PatternLatin) ~= nil
end -- Text.isLatinRange()
 
 
 
Text.isQuote = function ( s )
    -- Is this character any quotation mark?
    -- Parameter:
    --    s = single character to analyze
    -- Returns: true, if s is quotation mark
    s = s and tostring(s) or ""
    if s == "" then
    return false
    end
    if not SeekQuote then
        SeekQuote = mw.ustring.char(  34,      -- "
                                      39,      -- '
                                      171,      -- laquo
                                      187,      -- raquo
                                    8216,      -- lsquo
                                    8217,      -- rsquo
                                    8218,      -- sbquo
                                    8220,      -- ldquo
                                    8221,      -- rdquo
                                    8222,      -- bdquo
                                    8249,      -- lsaquo
                                    8250,      -- rsaquo
                                    0x300C,    -- CJK
                                    0x300D,    -- CJK
                                    0x300E,    -- CJK
                                    0x300F )    -- CJK
     end
     end
     return r
     return mw.ustring.find( SeekQuote, s, 1, true ) ~= nil
end -- Text.containsCJK()
end -- Text.isQuote()




Zeile 64: Zeile 338:
     --    adapt  -- string (optional); format including "%s"
     --    adapt  -- string (optional); format including "%s"
     -- Returns: string
     -- Returns: string
     local collect = { }
     return mw.text.listToText(trimAndFormat(args, adapt))
     for k, v in pairs( args ) do
end -- Text.listToText()
        if type( k ) == "number" then
 
             v = mw.text.trim( v )
 
            if v ~= "" then
 
                if adapt then
Text.quote = function ( apply, alien, advance )
                    v = mw.ustring.format( adapt, v )
    -- Quote text
                end
    -- Parameter:
                table.insert( collect, v )
    --    apply    -- string, with text
            end
     --    alien    -- string, with language code, or nil
    --    advance  -- number, with level 1 or 2, or nil
    -- Returns: quoted string
    apply = apply and tostring(apply) or ""
    local mode, slang
    if type( alien ) == "string" then
        slang = mw.text.trim( alien ):lower()
    else
        slang = mw.title.getCurrentTitle().pageLanguage
        if not slang then
            -- TODO FIXME: Introduction expected 2017-04
             slang = mw.language.getContentLanguage():getCode()
        end
    end
    if advance == 2 then
        mode = 2
    else
        mode = 1
    end
    return fiatQuote( mw.text.trim( apply ), slang, mode )
end -- Text.quote()
 
 
 
Text.quoteUnquoted = function ( apply, alien, advance )
    -- Quote text, if not yet quoted and not empty
    -- Parameter:
    --    apply    -- string, with text
    --    alien    -- string, with language code, or nil
    --    advance  -- number, with level 1 or 2, or nil
    -- Returns: string; possibly quoted
    local r = mw.text.trim( apply and tostring(apply) or "" )
    local s = mw.ustring.sub( r, 1, 1 )
    if s ~= ""  and  not Text.isQuote( s, advance ) then
        s = mw.ustring.sub( r, -1, 1 )
        if not Text.isQuote( s ) then
            r = Text.quote( r, alien, advance )
         end
         end
     end
     end
     return mw.text.listToText( collect )
    return r
end -- Text.listToText()
end -- Text.quoteUnquoted()
 
 
 
Text.removeDiacritics = function ( adjust )
    -- Remove all diacritics
    -- Parameter:
    --    adjust  -- string
    -- Returns: string; all latin letters should be ASCII
    --                  or basic greek or cyrillic or symbols etc.
    local cleanup, decomposed
    if not PatternCombined then
        PatternCombined = mw.ustring.char( 91,
                                            0x0300, 45, 0x036F,
                                            0x1AB0, 45, 0x1AFF,
                                            0x1DC0, 45, 0x1DFF,
                                            0xFE20, 45, 0xFE2F,
                                          93 )
    end
    decomposed = mw.ustring.toNFD( adjust and tostring(adjust) or "" )
    cleanup    = mw.ustring.gsub( decomposed, PatternCombined, "" )
     return mw.ustring.toNFC( cleanup )
end -- Text.removeDiacritics()




Zeile 88: Zeile 420:
     -- Returns: true, if sentence terminated
     -- Returns: true, if sentence terminated
     local r
     local r
     if not patternTerminated then
     if not PatternTerminated then
         patternTerminated = mw.ustring.char( 91,
         PatternTerminated = mw.ustring.char( 91,
                                             12290,
                                             12290,
                                             65281,
                                             65281,
Zeile 96: Zeile 428:
                             .. "!%.%?…][\"'%]‹›«»‘’“”]*$"
                             .. "!%.%?…][\"'%]‹›«»‘’“”]*$"
     end
     end
     if mw.ustring.find( analyse, patternTerminated ) then
     if mw.ustring.find( analyse, PatternTerminated ) then
         r = true
         r = true
     else
     else
Zeile 106: Zeile 438:




Text.ucfirstAll = function ( adjust )
Text.ucfirstAll = function ( adjust)
     -- Capitalize all words
     -- Capitalize all words
     -- Precondition:
     -- Arguments:
     --    adjust -- string
     --    adjust = string to adjust
     -- Returns: string with all first letters in upper case
     -- Returns: string with all first letters in upper case
     local r = " " .. adjust
    adjust = adjust and tostring(adjust) or ""
     local r = mw.text.decode(adjust,true)
     local i = 1
     local i = 1
     local c, j, m
     local c, j, m
     if adjust:find( "&" ) then
     m = (r ~= adjust)
        r = r:gsub( "&amp;",      "&#38;" )
    r = " "..r
            :gsub( "&lt;",      "&#60;" )
            :gsub( "&gt;",      "&#62;" )
            :gsub( "&nbsp;",    "&#160;" )
            :gsub( "&thinsp;", "&#8201;" )
            :gsub( "&zwnj;",  "&#8204;" )
            :gsub( "&zwj;",    "&#8205;" )
            :gsub( "&lrm;",    "&#8206;" )
            :gsub( "&rlm;",    "&#8207;" )
        m = true
    end
     while i do
     while i do
         i = mw.ustring.find( r, "%W%l", i )
         i = mw.ustring.find( r, "%W%l", i )
Zeile 140: Zeile 463:
     r = r:sub( 2 )
     r = r:sub( 2 )
     if m then
     if m then
        r = r:gsub(    "&#38;", "&amp;" )
    r = mw.text.encode(r)
            :gsub(    "&#60;", "&lt;" )
            :gsub(    "&#62;", "&gt;" )
            :gsub(    "&#160;", "&nbsp;" )
            :gsub(  "&#8201;", "&thinsp;" )
            :gsub(  "&#8204;", "&zwnj;" )
            :gsub(  "&#8205;", "&zwj;" )
            :gsub(  "&#8206;", "&lrm;" )
            :gsub(  "&#8207;", "&rlm;" )
            :gsub( "&#X(%x+);", "&#x%1;" )
     end
     end
     return r
     return r
end -- Text.ucfirstAll()
end -- Text.ucfirstAll()




Zeile 163: Zeile 476:
     -- Returns: string with non-latin parts enclosed in <span>
     -- Returns: string with non-latin parts enclosed in <span>
     local r
     local r
     if not patternLatin then
     initLatinData()
        patternLatin = mw.ustring.char(   94, 91,
     if mw.ustring.match( adjust, PatternLatin ) then
                                          7, 45,  591,
                                        8194, 45, 8250,
                                          93, 42, 36 )
    end
     if mw.ustring.match( adjust, patternLatin ) then
         -- latin only, horizontal dashes, quotes
         -- latin only, horizontal dashes, quotes
         r = adjust
         r = adjust
Zeile 178: Zeile 486:
         local m    = false
         local m    = false
         local n    = mw.ustring.len( adjust )
         local n    = mw.ustring.len( adjust )
         local span = "%s%s<span style='font-style:normal'>%s</span>"
         local span = "%s%s<span dir='auto' style='font-style:normal'>%s</span>"
         local flat = function ( a )
         local flat = function ( a )
                -- isLatin
                  -- isLatin
                return  a <= 591  or  ( a >= 8194 and  a <= 8250 )
                  local range
                  for i = 1, #RangesLatin do
                      range = RangesLatin[ i ]
                      if a >= range[ 1 ] and  a <= range[ 2 ] then
                          return true
                      end
                  end    -- for i
               end -- flat()
               end -- flat()
        local focus = function ( a )
                  -- char is not ambivalent
                  local r = ( a > 64 )
                  if r then
                      r = ( a < 8192  or  a > 8212 )
                  else
                      r = ( a == 38  or  a == 60 )    -- '&' '<'
                  end
                  return r
              end -- focus()
         local form = function ( a )
         local form = function ( a )
                 return string.format( span,
                 return string.format( span,
Zeile 192: Zeile 516:
         for i = 1, n do
         for i = 1, n do
             c = mw.ustring.codepoint( adjust, i, i )
             c = mw.ustring.codepoint( adjust, i, i )
             if c > 64  or  c == 38  or  c == 60 then   -- '&' '<'
             if focus( c ) then
                 if flat( c ) then
                 if flat( c ) then
                     if j then
                     if j then
Zeile 231: Zeile 555:
                 m = m + 1
                 m = m + 1
             end
             end
         end -- for i
         end   -- for i
         if j  and  ( not m  or  m < n ) then
         if j  and  ( not m  or  m < n ) then
             r = form( n )
             r = form( n )
Zeile 240: Zeile 564:
     return r
     return r
end -- Text.uprightNonlatin()
end -- Text.uprightNonlatin()
Text.test = function ( about )
    local r
    if about == "quote" then
        initQuoteData()
        r = { }
        r.QuoteLang = QuoteLang
        r.QuoteType = QuoteType
    end
    return r
end -- Text.test()




Zeile 245: Zeile 581:
-- Export
-- Export
local p = { }
local p = { }
for _, func in ipairs({'containsCJK','isLatinRange','isQuote','sentenceTerminated'}) do
p[func] = function (frame)
return Text[func]( frame.args[ 1 ] or "" ) and "1" or ""
end
end
for _, func in ipairs({'getPlain','removeDiacritics','ucfirstAll','uprightNonlatin'}) do
p[func] = function (frame)
return Text[func]( frame.args[ 1 ] or "" )
end
end
function p.char( frame )
    local params = frame:getParent().args
    local story = params[ 1 ]
    local codes, lenient, multiple
    if not story then
        params = frame.args
        story  = params[ 1 ]
    end
    if story then
        local items = mw.text.split( mw.text.trim(story), "%s+" )
        if #items > 0 then
            local j
            lenient  = (yesNo(params.errors) == false)
            codes    = { }
            multiple = tonumber( params[ "*" ] )
            for _, v in ipairs( items ) do
            j = tonumber((v:sub( 1, 1 ) == "x" and "0" or "") .. v)
                table.insert( codes,  j or v )
            end
        end
    end
    return Text.char( codes, multiple, lenient )
end


function p.concatParams( frame )
function p.concatParams( frame )
Zeile 263: Zeile 635:
end
end


function p.containsCJK( frame )
 
     return Text.containsCJK( frame.args[ 1 ] or "" ) and "1" or ""
function p.listToFormat(frame)
     local lists = {}
    local pformat = frame.args["format"]
    local sep = frame.args["sep"] or ";"
 
    -- Parameter parsen: Listen
    for k, v in pairs(frame.args) do
        local knum = tonumber(k)
        if knum then lists[knum] = v end
    end
 
    -- Listen splitten
    local maxListLen = 0
    for i = 1, #lists do
        lists[i] = mw.text.split(lists[i], sep)
        if #lists[i] > maxListLen then maxListLen = #lists[i] end
    end
 
    -- Ergebnisstring generieren
    local result = ""
    local result_line = ""
    for i = 1, maxListLen do
        result_line = pformat
        for j = 1, #lists do
            result_line = mw.ustring.gsub(result_line, "%%s", lists[j][i], 1)
        end
        result = result .. result_line
    end
 
    return result
end
end


function p.listToText( frame )
function p.listToText( frame )
Zeile 282: Zeile 685:
end
end


function p.sentenceTerminated( frame )
 
     return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or ""
 
function p.quote( frame )
    local slang = frame.args[2]
    if type( slang ) == "string" then
        slang = mw.text.trim( slang )
        if slang == "" then
            slang = false
        end
    end
     return Text.quote( frame.args[ 1 ] or "",
                      slang,
                      tonumber( frame.args[3] ) )
end
end


function p.ucfirstAll( frame )
 
     return Text.ucfirstAll( frame.args[ 1 ] or "" )
 
function p.quoteUnquoted( frame )
    local slang = frame.args[2]
    if type( slang ) == "string" then
        slang = mw.text.trim( slang )
        if slang == "" then
            slang = false
        end
    end
     return Text.quoteUnquoted( frame.args[ 1 ] or "",
                              slang,
                              tonumber( frame.args[3] ) )
end
end


function p.uprightNonlatin( frame )
    return Text.uprightNonlatin( frame.args[ 1 ] or "" )
end


function p.zip(frame)
function p.zip(frame)
local lists = {}
    local lists = {}
local seps = {}
    local seps = {}
local defaultsep = frame.args["sep"] or ""
    local defaultsep = frame.args["sep"] or ""
local innersep = frame.args["isep"] or ""
    local innersep = frame.args["isep"] or ""
local outersep = frame.args["osep"] or ""
    local outersep = frame.args["osep"] or ""
 
-- Parameter parsen
    -- Parameter parsen
for k, v in pairs(frame.args) do
    for k, v in pairs(frame.args) do
local knum = tonumber(k)
        local knum = tonumber(k)
if knum then lists[knum] = v else
        if knum then lists[knum] = v else
if string.sub(k, 1, 3) == "sep" then
            if string.sub(k, 1, 3) == "sep" then
local sepnum = tonumber(string.sub(k, 4))
                local sepnum = tonumber(string.sub(k, 4))
if sepnum then seps[sepnum] = v end
                if sepnum then seps[sepnum] = v end
end
            end
end
        end
end
    end
-- sofern keine expliziten Separatoren angegeben sind, den Standardseparator verwenden
    -- sofern keine expliziten Separatoren angegeben sind, den Standardseparator verwenden
for i = 1, math.max(#seps, #lists) do
    for i = 1, math.max(#seps, #lists) do
if not seps[i] then seps[i] = defaultsep end
        if not seps[i] then seps[i] = defaultsep end
end
    end


-- Listen splitten
    -- Listen splitten
local maxListLen = 0
    local maxListLen = 0
for i = 1, #lists do
    for i = 1, #lists do
lists[i] = mw.text.split(lists[i], seps[i])
        lists[i] = mw.text.split(lists[i], seps[i])
if #lists[i] > maxListLen then maxListLen = #lists[i] end
        if #lists[i] > maxListLen then maxListLen = #lists[i] end
end
    end


local result = ""
    local result = ""
for i = 1, maxListLen do
    for i = 1, maxListLen do
if i ~= 1 then result = result .. outersep end
        if i ~= 1 then result = result .. outersep end
for j = 1, #lists do
        for j = 1, #lists do
if j ~= 1 then result = result .. innersep end
            if j ~= 1 then result = result .. innersep end
result = result .. (lists[j][i] or "")
            result = result .. (lists[j][i] or "")
end
        end
end
    end
return result
    return result
end
end


function p.removeDiacritics(frame)
 
local combiningDiacriticalMarks = "[" .. mw.ustring.char(0x0300) .. "-" .. mw.ustring.char(0x036F) .. "]"
 
return mw.ustring.toNFC(mw.ustring.gsub(mw.ustring.toNFD(frame.args[1] or ""), combiningDiacritics, ""))
function p.failsafe()
    return Text.serial
end
end


p.Text = function ()
p.Text = function ()

Aktuelle Version vom 27. Januar 2023, 13:51 Uhr

Die Dokumentation für dieses Modul kann unter Modul:Text/doc erstellt werden

local yesNo = require("Module:Yesno")
local Text = { serial = "2022-07-21",
               suite  = "Text" }
--[=[
Text utilities
]=]



-- local globals
local PatternCJK        = false
local PatternCombined   = false
local PatternLatin      = false
local PatternTerminated = false
local QuoteLang         = false
local QuoteType         = false
local RangesLatin       = false
local SeekQuote         = false

local function initLatinData()
    if not RangesLatin then
        RangesLatin = { {    7,  687 },
                        { 7531, 7578 },
                        { 7680, 7935 },
                        { 8194, 8250 } }
    end
    if not PatternLatin then
        local range
        PatternLatin = "^["
        for i = 1, #RangesLatin do
            range = RangesLatin[ i ]
            PatternLatin = PatternLatin ..
                           mw.ustring.char( range[ 1 ], 45, range[ 2 ] )
        end    -- for i
        PatternLatin = PatternLatin .. "]*$"
    end
end

local function initQuoteData()
    -- Create quote definitions
    if not QuoteLang then
    	QuoteLang = 
    	        { af        = "bd",
                  ar        = "la",
                  be        = "labd",
                  bg        = "bd",
                  ca        = "la",
                  cs        = "bd",
                  da        = "bd",
                  de        = "bd",
                  dsb       = "bd",
                  et        = "bd",
                  el        = "lald",
                  en        = "ld",
                  es        = "la",
                  eu        = "la",
            --    fa        = "la",
                  fi        = "rd",
                  fr        = "laSPC",
                  ga        = "ld",
                  he        = "ldla",
                  hr        = "bd",
                  hsb       = "bd",
                  hu        = "bd",
                  hy        = "labd",
                  id        = "rd",
                  is        = "bd",
                  it        = "ld",
                  ja        = "x300C",
                  ka        = "bd",
                  ko        = "ld",
                  lt        = "bd",
                  lv        = "bd",
                  nl        = "ld",
                  nn        = "la",
                  no        = "la",
                  pl        = "bdla",
                  pt        = "lald",
                  ro        = "bdla",
                  ru        = "labd",
                  sk        = "bd",
                  sl        = "bd",
                  sq        = "la",
                  sr        = "bx",
                  sv        = "rd",
                  th        = "ld",
                  tr        = "ld",
                  uk        = "la",
                  zh        = "ld",
                  ["de-ch"] = "la",
                  ["en-gb"] = "lsld",
                  ["en-us"] = "ld",
                  ["fr-ch"] = "la",
                  ["it-ch"] = "la",
                  ["pt-br"] = "ldla",
                  ["zh-tw"] = "x300C",
                  ["zh-cn"] = "ld" }
    end
    if not QuoteType then
    	QuoteType = 
    	        { bd    = { { 8222, 8220 },  { 8218, 8217 } },
                  bdla  = { { 8222, 8220 },  {  171,  187 } },
                  bx    = { { 8222, 8221 },  { 8218, 8217 } },
                  la    = { {  171,  187 },  { 8249, 8250 } },
                  laSPC = { {  171,  187 },  { 8249, 8250 },  true },
                  labd  = { {  171,  187 },  { 8222, 8220 } },
                  lald  = { {  171,  187 },  { 8220, 8221 } },
                  ld    = { { 8220, 8221 },  { 8216, 8217 } },
                  ldla  = { { 8220, 8221 },  {  171,  187 } },
                  lsld  = { { 8216, 8217 },  { 8220, 8221 } },
                  rd    = { { 8221, 8221 },  { 8217, 8217 } },
                  x300C = { { 0x300C, 0x300D },
                            { 0x300E, 0x300F } } }
    end
end -- initQuoteData()



local function fiatQuote( apply, alien, advance )
    -- Quote text
    -- Parameter:
    --     apply    -- string, with text
    --     alien    -- string, with language code
    --     advance  -- number, with level 1 or 2
    local r = apply and tostring(apply) or ""
    alien = alien or "en"
    advance = tonumber(advance) or 0
    local suite
    initQuoteData()
    local slang = alien:match( "^(%l+)-" )
    suite = QuoteLang[alien] or slang and QuoteLang[slang] or QuoteLang["en"]
    if suite then
        local quotes = QuoteType[ suite ]
        if quotes then
            local space
            if quotes[ 3 ] then
                space = "&#160;"
            else
                space = ""
            end
            quotes = quotes[ advance ]
            if quotes then
                r = mw.ustring.format( "%s%s%s%s%s",
                                       mw.ustring.char( quotes[ 1 ] ),
                                       space,
                                       apply,
                                       space,
                                       mw.ustring.char( quotes[ 2 ] ) )
            end
        else
            mw.log( "fiatQuote() " .. suite )
        end
    end
    return r
end -- fiatQuote()



Text.char = function ( apply, again, accept )
    -- Create string from codepoints
    -- Parameter:
    --     apply   -- table (sequence) with numerical codepoints, or nil
    --     again   -- number of repetitions, or nil
    --     accept  -- true, if no error messages to be appended
    -- Returns: string
    local r = ""
    apply = type(apply) == "table" and apply or {}
    again = math.floor(tonumber(again) or 1)
    if again < 1 then
    	return ""
    end
    local bad   = { }
    local codes = { }
    for _, v in ipairs( apply ) do
    	local n = tonumber(v)
    	if not n or (n < 32 and n ~= 9 and n ~= 10) then
    		table.insert(bad, tostring(v))
    	else
    		table.insert(codes, math.floor(n))
		end
    end 
    if #bad > 0 then
    	if not accept then
    		r = tostring(  mw.html.create( "span" )
                    		:addClass( "error" )
                    		:wikitext( "bad codepoints: " .. table.concat( bad, " " )) )
    	end
    	return r
    end
    if #codes > 0 then
    	r = mw.ustring.char( unpack( codes ) )
    	if again > 1 then
    		r = r:rep(again)
    	end
	end
    return r
end -- Text.char()

local function trimAndFormat(args, fmt)
	local result = {}
	if type(args) ~= 'table' then
		args = {args}
	end
	for _, v in ipairs(args) do
		v = mw.text.trim(tostring(v))
		if v ~= "" then
			table.insert(result,fmt and mw.ustring.format(fmt, v) or v)
		end
	end
	return result
end

Text.concatParams = function ( args, apply, adapt )
    -- Concat list items into one string
    -- Parameter:
    --     args   -- table (sequence) with numKey=string
    --     apply  -- string (optional); separator (default: "|")
    --     adapt  -- string (optional); format including "%s"
    -- Returns: string
    local collect = { }
    return table.concat(trimAndFormat(args,adapt), apply or "|")
end -- Text.concatParams()



Text.containsCJK = function ( s )
    -- Is any CJK code within?
    -- Parameter:
    --     s  -- string
    -- Returns: true, if CJK detected
    s = s and tostring(s) or ""
    if not patternCJK then
        patternCJK = mw.ustring.char( 91,
        	                            4352, 45,   4607,
        	                           11904, 45,  42191,
        	                           43072, 45,  43135,
        	                           44032, 45,  55215,
        	                           63744, 45,  64255,
        	                           65072, 45,  65103,
        	                           65381, 45,  65500,
                                      131072, 45, 196607,
                                      93 )
    end
    return mw.ustring.find( s, patternCJK ) ~= nil
end -- Text.containsCJK()

Text.removeDelimited = function (s, prefix, suffix)
	-- Remove all text in s delimited by prefix and suffix (inclusive)
	-- Arguments:
	--    s = string to process
	--    prefix = initial delimiter
	--    suffix = ending delimiter
	-- Returns: stripped string
	s = s and tostring(s) or ""
	prefix = prefix and tostring(prefix) or ""
	suffix = suffix and tostring(suffix) or ""
	local prefixLen = mw.ustring.len(prefix)
	local suffixLen = mw.ustring.len(suffix)
	if prefixLen == 0 or suffixLen == 0 then
		return s
	end
	local i = s:find(prefix, 1, true)
	local r = s
	local j
	while i do
		j = r:find(suffix, i + prefixLen)
		if j then
			r = r:sub(1, i - 1)..r:sub(j+suffixLen)
		else
			r = r:sub(1, i - 1)
		end
		i = r:find(prefix, 1, true)
	end
	return r
end

Text.getPlain = function ( adjust )
    -- Remove wikisyntax from string, except templates
    -- Parameter:
    --     adjust  -- string
    -- Returns: string
    local r = Text.removeDelimited(adjust,"<!--","-->")
    r = r:gsub( "(</?%l[^>]*>)", "" )
         :gsub( "'''", "" )
         :gsub( "''", "" )
         :gsub( "&nbsp;", " " )
    return r
end -- Text.getPlain()

Text.isLatinRange = function (s)
    -- Are characters expected to be latin or symbols within latin texts?
    -- Arguments:
    --  s = string to analyze
    -- Returns: true, if valid for latin only
    s = s and tostring(s) or ""  --- ensure input is always string
    initLatinData()
    return mw.ustring.match(s, PatternLatin) ~= nil
end -- Text.isLatinRange()



Text.isQuote = function ( s )
    -- Is this character any quotation mark?
    -- Parameter:
    --     s = single character to analyze
    -- Returns: true, if s is quotation mark
    s = s and tostring(s) or ""
    if s == "" then
    	return false
    end
    if not SeekQuote then
        SeekQuote = mw.ustring.char(   34,       -- "
                                       39,       -- '
                                      171,       -- laquo
                                      187,       -- raquo
                                     8216,       -- lsquo
                                     8217,       -- rsquo
                                     8218,       -- sbquo
                                     8220,       -- ldquo
                                     8221,       -- rdquo
                                     8222,       -- bdquo
                                     8249,       -- lsaquo
                                     8250,       -- rsaquo
                                     0x300C,     -- CJK
                                     0x300D,     -- CJK
                                     0x300E,     -- CJK
                                     0x300F )    -- CJK
    end
    return mw.ustring.find( SeekQuote, s, 1, true ) ~= nil
end -- Text.isQuote()



Text.listToText = function ( args, adapt )
    -- Format list items similar to mw.text.listToText()
    -- Parameter:
    --     args   -- table (sequence) with numKey=string
    --     adapt  -- string (optional); format including "%s"
    -- Returns: string
    return mw.text.listToText(trimAndFormat(args, adapt))
end -- Text.listToText()



Text.quote = function ( apply, alien, advance )
    -- Quote text
    -- Parameter:
    --     apply    -- string, with text
    --     alien    -- string, with language code, or nil
    --     advance  -- number, with level 1 or 2, or nil
    -- Returns: quoted string
    apply = apply and tostring(apply) or ""
    local mode, slang
    if type( alien ) == "string" then
        slang = mw.text.trim( alien ):lower()
    else
        slang = mw.title.getCurrentTitle().pageLanguage
        if not slang then
            -- TODO FIXME: Introduction expected 2017-04
            slang = mw.language.getContentLanguage():getCode()
        end
    end
    if advance == 2 then
        mode = 2
    else
        mode = 1
    end
    return fiatQuote( mw.text.trim( apply ), slang, mode )
end -- Text.quote()



Text.quoteUnquoted = function ( apply, alien, advance )
    -- Quote text, if not yet quoted and not empty
    -- Parameter:
    --     apply    -- string, with text
    --     alien    -- string, with language code, or nil
    --     advance  -- number, with level 1 or 2, or nil
    -- Returns: string; possibly quoted
    local r = mw.text.trim( apply and tostring(apply) or "" )
    local s = mw.ustring.sub( r, 1, 1 )
    if s ~= ""  and  not Text.isQuote( s, advance ) then
        s = mw.ustring.sub( r, -1, 1 )
        if not Text.isQuote( s ) then
            r = Text.quote( r, alien, advance )
        end
    end
    return r
end -- Text.quoteUnquoted()



Text.removeDiacritics = function ( adjust )
    -- Remove all diacritics
    -- Parameter:
    --     adjust  -- string
    -- Returns: string; all latin letters should be ASCII
    --                  or basic greek or cyrillic or symbols etc.
    local cleanup, decomposed
    if not PatternCombined then
        PatternCombined = mw.ustring.char( 91,
                                            0x0300, 45, 0x036F,
                                            0x1AB0, 45, 0x1AFF,
                                            0x1DC0, 45, 0x1DFF,
                                            0xFE20, 45, 0xFE2F,
                                           93 )
    end
    decomposed = mw.ustring.toNFD( adjust and tostring(adjust) or "" )
    cleanup    = mw.ustring.gsub( decomposed, PatternCombined, "" )
    return mw.ustring.toNFC( cleanup )
end -- Text.removeDiacritics()



Text.sentenceTerminated = function ( analyse )
    -- Is string terminated by dot, question or exclamation mark?
    --     Quotation, link termination and so on granted
    -- Parameter:
    --     analyse  -- string
    -- Returns: true, if sentence terminated
    local r
    if not PatternTerminated then
        PatternTerminated = mw.ustring.char( 91,
                                             12290,
                                             65281,
                                             65294,
                                             65311 )
                            .. "!%.%?…][\"'%]‹›«»‘’“”]*$"
    end
    if mw.ustring.find( analyse, PatternTerminated ) then
        r = true
    else
        r = false
    end
    return r
end -- Text.sentenceTerminated()



Text.ucfirstAll = function ( adjust)
    -- Capitalize all words
    -- Arguments:
    --     adjust = string to adjust
    -- Returns: string with all first letters in upper case
    adjust = adjust and tostring(adjust) or ""
    local r = mw.text.decode(adjust,true)
    local i = 1
    local c, j, m
    m = (r ~= adjust)
    r = " "..r
    while i do
        i = mw.ustring.find( r, "%W%l", i )
        if i then
            j = i + 1
            c = mw.ustring.upper( mw.ustring.sub( r, j, j ) )
            r = string.format( "%s%s%s",
                               mw.ustring.sub( r, 1, i ),
                               c,
                               mw.ustring.sub( r, i + 2 ) )
            i = j
        end
    end -- while i
    r = r:sub( 2 )
    if m then
    	r = mw.text.encode(r)
    end
    return r
end -- Text.ucfirstAll()


Text.uprightNonlatin = function ( adjust )
    -- Ensure non-italics for non-latin text parts
    --     One single greek letter might be granted
    -- Precondition:
    --     adjust  -- string
    -- Returns: string with non-latin parts enclosed in <span>
    local r
    initLatinData()
    if mw.ustring.match( adjust, PatternLatin ) then
        -- latin only, horizontal dashes, quotes
        r = adjust
    else
        local c
        local j    = false
        local k    = 1
        local m    = false
        local n    = mw.ustring.len( adjust )
        local span = "%s%s<span dir='auto' style='font-style:normal'>%s</span>"
        local flat = function ( a )
                  -- isLatin
                  local range
                  for i = 1, #RangesLatin do
                      range = RangesLatin[ i ]
                      if a >= range[ 1 ]  and  a <= range[ 2 ] then
                          return true
                      end
                  end    -- for i
              end -- flat()
        local focus = function ( a )
                  -- char is not ambivalent
                  local r = ( a > 64 )
                  if r then
                      r = ( a < 8192  or  a > 8212 )
                  else
                      r = ( a == 38  or  a == 60 )    -- '&' '<'
                  end
                  return r
              end -- focus()
        local form = function ( a )
                return string.format( span,
                                      r,
                                      mw.ustring.sub( adjust, k, j - 1 ),
                                      mw.ustring.sub( adjust, j, a ) )
              end -- form()
        r = ""
        for i = 1, n do
            c = mw.ustring.codepoint( adjust, i, i )
            if focus( c ) then
                if flat( c ) then
                    if j then
                        if m then
                            if i == m then
                                -- single greek letter.
                                j = false
                            end
                            m = false
                        end
                        if j then
                            local nx = i - 1
                            local s  = ""
                            for ix = nx, 1, -1 do
                                c = mw.ustring.sub( adjust, ix, ix )
                                if c == " "  or  c == "(" then
                                    nx = nx - 1
                                    s  = c .. s
                                else
                                    break -- for ix
                                end
                            end -- for ix
                            r = form( nx ) .. s
                            j = false
                            k = i
                        end
                    end
                elseif not j then
                    j = i
                    if c >= 880  and  c <= 1023 then
                        -- single greek letter?
                        m = i + 1
                    else
                        m = false
                    end
                end
            elseif m then
                m = m + 1
            end
        end    -- for i
        if j  and  ( not m  or  m < n ) then
            r = form( n )
        else
            r = r .. mw.ustring.sub( adjust, k )
        end
    end
    return r
end -- Text.uprightNonlatin()


Text.test = function ( about )
    local r
    if about == "quote" then
        initQuoteData()
        r = { }
        r.QuoteLang = QuoteLang
        r.QuoteType = QuoteType
    end
    return r
end -- Text.test()



-- Export
local p = { }

for _, func in ipairs({'containsCJK','isLatinRange','isQuote','sentenceTerminated'}) do
	p[func] = function (frame) 
		return Text[func]( frame.args[ 1 ] or "" ) and "1" or ""
	end
end

for _, func in ipairs({'getPlain','removeDiacritics','ucfirstAll','uprightNonlatin'}) do
	p[func] = function (frame) 
		return Text[func]( frame.args[ 1 ] or "" )
	end
end

function p.char( frame )
    local params = frame:getParent().args
    local story = params[ 1 ]
    local codes, lenient, multiple
    if not story then
        params = frame.args
        story  = params[ 1 ]
    end
    if story then
        local items = mw.text.split( mw.text.trim(story), "%s+" )
        if #items > 0 then
            local j
            lenient  = (yesNo(params.errors) == false)
            codes    = { }
            multiple = tonumber( params[ "*" ] )
            for _, v in ipairs( items ) do
            	j = tonumber((v:sub( 1, 1 ) == "x" and "0" or "") .. v)
                table.insert( codes,  j or v )
            end 
        end
    end
    return Text.char( codes, multiple, lenient )
end

function p.concatParams( frame )
    local args
    local template = frame.args.template
    if type( template ) == "string" then
        template = mw.text.trim( template )
        template = ( template == "1" )
    end
    if template then
        args = frame:getParent().args
    else
        args = frame.args
    end
    return Text.concatParams( args,
                              frame.args.separator,
                              frame.args.format )
end


function p.listToFormat(frame)
    local lists = {}
    local pformat = frame.args["format"]
    local sep = frame.args["sep"] or ";"

    -- Parameter parsen: Listen
    for k, v in pairs(frame.args) do
        local knum = tonumber(k)
        if knum then lists[knum] = v end
    end

    -- Listen splitten
    local maxListLen = 0
    for i = 1, #lists do
        lists[i] = mw.text.split(lists[i], sep)
        if #lists[i] > maxListLen then maxListLen = #lists[i] end
    end

    -- Ergebnisstring generieren
    local result = ""
    local result_line = ""
    for i = 1, maxListLen do
        result_line = pformat
        for j = 1, #lists do
            result_line = mw.ustring.gsub(result_line, "%%s", lists[j][i], 1)
        end
        result = result .. result_line
    end

    return result
end



function p.listToText( frame )
    local args
    local template = frame.args.template
    if type( template ) == "string" then
        template = mw.text.trim( template )
        template = ( template == "1" )
    end
    if template then
        args = frame:getParent().args
    else
        args = frame.args
    end
    return Text.listToText( args, frame.args.format )
end



function p.quote( frame )
    local slang = frame.args[2]
    if type( slang ) == "string" then
        slang = mw.text.trim( slang )
        if slang == "" then
            slang = false
        end
    end
    return Text.quote( frame.args[ 1 ] or "",
                       slang,
                       tonumber( frame.args[3] ) )
end



function p.quoteUnquoted( frame )
    local slang = frame.args[2]
    if type( slang ) == "string" then
        slang = mw.text.trim( slang )
        if slang == "" then
            slang = false
        end
    end
    return Text.quoteUnquoted( frame.args[ 1 ] or "",
                               slang,
                               tonumber( frame.args[3] ) )
end


function p.zip(frame)
    local lists = {}
    local seps = {}
    local defaultsep = frame.args["sep"] or ""
    local innersep = frame.args["isep"] or ""
    local outersep = frame.args["osep"] or ""

    -- Parameter parsen
    for k, v in pairs(frame.args) do
        local knum = tonumber(k)
        if knum then lists[knum] = v else
            if string.sub(k, 1, 3) == "sep" then
                local sepnum = tonumber(string.sub(k, 4))
                if sepnum then seps[sepnum] = v end
            end
        end
    end
    -- sofern keine expliziten Separatoren angegeben sind, den Standardseparator verwenden
    for i = 1, math.max(#seps, #lists) do
        if not seps[i] then seps[i] = defaultsep end
    end

    -- Listen splitten
    local maxListLen = 0
    for i = 1, #lists do
        lists[i] = mw.text.split(lists[i], seps[i])
        if #lists[i] > maxListLen then maxListLen = #lists[i] end
    end

    local result = ""
    for i = 1, maxListLen do
        if i ~= 1 then result = result .. outersep end
        for j = 1, #lists do
            if j ~= 1 then result = result .. innersep end
            result = result .. (lists[j][i] or "")
        end
    end
    return result
end



function p.failsafe()
    return Text.serial
end



p.Text = function ()
    return Text
end -- p.Text

return p