Modul:Text: Unterschied zwischen den Versionen
2015-03-17
w>Mps |
w>PerfektesChaos (2015-03-17) |
||
| Zeile 1: | Zeile 1: | ||
--[=[ 2015- | --[=[ 2015-03-17 | ||
Text utilities | Text utilities | ||
]=] | ]=] | ||
| Zeile 6: | Zeile 6: | ||
local Text = { } | local Text = { } | ||
local | local PatternCJK = false | ||
local | local PatternCombined = false | ||
local | local PatternLatin = false | ||
local PatternTerminated = false | |||
local RangesLatin = false | |||
| Zeile 55: | Zeile 57: | ||
return r | return r | ||
end -- Text.containsCJK() | end -- Text.containsCJK() | ||
Text.isLatinRange = function ( adjust ) | |||
-- Are characters expected to be latin or symbols within latin texts? | |||
-- Precondition: | |||
-- adjust -- string, or nil for initialization | |||
-- Returns: true, if valid for latin only | |||
local r | |||
if not RangesLatin then | |||
RangesLatin = { { 7, 687 }, | |||
{ 7531, 7578 }, | |||
{ 7680, 7935 }, | |||
{ 8194, 8250 } } | |||
end | |||
if not PatternLatin then | |||
local range | |||
PatternLatin = "^[" | |||
for i = 1, #RangesLatin do | |||
range = RangesLatin[ i ] | |||
PatternLatin = PatternLatin .. | |||
mw.ustring.char( range[ 1 ], 45, range[ 2 ] ) | |||
end -- for i | |||
PatternLatin = PatternLatin .. "]*$" | |||
mw.log(PatternLatin) | |||
end | |||
if adjust then | |||
if mw.ustring.match( adjust, PatternLatin ) then | |||
r = true | |||
else | |||
r = false | |||
end | |||
end | |||
return r | |||
end -- Text.isLatinRange() | |||
| Zeile 78: | Zeile 115: | ||
return mw.text.listToText( collect ) | return mw.text.listToText( collect ) | ||
end -- Text.listToText() | end -- Text.listToText() | ||
Text.removeDiacritics = function ( adjust ) | |||
-- Remove all diacritics | |||
-- Parameter: | |||
-- adjust -- string | |||
-- Returns: string; all latin letters should be ASCII | |||
-- or basic greek or cyrillic or symbols etc. | |||
local cleanup, decomposed | |||
if not PatternCombined then | |||
PatternCombined = mw.ustring.char( 91, | |||
0x0300, 45, 0x036F, | |||
0x1AB0, 45, 0x1AFF, | |||
0x1DC0, 45, 0x1DFF, | |||
0xFE20, 45, 0xFE2F, | |||
93 ) | |||
end | |||
decomposed = mw.ustring.toNFD( adjust ) | |||
cleanup = mw.ustring.gsub( decomposed, PatternCombined, "" ) | |||
return mw.ustring.toNFC( cleanup ) | |||
end -- Text.removeDiacritics() | |||
| Zeile 88: | Zeile 147: | ||
-- Returns: true, if sentence terminated | -- Returns: true, if sentence terminated | ||
local r | local r | ||
if not | if not PatternTerminated then | ||
PatternTerminated = mw.ustring.char( 91, | |||
12290, | 12290, | ||
65281, | 65281, | ||
| Zeile 96: | Zeile 155: | ||
.. "!%.%?…][\"'%]‹›«»‘’“”]*$" | .. "!%.%?…][\"'%]‹›«»‘’“”]*$" | ||
end | end | ||
if mw.ustring.find( analyse, | if mw.ustring.find( analyse, PatternTerminated ) then | ||
r = true | r = true | ||
else | else | ||
| Zeile 163: | Zeile 222: | ||
-- Returns: string with non-latin parts enclosed in <span> | -- Returns: string with non-latin parts enclosed in <span> | ||
local r | local r | ||
Text.isLatinRange() | |||
if mw.ustring.match( adjust, PatternLatin ) then | |||
if mw.ustring.match( adjust, | |||
-- latin only, horizontal dashes, quotes | -- latin only, horizontal dashes, quotes | ||
r = adjust | r = adjust | ||
| Zeile 178: | Zeile 232: | ||
local m = false | local m = false | ||
local n = mw.ustring.len( adjust ) | local n = mw.ustring.len( adjust ) | ||
local span = "%s%s<span style='font-style:normal'>%s</span>" | local span = "%s%s<span dir='auto' style='font-style:normal'>%s</span>" | ||
local flat = function ( a ) | local flat = function ( a ) | ||
-- isLatin | |||
local range | |||
for i = 1, #RangesLatin do | |||
range = RangesLatin[ i ] | |||
if a >= range[ 1 ] and a <= range[ 2 ] then | |||
return true | |||
end | |||
end -- for i | |||
end -- flat() | end -- flat() | ||
local form = function ( a ) | local form = function ( a ) | ||
| Zeile 231: | Zeile 291: | ||
m = m + 1 | m = m + 1 | ||
end | end | ||
end -- for i | end -- for i | ||
if j and ( not m or m < n ) then | if j and ( not m or m < n ) then | ||
r = form( n ) | r = form( n ) | ||
| Zeile 265: | Zeile 325: | ||
function p.containsCJK( frame ) | function p.containsCJK( frame ) | ||
return Text.containsCJK( frame.args[ 1 ] or "" ) and "1" or "" | return Text.containsCJK( frame.args[ 1 ] or "" ) and "1" or "" | ||
end | |||
function p.isLatinRange( frame ) | |||
return Text.isLatinRange( frame.args[1] or "" ) and "1" or "" | |||
end | end | ||
| Zeile 280: | Zeile 344: | ||
end | end | ||
return Text.listToText( args, frame.args.format ) | return Text.listToText( args, frame.args.format ) | ||
end | |||
function p.removeDiacritics( frame ) | |||
return Text.removeDiacritics( frame.args[1] or "" ) | |||
end | end | ||
| Zeile 300: | Zeile 368: | ||
local innersep = frame.args["isep"] or "" | local innersep = frame.args["isep"] or "" | ||
local outersep = frame.args["osep"] or "" | local outersep = frame.args["osep"] or "" | ||
-- Parameter parsen | -- Parameter parsen | ||
for k, v in pairs(frame.args) do | for k, v in pairs(frame.args) do | ||
| Zeile 332: | Zeile 400: | ||
end | end | ||
return result | return result | ||
end | end | ||