Moduł:Brudnopis/Paweł Ziemian/xlat
Dokumentacja dla tego modułu może zostać utworzona pod nazwą Moduł:Brudnopis/Paweł Ziemian/xlat/opis
local m = {}
local Jamo = {
["\225\132\128"] = { comment = "HANGUL CHOSEONG KIYEOK", -- 'ᄀ'
{ "G", },
},
["\225\132\129"] = { comment = "HANGUL CHOSEONG SSANGKIYEOK", -- 'ᄁ'
{ "GG", },
},
["\225\132\130"] = { comment = "HANGUL CHOSEONG NIEUN", -- 'ᄂ'
{ "N", },
},
["\225\132\131"] = { comment = "HANGUL CHOSEONG TIKEUT", -- 'ᄃ'
{ "D", },
},
["\225\132\132"] = { comment = "HANGUL CHOSEONG SSANGTIKEUT", -- 'ᄄ'
{ "DD", },
},
["\225\132\133"] = { comment = "HANGUL CHOSEONG RIEUL", -- 'ᄅ'
{ "R", },
},
["\225\132\134"] = { comment = "HANGUL CHOSEONG MIEUM", -- 'ᄆ'
{ "M", },
},
["\225\132\135"] = { comment = "HANGUL CHOSEONG PIEUP", -- 'ᄇ'
{ "B", },
},
["\225\132\136"] = { comment = "HANGUL CHOSEONG SSANGPIEUP", -- 'ᄈ'
{ "BB", },
},
["\225\132\137"] = { comment = "HANGUL CHOSEONG SIOS", -- 'ᄉ'
{ "S", },
},
["\225\132\138"] = { comment = "HANGUL CHOSEONG SSANGSIOS", -- 'ᄊ'
{ "SS", },
},
["\225\132\139"] = { comment = "HANGUL CHOSEONG IEUNG", -- 'ᄋ'
{ "", },
},
["\225\132\140"] = { comment = "HANGUL CHOSEONG CIEUC", -- 'ᄌ'
{ "J", },
},
["\225\132\141"] = { comment = "HANGUL CHOSEONG SSANGCIEUC", -- 'ᄍ'
{ "JJ", },
},
["\225\132\142"] = { comment = "HANGUL CHOSEONG CHIEUCH", -- 'ᄎ'
{ "C", },
},
["\225\132\143"] = { comment = "HANGUL CHOSEONG KHIEUKH", -- 'ᄏ'
{ "K", },
},
["\225\132\144"] = { comment = "HANGUL CHOSEONG THIEUTH", -- 'ᄐ'
{ "T", },
},
["\225\132\145"] = { comment = "HANGUL CHOSEONG PHIEUPH", -- 'ᄑ'
{ "P", },
},
["\225\132\146"] = { comment = "HANGUL CHOSEONG HIEUH", -- 'ᄒ'
{ "H", },
},
["\225\133\161"] = { comment = "HANGUL JUNGSEONG A", -- 'ᅡ'
{ "A", },
},
["\225\133\162"] = { comment = "HANGUL JUNGSEONG AE", -- 'ᅢ'
{ "AE", },
},
["\225\133\163"] = { comment = "HANGUL JUNGSEONG YA", -- 'ᅣ'
{ "YA", },
},
["\225\133\164"] = { comment = "HANGUL JUNGSEONG YAE", -- 'ᅤ'
{ "YAE", },
},
["\225\133\165"] = { comment = "HANGUL JUNGSEONG EO", -- 'ᅥ'
{ "EO", },
},
["\225\133\166"] = { comment = "HANGUL JUNGSEONG E", -- 'ᅦ'
{ "E", },
},
["\225\133\167"] = { comment = "HANGUL JUNGSEONG YEO", -- 'ᅧ'
{ "YEO", },
},
["\225\133\168"] = { comment = "HANGUL JUNGSEONG YE", -- 'ᅨ'
{ "YE", },
},
["\225\133\169"] = { comment = "HANGUL JUNGSEONG O", -- 'ᅩ'
{ "O", },
},
["\225\133\170"] = { comment = "HANGUL JUNGSEONG WA", -- 'ᅪ'
{ "WA", },
},
["\225\133\171"] = { comment = "HANGUL JUNGSEONG WAE", -- 'ᅫ'
{ "WAE", },
},
["\225\133\172"] = { comment = "HANGUL JUNGSEONG OE", -- 'ᅬ'
{ "OE", },
},
["\225\133\173"] = { comment = "HANGUL JUNGSEONG YO", -- 'ᅭ'
{ "YO", },
},
["\225\133\174"] = { comment = "HANGUL JUNGSEONG U", -- 'ᅮ'
{ "U", },
},
["\225\133\175"] = { comment = "HANGUL JUNGSEONG WEO", -- 'ᅯ'
{ "WEO", },
},
["\225\133\176"] = { comment = "HANGUL JUNGSEONG WE", -- 'ᅰ'
{ "WE", },
},
["\225\133\177"] = { comment = "HANGUL JUNGSEONG WI", -- 'ᅱ'
{ "WI", },
},
["\225\133\178"] = { comment = "HANGUL JUNGSEONG YU", -- 'ᅲ'
{ "YU", },
},
["\225\133\179"] = { comment = "HANGUL JUNGSEONG EU", -- 'ᅳ'
{ "EU", },
},
["\225\133\180"] = { comment = "HANGUL JUNGSEONG YI", -- 'ᅴ'
{ "YI", },
},
["\225\133\181"] = { comment = "HANGUL JUNGSEONG I", -- 'ᅵ'
{ "I", },
},
["\225\134\168"] = { comment = "HANGUL JONGSEONG KIYEOK", -- 'ᆨ'
{ "G", },
},
["\225\134\169"] = { comment = "HANGUL JONGSEONG SSANGKIYEOK", -- 'ᆩ'
{ "GG", },
},
["\225\134\170"] = { comment = "HANGUL JONGSEONG KIYEOK-SIOS", -- 'ᆪ'
{ "GS", },
},
["\225\134\171"] = { comment = "HANGUL JONGSEONG NIEUN", -- 'ᆫ'
{ "N", },
},
["\225\134\172"] = { comment = "HANGUL JONGSEONG NIEUN-CIEUC", -- 'ᆬ'
{ "NJ", },
},
["\225\134\173"] = { comment = "HANGUL JONGSEONG NIEUN-HIEUH", -- 'ᆭ'
{ "NH", },
},
["\225\134\174"] = { comment = "HANGUL JONGSEONG TIKEUT", -- 'ᆮ'
{ "D", },
},
["\225\134\175"] = { comment = "HANGUL JONGSEONG RIEUL", -- 'ᆯ'
{ "L", },
},
["\225\134\176"] = { comment = "HANGUL JONGSEONG RIEUL-KIYEOK", -- 'ᆰ'
{ "LG", },
},
["\225\134\177"] = { comment = "HANGUL JONGSEONG RIEUL-MIEUM", -- 'ᆱ'
{ "LM", },
},
["\225\134\178"] = { comment = "HANGUL JONGSEONG RIEUL-PIEUP", -- 'ᆲ'
{ "LB", },
},
["\225\134\179"] = { comment = "HANGUL JONGSEONG RIEUL-SIOS", -- 'ᆳ'
{ "LS", },
},
["\225\134\180"] = { comment = "HANGUL JONGSEONG RIEUL-THIEUTH", -- 'ᆴ'
{ "LT", },
},
["\225\134\181"] = { comment = "HANGUL JONGSEONG RIEUL-PHIEUPH", -- 'ᆵ'
{ "LP", },
},
["\225\134\182"] = { comment = "HANGUL JONGSEONG RIEUL-HIEUH", -- 'ᆶ'
{ "LH", },
},
["\225\134\183"] = { comment = "HANGUL JONGSEONG MIEUM", -- 'ᆷ'
{ "M", },
},
["\225\134\184"] = { comment = "HANGUL JONGSEONG PIEUP", -- 'ᆸ'
{ "B", },
},
["\225\134\185"] = { comment = "HANGUL JONGSEONG PIEUP-SIOS", -- 'ᆹ'
{ "BS", },
},
["\225\134\186"] = { comment = "HANGUL JONGSEONG SIOS", -- 'ᆺ'
{ "S", },
},
["\225\134\187"] = { comment = "HANGUL JONGSEONG SSANGSIOS", -- 'ᆻ'
{ "SS", },
},
["\225\134\188"] = { comment = "HANGUL JONGSEONG IEUNG", -- 'ᆼ'
{ "NG", },
},
["\225\134\189"] = { comment = "HANGUL JONGSEONG CIEUC", -- 'ᆽ'
{ "J", },
},
["\225\134\190"] = { comment = "HANGUL JONGSEONG CHIEUCH", -- 'ᆾ'
{ "C", },
},
["\225\134\191"] = { comment = "HANGUL JONGSEONG KHIEUKH", -- 'ᆿ'
{ "K", },
},
["\225\135\128"] = { comment = "HANGUL JONGSEONG THIEUTH", -- 'ᇀ'
{ "T", },
},
["\225\135\129"] = { comment = "HANGUL JONGSEONG PHIEUPH", -- 'ᇁ'
{ "P", },
},
["\225\135\130"] = { comment = "HANGUL JONGSEONG HIEUH", -- 'ᇂ'
{ "H", },
},
}
local alfabet = {
[1548] = { "Arabic Comma",
"," },
[1563] = { "Arabic Semicolon",
";" },
[1567] = { "Arabic Question Mark",
"?" },
[1569] = { "Arabic Letter Hamza",
"’" },
[1570] = { "Arabic Letter Alef With Madda Above",
"’āā" },
[1571] = { "Arabic Letter Alef With Hamza Above",
"’ā" },
[1572] = { "Arabic Letter Waw With Hamza Above",
"’ū" },
[1573] = { "Arabic Letter Alef With Hamza Below",
"’ā" },
[1574] = { "Arabic Letter Yeh With Hamza Above",
"’ī" },
[1575] = { "Arabic Letter Alef",
"’" },
[1576] = { "Arabic Letter Beh",
"b" },
[1577] = { "Arabic Letter Teh Marbuta",
"ť" },
[1578] = { "Arabic Letter Teh",
"t" },
[1579] = { "Arabic Letter Theh",
"ṯ" },
[1580] = { "Arabic Letter Jeem",
"ǧ" },
[1581] = { "Arabic Letter Jah",
"ḥ" },
[1582] = { "Arabic Letter Khah",
"ẖ" },
[1583] = { "Arabic Letter Dal",
"d" },
[1584] = { "Arabic Letter Thal",
"ḏ" },
[1585] = { "Arabic Letter Reh",
"r" },
[1586] = { "Arabic Letter Zain",
"z" },
[1587] = { "Arabic Letter Seen",
"s" },
[1588] = { "Arabic Letter Sheen",
"š" },
[1589] = { "Arabic Letter Sad",
"ṣ" },
[1590] = { "Arabic Letter Dad",
"ḍ" },
[1591] = { "Arabic Letter Tah",
"ṭ" },
[1592] = { "Arabic Letter Zah",
"ẓ" },
[1593] = { "Arabic Letter Ain",
"`" },
[1594] = { "Arabic Letter Ghain",
"ḡ" },
[1600] = { "Arabic Letter Tatweel (kashida)",
"_" },
[1601] = { "Arabic Letter Feh",
"f" },
[1602] = { "Arabic Letter Qaf",
"q" },
[1603] = { "Arabic Letter Kaf",
"k" },
[1604] = { "Arabic Letter Lam",
"l" },
[1605] = { "Arabic Letter Meem",
"m" },
[1606] = { "Arabic Letter Noon",
"n" },
[1607] = { "Arabic Letter Heh",
"h" },
[1608] = { "Arabic Letter Waw",
"ū" },
[1609] = { "Arabic Letter Alef Maksura (alef variant only for use in final position)",
"ā" },
[1610] = { "Arabic Letter Yeh",
"ī" },
[1611] = { "Arabic Fathatan (final only)",
"an" },
[1612] = { "Arabic Dammatan (final only)",
"un" },
[1613] = { "Arabic Kasratan (final only)",
"in" },
[1614] = { "Arabic Fatha",
"a" },
[1615] = { "Arabic Damma",
"u" },
[1616] = { "Arabic Kasra",
"i" },
[1617] = { "Arabic Shadda",
"×" },
[1618] = { "Arabic Sukun",
"-" },
[1632] = { "Arabic-Indic Digit Zero",
"0" },
[1633] = { "Arabic-Indic Digit One",
"1" },
[1634] = { "Arabic-Indic Digit Two",
"2" },
[1635] = { "Arabic-Indic Digit Three",
"3" },
[1636] = { "Arabic-Indic Digit Four",
"4" },
[1637] = { "Arabic-Indic Digit Five",
"5" },
[1638] = { "Arabic-Indic Digit Six",
"6" },
[1639] = { "Arabic-Indic Digit Seven",
"7" },
[1640] = { "Arabic-Indic Digit Eight",
"8" },
[1641] = { "Arabic-Indic Digit Nine",
"9" },
[1642] = { "Arabic Percent Sign",
"%" },
[1643] = { "Arabic Decimal Separator",
"," },
[1644] = { "Arabic Thousands Separator",
" " },
[1645] = { "Arabic Five Pointed Star",
},
[1648] = { "Arabic Letter Superscript Alef",
},
[1649] = { "Arabic Letter Alef Wasla",
},
[1650] = { "Arabic Letter Alef With Wavy Hamza Above",
},
[1651] = { "Arabic Letter Alef With Wavy Hamza Below",
},
[1652] = { "Arabic Letter High Hamza",
},
[1653] = { "Arabic Letter High Hamza Alef",
},
[1654] = { "Arabic Letter High Hamza Waw",
},
[1655] = { "Arabic Letter U With Hamza Above",
},
[1656] = { "Arabic Letter High Hamza Yeh",
},
[1657] = { "Arabic Letter Tteh",
},
[1658] = { "Arabic Letter Tteheh",
},
[1659] = { "Arabic Letter Beeh",
},
[1660] = { "Arabic Letter Teh With Ring",
},
[1661] = { "Arabic Letter Teh With Three Dots Above Downwarts",
},
[1662] = { "Arabic Letter Peh",
},
[1663] = { "Arabic Letter Teheh",
},
[1664] = { "Arabic Letter Beheh",
},
[1665] = { "Arabic Letter Hah With Hamza Above",
},
[1666] = { "Arabic Letter Hah With Two Dots Vertical Above",
},
[1667] = { "Arabic Letter Nyeh",
},
[1668] = { "Arabic Letter Dyeh",
},
[1669] = { "Arabic Letter Hah With Tree Dots Above",
},
[1670] = { "Arabic Letter Tcheh",
},
[1671] = { "Arabic Letter Tcheheh",
},
[1672] = { "Arabic Letter Ddal",
},
[1673] = { "Arabic Letter Dal With Ring",
},
[1674] = { "Arabic Letter Dal With Dot Below",
},
[1675] = { "Arabic Letter Dal With Dot Below And Small Tah",
},
[1676] = { "Arabic Letter Dahal",
},
[1677] = { "Arabic Letter Ddahal",
},
[1678] = { "Arabic Letter Dul",
},
[1679] = { "Arabic Letter Dal With Three Dots Above Downwards",
},
-- 0690 = 1680
}
function m.artrl(frame)
local result = {}
table.insert(result, "translation = {\n")
for i = 1500, 1700, 1 do
local data = alfabet[i]
if data then
local char = mw.ustring.char(i)
local trl = data[2]
if trl then
local item = string.format("\t[\"%s\"] = { -- %s\n\t\t{ \"%s\", },\n\t},\n", char, data[1], trl)
table.insert(result, item)
end
end
end
table.insert(result, "},")
return "<pre>"..table.concat(result, "").."</pre>"
end
function m.ardec(frame)
local pf = frame:getParent()
local text = frame.args[1] or pf.args[1]
if not text or (#text == 0) then
mw.log("Brak tekstu")
return
end
local iterator = mw.ustring.gcodepoint(text)
local next = iterator()
local result = {}
table.insert(result, "\n{| class=wikitable\n! znak !! q !! a !! b !! hex !! dec !! name")
while next do
local data = alfabet[next] or { "" }
local char = mw.ustring.char(next)
local item = string.format("\n|-\n| %s || \"%s\" || \"%s$\" || \"^%s\" || %x || %d || %s", char, char, char, char, next, next, data[1])
table.insert(result, item)
next = iterator()
end
table.insert(result, "\n|}")
return table.concat(result, "")
end
function m.arlst(frame)
local result = {}
table.insert(result, "\n{| class=wikitable\n! znak !! q !! a !! b !! hex !! dec !! name")
for i = 1500, 1700, 1 do
local data = alfabet[i]
if data then
local char = mw.ustring.char(i)
local item = string.format("\n|-\n| %s || \"%s\" || \"%s$\" || \"^%s\" || %x || %d || %s", char, char, char, char, i, i, data[1])
table.insert(result, item)
end
end
table.insert(result, "\n|}")
return table.concat(result, "")
end
local function xlat3(text, translation, preserveCombining)
if not text or not translation or (#text == 0) then
return text
end
local cache = {}
local function createCacheEntry(char)
local result = {
rules = translation[char] or { { char } },
class = {},
}
cache[char] = result
return result
end
local function loadRules(char)
return (cache[char] or createCacheEntry(char)).rules
end
local function checkRule(char, restriction)
local cached = cache[char] or createCacheEntry(char)
local result = cached.class[restriction]
if result == nil then
local test = mw.ustring.match(char, restriction)
result = test and true or false
end
return result
end
local function translate(after, current, before)
-- translate the current character
local rules = loadRules(current)
for _, v in ipairs(rules) do
local a = not v.a or checkRule(after, v.a)
local b = not v.b or checkRule(before, v.b)
if a and b then
return v[1]
end
end
return current
end
local combining = mw.loadData( 'Moduł:IPA/data' ).combining
local result = {}
local after = { known = "", rest = "" }
local current = { known = "", rest = "" }
local before1 = { known = "", rest = "" }
local before2 = { known = "", rest = "" }
local iterator = mw.ustring.gcodepoint(text)
local next = iterator()
while next do
char = { next }
next = false
-- read next "character" from input
while not next do
next = iterator()
if not next or not combining[next] then
break
end
-- accumulate whole character
table.insert(char, next)
next = false
end
-- shift window
after = current
current = before1
before1 = before2
before2 = {}
-- determine the largest known part of the current character
local j = #char
while j >= 1 do
local part = mw.ustring.char(unpack(char, i, j))
if translation[part] then
before2.known = part
before2.rest = j < #char and mw.ustring.char(unpack(char, j+1, #char)) or ""
break
end
j = j - 1
end
if j == 0 then
-- use whole unrecognized character
before2.known = mw.ustring.char(unpack(char, i, #char))
before2.rest = ""
end
if current.known ~= "" then
local translated = translate(after.known, current.known, before1.known..before2.known)
table.insert(result, translated)
if (#translated > 0) and preserveCombining then
table.insert(result, current.rest)
end
end
end
-- translate last characters
local translated = translate(current.known, before1.known, before2.known)
table.insert(result, translated)
if (#translated > 0) and preserveCombining then
table.insert(result, current.rest)
end
local translated = translate(before1.known, before2.known, "")
table.insert(result, translated)
if (#translated > 0) and preserveCombining then
table.insert(result, current.rest)
end
local result = table.concat(result, "")
mw.log("return \""..result.."\"")
return result
end
local function xlat2(text, translation, preserveCombining)
if not text or not translation or (#text == 0) then
return text
end
local cache = {}
local function createCacheEntry(char)
local result = {
rules = translation[char] or { { char } },
class = {},
}
cache[char] = result
return result
end
local function loadRules(char)
return (cache[char] or createCacheEntry(char)).rules
end
local function checkRule(char, restriction)
local cached = cache[char] or createCacheEntry(char)
local result = cached.class[restriction]
if result == nil then
local test = mw.ustring.match(char, restriction)
result = test and true or false
end
return result
end
local function translate(after, current, before)
-- translate the current character
local rules = loadRules(current)
for _, v in ipairs(rules) do
local a = not v.a or checkRule(after, v.a)
local b = not v.b or checkRule(before, v.b)
if a and b then
return v[1]
end
end
return current
end
local combining = mw.loadData( 'Moduł:IPA/data' ).combining
local result = {}
local after = { known = "", rest = "" }
local current = { known = "", rest = "" }
local before = { known = "", rest = "" }
local iterator = mw.ustring.gcodepoint(text)
local next = iterator()
while next do
char = { next }
next = false
-- read next "character" from input
while not next do
next = iterator()
if not next or not combining[next] then
break
end
-- accumulate whole character
table.insert(char, next)
next = false
end
-- shift window
after = current
current = before
before = {}
-- determine the largest known part of the current character
local j = #char
while j >= 1 do
local part = mw.ustring.char(unpack(char, i, j))
if translation[part] then
before.known = part
before.rest = j < #char and mw.ustring.char(unpack(char, j+1, #char)) or ""
break
end
j = j - 1
end
if j == 0 then
-- use whole unrecognized character
before.known = mw.ustring.char(unpack(char, i, #char))
before.rest = ""
end
if current.known ~= "" then
local translated = translate(after.known, current.known, before.known)
table.insert(result, translated)
if (#translated > 0) and preserveCombining then
table.insert(result, current.rest)
end
end
end
-- translate last character
after = current
current = before
before = { known = "", rest = "" }
local translated = translate(after.known, current.known, before.known)
table.insert(result, translated)
if (#translated > 0) and preserveCombining then
table.insert(result, current.rest)
end
local result = table.concat(result, "")
mw.log("return \""..result.."\"")
return result
end
local function xlat(text, translation, preserveCombining)
if not text or not translation or (#text == 0) then
return text
end
local cache = {}
local function createCacheEntry(codepoint)
local char = mw.ustring.char(codepoint)
local result = {
ch = char,
rules = translation[char] or { { char } },
class = {},
}
cache[codepoint] = result
return result
end
local function loadRules(codepoint)
return (cache[codepoint] or createCacheEntry(codepoint)).rules
end
local function checkRule(codepoint, restriction)
local cached = cache[codepoint] or createCacheEntry(codepoint)
local result = cached.class[restriction]
if result == nil then
result = mw.ustring.match(cached.ch, restriction)
cached.class[restriction] = result
end
return result
end
local function loadChar(codepoint)
return (cache[codepoint] or createCacheEntry(codepoint)).ch
end
local combining = mw.loadData( 'Moduł:IPA/data' ).combining
local result = {}
local after = 0 local current = 0 local before = 0
local function translate()
local rules = loadRules(current)
for _, v in ipairs(rules) do
local a = not v.a or checkRule(after, v.a)
local b = not v.b or checkRule(before, v.b)
if a and b then
table.insert(result, v[1])
return
end
end
mw.log(loadChar(current)..": unknown rule, "..current)
table.insert(result, loadChar(current))
end
local iterator = mw.ustring.gcodepoint(text)
local skipped = {}
local analyze = false
for cp in iterator do
if combining[cp] then
if preserveCombining then
table.insert(skipped, cp)
end
else
after = current
current = before
before = cp
if analyze then
translate()
end
if #skipped > 0 then
table.insert(result, mw.ustring.char( unpack(skipped) ))
skipped = {}
end
end
analyze = true
end
if #skipped > 0 then
table.insert(result, mw.ustring.char( unpack(skipped) ))
end
after = current
current = before
before = 0
if analyze then
translate()
end
local result = table.concat(result, "")
mw.log("return \""..result.."\"")
return result
end
local trans5 = {
ru = {
["А"] = {
{ "A", },
},
["а"] = {
{ "a", },
},
["Б"] = {
{ "B", },
},
["б"] = {
{ "b", },
},
["В"] = {
{ "W", },
},
["в"] = {
{ "w", },
},
["Г"] = {
{ "G", },
},
["г"] = {
{ "g", },
},
["Д"] = {
{ "D", },
},
["д"] = {
{ "d", },
},
["Е"] = {
{ "Je", a = "[%W]$", b = "^[%l]", },
{ "JE", a = "[%W]$", b = "^[%u%W]", },
{ "JE", a = "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]$", },
{ "E", a = "[ЖжЛлЦцЧчШшЩщ]$", },
{ "IE", },
},
["е"] = {
{ "jE", a = "[%W]$", b = "^[%u]", },
{ "je", a = "[%W]$", b = "^[%l%W]", },
{ "je", a = "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]$", },
{ "e", a = "[ЖжЛлЦцЧчШшЩщ]$", },
{ "ie", },
},
["Ё"] = {
{ "Jo", a = "[%W]$", b = "^[%l]", },
{ "JO", a = "[%W]$", b = "^[%u%W]", },
{ "JO", a = "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]$", },
{ "O", a = "[ЖжЛлЦцЧчШшЩщ]$", },
{ "IO", },
},
["ё"] = {
{ "jO", a = "[%W]$", b = "^[%u]", },
{ "jo", a = "[%W]$", b = "^[%l%W]", },
{ "jo", a = "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]$", },
{ "o", a = "[ЖжЛлЦцЧчШшЩщ]$", },
{ "io", },
},
["Ж"] = {
{ "Ż", },
},
["ж"] = {
{ "ż", },
},
["З"] = {
{ "Z", },
},
["з"] = {
{ "z", },
},
["И"] = {
{ "JI", a = "[Ь]$", },
{ "jI", a = "[ь]$", },
{ "Y", a = "[ЖжЦцШш]$", },
{ "I", },
},
["и"] = {
{ "Ji", a = "[Ь]$", },
{ "ji", a = "[ь]$", },
{ "y", a = "[ЖжЦцШш]$", },
{ "i", },
},
["Й"] = {
{ "J", },
},
["й"] = {
{ "j", },
},
["К"] = {
{ "K", },
},
["к"] = {
{ "k", },
},
["Л"] = {
{ "L", b = "^[ЕеЁёИиЬьЮюЯя]", },
{ "Ł", },
},
["л"] = {
{ "l", b = "^[ЕеЁёИиЬьЮюЯя]", },
{ "ł", },
},
["М"] = {
{ "M", },
},
["м"] = {
{ "m", },
},
["Н"] = {
{ "N", },
},
["н"] = {
{ "n", },
},
["О"] = {
{ "O", },
},
["о"] = {
{ "o", },
},
["П"] = {
{ "P", },
},
["п"] = {
{ "p", },
},
["Р"] = {
{ "R", },
},
["р"] = {
{ "r", },
},
["С"] = {
{ "S", },
},
["с"] = {
{ "s", },
},
["Т"] = {
{ "T", },
},
["т"] = {
{ "t", },
},
["У"] = {
{ "U", },
},
["у"] = {
{ "u", },
},
["Ф"] = {
{ "F", },
},
["ф"] = {
{ "f", },
},
["Х"] = {
{ "Ch", a = "[%W]$", b = "^[%l]", },
{ "CH", },
},
["х"] = {
{ "cH", a = "[%W]$", b = "^[%u]", },
{ "ch", },
},
["Ц"] = {
{ "C", },
},
["ц"] = {
{ "c", },
},
["Ч"] = {
{ "Cz", a = "[%W]$", b = "^[%l]", },
{ "CZ", },
},
["ч"] = {
{ "cZ", a = "[%W]$", b = "^[%u]", },
{ "cz", },
},
["Ш"] = {
{ "Sz", a = "[%W]$", b = "^[%l]", },
{ "SZ", },
},
["ш"] = {
{ "sZ", a = "[%W]$", b = "^[%u]", },
{ "sz", },
},
["Щ"] = {
{ "Szcz", a = "[%W]$", b = "^[%l]", },
{ "SZCZ", },
},
["щ"] = {
{ "sZCZ", a = "[%W]$", b = "^[%u]", },
{ "szcz", },
},
["Ъ"] = {
{ "", },
},
["ъ"] = {
{ "", },
},
["Ы"] = {
{ "Y", },
},
["ы"] = {
{ "y", },
},
["Ь"] = {
{ "", a = "[ЖжЛлЧчШшЩщ]$", },
{ "", b = "^[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]", },
{ "´", },
},
["ь"] = {
{ "", a = "[ЖжЛлЧчШшЩщ]$", },
{ "", b = "^[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]", },
{ "´", },
},
["Э"] = {
{ "E", },
},
["э"] = {
{ "e", },
},
["Ю"] = {
{ "Ju", a = "[%W]$", b = "^[%l]", },
{ "JU", a = "[%W]$", b = "^[%u%W]", },
{ "JU", a = "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]$", },
{ "U", a = "[Лл]$", },
{ "IU", },
},
["ю"] = {
{ "jU", a = "[%W]$", b = "^[%u]", },
{ "ju", a = "[%W]$", b = "^[%l%W]", },
{ "ju", a = "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]$", },
{ "u", a = "[Лл]$", },
{ "iu", },
},
["Я"] = {
{ "Ja", a = "[%W]$", b = "^[%l]", },
{ "JA", a = "[%W]$", b = "^[%u%W]", },
{ "JA", a = "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]$", },
{ "A", a = "[Лл]$", },
{ "IA", },
},
["я"] = {
{ "jA", a = "[%W]$", b = "^[%u]", },
{ "ja", a = "[%W]$", b = "^[%l%W]", },
{ "ja", a = "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]$", },
{ "a", a = "[Лл]$", },
{ "ia", },
},
["’"] = {
{ "’", },
},
},
}
function m.trans5(frame)
local text = frame.args[1]
local trans = trans5[frame.args.lang]
local preserveCombing = frame.args.accents == "preserve"
if not text or not trans or (#text == 0) then
return text
end
return xlat(text, trans, preserveCombing)
end
function m.trans6(frame)
local text = frame.args[1]
local trans = trans5[frame.args.lang]
local preserveCombing = frame.args.accents == "preserve"
if not text or not trans or (#text == 0) then
return text
end
return xlat2(text, trans, preserveCombing)
end
local trans2 = {
ru = {
["А"] = { d = "A", }, ["а"] = { d = "a", },
["Б"] = { d = "B", }, ["б"] = { d = "b", },
["В"] = { d = "W", }, ["в"] = { d = "w", },
["Г"] = { d = "G", }, ["г"] = { d = "g", },
["Д"] = { d = "D", }, ["д"] = { d = "d", },
["Е"] = {
{ "[%W]Е[%l]", "Je" },
{ "[%W]Е[%u%W]", "JE" },
{ "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]Е.", "JE" },
{ "[ЖжЛлЦцЧчШшЩщ]Е.", "E" },
d = "IE",
},
["е"] = {
{ "[%W]е[%u]", "jE" },
{ "[%W]е[%l%W]", "je" },
{ "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]е.", "je" },
{ "[ЖжЛлЦцЧчШшЩщ]е.", "e" },
d = "ie",
},
["Ё"] = {
{ "[%W]Ё[%l]", "Jo" },
{ "[%W]Ё[%u%W]", "JO" },
{ "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]Ё.", "JO" },
{ "[ЖжЛлЦцЧчШшЩщ]Ё.", "O" },
d = "IO",
},
["ё"] = {
{ "[%W]ё[%u]", "jO" },
{ "[%W]ё[%l%W]", "jo" },
{ "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]ё.", "jo" },
{ "[ЖжЛлЦцЧчШшЩщ]ё.", "o" },
d = "io",
},
["Ж"] = { d = "Ż", }, ["ж"] = { d = "ż", },
["З"] = { d = "Z", }, ["з"] = { d = "z", },
["И"] = {
{ "ЬИ.", "JI" },
{ "ьИ.", "jI" },
{ "[ЖжЦцШш]И.", "Y" },
d = "I",
},
["и"] = {
{ "Ьи.", "Ji" },
{ "ьи.", "ji" },
{ "[ЖжЦцШш]и.", "y" },
d = "i",
},
["Й"] = { d = "J", }, ["й"] = { d = "j", },
["К"] = { d = "K", }, ["к"] = { d = "k", },
["Л"] = {
{ ".Л[ЕеЁёИиЬьЮюЯя]", "L" },
d = "Ł",
},
["л"] = {
{ ".л[ЕеЁёИиЬьЮюЯя]", "l" },
d = "ł",
},
["М"] = { d = "M", }, ["м"] = { d = "m", },
["Н"] = { d = "N", }, ["н"] = { d = "n", },
["О"] = { d = "O", }, ["о"] = { d = "o", },
["П"] = { d = "P", }, ["п"] = { d = "p", },
["Р"] = { d = "R", }, ["р"] = { d = "r", },
["С"] = { d = "S", }, ["с"] = { d = "s", },
["Т"] = { d = "T", }, ["т"] = { d = "t", },
["У"] = { d = "U", }, ["у"] = { d = "u", },
["Ф"] = { d = "F", }, ["ф"] = { d = "f", },
["Х"] = {
{ "[%W]Х[%l]", "Ch" },
d = "CH",
},
["х"] = {
{ "[%W]х[%u]", "cH" },
d = "ch",
},
["Ц"] = { d = "C", }, ["ц"] = { d = "c", },
["Ч"] = {
{ "[%W]Ч[%l]", "Cz" },
d = "CZ",
},
["ч"] = {
{ "[%W]ч[%u]", "cZ" },
d = "cz",
},
["Ш"] = {
{ "[%W]Ш[%l]", "Sz" },
d = "SZ",
},
["ш"] = {
{ "[%W]ш[%u]", "sZ" },
d = "sz",
},
["Щ"] = {
{ "[%W]Щ[%l]", "Szcz" },
d = "SZCZ",
},
["щ"] = {
{ "[%W]щ[%u]", "sZCZ" },
d = "szcz",
},
["Ъ"] = { d = "", }, ["ъ"] = { d = "", },
["Ы"] = { d = "Y", }, ["ы"] = { d = "y", },
["Ь"] = {
{ "[ЖжЛлЧчШшЩщ]Ь.", "" },
-- { "[ЖжЛлЧчШшЩщ]Ь.", "" },
{ ".Ь[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]", "" },
d = "´",
},
["ь"] = {
{ "[ЖжЛлЧчШшЩщ]ь.", "" },
-- { "[ЖжЛлЧчШшЩщ]ь.", "" },
{ ".ь[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]", "" },
d = "´",
},
["Э"] = { d = "E", }, ["э"] = { d = "e", },
["Ю"] = {
{ "[%W]Ю[%l]", "Ju" },
{ "[%W]Ю[%u%W]", "JU" },
{ "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]Ю.", "JU" },
{ "[Лл]Ю.", "U" },
d = "IU",
},
["ю"] = {
{ "[%W]ю[%u]", "jU" },
{ "[%W]ю[%l%W]", "ju" },
{ "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]ю.", "ju" },
{ "[Лл]ю.", "u" },
d = "iu",
},
["Я"] = {
{ "[%W]Я[%l]", "Ja" },
{ "[%W]Я[%u%W]", "JA" },
{ "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]Я.", "JA" },
{ "[Лл]Я.", "A" },
d = "IA",
},
["я"] = {
{ "[%W]я[%u]", "jA" },
{ "[%W]я[%l%W]", "ja" },
{ "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]я.", "ja" },
{ "[Лл]я.", "a" },
d = "ia",
},
["’"] = { d = "’", },
},
}
function m.trans2(frame)
local text = frame.args[1]
local trans = trans2[frame.args.lang]
local preserveCombing = frame.args.accents == "preserve"
if not text or not trans or (#text == 0) then
return text
end
local combining = mw.loadData( 'Moduł:IPA/data' ).combining
local result = {}
local unknown = mw.ustring.codepoint("�", 1)
local after = unknown local current = unknown local before = unknown
local function translate()
local ch = mw.ustring.char(current)
local rules = trans[ch]
if not rules then
mw.log(ch..": unknown, "..current)
table.insert(result, ch)
current = unknown
return
end
if #rules > 0 then
mw.log(after..", "..current..", "..before)
local fragment = mw.ustring.char(after, current, before)
for _, v in ipairs(rules) do
if mw.ustring.match(fragment, v[1]) then
mw.log(ch..": \""..fragment.."\" match("..v[1]..") -> "..v[2])
table.insert(result, v[2])
return
end
mw.log(ch..": \""..fragment.."\" does not match("..v[1]..") -> "..v[2])
end
end
mw.log(ch..": default -> "..rules.d)
table.insert(result, rules.d)
return
end
local iterator = mw.ustring.gcodepoint(text)
before = iterator()
local skipped = {}
for cp in iterator do
if combining[cp] then
if preserveCombing then
mw.log("preserve combing character "..cp)
table.insert(skipped, cp)
mw.log("preserved count "..#skipped)
else
mw.log("ignore combining character "..cp)
end
else
after = current
current = before
before = cp
translate()
mw.log("preserved count "..#skipped)
if #skipped > 0 then
for _, v in ipairs(skipped) do
mw.log("restore combing character "..v)
table.insert(result, mw.ustring.char(v))
end
skipped = {}
end
end
end
after = current
current = before
before = unknown
translate()
if #skipped > 0 then
for _, v in ipairs(skipped) do
mw.log("restore combing character "..v)
table.insert(result, v)
end
skipped = {}
end
return table.concat(result, "")
end
local transliterations = {
ru = { ["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Д"] = "D", ["Е"] = "E", ["Ё"] = "Ё", ["Ж"] = "Ž", ["З"] = "Z", ["И"] = "I", ["Й"] = "J", ["К"] = "K", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["У"] = "U", ["Ф"] = "F", ["Х"] = "H", ["Ц"] = "C", ["Ч"] = "Č", ["Ш"] = "Š", ["Щ"] = "Ŝ", ["Ъ"] = "″", ["Ы"] = "Y", ["Ь"] = "′", ["Э"] = "È", ["Ю"] = "Û", ["Я"] = "Â", ["’"] = "’", ["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["д"] = "d", ["е"] = "e", ["ё"] = "ё", ["ж"] = "ž", ["з"] = "z", ["и"] = "i", ["й"] = "j", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ф"] = "f", ["х"] = "h", ["ц"] = "c", ["ч"] = "č", ["ш"] = "š", ["щ"] = "ŝ", ["ъ"] = "″", ["ы"] = "y", ["ь"] = "′", ["э"] = "è", ["ю"] = "û", ["я"] = "â", },
uk = { ["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Ґ"] = "G", ["Д"] = "D", ["Е"] = "E", ["Є"] = "Ê", ["Ж"] = "Ž", ["З"] = "Z", ["И"] = "I", ["І"] = "Ì", ["Ї"] = "Ї", ["Й"] = "J", ["К"] = "K", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["У"] = "U", ["Ф"] = "F", ["Х"] = "H", ["Ц"] = "C", ["Ч"] = "Č", ["Ш"] = "Š", ["Щ"] = "Ŝ", ["Ь"] = "′", ["Ю"] = "Û", ["Я"] = "Â", ["’"] = "’", ["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["ґ"] = "g", ["д"] = "d", ["е"] = "e", ["є"] = "ê", ["ж"] = "ž", ["з"] = "z", ["и"] = "i", ["і"] = "ì", ["ї"] = "ї", ["й"] = "j", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ф"] = "f", ["х"] = "h", ["ц"] = "c", ["ч"] = "č", ["ш"] = "š", ["щ"] = "ŝ", ["ь"] = "′", ["ю"] = "û", ["я"] = "â", },
be = { ["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Д"] = "D", ["Е"] = "E", ["Ё"] = "Ё", ["Ж"] = "Ž", ["З"] = "Z", ["I"] = "Ì", ["Й"] = "J", ["К"] = "K", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["У"] = "U", ["Ў"] = "Ŭ", ["Ф"] = "F", ["Х"] = "H", ["Ц"] = "C", ["Ч"] = "Č", ["Ш"] = "Š", ["Ы"] = "Y", ["Ь"] = "′", ["Э"] = "È", ["Ю"] = "Û", ["Я"] = "Â", ["’"] = "’", ["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["д"] = "d", ["е"] = "e", ["ё"] = "ё", ["ж"] = "ž", ["з"] = "z", ["i"] = "ì", ["й"] = "j", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ў"] = "ŭ", ["ф"] = "f", ["х"] = "h", ["ц"] = "c", ["ч"] = "č", ["ш"] = "š", ["ы"] = "y", ["ь"] = " ", ["э"] = "è", ["ю"] = "û", ["я"] = "â", },
bg = { ["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Д"] = "D", ["Е"] = "E", ["Ж"] = "Ž", ["З"] = "Z", ["И"] = "I", ["Й"] = "J", ["К"] = "K", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["У"] = "U", ["Ф"] = "F", ["Х"] = "H", ["Ц"] = "C", ["Ч"] = "Č", ["Ш"] = "Š", ["Щ"] = "Ŝ", ["Ъ"] = "″", ["Ь"] = "´", ["Ю"] = "Û", ["Я"] = "Â", ["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["д"] = "d", ["е"] = "e", ["ж"] = "ž", ["з"] = "z", ["и"] = "i", ["й"] = "i", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ф"] = "f", ["х"] = "h", ["ц"] = "c", ["ч"] = "č", ["ш"] = "š", ["щ"] = "ŝ", ["ъ"] = "″", ["ь"] = "´", ["ю"] = "û", ["я"] = "â", },
sr = { ["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Д"] = "D", ["Ђ"] = "Đ", ["Е"] = "E", ["Ж"] = "Ž", ["З"] = "Z", ["И"] = "I", ["Ј"] = "J", ["К"] = "K", ["Л"] = "L", ["Љ"] = "L", ["М"] = "M", ["Н"] = "N", ["Њ"] = "N̂", ["О"] = "O", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["Ћ"] = "Ć", ["У"] = "U", ["Ф"] = "F", ["Х"] = "H", ["Ц"] = "C", ["Ч"] = "Č", ["Џ"] = "D̂", ["Ш"] = "Š", ["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["д"] = "d", ["ђ"] = "đ", ["е"] = "e", ["ж"] = "ž", ["з"] = "z", ["и"] = "i", ["ј"] = "j", ["к"] = "k", ["л"] = "l", ["љ"] = "l", ["м"] = "m", ["н"] = "n", ["њ"] = "n̂", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["ћ"] = "ć", ["у"] = "u", ["ф"] = "f", ["х"] = "h", ["ц"] = "c", ["ч"] = "č", ["џ"] = "d̂ ",["ш"] = "š", },
mk = { ["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Ѓ"] = "Ǵ", ["Д"] = "D", ["Е"] = "E", ["Ж"] = "Ž", ["З"] = "Z", ["S"] = "Ẑ", ["И"] = "I", ["Ј"] = "J̌", ["К"] = "K", ["Л"] = "L", ["Љ"] = "L̂", ["М"] = "M", ["Н"] = "N", ["Њ"] = "N̂", ["О"] = "O", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["Ќ"] = "Ḱ", ["У"] = "U", ["Ф"] = "F", ["Х"] = "H", ["Ц"] = "C", ["Ч"] = "Č", ["Џ"] = "D̂", ["Ш"] = "Š", ["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["ѓ"] = "ǵ", ["д"] = "d", ["е"] = "e", ["ж"] = "ž", ["з"] = "z", ["s"] = "ẑ", ["и"] = "i", ["ј"] = "ǰ", ["к"] = "k", ["л"] = "l", ["љ"] = "l̂", ["м"] = "m", ["н"] = "n", ["њ"] = "n̂", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["ќ"] = "ḱ", ["у"] = "u", ["ф"] = "f", ["х"] = "h", ["ц"] = "c", ["ч"] = "č", ["џ"] = "d̂", ["ш"] = "š", },
el = {
-- ISO 843:1997 TL
["Α"] = "A", ["α"] = "a",
["Β"] = "V", ["β"] = "v", ["ϐ"] = "v",
["Γ"] = "G", ["γ"] = "g",
["Δ"] = "D", ["δ"] = "d",
["Ε"] = "E", ["ε"] = "e", ["ϵ"] = "e", ["϶"] = "e",
["Ζ"] = "Z", ["ζ"] = "z",
["Η"] = "Ī", ["η"] = "ī",
["Θ"] = "Th", ["θ"] = "th", ["ϴ"] = "Th", ["ϑ"] = "th",
["Ι"] = "I", ["ι"] = "i",
["Κ"] = "K", ["κ"] = "k", ["ϰ"] = "k", ["ϗ"] = "k",
["Λ"] = "L", ["λ"] = "l",
["Μ"] = "M", ["μ"] = "m",
["Ν"] = "N", ["ν"] = "n",
["Ξ"] = "X", ["ξ"] = "x",
["Ο"] = "O", ["ο"] = "o",
["Π"] = "P", ["π"] = "p", ["ϖ"] = "p",
["Ρ"] = "R", ["ρ"] = "r", ["ϱ"] = "r",
["Σ"] = "S", ["σ"] = "s", ["ϲ"] = "s", ["Ϛ"] = "S", ["ς"] = "s",
["Τ"] = "T", ["τ"] = "t",
["Y"] = "Y", ["υ"] = "y", ["ϒ"] = "Y",
["Φ"] = "F", ["φ"] = "f", ["ϕ"] = "f",
["Χ"] = "Ch", ["χ"] = "ch",
["Ψ"] = "Ps", ["ψ"] = "ps",
["Ω"] = "Ō", ["ω"] = "ō",
},
}
function m.Transliteracja(frame)
local text = frame.args[1]
local transliteration = transliterations[frame.args.lang]
if not text or not transliteration then
return text
end
text = mw.ustring.toNFD(text)
local result, count = mw.ustring.gsub(text, ".", transliteration)
return mw.ustring.toNFC(result)
end
local transcriptions = {
ru = {
simple = {
["А"] = "A", ["а"] = "a",
["Б"] = "B", ["б"] = "b",
["В"] = "W", ["в"] = "w",
["Г"] = "G", ["г"] = "g",
["Д"] = "D", ["д"] = "d",
["Е"] = "IE", ["е"] = "ie",
["Ё"] = "IO", ["ё"] = "io",
["Ж"] = "Ż", ["ж"] = "ż",
["З"] = "Z", ["з"] = "z",
["И"] = "I", ["и"] = "i",
["Й"] = "J", ["й"] = "j",
["К"] = "K", ["к"] = "k",
["Л"] = "Ł", ["л"] = "ł",
["М"] = "M", ["м"] = "m",
["Н"] = "N", ["н"] = "n",
["О"] = "O", ["о"] = "o",
["П"] = "P", ["п"] = "p",
["Р"] = "R", ["р"] = "r",
["С"] = "S", ["с"] = "s",
["Т"] = "T", ["т"] = "t",
["У"] = "U", ["у"] = "u",
["Ф"] = "F", ["ф"] = "f",
["Х"] = "CH", ["х"] = "ch",
["Ц"] = "C", ["ц"] = "c",
["Ч"] = "CZ", ["ч"] = "cz",
["Ш"] = "SZ", ["ш"] = "sz",
["Щ"] = "SZCZ", ["щ"] = "szcz",
["Ъ"] = "", ["ъ"] = "",
["Ы"] = "Y", ["ы"] = "y",
["Ь"] = "´", ["ь"] = "´",
["Э"] = "E", ["э"] = "e",
["Ю"] = "IU", ["ю"] = "iu",
["Я"] = "IA", ["я"] = "ia",
},
{ "ЛИ", "LI" },
{ "Ли", "Li" },
{ "ЛЕ", "LE" },
{ "Ле", "Le" },
{ "ЛЁ", "LO" },
{ "Лё", "Lo" },
{ "ЛЮ", "LU" },
{ "Лю", "Lu" },
{ "ЛЯ", "LA" },
{ "Ля", "La" },
{ "ЛЬ", "LЬ" },
{ "Ль", "Lь" },
{ "лИ", "lI" },
{ "ли", "li" },
{ "лЕ", "lE" },
{ "ле", "le" },
{ "лЁ", "lO" },
{ "лё", "lo" },
{ "лЮ", "lU" },
{ "лю", "lu" },
{ "лЯ", "lA" },
{ "ля", "la" },
{ "лЬ", "lЬ" },
{ "ль", "lь" },
{ "%f[%w]Е%f[%u%W]", "JE" },
{ "%f[%w]Е%f[%l]", "Je" },
{ "%f[%w]е%f[%u]", "jE" },
{ "%f[%w]е%f[%l%W]", "je" },
{ "([Ьь])Е", "JE" },
{ "([Ьь])е", "je" },
{ "([АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя])Е", "%1JE" },
{ "([АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя])е", "%1je" },
{ "([ЖжЦцЧчШшЩщ])Е", "%1E" },
{ "([ЖжЦцЧчШшЩщ])е", "%1e" },
-- { "([БВГДЗКМНПРСТФХбвгдзкмнпрстфх])Е", "%1IE" },
-- { "([БВГДЗКМНПРСТФХбвгдзкмнпрстфх])е", "%1Ie" },
{ "%f[%w]Ё%f[%u%W]", "JO" },
{ "%f[%w]Ё%f[%l]", "Jo" },
{ "%f[%w]ё%f[%u]", "jO" },
{ "%f[%w]ё%f[%l%W]", "jo" },
{ "([Ьь])Ё", "JO" },
{ "([Ьь])ё", "jo" },
{ "([АаЕеЁёИиЙйОоУуЪъЫыЭэЮюЯя])Ё", "%1JO" },
{ "([АаЕеЁёИиЙйОоУуЪъЫыЭэЮюЯя])ё", "%1jo" },
{ "([ЖжЧчШшЩщ])Ё", "%1O" },
{ "([ЖжЧчШшЩщ])ё", "%1o" },
-- { "([БВГДЗКМНПРСТФХЦбвгдзкмнпрстфхц])Ё", "%1IO" },
-- { "([БВГДЗКМНПРСТФХЦбвгдзкмнпрстфхц])ё", "%1io" },
{ "ЬИ", "JI" },
{ "Ьи", "Ji" },
{ "ьИ", "jI" },
{ "ьи", "ji" },
{ "([ЖжЦцШш])И", "%1Y" },
{ "([ЖжЦцШш])и", "%1y" },
{ "%f[%w]Ю%f[%u%W]", "JU" },
{ "%f[%w]Ю%f[%l]", "Ju" },
{ "%f[%w]ю%f[%u]", "jU" },
{ "%f[%w]ю%f[%l%W]", "ju" },
{ "[Ьь]Ю", "JU" },
{ "[Ьь]ю", "ju" },
{ "([АаЕеЁёИиЙйОоУуЪъЫыЭэЮюЯя])Ю", "%1JU" },
{ "([АаЕеЁёИиЙйОоУуЪъЫыЭэЮюЯя])ю", "%1ju" },
-- { "([ЖЧШЩБВГДЗКМНПРСТФХЦжчшщбвгдзкмнпрстфхц])Ю", "%1IU" },
-- { "([ЖЧШЩБВГДЗКМНПРСТФХЦжчшщбвгдзкмнпрстфхц])ю", "%1iu" },
{ "%f[%w]Я%f[%u%W]", "JA" },
{ "%f[%w]Я%f[%l]", "Ja" },
{ "%f[%w]я%f[%u]", "jA" },
{ "%f[%w]я%f[%l%W]", "ja" },
{ "([Ьь])Я", "JA" },
{ "([Ьь])я", "ja" },
{ "([АаЕеЁёИиЙйОоУуЪъЫыЭэЮюЯя])Я", "%1JA" },
{ "([АаЕеЁёИиЙйОоУуЪъЫыЭэЮюЯя])я", "%1ja" },
-- { "([ЖЧШЩБВГДЗКМНПРСТФХЦжчшщбвгдзкмнпрстфхц])Я", "%1IA" },
-- { "([ЖЧШЩБВГДЗКМНПРСТФХЦжчшщбвгдзкмнпрстфхц])я", "%1ia" },
{ "[Ьь]([АаЕеЁёИиЙйОоУуЫыЬьЭэЮюЯя])", "%1" },
{ "([LlЖЧШЩжчшщ])[Ьь]", "%1" },
{ "%f[%w]Х%f[%u%W]", "CH" },
{ "%f[%w]Х%f[%l]", "Ch" },
{ "%f[%w]х%f[%u]", "cH" },
{ "%f[%w]х%f[%l%W]", "ch" },
{ "%f[%w]Ч%f[%u%W]", "CZ" },
{ "%f[%w]Ч%f[%l]", "Cz" },
{ "%f[%w]ч%f[%u]", "cZ" },
{ "%f[%w]ч%f[%l%W]", "cz" },
{ "%f[%w]Ш%f[%u%W]", "SZ" },
{ "%f[%w]Ш%f[%l]", "Sz" },
{ "%f[%w]ш%f[%u]", "sZ" },
{ "%f[%w]ш%f[%l%W]", "sz" },
{ "%f[%w]Щ%f[%u%W]", "SZCZ" },
{ "%f[%w]Щ%f[%l]", "Szcz" },
{ "%f[%w]щ%f[%u]", "sZCZ" },
{ "%f[%w]щ%f[%l%W]", "szcz" },
},
}
function m.Transkrypcja(frame)
local text = frame.args[1]
local transcription = transcriptions[frame.args.lang]
if not text or not transcription then
return text
end
for _, rule in ipairs(transcription) do
text, _ = mw.ustring.gsub(text, rule[1], rule[2])
end
local result, _ = mw.ustring.gsub(text, ".", transcription.simple)
return result
end
local function loadXlatData(lang, name)
if not lang or (#lang == 0) then
mw.log("Brak kodu języka")
return nil
end
if not name then
mw.log("Brak wyboru tabeli");
end
local dataModule = 'Moduł:Brudnopis/Paweł Ziemian/xlat/'..lang
local success, module = pcall(mw.loadData, dataModule)
if not success or not module then
mw.log("Błąd ładowania modułu dla języka: "..lang)
return nil
end
local data = module[name]
if not data then
mw.log("Błąd ładowania tabeli '"..name.."' dla języka: "..lang)
return nil
end
for k, v in pairs(data) do
local t = type(v)
if t == "string" then
return data, "gsub"
elseif t == "table" then
return data, "xlat"
else
return nil
end
end
end
local function transform(frame, method)
local pf = frame:getParent()
local text = frame.args[1] or pf.args[1]
if not text or (#text == 0) then
mw.log("Brak tekstu")
return
end
local lang = frame.args.lang or pf.args.lang
local data, model = loadXlatData(lang, method)
if not data then
mw.log("Brak przepisu transformacji '"..method.."' dla języka "..lang)
return
end
if model == "gsub" then
text = mw.ustring.toNFD(text)
local result, count = mw.ustring.gsub(text, ".", data)
return mw.ustring.toNFC(result)
end
if model == "xlat" then
local preserveCombing = (frame.args.accents or pf.args.accents) == "preserve"
return xlat3(text, transcription, preserveCombing)
end
mw.log("Nieobsługiwany model transformacji '"..model.."' dla języka "..lang)
end
function m.transliteracja(frame)
return transform(frame, "transliteracja")
end
function m.transkrypcja(frame)
return transform(frame, "transkrypcja")
end
function m.decompose(frame)
local pf = frame:getParent()
local text = frame.args[1] or pf.args[1]
local upper = frame.args.upper or pf.args.upper
local lang = frame.args.lang or pf.args.lang
if not text or (#text == 0) then
mw.log("Brak tekstu")
return
end
local text = mw.ustring.toNFD(text)
if upper and lang then
text = mw.getLanguage(lang):uc(text)
end
return text
end
function m.split(frame)
local pf = frame:getParent()
local text = frame.args[1] or pf.args[1]
local fmt = frame.args.format or pf.args.format or "%x"
if not text or (#text == 0) then
mw.log("Brak tekstu")
return
end
local iterator = mw.ustring.gcodepoint(text)
local next = iterator()
local result = {}
while next do
local item = string.format(fmt, next)
table.insert(result, item)
next = iterator()
end
return table.concat(result, " ")
end
local function diagjamo(S)
local text = mw.ustring.char(S)
local info = Jamo[text]
if info then
return string.format("%s/%d/0x%x", info.comment, S, S)
else
return string.format("%s/%d/0x%x", text, S, S)
end
end
function m.hangul(frame)
local pf = frame:getParent()
local text = frame.args[1] or pf.args[1]
if not text or (#text == 0) then
mw.log("Brak tekstu")
return
end
-- http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf#G24646
local SBase = 44032 -- 0xAC00
local LBase = 4352 -- 0x1100
local VBase = 4449 -- 0x1161
local TBase = 4519 -- 0x11A7
local SCount = 11172
local LCount = 19
local VCount = 21
local TCount = 28
local NCount = VCount * TCount
local result = {}
local iterator = mw.ustring.gcodepoint(text)
for S in iterator do
local SIndex = S - SBase
if (0 <= SIndex) and (SIndex < SCount) then
local L = LBase + math.floor(SIndex / NCount)
local V = VBase + math.floor(math.fmod(SIndex, NCount) / TCount)
local T = TBase + math.fmod(SIndex, TCount)
table.insert(result, L)
table.insert(result, V)
if T ~= TBase then
table.insert(result, T)
mw.log("S:"..diagjamo(S).." -> L:"..diagjamo(L)..", V:"..diagjamo(V)..", T:"..diagjamo(T))
else
mw.log("S:"..diagjamo(S).." -> L:"..diagjamo(L)..", V:"..diagjamo(V))
end
else
table.insert(result, S)
mw.log("S:"..S)
end
end
return mw.ustring.char(unpack(result))
end
return m