Moduł:Brudnopis/Paweł Ziemian/xlat

Dokumentacja dla tego modułu może zostać utworzona pod nazwą Moduł:Brudnopis/Paweł Ziemian/xlat/opis
local m = {}

local Jamo = {
		["\225\132\128"] = { comment = "HANGUL CHOSEONG KIYEOK", -- 'ᄀ'
			{ "G", },
		},
		["\225\132\129"] = { comment = "HANGUL CHOSEONG SSANGKIYEOK", -- 'ᄁ'
			{ "GG", },
		},
		["\225\132\130"] = { comment = "HANGUL CHOSEONG NIEUN", -- 'ᄂ'
			{ "N", },
		},
		["\225\132\131"] = { comment = "HANGUL CHOSEONG TIKEUT", -- 'ᄃ'
			{ "D", },
		},
		["\225\132\132"] = { comment = "HANGUL CHOSEONG SSANGTIKEUT", -- 'ᄄ'
			{ "DD", },
		},
		["\225\132\133"] = { comment = "HANGUL CHOSEONG RIEUL", -- 'ᄅ'
			{ "R", },
		},
		["\225\132\134"] = { comment = "HANGUL CHOSEONG MIEUM", -- 'ᄆ'
			{ "M", },
		},
		["\225\132\135"] = { comment = "HANGUL CHOSEONG PIEUP", -- 'ᄇ'
			{ "B", },
		},
		["\225\132\136"] = { comment = "HANGUL CHOSEONG SSANGPIEUP", -- 'ᄈ'
			{ "BB", },
		},
		["\225\132\137"] = { comment = "HANGUL CHOSEONG SIOS", -- 'ᄉ'
			{ "S", },
		},
		["\225\132\138"] = { comment = "HANGUL CHOSEONG SSANGSIOS", -- 'ᄊ'
			{ "SS", },
		},
		["\225\132\139"] = { comment = "HANGUL CHOSEONG IEUNG", -- 'ᄋ'
			{ "", },
		},
		["\225\132\140"] = { comment = "HANGUL CHOSEONG CIEUC", -- 'ᄌ'
			{ "J", },
		},
		["\225\132\141"] = { comment = "HANGUL CHOSEONG SSANGCIEUC", -- 'ᄍ'
			{ "JJ", },
		},
		["\225\132\142"] = { comment = "HANGUL CHOSEONG CHIEUCH", -- 'ᄎ'
			{ "C", },
		},
		["\225\132\143"] = { comment = "HANGUL CHOSEONG KHIEUKH", -- 'ᄏ'
			{ "K", },
		},
		["\225\132\144"] = { comment = "HANGUL CHOSEONG THIEUTH", -- 'ᄐ'
			{ "T", },
		},
		["\225\132\145"] = { comment = "HANGUL CHOSEONG PHIEUPH", -- 'ᄑ'
			{ "P", },
		},
		["\225\132\146"] = { comment = "HANGUL CHOSEONG HIEUH", -- 'ᄒ'
			{ "H", },
		},
		["\225\133\161"] = { comment = "HANGUL JUNGSEONG A", -- 'ᅡ'
			{ "A", },
		},
		["\225\133\162"] = { comment = "HANGUL JUNGSEONG AE", -- 'ᅢ'
			{ "AE", },
		},
		["\225\133\163"] = { comment = "HANGUL JUNGSEONG YA", -- 'ᅣ'
			{ "YA", },
		},
		["\225\133\164"] = { comment = "HANGUL JUNGSEONG YAE", -- 'ᅤ'
			{ "YAE", },
		},
		["\225\133\165"] = { comment = "HANGUL JUNGSEONG EO", -- 'ᅥ'
			{ "EO", },
		},
		["\225\133\166"] = { comment = "HANGUL JUNGSEONG E", -- 'ᅦ'
			{ "E", },
		},
		["\225\133\167"] = { comment = "HANGUL JUNGSEONG YEO", -- 'ᅧ'
			{ "YEO", },
		},
		["\225\133\168"] = { comment = "HANGUL JUNGSEONG YE", -- 'ᅨ'
			{ "YE", },
		},
		["\225\133\169"] = { comment = "HANGUL JUNGSEONG O", -- 'ᅩ'
			{ "O", },
		},
		["\225\133\170"] = { comment = "HANGUL JUNGSEONG WA", -- 'ᅪ'
			{ "WA", },
		},
		["\225\133\171"] = { comment = "HANGUL JUNGSEONG WAE", -- 'ᅫ'
			{ "WAE", },
		},
		["\225\133\172"] = { comment = "HANGUL JUNGSEONG OE", -- 'ᅬ'
			{ "OE", },
		},
		["\225\133\173"] = { comment = "HANGUL JUNGSEONG YO", -- 'ᅭ'
			{ "YO", },
		},
		["\225\133\174"] = { comment = "HANGUL JUNGSEONG U", -- 'ᅮ'
			{ "U", },
		},
		["\225\133\175"] = { comment = "HANGUL JUNGSEONG WEO", -- 'ᅯ'
			{ "WEO", },
		},
		["\225\133\176"] = { comment = "HANGUL JUNGSEONG WE", -- 'ᅰ'
			{ "WE", },
		},
		["\225\133\177"] = { comment = "HANGUL JUNGSEONG WI", -- 'ᅱ'
			{ "WI", },
		},
		["\225\133\178"] = { comment = "HANGUL JUNGSEONG YU", -- 'ᅲ'
			{ "YU", },
		},
		["\225\133\179"] = { comment = "HANGUL JUNGSEONG EU", -- 'ᅳ'
			{ "EU", },
		},
		["\225\133\180"] = { comment = "HANGUL JUNGSEONG YI", -- 'ᅴ'
			{ "YI", },
		},
		["\225\133\181"] = { comment = "HANGUL JUNGSEONG I", -- 'ᅵ'
			{ "I", },
		},
		["\225\134\168"] = { comment = "HANGUL JONGSEONG KIYEOK", -- 'ᆨ'
			{ "G", },
		},
		["\225\134\169"] = { comment = "HANGUL JONGSEONG SSANGKIYEOK", -- 'ᆩ'
			{ "GG", },
		},
		["\225\134\170"] = { comment = "HANGUL JONGSEONG KIYEOK-SIOS", -- 'ᆪ'
			{ "GS", },
		},
		["\225\134\171"] = { comment = "HANGUL JONGSEONG NIEUN", -- 'ᆫ'
			{ "N", },
		},
		["\225\134\172"] = { comment = "HANGUL JONGSEONG NIEUN-CIEUC", -- 'ᆬ'
			{ "NJ", },
		},
		["\225\134\173"] = { comment = "HANGUL JONGSEONG NIEUN-HIEUH", -- 'ᆭ'
			{ "NH", },
		},
		["\225\134\174"] = { comment = "HANGUL JONGSEONG TIKEUT", -- 'ᆮ'
			{ "D", },
		},
		["\225\134\175"] = { comment = "HANGUL JONGSEONG RIEUL", -- 'ᆯ'
			{ "L", },
		},
		["\225\134\176"] = { comment = "HANGUL JONGSEONG RIEUL-KIYEOK", -- 'ᆰ'
			{ "LG", },
		},
		["\225\134\177"] = { comment = "HANGUL JONGSEONG RIEUL-MIEUM", -- 'ᆱ'
			{ "LM", },
		},
		["\225\134\178"] = { comment = "HANGUL JONGSEONG RIEUL-PIEUP", -- 'ᆲ'
			{ "LB", },
		},
		["\225\134\179"] = { comment = "HANGUL JONGSEONG RIEUL-SIOS", -- 'ᆳ'
			{ "LS", },
		},
		["\225\134\180"] = { comment = "HANGUL JONGSEONG RIEUL-THIEUTH", -- 'ᆴ'
			{ "LT", },
		},
		["\225\134\181"] = { comment = "HANGUL JONGSEONG RIEUL-PHIEUPH", -- 'ᆵ'
			{ "LP", },
		},
		["\225\134\182"] = { comment = "HANGUL JONGSEONG RIEUL-HIEUH", -- 'ᆶ'
			{ "LH", },
		},
		["\225\134\183"] = { comment = "HANGUL JONGSEONG MIEUM", -- 'ᆷ'
			{ "M", },
		},
		["\225\134\184"] = { comment = "HANGUL JONGSEONG PIEUP", -- 'ᆸ'
			{ "B", },
		},
		["\225\134\185"] = { comment = "HANGUL JONGSEONG PIEUP-SIOS", -- 'ᆹ'
			{ "BS", },
		},
		["\225\134\186"] = { comment = "HANGUL JONGSEONG SIOS", -- 'ᆺ'
			{ "S", },
		},
		["\225\134\187"] = { comment = "HANGUL JONGSEONG SSANGSIOS", -- 'ᆻ'
			{ "SS", },
		},
		["\225\134\188"] = { comment = "HANGUL JONGSEONG IEUNG", -- 'ᆼ'
			{ "NG", },
		},
		["\225\134\189"] = { comment = "HANGUL JONGSEONG CIEUC", -- 'ᆽ'
			{ "J", },
		},
		["\225\134\190"] = { comment = "HANGUL JONGSEONG CHIEUCH", -- 'ᆾ'
			{ "C", },
		},
		["\225\134\191"] = { comment = "HANGUL JONGSEONG KHIEUKH", -- 'ᆿ'
			{ "K", },
		},
		["\225\135\128"] = { comment = "HANGUL JONGSEONG THIEUTH", -- 'ᇀ'
			{ "T", },
		},
		["\225\135\129"] = { comment = "HANGUL JONGSEONG PHIEUPH", -- 'ᇁ'
			{ "P", },
		},
		["\225\135\130"] = { comment = "HANGUL JONGSEONG HIEUH", -- 'ᇂ'
			{ "H", },
		},
}

local 	alfabet = {
		[1548] = { "Arabic Comma",
			"," },
		[1563] = { "Arabic Semicolon",
			";" },
		[1567] = { "Arabic Question Mark",
			"?" },
		[1569] = { "Arabic Letter Hamza",
			"’" },
		[1570] = { "Arabic Letter Alef With Madda Above",
			"’āā" },
		[1571] = { "Arabic Letter Alef With Hamza Above",
			"’ā" },
		[1572] = { "Arabic Letter Waw With Hamza Above",
			"’ū" },
		[1573] = { "Arabic Letter Alef With Hamza Below",
			"’ā" },
		[1574] = { "Arabic Letter Yeh With Hamza Above",
			"’ī" },
		[1575] = { "Arabic Letter Alef",
			"’" },
		[1576] = { "Arabic Letter Beh",
			"b" },
		[1577] = { "Arabic Letter Teh Marbuta",
			"ť" },
		[1578] = { "Arabic Letter Teh",
			"t" },
		[1579] = { "Arabic Letter Theh",
			"ṯ" },
		[1580] = { "Arabic Letter Jeem",
			"ǧ" },
		[1581] = { "Arabic Letter Jah",
			"ḥ" },
		[1582] = { "Arabic Letter Khah",
			"ẖ" },
		[1583] = { "Arabic Letter Dal",
			"d" },
		[1584] = { "Arabic Letter Thal",
			"ḏ" },
		[1585] = { "Arabic Letter Reh",
			"r" },
		[1586] = { "Arabic Letter Zain",
			"z" },
		[1587] = { "Arabic Letter Seen",
			"s" },
		[1588] = { "Arabic Letter Sheen",
			"š" },
		[1589] = { "Arabic Letter Sad",
			"ṣ" },
		[1590] = { "Arabic Letter Dad",
			"ḍ" },
		[1591] = { "Arabic Letter Tah",
			"ṭ" },
		[1592] = { "Arabic Letter Zah",
			"ẓ" },
		[1593] = { "Arabic Letter Ain",
			"`" },
		[1594] = { "Arabic Letter Ghain",
			"ḡ" },
		[1600] = { "Arabic Letter Tatweel (kashida)",
			"_" },
		[1601] = { "Arabic Letter Feh",
			"f" },
		[1602] = { "Arabic Letter Qaf",
			"q" },
		[1603] = { "Arabic Letter Kaf",
			"k" },
		[1604] = { "Arabic Letter Lam",
			"l" },
		[1605] = { "Arabic Letter Meem",
			"m" },
		[1606] = { "Arabic Letter Noon",
			"n" },
		[1607] = { "Arabic Letter Heh",
			"h" },
		[1608] = { "Arabic Letter Waw",
			"ū" },
		[1609] = { "Arabic Letter Alef Maksura (alef variant only for use in final position)",
			"ā" },
		[1610] = { "Arabic Letter Yeh",
			"ī" },
		[1611] = { "Arabic Fathatan (final only)",
			"an" },
		[1612] = { "Arabic Dammatan (final only)",
			"un" },
		[1613] = { "Arabic Kasratan (final only)",
			"in" },
		[1614] = { "Arabic Fatha",
			"a" },
		[1615] = { "Arabic Damma",
			"u" },
		[1616] = { "Arabic Kasra",
			"i" },
		[1617] = { "Arabic Shadda",
			"×" },
		[1618] = { "Arabic Sukun",
			"-" },
		[1632] = { "Arabic-Indic Digit Zero",
			"0" },
		[1633] = { "Arabic-Indic Digit One",
			"1" },
		[1634] = { "Arabic-Indic Digit Two",
			"2" },
		[1635] = { "Arabic-Indic Digit Three",
			"3" },
		[1636] = { "Arabic-Indic Digit Four",
			"4" },
		[1637] = { "Arabic-Indic Digit Five",
			"5" },
		[1638] = { "Arabic-Indic Digit Six",
			"6" },
		[1639] = { "Arabic-Indic Digit Seven",
			"7" },
		[1640] = { "Arabic-Indic Digit Eight",
			"8" },
		[1641] = { "Arabic-Indic Digit Nine",
			"9" },
		[1642] = { "Arabic Percent Sign",
			"%" },
		[1643] = { "Arabic Decimal Separator",
			"," },
		[1644] = { "Arabic Thousands Separator",
			" " },
		[1645] = { "Arabic Five Pointed Star",
			},
		[1648] = { "Arabic Letter Superscript Alef",
			},
		[1649] = { "Arabic Letter Alef Wasla",
			},
		[1650] = { "Arabic Letter Alef With Wavy Hamza Above",
			},
		[1651] = { "Arabic Letter Alef With Wavy Hamza Below",
			},
		[1652] = { "Arabic Letter High Hamza",
			},
		[1653] = { "Arabic Letter High Hamza Alef",
			},
		[1654] = { "Arabic Letter High Hamza Waw",
			},
		[1655] = { "Arabic Letter U With Hamza Above",
			},
		[1656] = { "Arabic Letter High Hamza Yeh",
			},
		[1657] = { "Arabic Letter Tteh",
			},
		[1658] = { "Arabic Letter Tteheh",
			},
		[1659] = { "Arabic Letter Beeh",
			},
		[1660] = { "Arabic Letter Teh With Ring",
			},
		[1661] = { "Arabic Letter Teh With Three Dots Above Downwarts",
			},
		[1662] = { "Arabic Letter Peh",
			},
		[1663] = { "Arabic Letter Teheh",
			},
		[1664] = { "Arabic Letter Beheh",
			},
		[1665] = { "Arabic Letter Hah With Hamza Above",
			},
		[1666] = { "Arabic Letter Hah With Two Dots Vertical Above",
			},
		[1667] = { "Arabic Letter Nyeh",
			},
		[1668] = { "Arabic Letter Dyeh",
			},
		[1669] = { "Arabic Letter Hah With Tree Dots Above",
			},
		[1670] = { "Arabic Letter Tcheh",
			},
		[1671] = { "Arabic Letter Tcheheh",
			},
		[1672] = { "Arabic Letter Ddal",
			},
		[1673] = { "Arabic Letter Dal With Ring",
			},
		[1674] = { "Arabic Letter Dal With Dot Below",
			},
		[1675] = { "Arabic Letter Dal With Dot Below And Small Tah",
			},
		[1676] = { "Arabic Letter Dahal",
			},
		[1677] = { "Arabic Letter Ddahal",
			},
		[1678] = { "Arabic Letter Dul",
			},
		[1679] = { "Arabic Letter Dal With Three Dots Above Downwards",
			},
		-- 0690 = 1680
	}

function m.artrl(frame)
	local result = {}
	table.insert(result, "translation = {\n")
	for i = 1500, 1700, 1 do
		local data = alfabet[i]
		if data then
			local char = mw.ustring.char(i)
			local trl = data[2]
			if trl then
				local item = string.format("\t[\"%s\"] = { -- %s\n\t\t{ \"%s\", },\n\t},\n", char, data[1], trl)
				table.insert(result, item)
			end
		end
	end

	table.insert(result, "},")
	return "<pre>"..table.concat(result, "").."</pre>"
end

function m.ardec(frame)
	local pf = frame:getParent()
	local text = frame.args[1] or pf.args[1]
	if not text or (#text == 0) then
		mw.log("Brak tekstu")
		return
	end

	local iterator = mw.ustring.gcodepoint(text)
	local next = iterator()
	local result = {}
	table.insert(result, "\n{| class=wikitable\n! znak !! q !! a !! b !! hex !! dec !! name")
	while next do
		local data = alfabet[next] or { "" }
		local char = mw.ustring.char(next)
		local item = string.format("\n|-\n| %s || \"%s\" || \"%s$\" || \"^%s\" || %x || %d || %s", char, char, char, char, next, next, data[1])
		table.insert(result, item)
		next = iterator()
	end
	table.insert(result, "\n|}")
	return table.concat(result, "")
end

function m.arlst(frame)
	local result = {}
	table.insert(result, "\n{| class=wikitable\n! znak !! q !! a !! b !! hex !! dec !! name")
	for i = 1500, 1700, 1 do
		local data = alfabet[i]
		if data then
			local char = mw.ustring.char(i)
			local item = string.format("\n|-\n| %s || \"%s\" || \"%s$\" || \"^%s\" || %x || %d || %s", char, char, char, char, i, i, data[1])
			table.insert(result, item)
		end
	end

	table.insert(result, "\n|}")
	return table.concat(result, "")
end


local function xlat3(text, translation, preserveCombining)
	if not text or not translation or (#text == 0) then
		return text
	end

	local cache = {}
	
	local function createCacheEntry(char)
		local result = {
			rules = translation[char] or { { char } },
			class = {},
		}
		cache[char] = result
		return result
	end
	
	local function loadRules(char)
		return (cache[char] or createCacheEntry(char)).rules
	end
	
	local function checkRule(char, restriction)
		local cached = cache[char] or createCacheEntry(char)
		local result = cached.class[restriction]
		if result == nil then
			local test = mw.ustring.match(char, restriction)
			result = test and true or false
		end
		
		return result
	end

	local function translate(after, current, before)
		-- translate the current character
		local rules = loadRules(current)
		for _, v in ipairs(rules) do
			local a = not v.a or checkRule(after, v.a)
			local b = not v.b or checkRule(before, v.b)
			if a and b then
				return v[1]
			end
		end

		return current
	end

	local combining = mw.loadData( 'Moduł:IPA/data' ).combining

	local result = {}
	local after = { known = "", rest = "" }
	local current = { known = "", rest = "" }
	local before1 = { known = "", rest = "" }
	local before2 = { known = "", rest = "" }

	local iterator = mw.ustring.gcodepoint(text)
	local next = iterator()
	while next do
		char = { next }
		next = false
		-- read next "character" from input
		while not next do
			next = iterator()
			if not next or not combining[next] then
				break
			end
			-- accumulate whole character
			table.insert(char, next)
			next = false
		end

		-- shift window
		after = current
		current = before1
		before1 = before2
		before2 = {}

		-- determine the largest known part of the current character
		local j = #char
		while j >= 1 do
			local part = mw.ustring.char(unpack(char,  i, j))
			if translation[part] then
				before2.known = part
				before2.rest = j < #char and mw.ustring.char(unpack(char, j+1, #char)) or ""
				break
			end
			j = j - 1
		end
		if j == 0 then
			-- use whole unrecognized character
			before2.known = mw.ustring.char(unpack(char, i, #char))
			before2.rest = ""
		end

		if current.known ~= "" then
			local translated = translate(after.known, current.known, before1.known..before2.known)
			table.insert(result, translated)
			if (#translated > 0) and preserveCombining then
				table.insert(result, current.rest)
			end
		end
	end

	-- translate last characters
	local translated = translate(current.known, before1.known, before2.known)
	table.insert(result, translated)
	if (#translated > 0) and preserveCombining then
		table.insert(result, current.rest)
	end

	local translated = translate(before1.known, before2.known, "")
	table.insert(result, translated)
	if (#translated > 0) and preserveCombining then
		table.insert(result, current.rest)
	end

	local result = table.concat(result, "")
	mw.log("return \""..result.."\"")
	return result
end

local function xlat2(text, translation, preserveCombining)
	if not text or not translation or (#text == 0) then
		return text
	end

	local cache = {}
	
	local function createCacheEntry(char)
		local result = {
			rules = translation[char] or { { char } },
			class = {},
		}
		cache[char] = result
		return result
	end
	
	local function loadRules(char)
		return (cache[char] or createCacheEntry(char)).rules
	end
	
	local function checkRule(char, restriction)
		local cached = cache[char] or createCacheEntry(char)
		local result = cached.class[restriction]
		if result == nil then
			local test = mw.ustring.match(char, restriction)
			result = test and true or false
		end
		
		return result
	end

	local function translate(after, current, before)
		-- translate the current character
		local rules = loadRules(current)
		for _, v in ipairs(rules) do
			local a = not v.a or checkRule(after, v.a)
			local b = not v.b or checkRule(before, v.b)
			if a and b then
				return v[1]
			end
		end

		return current
	end

	local combining = mw.loadData( 'Moduł:IPA/data' ).combining

	local result = {}
	local after =  { known = "", rest = "" }
	local current =  { known = "", rest = "" }
	local before =  { known = "", rest = "" }

	local iterator = mw.ustring.gcodepoint(text)
	local next = iterator()
	while next do
		char = { next }
		next = false
		-- read next "character" from input
		while not next do
			next = iterator()
			if not next or not combining[next] then
				break
			end
			-- accumulate whole character
			table.insert(char, next)
			next = false
		end

		-- shift window
		after = current
		current = before
		before = {}

		-- determine the largest known part of the current character
		local j = #char
		while j >= 1 do
			local part = mw.ustring.char(unpack(char,  i, j))
			if translation[part] then
				before.known = part
				before.rest = j < #char and mw.ustring.char(unpack(char, j+1, #char)) or ""
				break
			end
			j = j - 1
		end
		if j == 0 then
			-- use whole unrecognized character
			before.known = mw.ustring.char(unpack(char, i, #char))
			before.rest = ""
		end

		if current.known ~= "" then
			local translated = translate(after.known, current.known, before.known)
			table.insert(result, translated)
			if (#translated > 0) and preserveCombining then
				table.insert(result, current.rest)
			end
		end
	end

	-- translate last character
	after = current
	current = before
	before = { known = "", rest = "" }
	local translated = translate(after.known, current.known, before.known)
	table.insert(result, translated)
	if (#translated > 0) and preserveCombining then
		table.insert(result, current.rest)
	end

	local result = table.concat(result, "")
	mw.log("return \""..result.."\"")
	return result
end

local function xlat(text, translation, preserveCombining)
	if not text or not translation or (#text == 0) then
		return text
	end

	local cache = {}
	
	local function createCacheEntry(codepoint)
		local char = mw.ustring.char(codepoint)
		local result = {
			ch = char,
			rules = translation[char] or { { char } },
			class = {},
		}
		cache[codepoint] = result
		return result
	end
	
	local function loadRules(codepoint)
		return (cache[codepoint] or createCacheEntry(codepoint)).rules
	end
	
	local function checkRule(codepoint, restriction)
		local cached = cache[codepoint] or createCacheEntry(codepoint)
		local result = cached.class[restriction]
		if result == nil then
			result = mw.ustring.match(cached.ch, restriction)
			cached.class[restriction] = result
		end
		
		return result
	end

	local function loadChar(codepoint)
		return (cache[codepoint] or createCacheEntry(codepoint)).ch
	end
	
	local combining = mw.loadData( 'Moduł:IPA/data' ).combining
	local result = {}
	local after = 0 local current = 0 local before = 0

	local function translate()
		local rules = loadRules(current)
		for _, v in ipairs(rules) do
			local a = not v.a or checkRule(after, v.a)
			local b = not v.b or checkRule(before, v.b)
			if a and b then
				table.insert(result, v[1])
				return
			end
		end

		mw.log(loadChar(current)..": unknown rule, "..current)
		table.insert(result, loadChar(current))
	end

	local iterator = mw.ustring.gcodepoint(text)

	local skipped = {}	
	local analyze = false
	for cp in iterator do
		if combining[cp] then
			if  preserveCombining then
				table.insert(skipped, cp)
			end
		else
			after = current
			current = before
			before = cp
			if analyze then
				translate()
			end
			if #skipped > 0 then
				table.insert(result, mw.ustring.char( unpack(skipped) ))
				skipped = {}
			end
		end
		analyze = true
	end
	
	if #skipped > 0 then
		table.insert(result, mw.ustring.char( unpack(skipped) ))
	end
	after = current
	current = before
	before = 0
	if analyze then
		translate()
	end

	local result = table.concat(result, "")
	mw.log("return \""..result.."\"")
	return result
end

local trans5 = {
	ru = {
		["А"] = {
			{ "A", },
		},
		["а"] = {
			{ "a", },
		},
	
		["Б"] = {
			{ "B", },
		},
		["б"] = {
			{ "b", },
		},
		["В"] = {
			{ "W", },
		},
		["в"] = {
			{ "w", },
		},
		["Г"] = {
			{ "G", },
		},
		["г"] = {
			{ "g", },
		},
		["Д"] = {
			{ "D", },
		},
		["д"] = {
			{ "d", },
		},
		
		["Е"] = {
			{ "Je", a = "[%W]$", b = "^[%l]", },
			{ "JE", a = "[%W]$", b = "^[%u%W]", },
			{ "JE", a = "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]$", },
			{ "E", a = "[ЖжЛлЦцЧчШшЩщ]$", },
			{ "IE", },
		},
		["е"] = {
			{ "jE", a = "[%W]$", b = "^[%u]", },
			{ "je", a = "[%W]$", b = "^[%l%W]", },
			{ "je", a = "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]$", },
			{ "e", a = "[ЖжЛлЦцЧчШшЩщ]$", },
			{ "ie", },
		},
			
		["Ё"] = {
			{ "Jo", a = "[%W]$", b = "^[%l]", },
			{ "JO", a = "[%W]$", b = "^[%u%W]", },
			{ "JO", a = "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]$", },
			{ "O", a = "[ЖжЛлЦцЧчШшЩщ]$", },
			{ "IO", },
		},
		["ё"] = {
			{ "jO", a = "[%W]$", b = "^[%u]", },
			{ "jo", a = "[%W]$", b = "^[%l%W]", },
			{ "jo", a = "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]$", },
			{ "o", a = "[ЖжЛлЦцЧчШшЩщ]$", },
			{ "io", },
		},
		
		["Ж"] = {
			{ "Ż", },
		},
		["ж"] = {
			{ "ż", },
		},
		["З"] = {
			{ "Z", },
		},
		["з"] = {
			{ "z", },
		},
		
		["И"] = {
			{ "JI", a = "[Ь]$", },
			{ "jI", a = "[ь]$", },
			{ "Y", a = "[ЖжЦцШш]$", },
			{ "I", },
		},
		["и"] = {
			{ "Ji", a = "[Ь]$", },
			{ "ji", a = "[ь]$", },
			{ "y", a = "[ЖжЦцШш]$", },
			{ "i", },
		},
		
		["Й"] = {
			{ "J", },
		},
		["й"] = {
			{ "j", },
		},
		["К"] = {
			{ "K", },
		},
		["к"] = {
			{ "k", },
		},
		
		["Л"] = {
			{ "L", b = "^[ЕеЁёИиЬьЮюЯя]", },
			{ "Ł", },
		},
		["л"] = {
			{ "l", b = "^[ЕеЁёИиЬьЮюЯя]", },
			{ "ł", },
		},
		
		["М"] = {
			{ "M", },
		},
		["м"] = {
			{ "m", },
		},
		["Н"] = {
			{ "N", },
		},
		["н"] = {
			{ "n", },
		},
		["О"] = {
			{ "O", },
		},
		["о"] = {
			{ "o", },
		},
		["П"] = {
			{ "P", },
		},
		["п"] = {
			{ "p", },
		},
		["Р"] = {
			{ "R", },
		},
		["р"] = {
			{ "r", },
		},
		["С"] = {
			{ "S", },
		},
		["с"] = {
			{ "s", },
		},
		["Т"] = {
			{ "T", },
		},
		["т"] = {
			{ "t", },
		},
		["У"] = {
			{ "U", },
		},
		["у"] = {
			{ "u", },
		},
		["Ф"] = {
			{ "F", },
		},
		["ф"] = {
			{ "f", },
		},
		
		["Х"] = {
			{  "Ch", a = "[%W]$", b = "^[%l]", },
			{ "CH", },
		},
		["х"] = {
			{ "cH", a = "[%W]$", b = "^[%u]", },
			{ "ch", },
		},
		
		["Ц"] = {
			{ "C", },
		},
		["ц"] = {
			{ "c", },
		},
		
		["Ч"] = {
			{ "Cz", a = "[%W]$", b = "^[%l]", },
			{ "CZ", },
		},
		["ч"] = {
			{ "cZ", a = "[%W]$", b = "^[%u]", },
			{ "cz", },
		},
		
		["Ш"] = {
			{ "Sz", a = "[%W]$", b = "^[%l]", },
			{ "SZ", },
		},
		["ш"] = {
			{ "sZ", a = "[%W]$", b = "^[%u]", },
			{ "sz", },
		},
	
		["Щ"] = {
			{ "Szcz", a = "[%W]$", b = "^[%l]", },
			{ "SZCZ", },
		},
		["щ"] = {
			{ "sZCZ", a = "[%W]$", b = "^[%u]", },
			{ "szcz", },
		},
		
		["Ъ"] = {
			{ "", },
		},
		["ъ"] = {
			{ "", },
		},
		["Ы"] = {
			{ "Y", },
		},
		["ы"] = {
			{ "y", },
		},
		
		["Ь"] = {
			{ "", a = "[ЖжЛлЧчШшЩщ]$", },
			{ "", b = "^[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]", },
			{ "´", },
		},
		["ь"] = {
			{ "", a = "[ЖжЛлЧчШшЩщ]$", },
			{ "", b = "^[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]", },
			{ "´", },
		},
		
		["Э"] = {
			{ "E", },
		},
		["э"] = {
			{ "e", },
		},
		
		["Ю"] = {
			{ "Ju", a = "[%W]$", b = "^[%l]", },
			{ "JU", a = "[%W]$", b = "^[%u%W]", },
			{ "JU", a = "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]$", },
			{ "U", a = "[Лл]$", },
			{ "IU", },
		},
		["ю"] = {
			{ "jU", a = "[%W]$", b = "^[%u]", },
			{ "ju", a = "[%W]$", b = "^[%l%W]", },
			{ "ju", a = "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]$", },
			{ "u", a = "[Лл]$", },
			{ "iu", },
		},
	
		["Я"] = {
			{ "Ja", a = "[%W]$", b = "^[%l]", },
			{ "JA", a = "[%W]$", b = "^[%u%W]", },
			{ "JA", a = "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]$", },
			{ "A", a = "[Лл]$", },
			{ "IA", },
		},
		["я"] = {
			{ "jA", a = "[%W]$", b = "^[%u]", },
			{ "ja", a = "[%W]$", b = "^[%l%W]", },
			{ "ja", a = "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]$", },
			{ "a", a = "[Лл]$", },
			{ "ia", },
		},
		
		["’"] = {
			{ "’", },
		},
	},
}

function m.trans5(frame)
	local text = frame.args[1]
	local trans = trans5[frame.args.lang]
	local preserveCombing = frame.args.accents == "preserve"
	if not text or not trans or (#text == 0) then
		return text
	end
	
	return xlat(text, trans, preserveCombing)
end

function m.trans6(frame)
	local text = frame.args[1]
	local trans = trans5[frame.args.lang]
	local preserveCombing = frame.args.accents == "preserve"
	if not text or not trans or (#text == 0) then
		return text
	end
	
	return xlat2(text, trans, preserveCombing)
end


local trans2 = {
	ru = {
		["А"] = { d = "A", }, ["а"] = { d = "a", },
		["Б"] = { d = "B", }, ["б"] = { d = "b", },
		["В"] = { d = "W", }, ["в"] = { d = "w", },
		["Г"] = { d = "G", }, ["г"] = { d = "g", },
		["Д"] = { d = "D", }, ["д"] = { d = "d", },
		
		["Е"] = {
			{ "[%W]Е[%l]", "Je" },
			{ "[%W]Е[%u%W]", "JE" },
			{ "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]Е.", "JE" },
			{ "[ЖжЛлЦцЧчШшЩщ]Е.", "E" },
			d = "IE",
		},
		["е"] = {
			{ "[%W]е[%u]", "jE" },
			{ "[%W]е[%l%W]", "je" },
			{ "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]е.", "je" },
			{ "[ЖжЛлЦцЧчШшЩщ]е.", "e" },
			d = "ie",
		},
			
		["Ё"] = {
			{ "[%W]Ё[%l]", "Jo" },
			{ "[%W]Ё[%u%W]", "JO" },
			{ "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]Ё.", "JO" },
			{ "[ЖжЛлЦцЧчШшЩщ]Ё.", "O" },
			d = "IO",
		},
		["ё"] = {
			{ "[%W]ё[%u]", "jO" },
			{ "[%W]ё[%l%W]", "jo" },
			{ "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]ё.", "jo" },
			{ "[ЖжЛлЦцЧчШшЩщ]ё.", "o" },
			d = "io",
		},
		
		["Ж"] = { d = "Ż", }, ["ж"] = { d = "ż", },
		["З"] = { d = "Z", }, ["з"] = { d = "z", },
		
		["И"] = {
			{ "ЬИ.", "JI" },
			{ "ьИ.", "jI" },
			{ "[ЖжЦцШш]И.", "Y" },
			d = "I",
		},
		["и"] = {
			{ "Ьи.", "Ji" },
			{ "ьи.", "ji" },
			{ "[ЖжЦцШш]и.", "y" },
			d = "i",
		},
		
		["Й"] = { d = "J", }, ["й"] = { d = "j", },
		["К"] = { d = "K", }, ["к"] = { d = "k", },
		
		["Л"] = {
			{ ".Л[ЕеЁёИиЬьЮюЯя]", "L" },
			d = "Ł",
		},
		["л"] = {
			{ ".л[ЕеЁёИиЬьЮюЯя]", "l" },
			d = "ł",
		},
		
		["М"] = { d = "M", }, ["м"] = { d = "m", },
		["Н"] = { d = "N", }, ["н"] = { d = "n", },
		["О"] = { d = "O", }, ["о"] = { d = "o", },
		["П"] = { d = "P", }, ["п"] = { d = "p", },
		["Р"] = { d = "R", }, ["р"] = { d = "r", },
		["С"] = { d = "S", }, ["с"] = { d = "s", },
		["Т"] = { d = "T", }, ["т"] = { d = "t", },
		["У"] = { d = "U", }, ["у"] = { d = "u", },
		["Ф"] = { d = "F", }, ["ф"] = { d = "f", },
		
		["Х"] = {
			{ "[%W]Х[%l]", "Ch" },
			d = "CH",
		},
		["х"] = {
			{ "[%W]х[%u]", "cH" },
			d = "ch",
		},
		
		["Ц"] = { d = "C", }, ["ц"] = { d = "c", },
		
		["Ч"] = {
			{ "[%W]Ч[%l]", "Cz" },
			d = "CZ",
		},
		["ч"] = {
			{ "[%W]ч[%u]", "cZ" },
			d = "cz",
		},
		
		["Ш"] = {
			{ "[%W]Ш[%l]", "Sz" },
			d = "SZ",
		},
		["ш"] = {
			{ "[%W]ш[%u]", "sZ" },
			d = "sz",
		},
	
		["Щ"] = {
			{ "[%W]Щ[%l]", "Szcz" },
			d = "SZCZ",
		},
		["щ"] = {
			{ "[%W]щ[%u]", "sZCZ" },
			d = "szcz",
		},
		
		["Ъ"] = { d = "", }, ["ъ"] = { d = "", },
		["Ы"] = { d = "Y", }, ["ы"] = { d = "y", },
		
		["Ь"] = {
			{ "[ЖжЛлЧчШшЩщ]Ь.", "" },
--			{ "[ЖжЛлЧчШшЩщ]Ь.", "" },
			{ ".Ь[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]", "" },
			d = "´",
		},
		["ь"] = {
			{ "[ЖжЛлЧчШшЩщ]ь.", "" },
--			{ "[ЖжЛлЧчШшЩщ]ь.", "" },
			{ ".ь[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]", "" },
			d = "´",
		},
		
		["Э"] = { d = "E", }, ["э"] = { d = "e", },
		
		["Ю"] = {
			{ "[%W]Ю[%l]", "Ju" },
			{ "[%W]Ю[%u%W]", "JU" },
			{ "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]Ю.", "JU" },
			{ "[Лл]Ю.", "U" },
			d = "IU",
		},
		["ю"] = {
			{ "[%W]ю[%u]", "jU" },
			{ "[%W]ю[%l%W]", "ju" },
			{ "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]ю.", "ju" },
			{ "[Лл]ю.", "u" },
			d = "iu",
		},
	
		["Я"] = {
			{ "[%W]Я[%l]", "Ja" },
			{ "[%W]Я[%u%W]", "JA" },
			{ "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]Я.", "JA" },
			{ "[Лл]Я.", "A" },
			d = "IA",
		},
		["я"] = {
			{ "[%W]я[%u]", "jA" },
			{ "[%W]я[%l%W]", "ja" },
			{ "[АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя]я.", "ja" },
			{ "[Лл]я.", "a" },
			d = "ia",
		},
		
		["’"] = { d = "’", },
	},
}

function m.trans2(frame)
	local text = frame.args[1]
	local trans = trans2[frame.args.lang]
	local preserveCombing = frame.args.accents == "preserve"
	if not text or not trans or (#text == 0) then
		return text
	end

	local combining = mw.loadData( 'Moduł:IPA/data' ).combining
	local result = {}
	local unknown = mw.ustring.codepoint("�", 1)
	local after = unknown local current = unknown local before = unknown

	local function translate()
		local ch = mw.ustring.char(current)
		local rules = trans[ch]
		if not rules then
			mw.log(ch..": unknown, "..current)
			table.insert(result, ch)
			current = unknown
			return
		end
		
		if #rules > 0 then
			mw.log(after..", "..current..", "..before)
			local fragment = mw.ustring.char(after, current, before)
			for _, v in ipairs(rules) do
				if mw.ustring.match(fragment, v[1]) then
					mw.log(ch..": \""..fragment.."\" match("..v[1]..") -> "..v[2])
					table.insert(result, v[2])
					return
				end
				mw.log(ch..": \""..fragment.."\" does not match("..v[1]..") -> "..v[2])
			end
		end

		mw.log(ch..": default -> "..rules.d)
		table.insert(result, rules.d)
		return
	end

	local iterator = mw.ustring.gcodepoint(text)
	before = iterator()
	local skipped = {}
	for cp in iterator do
		if combining[cp] then
			if preserveCombing then
				mw.log("preserve combing character  "..cp)
				table.insert(skipped, cp)
				mw.log("preserved count "..#skipped)
			else
				mw.log("ignore combining character   "..cp)
			end
		else
			after = current
			current = before
			before = cp
			translate()
			mw.log("preserved count "..#skipped)
			if #skipped > 0 then
				for _, v in ipairs(skipped) do
					mw.log("restore combing character  "..v)
					table.insert(result, mw.ustring.char(v))
				end
				skipped = {}
			end
		end
	end
	
	after = current
	current = before
	before = unknown
	translate()
	if #skipped > 0 then
		for _, v in ipairs(skipped) do
			mw.log("restore combing character  "..v)
			table.insert(result, v)
		end
		skipped = {}
	end
	
	return table.concat(result, "")
end

local transliterations = {
	ru = { ["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Д"] = "D", ["Е"] = "E", ["Ё"] = "Ё", ["Ж"] = "Ž", ["З"] = "Z", ["И"] = "I", ["Й"] = "J", ["К"] = "K", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["У"] = "U", ["Ф"] = "F", ["Х"] = "H", ["Ц"] = "C", ["Ч"] = "Č", ["Ш"] = "Š", ["Щ"] = "Ŝ", ["Ъ"] = "″", ["Ы"] = "Y", ["Ь"] = "′", ["Э"] = "È", ["Ю"] = "Û", ["Я"] = "Â", ["’"] = "’", ["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["д"] = "d", ["е"] = "e", ["ё"] = "ё", ["ж"] = "ž", ["з"] = "z", ["и"] = "i", ["й"] = "j", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ф"] = "f", ["х"] = "h", ["ц"] = "c", ["ч"] = "č", ["ш"] = "š", ["щ"] = "ŝ", ["ъ"] = "″", ["ы"] = "y", ["ь"] = "′", ["э"] = "è", ["ю"] = "û", ["я"] = "â", },
	uk = { ["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Ґ"] = "G", ["Д"] = "D", ["Е"] = "E", ["Є"] = "Ê", ["Ж"] = "Ž", ["З"] = "Z", ["И"] = "I", ["І"] = "Ì", ["Ї"] = "Ї", ["Й"] = "J", ["К"] = "K", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["У"] = "U", ["Ф"] = "F", ["Х"] = "H", ["Ц"] = "C", ["Ч"] = "Č", ["Ш"] = "Š", ["Щ"] = "Ŝ", ["Ь"] = "′", ["Ю"] = "Û", ["Я"] = "Â", ["’"] = "’", ["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["ґ"] = "g", ["д"] = "d", ["е"] = "e", ["є"] = "ê", ["ж"] = "ž", ["з"] = "z", ["и"] = "i", ["і"] = "ì", ["ї"] = "ї", ["й"] = "j", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ф"] = "f", ["х"] = "h", ["ц"] = "c", ["ч"] = "č", ["ш"] = "š", ["щ"] = "ŝ", ["ь"] = "′", ["ю"] = "û", ["я"] = "â", },
	be = { ["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Д"] = "D", ["Е"] = "E", ["Ё"] = "Ё", ["Ж"] = "Ž", ["З"] = "Z", ["I"] = "Ì", ["Й"] = "J", ["К"] = "K", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["У"] = "U", ["Ў"] = "Ŭ", ["Ф"] = "F", ["Х"] = "H", ["Ц"] = "C", ["Ч"] = "Č", ["Ш"] = "Š", ["Ы"] = "Y", ["Ь"] = "′", ["Э"] = "È", ["Ю"] = "Û", ["Я"] = "Â", ["’"] = "’", ["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["д"] = "d", ["е"] = "e", ["ё"] = "ё", ["ж"] = "ž", ["з"] = "z", ["i"] = "ì", ["й"] = "j", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ў"] = "ŭ", ["ф"] = "f", ["х"] = "h", ["ц"] = "c", ["ч"] = "č", ["ш"] = "š", ["ы"] = "y", ["ь"] = " ", ["э"] = "è", ["ю"] = "û", ["я"] = "â", },
	bg = { ["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Д"] = "D", ["Е"] = "E", ["Ж"] = "Ž", ["З"] = "Z", ["И"] = "I", ["Й"] = "J", ["К"] = "K", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["У"] = "U", ["Ф"] = "F", ["Х"] = "H", ["Ц"] = "C", ["Ч"] = "Č", ["Ш"] = "Š", ["Щ"] = "Ŝ", ["Ъ"] = "″", ["Ь"] = "´", ["Ю"] = "Û", ["Я"] = "Â", ["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["д"] = "d", ["е"] = "e", ["ж"] = "ž", ["з"] = "z", ["и"] = "i", ["й"] = "i", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ф"] = "f", ["х"] = "h", ["ц"] = "c", ["ч"] = "č", ["ш"] = "š", ["щ"] = "ŝ", ["ъ"] = "″", ["ь"] = "´", ["ю"] = "û", ["я"] = "â", },
	sr = { ["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Д"] = "D", ["Ђ"] = "Đ", ["Е"] = "E", ["Ж"] = "Ž", ["З"] = "Z", ["И"] = "I", ["Ј"] = "J", ["К"] = "K", ["Л"] = "L", ["Љ"] = "L", ["М"] = "M", ["Н"] = "N", ["Њ"] = "N̂", ["О"] = "O", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["Ћ"] = "Ć", ["У"] = "U", ["Ф"] = "F", ["Х"] = "H", ["Ц"] = "C", ["Ч"] = "Č", ["Џ"] = "D̂", ["Ш"] = "Š", ["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["д"] = "d", ["ђ"] = "đ", ["е"] = "e", ["ж"] = "ž", ["з"] = "z", ["и"] = "i", ["ј"] = "j", ["к"] = "k", ["л"] = "l", ["љ"] = "l", ["м"] = "m", ["н"] = "n", ["њ"] = "n̂", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["ћ"] = "ć", ["у"] = "u", ["ф"] = "f", ["х"] = "h", ["ц"] = "c", ["ч"] = "č", ["џ"] = "d̂ ",["ш"] = "š", },
	mk = { ["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Ѓ"] = "Ǵ", ["Д"] = "D", ["Е"] = "E", ["Ж"] = "Ž", ["З"] = "Z", ["S"] = "Ẑ", ["И"] = "I", ["Ј"] = "J̌", ["К"] = "K", ["Л"] = "L", ["Љ"] = "L̂", ["М"] = "M", ["Н"] = "N", ["Њ"] = "N̂", ["О"] = "O", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["Ќ"] = "Ḱ", ["У"] = "U", ["Ф"] = "F", ["Х"] = "H", ["Ц"] = "C", ["Ч"] = "Č", ["Џ"] = "D̂", ["Ш"] = "Š", ["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["ѓ"] = "ǵ", ["д"] = "d", ["е"] = "e", ["ж"] = "ž", ["з"] = "z", ["s"] = "ẑ", ["и"] = "i", ["ј"] = "ǰ", ["к"] = "k", ["л"] = "l", ["љ"] = "l̂", ["м"] = "m", ["н"] = "n", ["њ"] = "n̂", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["ќ"] = "ḱ", ["у"] = "u", ["ф"] = "f", ["х"] = "h", ["ц"] = "c", ["ч"] = "č", ["џ"] = "d̂", ["ш"] = "š", },
	el = {
		-- ISO 843:1997 TL
		["Α"] = "A", ["α"] = "a",
		["Β"] = "V", ["β"] = "v", ["ϐ"] = "v",
		["Γ"] = "G", ["γ"] = "g",
		["Δ"] = "D", ["δ"] = "d",
		["Ε"] = "E", ["ε"] = "e", ["ϵ"] = "e", ["϶"] = "e",
		["Ζ"] = "Z", ["ζ"] = "z",
		["Η"] = "Ī", ["η"] = "ī",
		["Θ"] = "Th", ["θ"] = "th", ["ϴ"] = "Th", ["ϑ"] = "th",
		["Ι"] = "I", ["ι"] = "i",
		["Κ"] = "K", ["κ"] = "k", ["ϰ"] = "k", ["ϗ"] = "k",
		["Λ"] = "L", ["λ"] = "l",
		["Μ"] = "M", ["μ"] = "m",
		["Ν"] = "N", ["ν"] = "n",
		["Ξ"] = "X", ["ξ"] = "x",
		["Ο"] = "O", ["ο"] = "o",
		["Π"] = "P", ["π"] = "p", ["ϖ"] = "p",
		["Ρ"] = "R", ["ρ"] = "r", ["ϱ"] = "r",
		["Σ"] = "S", ["σ"] = "s", ["ϲ"] = "s", ["Ϛ"] = "S", ["ς"] = "s",
		["Τ"] = "T", ["τ"] = "t",
		["Y"] = "Y", ["υ"] = "y", ["ϒ"] = "Y",
		["Φ"] = "F", ["φ"] = "f", ["ϕ"] = "f",
		["Χ"] = "Ch", ["χ"] = "ch",
		["Ψ"] = "Ps", ["ψ"] = "ps",
		["Ω"] = "Ō", ["ω"] = "ō",
	},
	
	
}

function m.Transliteracja(frame)
	local text = frame.args[1]
	local transliteration = transliterations[frame.args.lang]
	if not text or not transliteration then
		return text
	end
	
	text = mw.ustring.toNFD(text)
	local result, count = mw.ustring.gsub(text, ".", transliteration)
	return mw.ustring.toNFC(result)
end

local transcriptions = {
	ru = {
		simple = {
			["А"] = "A", ["а"] = "a",
			["Б"] = "B", ["б"] = "b",
			["В"] = "W", ["в"] = "w",
			["Г"] = "G", ["г"] = "g",
			["Д"] = "D", ["д"] = "d",
			["Е"] = "IE", ["е"] = "ie",
			["Ё"] = "IO", ["ё"] = "io",
			["Ж"] = "Ż", ["ж"] = "ż",
			["З"] = "Z", ["з"] = "z",
			["И"] = "I", ["и"] = "i",
			["Й"] = "J", ["й"] = "j",
			["К"] = "K", ["к"] = "k",
			["Л"] = "Ł", ["л"] = "ł",
			["М"] = "M", ["м"] = "m",
			["Н"] = "N", ["н"] = "n",
			["О"] = "O", ["о"] = "o",
			["П"] = "P", ["п"] = "p",
			["Р"] = "R", ["р"] = "r",
			["С"] = "S", ["с"] = "s",
			["Т"] = "T", ["т"] = "t",
			["У"] = "U", ["у"] = "u",
			["Ф"] = "F", ["ф"] = "f",
			["Х"] = "CH", ["х"] = "ch",
			["Ц"] = "C", ["ц"] = "c",
			["Ч"] = "CZ", ["ч"] = "cz",
			["Ш"] = "SZ", ["ш"] = "sz",
			["Щ"] = "SZCZ", ["щ"] = "szcz",
			["Ъ"] = "", ["ъ"] = "",
			["Ы"] = "Y", ["ы"] = "y",
			["Ь"] = "´", ["ь"] = "´",
			["Э"] = "E", ["э"] = "e",
			["Ю"] = "IU", ["ю"] = "iu",
			["Я"] = "IA", ["я"] = "ia",
		},
	
		{ "ЛИ", "LI" },
		{ "Ли", "Li" },
		{ "ЛЕ", "LE" },
		{ "Ле", "Le" },
		{ "ЛЁ", "LO" },
		{ "Лё", "Lo" },
		{ "ЛЮ", "LU" },
		{ "Лю", "Lu" },
		{ "ЛЯ", "LA" },
		{ "Ля", "La" },
		{ "ЛЬ", "LЬ" },
		{ "Ль", "Lь" },
		{ "лИ", "lI" },
		{ "ли", "li" },
		{ "лЕ", "lE" },
		{ "ле", "le" },
		{ "лЁ", "lO" },
		{ "лё", "lo" },
		{ "лЮ", "lU" },
		{ "лю", "lu" },
		{ "лЯ", "lA" },
		{ "ля", "la" },
		{ "лЬ", "lЬ" },
		{ "ль", "lь" },

		{ "%f[%w]Е%f[%u%W]", "JE" },
		{ "%f[%w]Е%f[%l]", "Je" },
		{ "%f[%w]е%f[%u]", "jE" },
		{ "%f[%w]е%f[%l%W]", "je" },
		{ "([Ьь])Е", "JE" },
		{ "([Ьь])е", "je" },
		{ "([АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя])Е", "%1JE" },
		{ "([АаЕеЁёИиЙйОоУуЪъЫыЬьЭэЮюЯя])е", "%1je" },
		{ "([ЖжЦцЧчШшЩщ])Е", "%1E" },
		{ "([ЖжЦцЧчШшЩщ])е", "%1e" },
--		{ "([БВГДЗКМНПРСТФХбвгдзкмнпрстфх])Е", "%1IE" },
--		{ "([БВГДЗКМНПРСТФХбвгдзкмнпрстфх])е", "%1Ie" },

		{ "%f[%w]Ё%f[%u%W]", "JO" },
		{ "%f[%w]Ё%f[%l]", "Jo" },
		{ "%f[%w]ё%f[%u]", "jO" },
		{ "%f[%w]ё%f[%l%W]", "jo" },
		{ "([Ьь])Ё", "JO" },
		{ "([Ьь])ё", "jo" },
		{ "([АаЕеЁёИиЙйОоУуЪъЫыЭэЮюЯя])Ё", "%1JO" },
		{ "([АаЕеЁёИиЙйОоУуЪъЫыЭэЮюЯя])ё", "%1jo" },
		{ "([ЖжЧчШшЩщ])Ё", "%1O" },
		{ "([ЖжЧчШшЩщ])ё", "%1o" },
--		{ "([БВГДЗКМНПРСТФХЦбвгдзкмнпрстфхц])Ё", "%1IO" },
--		{ "([БВГДЗКМНПРСТФХЦбвгдзкмнпрстфхц])ё", "%1io" },

		{ "ЬИ", "JI" },
		{ "Ьи", "Ji" },
		{ "ьИ", "jI" },
		{ "ьи", "ji" },

		{ "([ЖжЦцШш])И", "%1Y" },
		{ "([ЖжЦцШш])и", "%1y" },

		{ "%f[%w]Ю%f[%u%W]", "JU" },
		{ "%f[%w]Ю%f[%l]", "Ju" },
		{ "%f[%w]ю%f[%u]", "jU" },
		{ "%f[%w]ю%f[%l%W]", "ju" },
		{ "[Ьь]Ю", "JU" },
		{ "[Ьь]ю", "ju" },
		{ "([АаЕеЁёИиЙйОоУуЪъЫыЭэЮюЯя])Ю", "%1JU" },
		{ "([АаЕеЁёИиЙйОоУуЪъЫыЭэЮюЯя])ю", "%1ju" },
--		{ "([ЖЧШЩБВГДЗКМНПРСТФХЦжчшщбвгдзкмнпрстфхц])Ю", "%1IU" },
--		{ "([ЖЧШЩБВГДЗКМНПРСТФХЦжчшщбвгдзкмнпрстфхц])ю", "%1iu" },

		{ "%f[%w]Я%f[%u%W]", "JA" },
		{ "%f[%w]Я%f[%l]", "Ja" },
		{ "%f[%w]я%f[%u]", "jA" },
		{ "%f[%w]я%f[%l%W]", "ja" },
		{ "([Ьь])Я", "JA" },
		{ "([Ьь])я", "ja" },
		{ "([АаЕеЁёИиЙйОоУуЪъЫыЭэЮюЯя])Я", "%1JA" },
		{ "([АаЕеЁёИиЙйОоУуЪъЫыЭэЮюЯя])я", "%1ja" },
--		{ "([ЖЧШЩБВГДЗКМНПРСТФХЦжчшщбвгдзкмнпрстфхц])Я", "%1IA" },
--		{ "([ЖЧШЩБВГДЗКМНПРСТФХЦжчшщбвгдзкмнпрстфхц])я", "%1ia" },

		{ "[Ьь]([АаЕеЁёИиЙйОоУуЫыЬьЭэЮюЯя])", "%1" },
		{ "([LlЖЧШЩжчшщ])[Ьь]", "%1" },

		{ "%f[%w]Х%f[%u%W]", "CH" },
		{ "%f[%w]Х%f[%l]", "Ch" },
		{ "%f[%w]х%f[%u]", "cH" },
		{ "%f[%w]х%f[%l%W]", "ch" },

		{ "%f[%w]Ч%f[%u%W]", "CZ" },
		{ "%f[%w]Ч%f[%l]", "Cz" },
		{ "%f[%w]ч%f[%u]", "cZ" },
		{ "%f[%w]ч%f[%l%W]", "cz" },

		{ "%f[%w]Ш%f[%u%W]", "SZ" },
		{ "%f[%w]Ш%f[%l]", "Sz" },
		{ "%f[%w]ш%f[%u]", "sZ" },
		{ "%f[%w]ш%f[%l%W]", "sz" },

		{ "%f[%w]Щ%f[%u%W]", "SZCZ" },
		{ "%f[%w]Щ%f[%l]", "Szcz" },
		{ "%f[%w]щ%f[%u]", "sZCZ" },
		{ "%f[%w]щ%f[%l%W]", "szcz" },
	},
	
}

function m.Transkrypcja(frame)
	local text = frame.args[1]
	local transcription = transcriptions[frame.args.lang]
	if not text or not transcription then
		return text
	end
	
	for _, rule in ipairs(transcription) do
		text, _ = mw.ustring.gsub(text, rule[1], rule[2])
	end
	
	local result, _ = mw.ustring.gsub(text, ".", transcription.simple)
	return result
end

local function loadXlatData(lang, name)
	if not lang or (#lang == 0) then
		mw.log("Brak kodu języka")
		return nil
	end
	
	if not name then
		mw.log("Brak wyboru tabeli");
	end
	
	local dataModule = 'Moduł:Brudnopis/Paweł Ziemian/xlat/'..lang
	local success, module = pcall(mw.loadData, dataModule)
	if not success or not module then
		mw.log("Błąd ładowania modułu dla języka: "..lang)
		return nil
	end
	
	local data = module[name]
	if not data then
		mw.log("Błąd ładowania tabeli '"..name.."' dla języka: "..lang)
		return nil
	end
	
	for	k, v in pairs(data) do
		local t = type(v)
		if t == "string" then
			return data, "gsub"
		elseif t == "table" then
			return data, "xlat"
		else
			return nil
		end
	end
end

local function transform(frame, method)
	local pf = frame:getParent()
	local text = frame.args[1] or pf.args[1]
	if not text or (#text == 0) then
		mw.log("Brak tekstu")
		return
	end

	local lang = frame.args.lang or pf.args.lang
	local data, model = loadXlatData(lang, method)
	if not data then
		mw.log("Brak przepisu transformacji '"..method.."' dla języka "..lang)
		return
	end
	
	if model == "gsub" then
		text = mw.ustring.toNFD(text)
		local result, count = mw.ustring.gsub(text, ".", data)
		return mw.ustring.toNFC(result)
	end
	
	if model == "xlat" then
		local preserveCombing = (frame.args.accents or pf.args.accents) == "preserve"
		return xlat3(text, transcription, preserveCombing)
	end
	
	mw.log("Nieobsługiwany model transformacji '"..model.."' dla języka "..lang)
end

function m.transliteracja(frame)
	return transform(frame, "transliteracja")
end

function m.transkrypcja(frame)
	return transform(frame, "transkrypcja")
end

function m.decompose(frame)
	local pf = frame:getParent()
	local text = frame.args[1] or pf.args[1]
	local upper = frame.args.upper or pf.args.upper
	local lang = frame.args.lang or pf.args.lang
	
	if not text or (#text == 0) then
		mw.log("Brak tekstu")
		return
	end

	local text = mw.ustring.toNFD(text)
	if upper and lang then
		text = mw.getLanguage(lang):uc(text)
	end
	
	return text
end

function m.split(frame)
	local pf = frame:getParent()
	local text = frame.args[1] or pf.args[1]
	local fmt =  frame.args.format or pf.args.format or "%x"
	if not text or (#text == 0) then
		mw.log("Brak tekstu")
		return
	end

	local iterator = mw.ustring.gcodepoint(text)
	local next = iterator()
	local result = {}
	while next do
		local item = string.format(fmt, next)
		table.insert(result, item)
		next = iterator()
	end
	
	return table.concat(result, " ")
end

local function diagjamo(S)
	local text = mw.ustring.char(S)
	local info = Jamo[text]
	if info then
		return string.format("%s/%d/0x%x", info.comment, S, S)
	else
		return string.format("%s/%d/0x%x", text, S, S)
	end
end

function m.hangul(frame)
	local pf = frame:getParent()
	local text = frame.args[1] or pf.args[1]
	if not text or (#text == 0) then
		mw.log("Brak tekstu")
		return
	end
	
	-- http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf#G24646
	local SBase = 44032 -- 0xAC00
	local LBase = 4352  -- 0x1100
	local VBase = 4449  -- 0x1161
	local TBase = 4519  -- 0x11A7
	local SCount = 11172
	local LCount = 19
	local VCount = 21
	local TCount = 28
	local NCount = VCount * TCount
	
	local result = {}
	local iterator = mw.ustring.gcodepoint(text)
	for S in iterator do
		local SIndex = S - SBase
		if (0 <= SIndex) and (SIndex < SCount) then
			local L = LBase + math.floor(SIndex / NCount)
			local V = VBase + math.floor(math.fmod(SIndex, NCount) / TCount)
			local T = TBase + math.fmod(SIndex, TCount)
			table.insert(result, L)
			table.insert(result, V)
			if T ~= TBase then
				table.insert(result, T)
				mw.log("S:"..diagjamo(S).." -> L:"..diagjamo(L)..", V:"..diagjamo(V)..", T:"..diagjamo(T))
			else
				mw.log("S:"..diagjamo(S).." -> L:"..diagjamo(L)..", V:"..diagjamo(V))
			end
		else
			table.insert(result, S)
			mw.log("S:"..S)
		end
	end

	return mw.ustring.char(unpack(result))
end

return m