Moduł:Xlat

Z Wikipedii, wolnej encyklopedii
 Dokumentacja modułu [stwórz] [odśwież]
local function xlat(text, translation, preserveCombining)
	if not text or not translation or (#text == 0) then
		return text
	end

	local cache = {}
	
	local function createCacheEntry(char)
		local result = {
			rules = translation[char] or { { char } },
			class = {},
		}
		cache[char] = result
		return result
	end
	
	local function loadRules(char)
		return (cache[char] or createCacheEntry(char)).rules
	end
	
	local function checkRule(char, restriction)
		local cached = cache[char] or createCacheEntry(char)
		local result = cached.class[restriction]
		if result == nil then
			local test = mw.ustring.match(char, restriction)
			result = test and true or false
		end

		mw.log("RULE << "..char.." >> match << "..restriction.." >> "..(result and "SUCCESS" or "FAIL"))
		return result
	end

	local function translate(after, current, before)
		-- translate the current character
		local rules = loadRules(current)
		for _, v in ipairs(rules) do
			local a = not v.a or checkRule(after, v.a)
			local b = not v.b or checkRule(before, v.b)
			if a and b then
				return v[1]
			end
		end

		return current
	end

	local combining = mw.loadData( 'Moduł:IPA/data' ).combining

	local result = {}
	local nothing = { known = "", rest = "" }
	local after2 = nothing
	local after1 = nothing
	local current = nothing
	local before1 = nothing
	local before2 = nothing

	local iterator = mw.ustring.gcodepoint(text)
	mw.log("X start")
	local next = iterator()
	mw.log("X next: "..string.format("U+%04X", next))
	while next do
		char = { next }
		next = false
		-- read next "character" from input
		while not next do
			next = iterator()
			mw.log("X next: "..(next and string.format("U+%04X", next) or "<NIL>"))
			if not next or not combining[next] then
				break
			end
			-- accumulate whole character
			table.insert(char, next)
			next = false
		end

		-- shift window
		after2 = after1
		after1 = current
		current = before1
		before1 = before2
		before2 = {}

		-- determine the largest known part of the current character
		local j = #char
		while j >= 1 do
			local part = mw.ustring.char(unpack(char,  i, j))
			if translation[part] then
				before2.known = part
				before2.rest = j < #char and mw.ustring.char(unpack(char, j+1, #char)) or ""
				break
			end
			j = j - 1
		end
		if j == 0 then
			-- use whole unrecognized character
			before2.known = mw.ustring.char(unpack(char, i, #char))
			before2.rest = ""
		end

		if current.known ~= "" then
			local translated = translate(after2.known..after1.known, current.known, before1.known..before2.known)
			mw.log("X trans: "..after2.known..after1.known.." - "..current.known.." - "..before1.known..before2.known.." → "..(translated or "<NIL>"))
			table.insert(result, translated)
			if (#translated > 0) and preserveCombining then
				table.insert(result, current.rest)
			end
		end
	end

	-- translate last characters
	local translated = translate(after1.known..current.known, before1.known, before2.known)
	mw.log("X trans: "..after1.known..current.known.." - "..before1.known.." - "..before2.known.." → "..(translated or "<NIL>"))
	table.insert(result, translated)
	if (#translated > 0) and preserveCombining then
		table.insert(result, current.rest)
	end

	local translated = translate(current.known..before1.known, before2.known, "")
	mw.log("X trans: "..current.known..before1.known.." - "..before2.known.." - ".." → "..(translated or "<NIL>"))
	table.insert(result, translated)
	if (#translated > 0) and preserveCombining then
		table.insert(result, current.rest)
	end

	mw.log("X stop")
	local result = table.concat(result, "")
	mw.log("return \""..result.."\"")
	return result
end

local function loadXlatData(lang, name)
	if not lang or (#lang == 0) then
		mw.log("Brak kodu języka")
		return nil
	end
	
	if not name then
		mw.log("Brak wyboru tabeli");
	end
	
	local dataModule = 'Moduł:Xlat/'..lang
	local success, module = pcall(mw.loadData, dataModule)
	if not success or not module then
		mw.log("Błąd ładowania modułu dla języka: "..lang)
		return nil
	end
	
	local data = module[name]
	if not data then
		mw.log("Błąd ładowania tabeli '"..name.."' dla języka: "..lang)
		return nil
	end
	
	local model = false
	for k, v in pairs(data) do
		local t = type(v)
		if t == "string" then
			model = "gsub"
		elseif t == "table" then
			model = "xlat"
		end
		
		break
	end

	if not model then
		return nil
	end
	
	return data, model
end

local function jamo(text)
	-- http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf#G24646
	local SBase = 44032 -- 0xAC00
	local LBase = 4352  -- 0x1100
	local VBase = 4449  -- 0x1161
	local TBase = 4519  -- 0x11A7
	local SCount = 11172
	local LCount = 19
	local VCount = 21
	local TCount = 28
	local NCount = VCount * TCount
 
	local result = {}
	local iterator = mw.ustring.gcodepoint(text)
	for S in iterator do
		local SIndex = S - SBase
		if (0 <= SIndex) and (SIndex < SCount) then
			local L = LBase + math.floor(SIndex / NCount)
			local V = VBase + math.floor(math.fmod(SIndex, NCount) / TCount)
			local T = TBase + math.fmod(SIndex, TCount)
			table.insert(result, L)
			table.insert(result, V)
			if T ~= TBase then
				table.insert(result, T)
				mw.log("S:"..S.." -> L:"..L..", V:"..V..", T:"..T)
			else
				mw.log("S:"..S.." -> L:"..L..", V:"..V)
			end
		else
			table.insert(result, S)
			mw.log("S:"..S)
		end
	end
 
	return mw.ustring.char(unpack(result))
end

local function decompose(text, method)
	if method == nil then
		-- default to composite characters
		return mw.ustring.toNFC(text)
	elseif method == false then
		-- nothing
		return text
	elseif method == true then
		-- decompose
		return mw.ustring.toNFD(text)
	elseif method == "NFD" then
		return mw.ustring.toNFD(text)
	elseif method == "NFC" then
		return mw.ustring.toNFC(text)
	elseif (method == "JAMO") then
		return jamo(text)
	else
		error("unsupported decomposition method: "..method)
	end
end

local function transform(frame, method)
	local pf = frame:getParent()
	local text = frame.args[1] or pf.args[1]
	if not text or (#text == 0) then
		mw.log("Brak tekstu")
		return
	end

	local lang = frame.args.lang or pf.args.lang
	local data, model = loadXlatData(lang, method)
	if not data then
		mw.log("Brak przepisu transformacji '"..method.."' dla języka "..lang)
		return
	end
	
	mw.log(model.." "..text)
	if model == "gsub" then
		text = mw.ustring.toNFD(text)
		local result, count = mw.ustring.gsub(text, ".", data)
		return mw.ustring.toNFC(result)
	end
	
	if model == "xlat" then
		local keepAccents = data[0] and data[0].keepAccents or false
		local accents = frame.args.accents or pf.args.accents
		if accents then
			keepAccents = accents == "preserve"
		end
		text = decompose(text, data[0] and data[0].decompose or false)
		local result = xlat(text, data, keepAccents)
		result = mw.ustring.toNFC(result)
		return result
	end
	
	mw.log("Nieobsługiwany model transformacji '"..model.."' dla języka "..lang)
end

return {
	
transliteracja = function (frame)
	return transform(frame, "transliteracja")
end,

transkrypcja = function (frame)
	return transform(frame, "transkrypcja")
end,

}