Old Chinese pronunciation module. See {{zh-pron}}. Data stored at Module:zh/data/och-pron-BS, Module:zh/data/och-pron-ZS and their subpages.


local export = {}
local m_string_utils = require("Module:string utilities")

local codepoint = mw.ustring.codepoint
local gsub = m_string_utils.gsub
local match = m_string_utils.match
local len = m_string_utils.len
local u = mw.ustring.char

local colour_1 = "#ccece6"
local colour_2 = "#E0F3F3"
local colour_3 = "#F7FCFB"

local function zh_fmt(text)
	return text ~= "" and '<span class="Hani" lang="zh">' .. text .. '</span>' or ""
end

local function insert_pron(reading_temp, text, system, i, return_note, index)
	reading_temp = gsub(reading_temp, "([̥̊]) ", "%1")
	local part = mw.text.split(reading_temp, " ")
	local IPA = gsub(part[1], "^%*", "")
	table.remove(part, 1)
	return ((i == 1 and (len(text) == 1 or (index or 1) == 1)) and "/*" or "") .. IPA .. 
		(len(text) == i and "/" or "") ..
		((#part > 0 and return_note) and (" " .. table.concat(part, " ")) or "")
end

local function pron_table(titlechar, reading, system, reading_index, count, i)
	return system == "BS"
		
		and {
			nil,
			'<b>' .. zh_fmt(titlechar) .. '</b>',
			reading_index .. "/" .. count,
			reading[1],
			'‹ <i>' .. gsub(reading[2], '([XH])', '<sup>%1</sup>') .. '</i> ›',
			'<span class="IPAchar">' .. gsub(insert_pron(reading[3], titlechar, "BS", 1, true), "ˤ", "ˁ") .. '</span>',
			reading[4]
		}
		
		or {
			nil,
			'<b>'..zh_fmt(titlechar)..'</b>',
			reading_index .. "/" .. count,
			reading[1],
			zh_fmt("[["..reading[2].."]]"),
			zh_fmt("[["..reading[3].."]]"),
			reading[4],
			zh_fmt("[["..reading[5].."]]"),
			'<span class="IPAchar">/*' .. reading[6] .. '/</span>',
			gsub(gsub(gsub(reading[7], "([一-鿿㐀-䶿﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧-﨩𠀀-𪛟𪜀-𮯯𰀀-𱍏,]+)", zh_fmt("%1")), "([^>,][^>,][^>,][^>,][^>,][^>,],)", "%1<br>"), "([一-鿿㐀-䶿﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧-﨩𠀀-𪛟𪜀-𮯯𰀀-𱍏])", "[[%1]]")
		}
end

local function get_data(system, ch)
	local module = ("Module:zh/data/och-pron-%s/%s"):format(system, ch)
	local success, data_module = pcall(require, module)
	return success and data_module
end

function export.ipa(index_text, preview)
	local titlechar = mw.title.getCurrentTitle().text
	local reading_index = mw.text.split(index_text, ";")
	local output_text = {}
	local systems = { "BS", "ZS" }
	local indiv_pronunciation = { ["BS"] = {}, ["ZS"] = {} }
	local rand = ""

	local fields = {
		
		["BS"] = {
			"ระบบ[[w:วิลเลียม เอช. แบกซ์เตอร์|แบกซ์เตอร์]]–[[w:ลอเรนต์ ซาการ์|ซาการ์]] 1.1 " ..
				"([http://ocbaxtersagart.lsait.lsa.umich.edu/ 2014])",
			"<small>อักขระ</small>",
			"<small>การออกเสียงที่</small>",
			"<small>ปักกิ่งใหม่<br/>(พินอิน)</small>",
			"<small>จีนยุคกลาง</small>",
			"<small>จีนเก่า</small>",
			"<small>อังกฤษ</small>"
		},
	
		["ZS"] = {
			"ระบบ[[w:เจิ้งจาง ซ่างฟาง|เจิ้งจาง]] (2003)",
			"<small>อักขระ</small>",
			"<small>การออกเสียงที่</small>",
			"<small>หมายเลข</small>",
			"<small>ส่วนประกอบ<br>สัทศาสตร์</small>",
			"<small>กลุ่มสัมผัส</small>",
			"<small>กลุ่มย่อยสัมผัส</small>",
			"<small>สัมผัสจีนยุคกลาง<br>ที่สอดคล้อง</small>",
			"<small>จีนเก่า</small>",
			"<small>หมายเหตุ</small>"
		}
	}
	
	for system_seq, system in ipairs(systems) do
		for i, cp in ipairs { codepoint(titlechar, 1, -1) } do
			local ch = u(cp)
			local data_module = get_data(system, ch)
			if data_module then
				local count = 0
				for index, value in ipairs(data_module) do
					count = count + 1
				end
				local reading_number = reading_index[i] and (mw.text.split(reading_index[i], ',')[system_seq] or reading_index[i]) or "y"
				if reading_number == "y" then
					for reading_index, reading in ipairs(data_module) do
						table.insert(indiv_pronunciation[system], pron_table(ch, reading, system, reading_index, count, i))
					end
				elseif reading_number == "n" then
					break
				else
					for indiv_number in mw.text.gsplit(reading_number, '%+') do
						table.insert(indiv_pronunciation[system], pron_table(ch, data_module[tonumber(indiv_number)], system, indiv_number, count, i))
					end
				end
			end
		end
		if indiv_pronunciation[system][1] then
			local hash, results = {}, {}
			local value_eff
			for _, value in ipairs(indiv_pronunciation[system]) do
				local valeur = value
				table.remove(valeur, 1)
				value_eff = table.concat(valeur)
				if not hash[value_eff] then
					hash[value_eff] = true
					table.insert(value, 1, nil)
					results[#results + 1] = value
				end
			end
			rand = rand ~= "" and rand or gsub("oc-" .. value_eff, "[^A-Za-z0-9]", codepoint)
			local fmt = {
				header = '\n{| class="wikitable mw-collapsible mw-collapsed" id="mw-customcollapsible-oc' .. rand ..
					'" style="width:100%; margin:0; text-align:center; border-collapse: collapse; border-style: hidden;"',
				lv1 = '\n|-\n! style="background-color:' .. colour_1 .. '" colspan=' .. #results+1 .. '|',
				lv2 = '\n|-\n! style="background-color:' .. colour_2 .. '; width:8em"|',
				lv3 = '\n| style="background-color:' .. colour_3 .. '"|',
				closing = '\n|}',
				
				BS_note = '\n|-\n|colspan=' .. #results+1 ..
					[=[ style="text-align:left; font-size:90%"|<div class="toccolours mw-collapsible mw-collapsed">
					'''''Notes''' for Old Chinese notations in the Baxter–Sagart system:''
					<div class="mw-collapsible-content">
					* Parentheses "()" indicate uncertain presence;<br>
					* Square brackets "[]" indicate uncertain identity, e.g. *[t] as coda may in fact be *-t or *-p;<br>
					* Angle brackets "&lt;>" indicate infix;<br>
					* Hyphen "-" indicates morpheme boundary;<br>
					* Period "." indicates syllable boundary.</div>
					</div>]=]
			}
		
			table.insert(output_text, fmt.header)
			for field_index, field in ipairs(fields[system]) do
				if match(field, "small") then
					local field_set = {}
					for _, result in ipairs(results) do
						table.insert(field_set, result[field_index])
					end
					if table.concat(field_set) ~= "" then
						table.insert(output_text, fmt.lv2 .. field .. fmt.lv3 .. table.concat(field_set, fmt.lv3))
					end
				else
					table.insert(output_text, fmt.lv1 .. field)
				end
			end
			table.insert(output_text, (system == "BS" and fmt.BS_note or "") .. fmt.closing)
		end
	end
	local fold = '\n* <div title="expand" class="mw-customtoggle-oc' .. rand .. '"> ' ..
		'[[w:ภาษาจีนเก่า|จีนเก่า]]<span style="float:right; border:1px solid #ccc; border-radius:1px;' ..
		' padding:0 0; font-size:90%">▼</span>' .. preview .. '</div>'
	return output_text[1] and fold .. gsub(table.concat(output_text), "%[%[%]%]", "") or ""
end

function export.retrieve_pron(text, reconstruction, no_intro, index)
	if type(text) == "table" then text = text.args[1] end
	text = require("Module:links").remove_links(text)
	local retrieve_result = {}
	local intro = no_intro and "" or "<span style=\"border-bottom: 1px dotted #000; cursor:help\" title=\"จีนเก่า\">OC</span> "
	if not reconstruction then
		local index_set
		if index and index ~= "y" then
			index_set = mw.text.split(index, ",")
		end
		for char_index, cp in ipairs { codepoint(text, 1, -1) } do
			local char_pronunciation = {}
			local ch = u(cp)
			local data_module = get_data("ZS", ch)
			if data_module then
				local reading_no = index_set and index_set[char_index] or "y"
				if reading_no ~= "y" then
					for number in mw.text.gsplit(reading_no, "+") do
						table.insert(char_pronunciation, data_module[tonumber(number)][6])
					end
				else
					for _, reading in ipairs(data_module) do
						table.insert(char_pronunciation, reading[6])
					end
				end
			else
				return nil
			end
			table.insert(retrieve_result, table.concat(char_pronunciation, len(text) == 1 and ", *" or "/"))
		end
	end
	return intro .. (reconstruction or "*" .. table.concat(retrieve_result, " "))
end

function export.generate_show(text, index)
	local index_set = mw.text.split(index, ";")
	local extract_results = { ["BS"] = {}, ["ZS"] = {} }
	local result = {}
	local position = { ["BS"] = 3, ["ZS"] = 6 }
	
	local fmt = {
		beginning = {
			["BS"] = "\n*: <small>(''[[w:วิลเลียม เอช. แบกซ์เตอร์|แบกซ์เตอร์]]–[[w:ลอเรนต์ ซาการ์|ซาการ์]]'')</small>: " ..
				'<span style="font-size:95%">',
			["ZS"] = "\n*: <small>(''[[w:เจิ้งจาง ซ่างฟาง|เจิ้งจาง]]'')</small>: " ..
				'<span style="font-size:95%">',
		},
		ending = {
			["BS"] = "</span>", ["ZS"] = "</span>"
		}
	}
	
	for system_seq, system in ipairs({ "BS", "ZS" }) do
		for i, cp in ipairs { codepoint(text, 1, -1) } do
			local char_pronunciation = {}
			local ch = u(cp)
			local data_module = get_data(system, ch)
			if data_module then
				local existing_pron = {}
				local reading_number = index_set[i] and (mw.text.split(index_set[i], ',')[system_seq] or index_set[i]) or "y"
				if reading_number == "y" then
					index = 0
					for _, reading in ipairs(data_module) do
						index = index + 1
						local reading_temp = reading[position[system]]
						if not existing_pron[reading_temp] then
							table.insert(char_pronunciation, insert_pron(reading_temp, text, system, i, false, index))
							existing_pron[reading_temp] = true
						end
					end
				elseif reading_number == "n" then
					break
				else
					index = 0
					for indiv_number in mw.text.gsplit(reading_number, '%+') do
						index = index + 1
						local reading_temp = data_module[tonumber(indiv_number)][position[system]]
						if not existing_pron[reading_temp] then
							table.insert(char_pronunciation, insert_pron(reading_temp, text, system, i, false, index))
							existing_pron[reading_temp] = true
						end
					end
				end
				table.insert(extract_results[system], table.concat(char_pronunciation, len(text) == 1 and ", " or "|"))
			else
				extract_results[system] = {}
				break
			end
		end
		if extract_results[system][1] then
			table.insert(result, fmt.beginning[system] .. 
				'<span class="IPAchar">' .. 
				table.concat(extract_results[system], "&nbsp; ") .. 
				 '</span>' .. 
				 fmt.ending[system])
		end
	end
	text = result[1] and gsub(table.concat(result), "|%*", "|") or nil
	text = text and gsub(text, "/|", "|") or nil
	text = text and gsub(text, "|", '<span style="padding-left:2px; padding-right:2px">|</span>')
	return text
end

function export.link(frame, arg)
	local args = arg or frame:getParent().args
	local text, meaning, lit = args[1], args[2] or args['gloss'] or nil, args['lit'] or nil
	return require("Module:zh/link").link(frame, nil, { "*" .. text, tr = export.retrieve_pron(text, args["tr"] or false, args["no_intro"] or false, args["id"] or false), gloss = meaning, lit = lit }, mw.title.getCurrentTitle().subpageText)
end

return export