local export = {}
local m_str_utils = require("Module:string utilities")
local find = m_str_utils.find
local gsplit = m_str_utils.gsplit
local gsub = m_str_utils.gsub
local sub = m_str_utils.sub
local match = m_str_utils.match
local len = m_str_utils.len
local lower = m_str_utils.lower
local split = m_str_utils.split
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local poj_tone_marks = "́" .. "̀" .. "̂" .. "̌" .. "̄" .. "̍" .. "̋" .. "̆"
local psdb_initial = {
["p"] = "'p", ["ph"] = "ph", ["b"] = "'b",
["t"] = "'d", ["th"] = "'t",
["k"] = "'k", ["kh"] = "'q", ["g"] = "'g",
["chi"] = "c", ["ch"] = "z",
["chhi"] = "ch", ["chh"] = "zh",
["si"] = "s", ["s"] = "s",
["j"] = "j",
["l"] = "l", ["h"] = "'h",
["m"] = "m", ["n"] = "n", ["ng"] = "ng",
[""] = "'"
}
local function psdb_final(text)
local basic_psdb = {
--single vowel tone 12357
["a1"] = "af", ["a2"] = "ar", ["a3"] = "ax", ["a5"] = "aa", ["a7"] = "a",
["i1"] = "y", ["i2"] = "ie", ["i3"] = "ix", ["i5"] = "ii", ["i7"] = "i",
["u1"] = "w", ["u2"] = "uo", ["u3"] = "ux", ["u5"] = "uu", ["u7"] = "u",
["e1"] = "ef", ["e2"] = "ea", ["e3"] = "ex", ["e5"] = "ee", ["e7"] = "e",
["oo1"] = "of", ["oo2"] = "or", ["oo3"] = "ox", ["oo5"] = "oo", ["oo7"] = "o",
["o1"] = "oy", ["o2"] = "oir", ["o3"] = "oix", ["o5"] = "ooi", ["o7"] = "oi",
["ng1"] = "'ngf", ["ng2"] = "'ngr", ["ng3"] = "'ngx", ["ng5"] = "'ngg", ["ng7"] = "'ng",
["m1"] = "'mf", ["m2"] = "'mr", ["m3"] = "'mx", ["m5"] = "'mm", ["m7"] = "'m",
--double vowel tone 12357
["ai1"] = "ay", ["ai2"] = "ae", ["ai3"] = "aix", ["ai5"] = "aai", ["ai7"] = "ai",
["au1"] = "aw", ["au2"] = "ao", ["au3"] = "aux", ["au5"] = "aau", ["au7"] = "au",
["ia1"] = "iaf", ["ia2"] = "iar", ["ia3"] = "iax", ["ia5"] = "iaa", ["ia7"] = "ia",
["iau1"] = "iaw", ["iau2"] = "iao", ["iau3"] = "iaux", ["iau5"] = "iaau", ["iau7"] = "iau",
["io1"] = "ioy", ["io2"] = "ioir", ["io3"] = "ioix", ["io5"] = "iooi", ["io7"] = "ioi",
["iu1"] = "iw", ["iu2"] = "iuo", ["iu3"] = "iux", ["iu5"] = "iuu", ["iu7"] = "iu",
["oa1"] = "oaf", ["oa2"] = "oar", ["oa3"] = "oax", ["oa5"] = "oaa", ["oa7"] = "oa",
["oai1"] = "oay", ["oai2"] = "oae", ["oai3"] = "oaix", ["oai5"] = "oaai", ["oai7"] = "oai",
["oe1"] = "oef", ["oe2"] = "oea", ["oe3"] = "oex", ["oe5"] = "oee", ["oe7"] = "oe",
["ui1"] = "uy", ["ui2"] = "uie", ["ui3"] = "uix", ["ui5"] = "uii", ["ui7"] = "ui",
--nasal vowel tone 12357
--nasal ending tone 12357
["ian1"] = "iefn", ["ian2"] = "iern", ["ian3"] = "iexn", ["ian5"] = "ieen", ["ian7"] = "ien",
["iong1"] = "iofng", ["iong2"] = "iorng", ["iong3"] = "ioxng", ["iong5"] = "ioong", ["iong7"] = "iong",
--stopped single vowel tone 48
["op4"] = "ob", ["op8"] = "op",
["ot4"] = "od", ["ot8"] = "ot",
["ok4"] = "og", ["ok8"] = "ok",
--stopped double vowel tone 48
["iop4"] = "iob", ["iop8"] = "iop",
["iot4"] = "iod", ["iot8"] = "iot",
["iok4"] = "iog", ["iok8"] = "iok",
}
text = gsub(text, "[一二三四五六七八]", {["一"] = "1", ["二"] = "2", ["三"] = "3", ["四"] = "4", ["五"] = "5", ["六"] = "6", ["七"] = "7", ["八"] = "8"})
if find(text, "ⁿ[12357]$") then
local basic = gsub(text, "ⁿ", "")
basic = gsub(basic, "^o([12357])$", "oo%1")
if basic_psdb[basic] then
return "v" .. basic_psdb[basic]
end
elseif find(text, ".[mn]g?[12357]$") and not find(text, "^ian[12357]$") and not find(text, "^iong[12357]$") then
local basic = gsub(text, "[mn]g?([12357])$", "%1")
local ending = match(text, "([mn]g?)[12357]$")
basic = gsub(basic, "^o([12357])$", "oo%1")
if basic_psdb[basic] then
return basic_psdb[basic] .. ending
end
elseif find(text, "[ptkh]ⁿ?4$") and not find(text, "^i?o[ptk]4$") then
local basic = gsub(text, "[ptkh](ⁿ?)4$", "%1") .. "7"
local ending = match(text, "([ptkh])ⁿ?4$")
ending = gsub(ending, "[ptkh]",{p = "b", t = "d", k = "g", h = "q"})
if find(basic, "ⁿ") then
basic = gsub(basic, "ⁿ", "")
basic = gsub(basic, "^o([12357])$", "oo%1")
if basic_psdb[basic] then
return "v" .. basic_psdb[basic] .. ending
end
else
if basic_psdb[basic] then
return basic_psdb[basic] .. ending
end
end
elseif find(text, "[ptkh]ⁿ?8$") and not find(text, "^i?o[ptk]8$") then
local basic = gsub(text, "[ptkh](ⁿ?)8$", "%1") .. "7"
local ending = match(text, "([ptkh])ⁿ?8$")
if find(basic, "ⁿ") then
basic = gsub(basic, "ⁿ", "")
basic = gsub(basic, "^o([12357])$", "oo%1")
if basic_psdb[basic] then
return "v" .. basic_psdb[basic] .. ending
end
else
if basic_psdb[basic] then
return basic_psdb[basic] .. ending
end
end
else
return basic_psdb[text]
end
end
function export.poj_check_invalid(text)
if not text then
return nil
end
local correct = toNFD(text) .. "-"
local accent = "[" .. poj_tone_marks .. "]"
local switch = "%1%3%2%4"
local title = mw.title.getCurrentTitle().text
correct = gsub(correct, "([oO])([ae])(" .. gsub(accent, "̍", "") .. ")([ⁿ%-/ ])", switch)
correct = gsub(correct, "([oO])(" .. accent .. ")([ae])([imnptkh][gh]?ⁿ?)", switch)
correct = gsub(correct, "([oO]a)(i)(" .. accent .. ")(h?ⁿ?)", switch)
correct = gsub(correct, "([aA])([iu])(" .. accent .. ")(h?ⁿ?)", switch)
correct = gsub(correct, "([iI])(" .. accent .. ")([aou])(u?[mnptkh]?g?ⁿ?)", switch)
correct = gsub(correct, "([iI]a)(u)(" .. accent .. ")(h?ⁿ?)", switch)
correct = gsub(correct, "([uU])(i)(" .. accent .. ")([hⁿ]?)", switch)
correct = gsub(correct, "([eE])(e)(" .. accent .. ")(h?ⁿ?)", switch)
correct = gsub(correct, "([eE])(re)(" .. gsub(accent, "̍", "") .. ")([%-/ ])", switch)
-- correct = gsub(correct, "([oO]" .. accent .. ")[ou·]", "%1͘")
if find(title, "[子仔]") and title ~= "明仔早" then
correct = gsub(correct, "%-" .. toNFD("á") .. "([%- /])", "-仔%1")
end
correct = toNFC(gsub(correct, "-$", ""))
if text ~= correct then
error("Invalid POJ input \"" .. text .. "\": please change it to \"" .. correct .. "\"")
end
return text
end
function export.poj_to_tl_conv(text)
if type(text) == "table" then text = text.args[1] end
local accent = "[" .. poj_tone_marks .. "]?"
local conv = {
["e"] = "i", ["E"] = "I", ["o"] = "u", ["O"] = "U"
}
local function convert2(a, b)
return conv[a] .. b
end
local function convert3(a, b, c)
return a .. conv[b] .. c
end
text = gsub(text, "#", "")
text = toNFD(text)
text = gsub(text, "仔", "á")
text = gsub(text, "%(([^%)]+)%)", "%1-%1-%1")
text = gsub(text, "([eE])(̍?k)", convert2)
text = gsub(text, "^([^eE]*)([eE])(" .. accent .. "ng)", convert3)
text = gsub(text, "(-[^eE]*)([eE])(" .. accent .. "ng)", convert3)
text = gsub(text, "([oO])(" .. accent .. "[ae])", convert2)
text = gsub(text, "([uU])(" .. accent .. ")([aei])", "%1%3%2")
text = gsub(text, "([eE])(" .. accent .. ")(re)", "%1%3%2")
text = gsub(text, "([oO]" .. accent .. ")͘", "%1o")
text = gsub(text, "(h?)ⁿ", "nn%1")
text = gsub(text, "[cC]h", {["ch"] = "ts", ["Ch"] = "Ts"})
text = gsub(text, "̆", "̋")
text = gsub(text,'/([^ ])',' / %1')
return toNFC(text)
end
function export.poj_check_syllable(initial, final, loc)
local validInitials = {
["p"] = 1, ["ph"] = 1, ["m"] = 1, ["b"] = 1,
["t"] = 1, ["th"] = 1, ["n"] = 1, ["l"] = 1,
["ch"] = 1, ["s"] = 1,
["k"] = 1, ["kh"] = 1, ["ng"] = 1, ["g"] = 1, ["h"] = 1, [""] = 1,
}
local moreValidInitials = { }
moreValidInitials["Xiamen"] = { ["chh"] = 1, }
moreValidInitials["Xiamen-d"] = { ["chh"] = 1, ["j"] = 1, }
moreValidInitials["Tong'an"] = { ["chh"] = 1, }
moreValidInitials["Quanzhou"] = { ["chh"] = 1, }
moreValidInitials["Jinjiang"] = { ["chh"] = 1, }
moreValidInitials["Nan'an"] = { ["chh"] = 1, }
moreValidInitials["Hui'an"] = { ["chh"] = 1, }
moreValidInitials["Yongchun"] = { ["chh"] = 1, }
moreValidInitials["Zhangpu"] = { ["j"] = 1, }
moreValidInitials["Changtai"] = { ["chh"] = 1, ["j"] = 1 }
moreValidInitials["Zhangzhou"] = { ["chh"] = 1, ["j"] = 1 }
moreValidInitials["Taipei"] = { ["chh"] = 1, }
moreValidInitials["Kaohsiung"] = { ["chh"] = 1, ["j"] = 1, }
moreValidInitials["Hongmaogang"] = { ["chh"] = 1, ["j"] = 1, }
moreValidInitials["Dalinpu"] = { ["chh"] = 1, ["j"] = 1, }
moreValidInitials["Tainan"] = { ["chh"] = 1, ["j"] = 1, }
moreValidInitials["Lukang"] = { ["chh"] = 1, }
moreValidInitials["Yilan"] = { ["chh"] = 1, ["j"] = 1, }
moreValidInitials["Kinmen"] = { ["chh"] = 1, }
moreValidInitials["Longyan"] = { ["chh"] = 1, }
moreValidInitials["Singapore"] = { ["chh"] = 1, ["j"] = 1 }
moreValidInitials["Penang"] = {
["chh"] = 1,
["f"] = 1, ["d"] = 1, ["j"] = 1, ["sh"] = 1,
["r"] = 1, ["w"] = 1, ["y"] = 1,
}
moreValidInitials["Klang"] = { ["chh"] = 1, }
moreValidInitials["Philippines"] = { ["chh"] = 1, }
local validFinals = {
["a"] = 1, ["ah"] = 1, ["ai"] = 1, ["aiⁿ"] = 1, ["ak"] = 1,
["am"] = 1, ["an"] = 1, ["aⁿ"] = 1, ["ang"] = 1, ["ap"] = 1,
["at"] = 1, ["au"] = 1, ["auh"] = 1, ["e"] = 1, ["eh"] = 1,
["eng"] = 1, ["i"] = 1, ["ia"] = 1, ["iah"] = 1, ["iak"] = 1,
["iam"] = 1, ["ian"] = 1, ["iaⁿ"] = 1, ["iang"] = 1, ["iap"] = 1,
["iat"] = 1, ["iau"] = 1, ["iauⁿ"] = 1, ["ih"] = 1, ["im"] = 1,
["in"] = 1, ["iⁿ"] = 1, ["io"] = 1, ["ioh"] = 1, ["iok"] = 1,
["iong"] = 1, ["ip"] = 1, ["it"] = 1, ["iu"] = 1, ["m"] = 1,
["ng"] = 1, ["o"] = 1, ["o͘"] = 1, ["oa"] = 1, ["oah"] = 1,
["oai"] = 1, ["oan"] = 1, ["oaⁿ"] = 1, ["oat"] = 1,
["oe"] = 1, ["oeh"] = 1, ["oh"] = 1, ["ok"] = 1, ["oⁿ"] = 1,
["ong"] = 1, ["u"] = 1, ["uh"] = 1, ["ui"] = 1, ["uiⁿ"] = 1,
["un"] = 1, ["ut"] = 1,
}
local moreValidFinals = { }
moreValidFinals["Xiamen"] = {
["ahⁿ"] = 1, ["aih"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["ehⁿ"] = 1,
["ek"] = 1, ["eⁿ"] = 1, ["iahⁿ"] = 1, ["iauh"] = 1, ["iauhⁿ"] = 1,
["ihⁿ"] = 1, ["iuh"] = 1, ["iuⁿ"] = 1, ["mh"] = 1, ["ngh"] = 1,
["oaih"] = 1, ["oaihⁿ"] = 1, ["oaiⁿ"] = 1, ["oehⁿ"] = 1, ["o͘h"] = 1, ["ohⁿ"] = 1,
["uih"] = 1, ["uihⁿ"] = 1,
}
moreValidFinals["Xiamen-d"] = {
["ahⁿ"] = 1, ["aih"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["ehⁿ"] = 1,
["ek"] = 1, ["eⁿ"] = 1, ["iahⁿ"] = 1, ["iauh"] = 1, ["iauhⁿ"] = 1,
["ihⁿ"] = 1, ["iuh"] = 1, ["iuⁿ"] = 1, ["mh"] = 1, ["ngh"] = 1,
["oaih"] = 1, ["oaihⁿ"] = 1, ["oaiⁿ"] = 1, ["oehⁿ"] = 1, ["o͘h"] = 1, ["ohⁿ"] = 1,
["uih"] = 1, ["uihⁿ"] = 1,
}
moreValidFinals["Tong'an"] = {
["ahⁿ"] = 1, ["aih"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["ek"] = 1,
["er"] = 1, ["erh"] = 1, ["iahⁿ"] = 1, ["iauh"] = 1, ["iauhⁿ"] = 1,
["ihⁿ"] = 1, ["ir"] = 1, ["iuh"] = 1, ["iuⁿ"] = 1, ["mh"] = 1,
["ngh"] = 1, ["oaih"] = 1, ["oaihⁿ"] = 1, ["oaiⁿ"] = 1, ["oang"] = 1, ["oehⁿ"] = 1,
["o͘h"] = 1, ["ohⁿ"] = 1, ["uih"] = 1,
}
moreValidFinals["Quanzhou"] = {
["ahⁿ"] = 1, ["aih"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["ehⁿ"] = 1, ["er"] = 1,
["erh"] = 1, ["erm"] = 1, ["iahⁿ"] = 1, ["iauh"] = 1, ["iauhⁿ"] = 1,
["ihⁿ"] = 1, ["ir"] = 1, ["irh"] = 1, ["iuh"] = 1, ["iuⁿ"] = 1, ["mh"] = 1,
["ngh"] = 1, ["oaihⁿ"] = 1, ["oaiⁿ"] = 1, ["oang"] = 1, ["o͘h"] = 1, ["ohⁿ"] = 1,
["uih"] = 1, ["uihⁿ"] = 1,
}
moreValidFinals["Jinjiang"] = {
["ahⁿ"] = 1, ["aih"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["iahⁿ"] = 1,
["iauh"] = 1, ["iauhⁿ"] = 1, ["ihⁿ"] = 1, ["iuh"] = 1, ["iuⁿ"] = 1,
["mh"] = 1, ["ngh"] = 1, ["oaihⁿ"] = 1, ["oang"] = 1, ["o͘h"] = 1,
["ohⁿ"] = 1, ["uih"] = 1, ["uihⁿ"] = 1,
["erm"] = 1,
}
moreValidFinals["Nan'an"] = {
["ahⁿ"] = 1, ["aih"] = 1, ["aihⁿ"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["er"] = 1, ["erh"] = 1,
["erm"] = 1, ["ern"] = 1, ["ert"] = 1, ["erng"] = 1, ["erk"] = 1,
["iahⁿ"] = 1, ["iauh"] = 1, ["iauhⁿ"] = 1,
["ihⁿ"] = 1, ["ir"] = 1, ["irh"] = 1, ["iuh"] = 1, ["iuⁿ"] = 1, ["iuhⁿ"] = 1, ["mh"] = 1,
["ngh"] = 1, ["oaihⁿ"] = 1, ["oaiⁿ"] = 1, ["oang"] = 1, ["o͘h"] = 1, ["ohⁿ"] = 1,
["uih"] = 1, ["uihⁿ"] = 1,
}
moreValidFinals["Hui'an"] = {
["ahⁿ"] = 1, ["aihⁿ"] = 1, ["auhⁿ"] = 1, ["eⁿ"] = 1, ["ehⁿ"] = 1, ["er"] = 1, ["erh"] = 1,
["ern"] = 1, ["ert"] = 1, ["em"] = 1, ["en"] = 1, ["ep"] = 1, ["et"] = 1, ["iahⁿ"] = 1,
["iauh"] = 1, ["iauhⁿ"] = 1, ["ihⁿ"] = 1, ["ir"] = 1, ["irh"] = 1, ["iuh"] = 1, ["iuⁿ"] = 1,
["iuhⁿ"] = 1, ["mh"] = 1,["ngh"] = 1, ["oaihⁿ"] = 1, ["oaiⁿ"] = 1, ["oang"] = 1, ["o͘h"] = 1,
["ohⁿ"] = 1, ["uih"] = 1, ["uihⁿ"] = 1,
}
moreValidFinals["Yongchun"] = {
["auⁿ"] = 1, ["ek"] = 1, ["er"] = 1, ["erh"] = 1, ["erm"] = 1,
["ern"] = 1, ["ert"] = 1, ["iahⁿ"] = 1, ["ihⁿ"] = 1, ["ir"] = 1,
["iuh"] = 1, ["iuⁿ"] = 1, ["ngh"] = 1, ["oang"] = 1, ["o͘h"] = 1,
["ohⁿ"] = 1, ["oaiⁿ"] = 1, ["uih"] = 1, ["uihⁿ"] = 1,
}
moreValidFinals["Zhangpu"] = {
["ahⁿ"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["ee"] = 1, ["eeh"] = 1,
["ehⁿ"] = 1, ["eⁿ"] = 1, ["iahⁿ"] = 1,
["iauhⁿ"] = 1, ["ihⁿ"] = 1, ["iuh"] = 1, ["iuⁿ"] = 1,
["mh"] = 1, ["ngh"] = 1, ["oaiⁿ"] = 1,
["ohⁿ"] = 1, ["om"] = 1, ["op"] = 1,
}
moreValidFinals["Changtai"] = {
["ahⁿ"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1,
["ehⁿ"] = 1, ["ek"] = 1, ["eⁿ"] = 1,
["iahⁿ"] = 1, ["iauhⁿ"] = 1, ["ihⁿ"] = 1, ["ioⁿ"] = 1, ["iuh"] = 1,
["mh"] = 1, ["ngh"] = 1,
["oaiⁿ"] = 1, ["oeⁿ"] = 1, ["ohⁿ"] = 1, ["om"] = 1,
}
moreValidFinals["Zhangzhou"] = {
["ahⁿ"] = 1, ["aih"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["ee"] = 1,
["eeh"] = 1, ["ehⁿ"] = 1, ["ek"] = 1, ["eⁿ"] = 1, ["iahⁿ"] = 1,
["iauh"] = 1, ["iauhⁿ"] = 1, ["ihⁿ"] = 1, ["ioⁿ"] = 1, ["iuh"] = 1,
["mh"] = 1, ["ngh"] = 1, ["oaih"] = 1, ["oaihⁿ"] = 1, ["oaiⁿ"] = 1, ["o͘h"] = 1,
["ohⁿ"] = 1, ["om"] = 1, ["op"] = 1,
}
moreValidFinals["Longyan"] = {
["auⁿ"] = 1, ["ee"] = 1, ["ie"] = 1, ["iee"] = 1, ["oee"] = 1,
["ieⁿ"] = 1, ["oeⁿ"] = 1, ["ioa"] = 1, ["ioaⁿ"] = 1,
["ioⁿ"] = 1, ["ir"] = 1,
}
moreValidFinals["Taipei"] = {
["ahⁿ"] = 1, ["aih"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["ehⁿ"] = 1,
["ek"] = 1, ["iahⁿ"] = 1, ["iauh"] = 1, ["iauhⁿ"] = 1, ["ihⁿ"] = 1,
["iuh"] = 1, ["iuⁿ"] = 1, ["mh"] = 1, ["ngh"] = 1, ["oaih"] = 1,
["oaihⁿ"] = 1, ["oaiⁿ"] = 1, ["o͘h"] = 1, ["ohⁿ"] = 1, ["om"] = 1, ["op"] = 1,
["uih"] = 1,
}
moreValidFinals["Kaohsiung"] = {
["ahⁿ"] = 1, ["aih"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["ehⁿ"] = 1,
["ek"] = 1, ["eⁿ"] = 1, ["iahⁿ"] = 1, ["iauh"] = 1, ["iauhⁿ"] = 1,
["ihⁿ"] = 1, ["iuh"] = 1, ["iuⁿ"] = 1, ["mh"] = 1, ["ngh"] = 1,
["oaih"] = 1, ["oaiⁿ"] = 1, ["oaihⁿ"] = 1, ["oeⁿ"] = 1, ["o͘h"] = 1, ["ohⁿ"] = 1, ["om"] = 1,
["op"] = 1,
}
moreValidFinals["Hongmaogang"] = {
["ahⁿ"] = 1, ["aihⁿ"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["ehⁿ"] = 1,
["ek"] = 1, ["eⁿ"] = 1, ["iahⁿ"] = 1, ["iauh"] = 1, ["iauhⁿ"] = 1,
["ihⁿ"] = 1, ["iuh"] = 1, ["iuⁿ"] = 1, ["mh"] = 1, ["ngh"] = 1,
["oaiⁿ"] = 1, ["oaiⁿ"] = 1, ["ohⁿ"] = 1, ["uih"] = 1,
}
moreValidFinals["Dalinpu"] = {
["ahⁿ"] = 1, ["aih"] = 1, ["aihⁿ"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["ehⁿ"] = 1,
["ek"] = 1, ["eⁿ"] = 1, ["iahⁿ"] = 1, ["iauh"] = 1, ["iauhⁿ"] = 1,
["ihⁿ"] = 1, ["iuh"] = 1, ["iuⁿ"] = 1, ["iuhⁿ"] = 1, ["mh"] = 1, ["ngh"] = 1,
["oaih"] = 1, ["oaiⁿ"] = 1, ["oaihⁿ"] = 1, ["oahⁿ"] = 1, ["oak"] = 1, ["oeⁿ"] = 1,
["o͘h"] = 1, ["ohⁿ"] = 1, ["uih"] = 1, ["uⁿ"] = 1,
}
moreValidFinals["Tainan"] = {
["ahⁿ"] = 1, ["aih"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["ehⁿ"] = 1,
["ek"] = 1, ["eⁿ"] = 1, ["iahⁿ"] = 1, ["iauh"] = 1, ["iauhⁿ"] = 1,
["ihⁿ"] = 1, ["iuh"] = 1, ["ioⁿ"] = 1, ["mh"] = 1, ["ngh"] = 1,
["oaih"] = 1, ["oaiⁿ"] = 1, ["oaihⁿ"] = 1, ["oeⁿ"] = 1, ["ohⁿ"] = 1, ["om"] = 1,
["uih"] = 1,
}
moreValidFinals["Lukang"] = {
["ahⁿ"] = 1, ["auⁿ"] = 1, ["er"] = 1, ["erh"] = 1,
["ek"] = 1, ["eⁿ"] = 1, ["iauhⁿ"] = 1, ["ir"] = 1,
["ihⁿ"] = 1, ["iuh"] = 1, ["iuⁿ"] = 1, ["o͘h"] = 1,
["oaihⁿ"] = 1, ["oeⁿ"] = 1, ["ohⁿ"] = 1, ["erm"] = 1,
["uih"] = 1, ["uiⁿ"] = 1,
}
moreValidFinals["Yilan"] = {
["ahⁿ"] = 1, ["aih"] = 1, ["aihⁿ"] = 1, ["auⁿ"] = 1, ["auhⁿ"] = 1,
["ehⁿ"] = 1, ["ek"] = 1, ["eⁿ"] = 1, ["iahⁿ"] = 1, ["iauh"] = 1,
["iauhⁿ"] = 1, ["ihⁿ"] = 1, ["iuh"] = 1, ["iuⁿ"] = 1, ["mh"] = 1,
["ngh"] = 1, ["o͘h"] = 1, ["oaiⁿ"] = 1, ["oaih"] = 1, ["oaihⁿ"] = 1,
["oeⁿ"] = 1, ["ohⁿ"] = 1, ["o͘h"] = 1, ["om"] = 1, ["op"] = 1, ["uiⁿ"] = 1,
}
moreValidFinals["Kinmen"] = {
["ahⁿ"] = 1, ["aih"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["ek"] = 1,
["er"] = 1, ["erh"] = 1, ["iahⁿ"] = 1, ["iauh"] = 1, ["iauhⁿ"] = 1,
["ihⁿ"] = 1, ["ir"] = 1, ["iuh"] = 1, ["iuⁿ"] = 1, ["mh"] = 1,
["ngh"] = 1, ["oaih"] = 1, ["oaihⁿ"] = 1, ["oaiⁿ"] = 1, ["oang"] = 1, ["oehⁿ"] = 1,
["o͘h"] = 1, ["ohⁿ"] = 1, ["uih"] = 1,
}
moreValidFinals["Singapore"] = {
["ahⁿ"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["ee"] = 1, ["ehⁿ"] = 1,
["ek"] = 1, ["eⁿ"] = 1, ["er"] = 1, ["erh"] = 1, ["ern"] = 1,
["iahⁿ"] = 1, ["iauh"] = 1, ["iauhⁿ"] = 1, ["ihⁿ"] = 1, ["ioⁿ"] = 1,
["ir"] = 1, ["iuh"] = 1, ["iuⁿ"] = 1, ["mh"] = 1, ["ngh"] = 1, ["oaiⁿ"] = 1,
["oang"] = 1, ["oehⁿ"] = 1, ["oeⁿ"] = 1, ["ohⁿ"] = 1, ["uih"] = 1,
}
moreValidFinals["Penang"] = {
["aih"] = 1, ["ee"] = 1, ["eeh"] = 1, ["eek"] = 1, ["eeng"] = 1,
["ei"] = 1, ["ek"] = 1, ["em"] = 1, ["en"] = 1, ["eⁿ"] = 1,
["eoi"] = 1, ["er"] = 1, ["erh"] = 1, ["ern"] = 1, ["ert"] = 1,
["et"] = 1, ["ik"] = 1, ["ing"] = 1, ["ioⁿ"] = 1, ["oaiⁿ"] = 1, ["oang"] = 1,
["o͘h"] = 1, ["oi"] = 1, ["oiⁿ"] = 1, ["om"] = 1, ["on"] = 1,
["ot"] = 1, ["ou"] = 1, ["uk"] = 1, ["um"] = 1, ["ung"] = 1,
["y"] = 1, ["yn"] = 1,
}
moreValidFinals["Klang"] = {
["auⁿ"] = 1, ["ek"] = 1, ["eⁿ"] = 1, ["er"] = 1, ["erh"] = 1,
["erm"] = 1, ["ern"] = 1, ["iahⁿ"] = 1, ["ihⁿ"] = 1,
["ir"] = 1, ["iuⁿ"] = 1, ["ngh"] = 1, ["oang"] = 1,
["ohⁿ"] = 1, ["uih"] = 1,
}
moreValidFinals["Philippines"] = moreValidFinals["Jinjiang"]
local loc_code = {
["Xiamen"] = "x",
["Xiamen-d"] = "a",
["Tong'an"] = "d",
["Quanzhou"] = "q",
["Jinjiang"] = "c",
["Nan'an"] = "n",
["Hui'an"] = "h",
["Zhangpu"] = "u",
["Changtai"] = "o",
["Zhangzhou"] = "z",
["Zhao'an"] = "Z",
["Taipei"] = "t",
["Kaohsiung"] = "k",
["Hongmaogang"] = "H",
["Dalinpu"] = "D",
["Tainan"] = "n",
["Kinmen"] = "j",
["Longyan"] = "l",
["Lukang"] = "L",
["Singapore"] = "s",
["Penang"] = "p",
["Philippines"] = "f",
["Yilan"] = "y",
["Yongchun"] = "Y",
}
if not ((validInitials[initial] or moreValidInitials[loc][initial]) and (validFinals[final] or moreValidFinals[loc][final])) then
--error("The syllable " .. initial .. "+" .. final .. " does not appear to be a valid " .. loc .. " POJ syllable.")
return "[[Category:Hokkien terms needing pronunciation attention|" .. loc_code[loc] .. "]]"
end
return nil
end
function export.generate_all(text)
local nan_pronunc, loc
if type(text) == "table" then
text, nan_pronunc, loc = text.args[1], text.args[2], text.args["loc"]
end
local location_list = {
["ax"] = "Anxi",
["ct"] = "Changtai",
["ha"] = "Hui'an",
["hc"] = "Hsinchu",
["jj"] = "Jinjiang",
["kh"] = "Kaohsiung",
["kl"] = "Klang",
["km"] = "Kinmen",
["ly"] = "Longyan",
["lk"] = "Lukang",
["md"] = "Medan",
["mg"] = "Magong",
["ml"] = "Mainland",
["na"] = "Nan'an",
["ph"] = "Philippines",
["pn"] = "Penang",
["px"] = "Pingxi",
["qz"] = "Quanzhou",
["qzd"] = "Quanzhou-d",
["sg"] = "Singapore",
["sghmg"] = "Hongmaogang",
["sgdlp"] = "Dalinpu",
["sx"] = "Sanxia",
["ta"] = "Tong'an",
["tc"] = "Taichung",
["tn"] = "Tainan",
["tp"] = "Taipei",
["ts"] = "Tamsui",
["tt"] = "Taitung",
["wh"] = "Wanhua",
["wq"] = "Wuqi",
["xm"] = "Xiamen",
["xmd"] = "Xiamen-d",
["yc"] = "Yongchun",
["yl"] = "Yilan",
["za"] = "Zhao'an",
["zp"] = "Zhangpu",
["zz"] = "Zhangzhou",
["zzd"] = "Zhangzhou-d",
["tw"] = "Taiwan",
["twt"] = "Taiwan-t",
["twk"] = "Taiwan-k",
["twv"] = "Taiwan-v",
["twvt"] = "Taiwan-vt",
["twvk"] = "Taiwan-vk",
["twd"] = "Taiwan-d",
["twdt"] = "Taiwan-dt",
["twdk"] = "Taiwan-dk",
["twr"] = "Taiwan-r",
["twrt"] = "Taiwan-rt",
["twrk"] = "Taiwan-rk",
["twq"] = "Taiwan-Q",
["twz"] = "Taiwan-Z",
}
local location_alias = {
["xz"] = "hc", ["sj"] = "hc", ["st"] = "hc",
["kx"] = "kh",
["gm"] = "km", ["jm"] = "km", ["qm"] = "km",
["ln"] = "ly",["lg"] = "lk",
["mk"] = "mg",
["cj"] = "qj",
["ss"] = "sx", ["sk"] = "sx",
["tz"] = "tc", ["tj"] = "tc",
["tl"] = "tn",
["em"] = "xm", ["am"] = "xm", ["hm"] = "xm",
["il"] = "yl",
["lc"] = "zz",
["cc"] = "zz (Zhangzhou) or qz (Quanzhou)",
["cz"] = "zz (Zhangzhou) or qz (Quanzhou)"
}
local location_link = {
["Anxi"] = "''[[w:Anxi County|Anxi]]''",
["Changtai"] = "''[[w:Changtai District|Changtai]]''",
["Hongmaogang"] = "''[[w:zh:紅毛港 (高雄市)|Hongmaogang]]''",
["Dalinpu"] = "''[[w:zh:大林蒲|Dalinpu]]''",
["Hsinchu"] = "''[[w:Hsinchu|Hsinchu]]''",
["Hui'an"] = "''[[w:Hui'an County|Hui'an]]''",
["Jinjiang"] = "''[[w:Jinjiang, Fujian|Jinjiang]]''",
["Kaohsiung"] = "''[[w:Kaohsiung|Kaohsiung]]''",
["Kinmen"] = "''[[w:Kinmen|Kinmen]]''",
["Klang"] = "''[[w:Klang (city)|Klang]]''",
["Longyan"] = "''[[w:Longyan dialect|Longyan]]''",
["Lukang"] = "''[[w:Lukang|Lukang]]''",
["Magong"] = "''[[w:Magong|Magong]]''",
["Medan"] = "''[[w:Medan Hokkien|Medan]]''",
["Mainland"] = "''[[w:Amoy dialect|Xiamen]]'', ''[[w:Quanzhou dialect|Quanzhou]]'', ''[[w:Zhangzhou dialect|Zhangzhou]]''",
["Nan'an"] = "''[[w:Nan'an, Fujian|Nan'an]]''",
["Penang"] = "''[[w:Penang Hokkien|Penang]]''",
["Philippines"] = "''[[w:Philippine Hokkien|Philippines]]''",
["Pingxi"] = "''[[w:Pingxi District|Pingxi]]''",
["Quanzhou"] = "''[[w:Quanzhou dialect|Quanzhou]]''",
["Quanzhou-d"] = "''dated in [[w:Quanzhou dialect|Quanzhou]]''",
["Sanxia"] = "''[[w:Sanxia|Sanxia]]''",
["Singapore"] = "''[[w:Singaporean Hokkien|Singapore]]''",
["Taichung"] = "''[[w:Taichung|Taichung]]''",
["Tainan"] = "''[[w:Tainan|Tainan]]''",
["Taipei"] = "''[[w:Taipei|Taipei]]''",
["Tamsui"] = "''[[w:Tamsui District|Tamsui]]''",
["Taitung"] = "''[[w:Taitung City|Taitung]]''",
["Tong'an"] = "''[[w:Tong'an District|Tong'an]]''",
["Wanhua"] = "''[[w:Wanhua District|Wanhua]]''",
["Wuqi"] = "''[[w:Wuqi District|Wuqi]]''",
["Xiamen"] = "''[[w:Amoy dialect|Xiamen]]''",
["Xiamen-d"] = "''dated in [[w:Amoy dialect|Xiamen]]''",
["Yilan"] = "''[[w:Yilan|Yilan]]''",
["Yongchun"] = "''[[w:Yongchun County|Yongchun]]''",
["Zhao'an"] = "''[[w:Zhao'an County|Zhao'an]]''",
["Zhangpu"] = "''[[w:Zhangpu County|Zhangpu]]''",
["Zhangzhou"] = "''[[w:Zhangzhou dialect|Zhangzhou]]''",
["Zhangzhou-d"] = "''dated in [[w:Zhangzhou dialect|Zhangzhou]]''",
["Taiwan"] = "''General [[w:Taiwanese Hokkien|Taiwanese]]''",
["Taiwan-t"] = "''General [[w:Taiwanese Hokkien|Taiwanese]]''",
["Taiwan-k"] = "''General [[w:Taiwanese Hokkien|Taiwanese]]''",
["Taiwan-v"] = "''variant in [[w:Taiwanese Hokkien|Taiwan]]''",
["Taiwan-vt"] = "''variant in [[w:Taiwanese Hokkien|Taiwan]]''",
["Taiwan-vk"] = "''variant in [[w:Taiwanese Hokkien|Taiwan]]''",
["Taiwan-d"] = "''dated in [[w:Taiwanese Hokkien|Taiwan]]''",
["Taiwan-dt"] = "''dated in [[w:Taiwanese Hokkien|Taiwan]]''",
["Taiwan-dk"] = "''dated in [[w:Taiwanese Hokkien|Taiwan]]''",
["Taiwan-r"] = "''rare in [[w:Taiwanese Hokkien|Taiwan]]''",
["Taiwan-rt"] = "''rare in [[w:Taiwanese Hokkien|Taiwan]]''",
["Taiwan-rk"] = "''rare in [[w:Taiwanese Hokkien|Taiwan]]''",
["Taiwan-Q"] = "''[[w:Quanzhou dialect|Quanzhou]]-like accent in [[w:Taiwanese Hokkien|Taiwan]]''",
["Taiwan-Z"] = "''[[w:Zhangzhou dialect|Zhangzhou]]-like accent in [[w:Taiwanese Hokkien|Taiwan]]''",
}
local IPA_available = {
["Xiamen"] = true,
["Tong'an"] = true,
["Quanzhou"] = true,
["Jinjiang"] = true,
["Nan'an"] = true,
["Hui'an"] = true,
["Yongchun"] = true,
["Zhangzhou"] = true,
["Changtai"] = true,
["Zhangpu"] = true,
["Longyan"] = true,
["Taipei"] = true,
["Kaohsiung"] = true,
["Hongmaogang"] = true,
["Dalinpu"] = true,
["Tainan"] = true,
["Kinmen"] = true,
["Lukang"] = true,
["Yilan"] = true,
["Singapore"] = true,
["Penang"] = true,
["Klang"] = true,
["Philippines"] = true,
}
local ast = (not nan_pronunc or nan_pronunc == "") and "*" or ""
local formatting = {
LV_two = {
leading = "\n" .. ast .. "* <small>(\'\'[[w:Hokkien|Hokkien]]\'\'",
trailing = ")</small>",
},
POJ = {
leading = "\n" .. ast .. "** <small>''[[w:Pe̍h-ōe-jī|Pe̍h-ōe-jī]]''</small>: <span style=\"font-family: Consolas, monospace;\">",
trailing = "</span>",
},
TL = {
leading = "\n" .. ast .. "** <small>''[[w:Tâi-lô|Tâi-lô]]''</small>: <span style=\"font-family: Consolas, monospace;\">",
trailing = "</span>",
},
PSDB = {
leading = "\n" .. ast .. "** <small>''[[w:Phofsit Daibuun|Phofsit Daibuun]]''</small>: <span style=\"font-family: Consolas, monospace;\">",
trailing = "</span>",
},
IPA = {
leading = "\n" .. ast .. "** <small>[[วิกิพจนานุกรม:สัทอักษรสากล|สัทอักษรสากล]] (",
trailing = ")</small>: ",
}
}
local IPA_available_list = { "Xiamen", "Quanzhou", "Zhangzhou", "Taiwan" }
export.poj_check_invalid(text)
local all_readings, locations, output_text = {}, {}, {}
for i, reading in ipairs(split(text, "/", true)) do
if find(reading, ":") then
local reading_part = split(reading, ":", true)
locations[i] = {}
all_readings[i] = reading_part[2]
for location_abbrev in gsplit(reading_part[1], ",", true) do
if location_alias[location_abbrev] then
error("Invalid Hokkien location code: " .. location_abbrev .. ", maybe you meant: " .. location_alias[location_abbrev])
end
if not location_list[location_abbrev] then
error("The region label '" .. location_abbrev .. "' cannot be found. Please see [[Template:zh-pron#Parameters]].")
end
table.insert(locations[i], location_list[location_abbrev])
end
else
locations[i] = IPA_available_list
all_readings[i] = reading
end
end
if not find(text, ":") then
table.insert(output_text, formatting.LV_two.leading .. formatting.LV_two.trailing ..
formatting.POJ.leading .. export.poj_display(text) .. formatting.POJ.trailing ..
formatting.TL.leading .. export.poj_to_tl_conv(text) .. formatting.TL.trailing)
if not find(text, "%-%-") then
local psdb_hash = export.poj_to_psdb_conv(text)
if not find(psdb_hash, "error") then
table.insert(output_text, formatting.PSDB.leading .. psdb_hash .. formatting.PSDB.trailing)
end
for _, IPA_location in ipairs(IPA_available_list) do
IPA_location = IPA_location == "Taiwan" and { "Taipei", "Kaohsiung" } or { IPA_location }
for _, location in ipairs(IPA_location) do
table.insert(output_text, formatting.IPA.leading .. location_link[location] .. formatting.IPA.trailing)
local reading_IPA_hash = {}
for poj_reading in gsplit(text, "/", true) do
table.insert(reading_IPA_hash, export.generate_IPA(poj_reading, location))
end
table.insert(output_text, table.concat(reading_IPA_hash, ", "))
if #reading_IPA_hash > 1 then
table.insert(output_text, string.format("[[Category:Hokkien terms needing pronunciation attention|%d]]", #reading_IPA_hash))
end
end
end
end
else
for i, poj_reading in ipairs(all_readings) do
table.insert(output_text, formatting.LV_two.leading)
local location_hash = {}
for _, location_name in ipairs(locations[i]) do
table.insert(location_hash, location_link[location_name])
end
table.insert(output_text, ": " .. table.concat(location_hash, ", ") .. formatting.LV_two.trailing)
table.insert(output_text, formatting.POJ.leading .. export.poj_display(poj_reading) .. formatting.POJ.trailing ..
formatting.TL.leading .. export.poj_to_tl_conv(poj_reading) .. formatting.TL.trailing)
if not find(poj_reading, "%-%-") then
local psdb_hash = export.poj_to_psdb_conv(poj_reading)
if not find(psdb_hash, "error") then
table.insert(output_text, formatting.PSDB.leading .. psdb_hash .. formatting.PSDB.trailing)
end
local IPA_readings = {}
for j, location_name in ipairs(locations[i]) do
location_name = gsub(location_name, '^Taiwan%-[vdr]?([tk]?)$', { ['t'] = 'Taipei', ['k'] = 'Kaohsiung', [''] = 'Taiwan' })
loc = {
['Taiwan'] = { 'Taipei', 'Kaohsiung' },
['Xiamen-d'] = { 'Xiamen-d' },
['Mainland'] = { 'Xiamen', 'Quanzhou', 'Zhangzhou' }
}
location_name = loc[location_name] or { gsub(location_name, '%-d$', '') }
for k, location in ipairs(location_name) do
local loc = gsub(location, '%-d$', '')
if IPA_available[loc] then
local poj_to_ipa = export.generate_IPA(poj_reading, location)
if IPA_readings[poj_to_ipa] then
table.insert(IPA_readings[poj_to_ipa][2], location_link[loc])
else
IPA_readings[poj_to_ipa] = { j + (k/10), { location_link[loc] } }
end
end
end
end
for reading, reading_info in pairs(IPA_readings) do
table.insert(output_text, formatting.IPA.leading .. table.concat(reading_info[2], ", ") ..
formatting.IPA.trailing .. reading)
end
end
end
end
return table.concat(output_text)
end
function export.generate_IPA(text, location)
-- (Wyang) I can't seem to find an example where 'triple' is used.. The code is below:
--if match(p[i], "%(") then
-- p[i] = gsub(p[i], "[%(%)]", "")
-- triple[i] = true
--end
--if triple[i] then
-- if tone[i] == "一" then
-- ipa[i] = (initial[i] .. final[i] .. "一至七 " .. initial[i] .. final[i] .. "一至七 " .. initial[i] .. final[i] .. (i == #tone and "一" or "一至七"))
-- elseif tone[i] == "二" then
-- ipa[i] = (initial[i] .. final[i] .. "二至一 " .. initial[i] .. final[i] .. "二至一 " .. initial[i] .. final[i] .. (i == #tone and "二" or "二至一"))
-- elseif tone[i] == "三" then
-- ipa[i] = (initial[i] .. final[i] .. "三至二 " .. initial[i] .. final[i] .. "三至二 " .. initial[i] .. final[i] .. (i == #tone and "三" or "三至二"))
-- elseif tone[i] == "四A" then
-- ipa[i] = (initial[i] .. final[i] .. "四至八 " .. initial[i] .. final[i] .. "四至八 " .. initial[i] .. final[i] .. (i == #tone and "四" or "四至八"))
-- elseif tone[i] == "四B" then
-- final[i] = gsub(final[i], "ʔ", "(ʔ)")
-- ipa[i] = (initial[i] .. final[i] .. "四至二 " .. initial[i] .. final[i] .. "四至二 " .. initial[i] .. final[i] .. (i == #tone and "四" or "四至二"))
-- elseif tone[i] == "五" then
-- if loc == "Quanzhou" or loc == "Taipei" then
-- ipa[i] = (initial[i] .. final[i] .. "五 " .. initial[i] .. final[i] .. "五至三 " .. initial[i] .. final[i] .. (i == #tone and "五" or "五至三"))
-- else
-- ipa[i] = (initial[i] .. final[i] .. "五 " .. initial[i] .. final[i] .. "五至七 " .. initial[i] .. final[i] .. (i == #tone and "五" or "五至七"))
-- end
-- elseif tone[i] == "七" then
-- ipa[i] = (initial[i] .. final[i] .. "七至一 " .. initial[i] .. final[i] .. "七至三 " .. initial[i] .. final[i] .. (i == #tone and "七" or "七至三"))
-- elseif tone[i] == "八A" then
-- ipa[i] = (initial[i] .. final[i] .. "八至四 " .. initial[i] .. final[i] .. "八至四 " .. initial[i] .. final[i] .. (i == #tone and "八" or "八至四"))
-- elseif tone[i] == "八B" then
-- final[i] = gsub(final[i], "ʔ", "(ʔ)")
-- ipa[i] = (initial[i] .. final[i] .. "八至五 " .. initial[i] .. final[i] .. "八至三 " .. initial[i] .. final[i] .. (i == #tone and "八" or "八至三"))
-- end
--end
if type(text) == "table" then text, location = text.args[1], text.args["loc"] end
local tone_from_mark = {
[""] = "1",
["́"] = "2",
["̀"] = "3",
["p"] = "4A", ["t"] = "4A", ["k"] = "4A",
["h"] = "4B",
["̂"] = "5",
["̌"] = "6",
["̄"] = "7",
["̍p"] = "8A", ["̍t"] = "8A", ["̍k"] = "8A",
["̍h"] = "8B",
["̋"] = "9",
["̆"] = "9",
}
local initial_ipa = {
["p"] = "p", ["ph"] = "pʰ", ["m"] = "m", ["b"] = "b", ["f"] = "f",
["t"] = "t", ["th"] = "tʰ", ["n"] = "n", ["l"] = "l", ["d"] = "d",
["ch"] = "t͡s", ["chh"] = "t͡sʰ", ["j"] = "d͡z", ["s"] = "s", ["sh"] = "ʃ",
["k"] = "k", ["kh"] = "kʰ", ["ng"] = "ŋ", ["g"] = "ɡ",
["h"] = "h", ["r"] = "ɹ", ["w"] = "w", ["y"] = "j", [""] = "",
["Kaohsiung-j"] = "z",
}
local final_ipa = {
["a"] = "a", ["ah"] = "aʔ", ["ahⁿ"] = "ãʔ",
["ai"] = "ai", ["aih"] = "aiʔ", ["aiⁿ"] = "ãi", ["aihⁿ"] = "ãiʔ",
["ak"] = "ak̚", ["am"] = "am", ["an"] = "an", ["aⁿ"] = "ã",
["ang"] = "aŋ", ["ap"] = "ap̚", ["at"] = "at̚",
["au"] = "au", ["auh"] = "auʔ", ["auhⁿ"] = "ãuʔ", ["auⁿ"] = "ãu",
["e"] = "e", ["ee"] = "ɛ", ["eeh"] = "ɛʔ",
["eek"] = "ɛk̚", ["eeng"] = "ɛŋ",
["eh"] = "eʔ", ["ehⁿ"] = "ẽʔ", ["ei"] = "ei", ["ek"] = "iɪk̚",
["em"] = "ɛm", ["en"] = "ɛn", ["eⁿ"] = "ẽ",
["eng"] = "iɪŋ", ["eoi"] = "ɵy", ["er"] = "ə",
["erh"] = "əʔ", ["erm"] = "əm", ["ern"] = "ən",
["ert"] = "ət̚", ["erk"] = "ək̚", ["et"] = "ɛt̚", ["ep"] = "ep̚",
["eu"] = "eu", ["euⁿ"] = "ẽu",
["i"] = "i", ["ia"] = "ia", ["iah"] = "iaʔ",
["iahⁿ"] = "iãʔ", ["iak"] = "iak̚",
["iam"] = "iam", ["ian"] = "iɛn", ["iaⁿ"] = "iã",
["iang"] = "iaŋ", ["iap"] = "iap̚", ["iat"] = "iɛt̚",
["iau"] = "iau", ["iauh"] = "iauʔ", ["iauhⁿ"] = "iãuʔ", ["iauⁿ"] = "iãu",
["ie"] = "ie", ["iee"] = "iɛ", ["ieⁿ"] = "iɛ̃",
["ih"] = "iʔ", ["ihⁿ"] = "ĩʔ",
["im"] = "im", ["in"] = "in", ["iⁿ"] = "ĩ", ["ing"] = "iŋ",
["io"] = "io", ["ioa"] = "iua", ["ioaⁿ"] = "iuã", ["ioh"] = "ioʔ", ["io͘h"] = "iɔʔ", ["io͘"] = "iɔ",
["iop"] = "iop", ["iok"] = "iɔk̚", ["ioⁿ"] = "iɔ̃", ["iom"] = "iom", ["iong"] = "iɔŋ",
["ip"] = "ip̚", ["ir"] = "ɯ", ["irh"] = "ɯʔ", ["irn"] = "ən", ["it"] = "it̚",
["iu"] = "iu", ["iua"] = "iua", ["iuh"] = "iuʔ", ["iuⁿ"] = "iũ", ["iuaⁿ"] = "iuã", ["iuhⁿ"] = "iũʔ",
["ie"] = "ie", ["iee"] = "iɛ", ["ieeⁿ"] = "iɛ̃",
["m"] = "m̩", ["mh"] = "m̩ʔ",
["ng"] = "ŋ̍", ["ngh"] = "ŋ̍ʔ",
["o"] = "o", ["o͘"] = "ɔ", ["oa"] = "ua", ["oah"] = "uaʔ", ["oahⁿ"] = "uãʔ", ["oai"] = "uai",
["oaih"] = "uaiʔ", ["oaihⁿ"] = "uãiʔ", ["oaiⁿ"] = "uãi", ["oan"] = "uan", ["oaⁿ"] = "uã",
["oang"] = "uaŋ", ["oat"] = "uat̚", ["oak"] = "uak̚",
["oe"] = "ue", ["oeh"] = "ueʔ", ["oehⁿ"] = "uẽʔ", ["oeⁿ"] = "uẽ", ["oee"] = "uɛ",
["oh"] = "oʔ", ["o͘h"] = "ɔʔ", ["ohⁿ"] = "ɔ̃ʔ", ["oi"] = "ɔi", ["oiⁿ"] = "ɔ̃i",
["ok"] = "ɔk̚", ["om"] = "ɔm", ["oⁿ"] = "ɔ̃", ["ong"] = "ɔŋ", ["op"] = "ɔp̚",
["ot"] = "ɔt̚", ["ou"] = "ou",
["u"] = "u", ["uⁿ"] = "ũ", ["uh"] = "uʔ", ["uhⁿ"] = "ũʔ",
["ui"] = "ui", ["uiⁿ"] = "uĩ", ["uih"] = "uiʔ", ["uihⁿ"] = "uĩʔ",
["uk"] = "ok̚", ["um"] = "om",
["un"] = "un", ["ung"] = "oŋ", ["ut"] = "ut̚",
["y"] = "y", ["yn"] = "yn",
["Zhangpu-e"] = "iei",
["Zhangpu-eⁿ"] = "ɛ̃", ["Zhangzhou-eⁿ"] = "ɛ̃", ["Penang-eⁿ"] = "ɛ̃",
["Zhangpu-ehⁿ"] = "ɛ̃ʔ", ["Zhangzhou-ehⁿ"] = "ɛ̃ʔ",
["Kaohsiung-o"] = "ɤ", ["Kaohsiung-io"] = "iɤ",
["Kaohsiung-oh"] = "ɤʔ", ["Kaohsiung-ioh"] = "iɤʔ",
["Tainan-o"] = "ɤ", ["Tainan-io"] = "iɤ",
["Tainan-oh"] = "ɤʔ", ["Tainan-ioh"] = "iɤʔ",
["Zhangpu-oe"] = "uɛ",
["Zhangpu-oeⁿ"] = "uɛ̃",
["Zhangpu-oeh"] = "uɛʔ",
["Zhangpu-eng"] = "ɛŋ", ["Zhangpu-ek"] = "ɛk̚",
["Singapore-eng"] = "eŋ", ["Singapore-ek"] = "ek̚",
["Penang-eng"] = "eŋ", ["Penang-ek"] = "ek̚", ["Penang-ik"] = "ik̚",
["Zhangpu-o͘"] = "ɔu", ["Zhangpu-oⁿ"] = "ɔ̃u",
["Changtai-eng"] = "eŋ", ["Changtai-ek"] = "ek̚",
["Changtai-o"] = "ɔ",
["Changtai-io"] = "iɔ",
["Changtai-oh"] = "ɔʔ",
["Changtai-ioh"] = "iɔʔ",
["Changtai-o͘"] = "eu", ["Changtai-o͘ⁿ"] = "ẽu",
["Hui'an-eng"] = "eŋ", ["Hui'an-ek"] = "ek̚",
["Hui'an-en"] = "en", ["Hui'an-et"] = "et̚",
["Hui'an-em"] = "em",
["Longyan-ong"] = "oŋ", ["Longyan-ok"] = "ok̚",
["Longyan-iong"] = "ioŋ", ["Longyan-iok"] = "iok̚",
["Longyan-oⁿ"] = "õ", ["Longyan-ioⁿ"] = "iõ",
["Longyan-oeⁿ"] = "uɛ̃",
["Longyan-ir"] = "z̩",
["Lukang-ir"] = "ɨ",
["Klang-eng"] = "eŋ", ["Klang-ek"] = "ek̚",
}
local tone_sandhi = { }
-- (Wyang) I'm not sure about the 'Xd' ones, when tone X is followed by the diminutive 仔.
tone_sandhi["Xiamen"] = {
["1"] = "7", ["2"] = "1", ["3"] = "2", ["4A"] = "8A", ["4B"] = "2",
["5"] = "7", ["7"] = "3", ["8A"] = "4A", ["8B"] = "3",
}
tone_sandhi["Xiamen-d"] = tone_sandhi["Xiamen"]
tone_sandhi["Tong'an"] = { -- 2 and 4 are special cases
["1"] = "7", ["3"] = "10",
["5"] = "9", ["7"] = "9", ["8A"] = "11", ["8B"] = "11",
}
tone_sandhi["Quanzhou"] = {
["1"] = "1", ["2"] = "5", ["3"] = "2", ["4A"] = "8A", ["4B"] = "4B",
["5"] = "6", ["6"] = "6", ["7"] = "6", ["8A"] = "S", ["8B"] = "S",
}
tone_sandhi["Jinjiang"] = {
["1"] = "1", ["2"] = "5", ["3"] = "2", ["4A"] = "8A", ["4B"] = "4B",
["5"] = "S1", ["6"] = "S1", ["7"] = "S1", ["8A"] = "S2", ["8B"] = "S2",
}
tone_sandhi["Nan'an"] = {
["1"] = "1", ["2"] = "5", ["3"] = "2", ["4A"] = "4A", ["4B"] = "4B",
["5"] = "6", ["6"] = "6", ["7"] = "6", ["8A"] = "S", ["8B"] = "S",
}
tone_sandhi["Hui'an"] = {
["1"] = "1", ["2"] = "5", ["3"] = "2", ["4A"] = "5", ["4B"] = "4B",
["5"] = "6", ["6"] = "6", ["7"] = "6", ["8A"] = "S", ["8B"] = "S",
}
tone_sandhi["Yongchun"] = {
["1"] = "7", ["2"] = "1", ["3"] = "S1", ["4A"] = "8B", ["4B"] = "8B",
["5"] = "7", ["7"] = "3", ["8A"] = "S2", ["8B"] = "S2",
}
tone_sandhi["Zhangzhou"] = {
["1"] = "7", ["2"] = "1", ["3"] = "2", ["4A"] = "S", ["4B"] = "2",
["5"] = "7", ["7"] = "3", ["8A"] = "3", ["8B"] = "3",
["4Bd"] = "1", ["8Bd"] = "7",
}
tone_sandhi["Changtai"] = {
["1"] = "7", ["2"] = "1", ["3"] = "2", ["4A"] = "8A", ["4B"] = "2",
["5"] = "7", ["7"] = "3", ["8A"] = "4A", ["8B"] = "3",
}
tone_sandhi["Zhangpu"] = {
["1"] = "7", ["2"] = "1", ["3"] = "2", ["4A"] = "4A", ["4B"] = "4B",
["5"] = "7", ["7"] = "3", ["8A"] = "7", ["8B"] = "7",
}
tone_sandhi["Longyan"] = {
["1"] = "1",
["5"] = "5", ["6"] = "5", ["8A"] = "2", ["8B"] = "2",
}
tone_sandhi["Taipei"] = {
["1"] = "7", ["2"] = "1", ["3"] = "2", ["4A"] = "8A", ["4B"] = "2",
["5"] = "3", ["7"] = "3", ["8A"] = "4A", ["8B"] = "3", ["9"] = "9",
["3d"] = "1", ["4Bd"] = "1", ["5d"] = "7", ["7d"] = "7", ["8Bd"] = "7",
}
tone_sandhi["Kaohsiung"] = {
["1"] = "7", ["2"] = "1", ["3"] = "2", ["4A"] = "8A", ["4B"] = "2",
["5"] = "7", ["7"] = "3", ["8A"] = "4A", ["8B"] = "3", ["9"] = "9",
["3d"] = "1", ["4Bd"] = "1", ["5d"] = "7", ["7d"] = "7", ["8Bd"] = "7",
}
tone_sandhi["Hongmaogang"] = {
["1"] = "1", ["2"] = "1", ["3"] = "2", ["4A"] = "8A", ["4B"] = "2",
["5"] = "3", ["6"] = "3", ["7"] = "3", ["8A"] = "S", ["8B"] = "S",
}
tone_sandhi["Dalinpu"] = {
["1"] = "1", ["2"] = "1", ["3"] = "2", ["4A"] = "8A", ["4B"] = "2",
["5"] = "3", ["6"] = "3", ["7"] = "3", ["8A"] = "S", ["8B"] = "S",
}
tone_sandhi["Tainan"] = {
["1"] = "7", ["2"] = "S1", ["3"] = "2", ["4A"] = "8A", ["4B"] = "2",
["5"] = "7", ["7"] = "S2", ["8A"] = "S3", ["8B"] = "S3", ["9"] = "9",
}
tone_sandhi["Lukang"] = {
["1"] = "1", ["2"] = "8A", ["3"] = "S1", ["4A"] = "4A", ["4B"] = "S1",
["5"] = "S2", ["6"] = "S2", ["7"] = "S2", ["8A"] = "S3", ["8B"] = "S3", ["9"] = "9",
}
tone_sandhi["Yilan"] = {
["1"] = "7", ["2"] = "S", ["3"] = "2", ["4A"] = "8A", ["4B"] = "2",
["5"] = "7", ["7"] = "3", ["8A"] = "4A", ["8B"] = "4B", ["9"] = "9",
}
tone_sandhi["Kinmen"] = { -- 3 and 4B are special cases
["1"] = "7", ["2"] = "5", ["4A"] = "8A",
["5"] = "3", ["7"] = "3", ["8A"] = "4A", ["8B"] = "3"
}
tone_sandhi["Singapore"] = { --Xiamen/Zhangzhou-like
["1"] = "7", ["2"] = "5", ["3"] = "2", ["4A"] = "8As", ["4B"] = "2",
["5"] = "3", ["7"] = "3", ["8A"] = "3", ["8B"] = "3"
}
tone_sandhi["Penang"] = {
["1"] = "7", ["2"] = "1", ["3"] = "1", ["4A"] = "8A", ["4B"] = "8B",
["5"] = "7", ["6"] = "6", ["7"] = "3", ["8A"] = "4A", ["8B"] = "4B", ["9"] = "9"
}
tone_sandhi["Klang"] = {
["1"] = "1", ["2"] = "S2", ["3"] = "S3", ["4A"] = "S3", ["4B"] = "S3",
["5"] = "S1", ["7"] = "S1", ["8A"] = "8B", ["8B"] = "8B",
}
tone_sandhi["Philippines"] = tone_sandhi["Jinjiang"]
local tone_value = { }
tone_value["Xiamen"] = {
["1"] = "44", ["2"] = "53", ["3"] = "21", ["4A"] = "32", ["4B"] = "32",
["5"] = "24", ["7"] = "22", ["8A"] = "4", ["8B"] = "4",
}
tone_value["Xiamen-d"] = tone_value["Xiamen"]
tone_value["Tong'an"] = {
["1"] = "44", ["2"] = "31", ["3"] = "112", ["4A"] = "32", ["4B"] = "32",
["5"] = "24", ["7"] = "22", ["8A"] = "53", ["8B"] = "53",
["9"] = "11", ["10"] = "42", ["11"] = "1", ["12"] = "4" --sandhi-only tones
}
tone_value["Quanzhou"] = {
["1"] = "33", ["2"] = "554", ["3"] = "41", ["4A"] = "5", ["4B"] = "5",
["5"] = "24", ["6"] = "22", ["7"] = "41", ["8A"] = "24", ["8B"] = "24",
["S"] = "2", --sandhi-only
}
tone_value["Jinjiang"] = {
["1"] = "33", ["2"] = "554", ["3"] = "41", ["4A"] = "5", ["4B"] = "5",
["5"] = "24", ["6"] = "33", ["7"] = "41", ["8A"] = "24", ["8B"] = "24",
["S1"] = "22", ["S2"] = "2", --sandhi-only
}
tone_value["Nan'an"] = {
["1"] = "33", ["2"] = "554", ["3"] = "31", ["4A"] = "5", ["4B"] = "5",
["5"] = "24", ["6"] = "22", ["7"] = "31", ["8A"] = "23", ["8B"] = "23",
["S"] = "2", --sandhi-only
}
tone_value["Hui'an"] = {
["1"] = "33", ["2"] = "54", ["3"] = "21", ["4A"] = "4", ["4B"] = "4",
["5"] = "24", ["6"] = "22", ["7"] = "21", ["8A"] = "23", ["8B"] = "23",
["S"] = "2", --sandhi-only
}
tone_value["Yongchun"] = {
["1"] = "44", ["2"] = "53", ["3"] = "21", ["4A"] = "32", ["4B"] = "32",
["5"] = "24", ["7"] = "22", ["8A"] = "24", ["8B"] = "4",
["S1"] = "53", ["S2"] = "21", --sandhi-only
}
tone_value["Zhangzhou"] = {
["1"] = "44", ["2"] = "53", ["3"] = "21", ["4A"] = "32", ["4B"] = "32",
["5"] = "13", ["7"] = "22", ["8A"] = "121", ["8B"] = "121",
["S"] = "5", --sandhi-only
}
tone_value["Changtai"] = {
["1"] = "44", ["2"] = "53", ["3"] = "21", ["4A"] = "32", ["4B"] = "32",
["5"] = "24", ["7"] = "22", ["8A"] = "3", ["8B"] = "3",
}
tone_value["Zhangpu"] = {
["1"] = "55", ["2"] = "53", ["3"] = "11", ["4A"] = "32", ["4B"] = "32",
["5"] = "213", ["7"] = "33", ["8A"] = "14", ["8B"] = "14",
}
tone_value["Longyan"] = {
["1"] = "334", ["2"] = "21", ["3"] = "213", ["4A"] = "5", ["4B"] = "5",
["5"] = "11", ["6"] = "53", ["7"] = "55", ["8A"] = "32", ["8B"] = "32",
["S"] = "34", --sandhi-only
}
tone_value["Taipei"] = {
["1"] = "44", ["2"] = "53", ["3"] = "11", ["4A"] = "32", ["4B"] = "32",
["5"] = "24", ["7"] = "33", ["8A"] = "4", ["8B"] = "4", ["9"] = "35"
}
tone_value["Kaohsiung"] = {
["1"] = "44", ["2"] = "41", ["3"] = "21", ["4A"] = "32", ["4B"] = "32",
["5"] = "23", ["7"] = "33", ["8A"] = "4", ["8B"] = "4", ["9"] = "35"
}
tone_value["Hongmaogang"] = {
["1"] = "33", ["2"] = "51", ["3"] = "11", ["4A"] = "31", ["4B"] = "31",
["5"] = "13", ["6"] = "31", ["7"] = "11", ["8A"] = "5", ["8B"] = "5",
["S"] = "1", --sandhi-only
}
tone_value["Dalinpu"] = {
["1"] = "33", ["2"] = "51", ["3"] = "11", ["4A"] = "31", ["4B"] = "31",
["5"] = "13", ["6"] = "31", ["7"] = "11", ["8A"] = "5", ["8B"] = "5",
["S"] = "1", --sandhi-only
}
tone_value["Tainan"] = {
["1"] = "44", ["2"] = "53", ["3"] = "21", ["4A"] = "32", ["4B"] = "32",
["5"] = "24", ["7"] = "33", ["8A"] = "4", ["8B"] = "4", ["9"] = "35",
["S1"] = "55", ["S2"] = "11", ["S3"] = "1", --sandhi-only
}
tone_value["Lukang"] = {
["1"] = "33", ["2"] = "55", ["3"] = "31", ["4A"] = "5", ["4B"] = "5",
["5"] = "24", ["6"] = "33", ["7"] = "31", ["8A"] = "35", ["8B"] = "35", ["9"] = "35",
["S1"] = "53", ["S2"] = "22", ["S3"] = "2", --sandhi-only
}
tone_value["Yilan"] = {
["1"] = "44", ["2"] = "53", ["3"] = "21", ["4A"] = "2", ["4B"] = "2",
["5"] = "24", ["7"] = "33", ["8A"] = "5", ["8B"] = "5", ["9"] = "35",
["S"] = "55", --sandhi-only
}
tone_value["Kinmen"] = {
["1"] = "44", ["2"] = "53", ["3"] = "12", ["4A"] = "32", ["4B"] = "32",
["5"] = "24", ["7"] = "22", ["8A"] = "54", ["8B"] = "54"
}
tone_value["Singapore"] = { --Xiamen/Zhangzhou-like
["1"] = "44", ["2"] = "42", ["3"] = "21", ["4A"] = "32", ["4B"] = "32",
["5"] = "24", ["7"] = "22", ["8A"] = "43", ["8B"] = "43", ["8As"] = "4"
}
tone_value["Penang"] = {
["1"] = "33", ["2"] = "445", ["3"] = "21", ["4A"] = "3", ["4B"] = "3",
["5"] = "23", ["6"] = "55", ["7"] = "21", ["8A"] = "4", ["8B"] = "4", ["9"] = "5"
}
tone_value["Klang"] = {
["1"] = "33", ["2"] = "53", ["3"] = "31", ["4A"] = "53", ["4B"] = "53",
["5"] = "24", ["7"] = "31", ["8A"] = "3", ["8B"] = "3",
["S1"] = "22", ["S2"] = "34", ["S3"] = "54", --sandhi-only
}
tone_value["Philippines"] = tone_value["Jinjiang"]
local function get_sandhi_from_post(location, current, post)
if post then
if location == "Tong'an" then
if current == "2" then
if find(post, "^[15]$") or find(post, "^8[AB]$") then
return "7"
else
return "5"
end
elseif find(current, "^4[AB]$") then
if post == "2" then
return "10"
else
return "12"
end
end
elseif location == "Kinmen" then
if current == "3" or current == "4B" then
if find(post, "^[12]$") or find(post, "^4[AB]$") then
return "1"
else
return "2"
end
end
elseif location == "Longyan" then
if current == "2" then
if post == "2" or post == "5" then
return "3"
else
return "2"
end
elseif current == "3" then
if post == "2" or post == "5" then
return "3"
else
return "2"
end
elseif find(current, "^4[AB]$") then
if post == "2" or post == "5" then
return current
else
return "S"
end
elseif current == "7" then
if post == "2" or post == "5" then
return "7"
else
return "1"
end
end
end
end
end
local function get_tone(text)
local tone = gsub(text, "^[^" .. poj_tone_marks .. "ptkh]+([" .. poj_tone_marks .. "]?)[^" .. poj_tone_marks .. "ptkh]*([ptkh]?)ⁿ?", function(tone_symbol, coda)
return tone_from_mark[tone_symbol .. coda] end)
return tone
end
local function nasalize(final)
if find(final, "^mh?$") or find(final, "^ngh?$") then return final end
if find(final, "o͘h?$") then
final = gsub(final, "͘", "")
elseif find(final, "oh?$") then
error("Invalid POJ: nasal initial cannot go with -" .. final)
elseif find(final, "eeh?$") then
final = gsub(final, "ee", "e")
end
return final .. "ⁿ"
end
local formatting = {
leading = "<span class=\"IPA\">/",
trailing = "/</span>"
}
local tone_superscript = { ["1"] = "¹", ["2"] = "²", ["3"] = "³", ["4"] = "⁴", ["5"] = "⁵", ["-"] = "⁻" }
local word_result = {}
local attention = {}
if location ~= 'Xiamen-d' then
location = gsub(location, '%-d$', '')
end
text = gsub(text, " ", "-")
text = gsub(text, ",", "#")
text = gsub(text, "%-?%.%.%.%-?", "#")
text = gsub(text, "#$", "")
text = gsub(text, "#%-?", " ")
text = toNFD(lower(text))
for word in gsplit(text, " ", true) do
local initial, final, tone, diminutive, sandhi, result = {}, {}, {}, {}, {}, {}
local syllables = split(word, "-", true)
syllables.length = #syllables
for index, syllable in ipairs(syllables) do
if syllable == "仔" then
syllable = "a".."́"
diminutive[index] = true
end
local original_syllable = syllable
syllable = gsub(syllable, "[" .. poj_tone_marks .. "]", "")
if not find(syllable, "[aeiouy]") then
final[index] = match(syllable, "^[ckmnpst]?h?h?(ngh?)$") or match(syllable, "^h?(mh?)$")
initial[index] = syllable ~= final[index] and sub(syllable, 1, len(syllable) - len(final[index])) or "" --original code: "ʔ"
else
initial[index] = match(syllable, "^[bcdfgjklmnprstwy]?[gh]?h?")
final[index] = sub(syllable, len(initial[index]) + 1, -1)
end
tone[index] = get_tone(sub(original_syllable, len(initial[index]) + 1, -1))
local nasal_initial = match(initial[index], "^[mn]g?$")
if nasal_initial then
if find(final[index], "ⁿ") then
error("Too much nasality in POJ. " .. original_syllable .. " should be " .. gsub(original_syllable, "ⁿ", ""))
end
if location ~= "Penang" and location ~= "Philippines" and location ~= "Singapore" then --exception for Penang, Philippines and Singapore
final[index] = nasalize(final[index])
end
end
if location == "Longyan" and find(final[index], "h$") then
final[index] = gsub(final[index], "h", "")
end
local nasal_final = match(final[index], "^[mn]") or match(final[index], "ⁿ")
local not_nasal_initial = match(initial[index], "^[blg]$")
if ((nasal_initial and not nasal_final) or (not_nasal_initial and nasal_final)) and (location ~= "Penang" and location ~= "Philippines" and location ~= "Singapore") then --exception for Penang, Philippines and Singapore
error("POJ error: nasality of initial and final not synchronized.")
end
table.insert(attention, export.poj_check_syllable(initial[index], final[index], location))
initial[index] = initial_ipa[location .. "-" .. initial[index]] or initial_ipa[initial[index]]
final[index] = final_ipa[location .. "-" .. final[index]] or final_ipa[final[index]]
or error("Cannot recognise " .. final[index] .. ".")
if index < syllables.length then
final[index] = gsub(final[index], "ʔ", "(ʔ)")
end
end
for index = 1, syllables.length do
sandhi[index] = tone_value[location][tone[index]]
local sandhi_hash = get_sandhi_from_post(location, tone[index], tone[index+1])
or tone_sandhi[location][tone[index]..(diminutive[index+1] and "d" or "")]
or tone_sandhi[location][tone[index]]
if index < syllables.length and tone_value[location][sandhi_hash] ~= tone_value[location][tone[index]] then
sandhi[index] = sandhi[index] .. "-" .. tone_value[location][sandhi_hash]
end
table.insert(result, initial[index] .. final[index] .. sandhi[index])
end
table.insert(word_result, table.concat(result, " "))
end
return (gsub(formatting.leading .. table.concat(word_result, " ") ..
formatting.trailing, "[12345%-]", tone_superscript)) .. table.concat(attention)
end
function export.poj_to_psdb_conv(text)
if type(text) == "table" then text = text.args[1] end
local readings = split(lower(text), "/", true)
for i = 1, #readings do
-- will ignore # boundary marker
local parts = split(gsub(readings[i], "#", ""), " ", true)
for j = 1, #parts do
local initial = {}
local final = {}
local psdb = {}
local tone = {}
local tonesandhi = {}
local neutral = {}
parts[j] = gsub(parts[j], "%-%-", "-0")
local p = split(parts[j], "-", true)
local ar = {}
local triple = {}
for i, item in ipairs(p) do
if find(item, "仔") then
item = gsub(item, "仔", "á")
ar[i] = true
end
if find(item, "%(") then
item = gsub(item, "[%(%)]", "")
triple[i] = true
end
if find(item, "^0") then
item = gsub(item, "0", "")
neutral[i] = true
end
item = gsub(item, "ớ", "óo")
item = gsub(item, "ờ", "òo")
item = gsub(item, "ơ̂", "ôo")
item = gsub(item, "ơ̄", "ōo")
item = gsub(item, "ơ̍", "o̍o")
item = gsub(item, "ơ", "oo")
item = gsub(item, "͘", "o")
item = gsub(item, "[̍̂̄̀]",{["̍"] = "捌", ["̂"] = "伍", ["̄"] = "柒", ["̀"] = "叁"})
if find(item, "[aeiou][捌]?[ptkh]") or find(item, "[^aeiou][mn][捌]?g?[ptkh]") then
if find(item, "捌") then
tone[i] = "八"
else
tone[i] = "四"
end
elseif find(item, "[áíúéóḿń貳]") then
tone[i] = "二"
elseif find(item, "[àìùèòǹ叁]") then
tone[i] = "三"
elseif find(item, "[âîûêô伍]") then
tone[i] = "五"
elseif find(item, "[āīūēō柒]") then
tone[i] = "七"
else
tone[i] = "一"
end
item = gsub(item, "[áíúéóḿńàìùèòǹâîûêôāīūēō貳叁伍柒捌]",{["á"] = "a", ["í"] = "i", ["ú"] = "u", ["é"] = "e", ["ó"] = "o", ["ḿ"] = "m", ["ń"] = "n", ["貳"] = "", ["à"] = "a", ["ì"] = "i", ["ù"] = "u", ["è"] = "e", ["ò"] = "o", ["ǹ"] = "n", ["叁"] = "", ["â"] = "a", ["î"] = "i", ["û"] = "u", ["ê"] = "e", ["ô"] = "o", ["伍"] = "", ["ā"] = "a", ["ī"] = "i", ["ū"] = "u", ["ē"] = "e", ["ō"] = "o", ["柒"] = "", ["捌"] = ""})
if sub(item,1,3) == "chh" then
initial[i] = "chh"
final[i] = sub(item,4,-1)
elseif sub(item,1,1) == "m" then
if sub(item,2,2) == "h" then
initial[i] = ""
final[i] = "mh"
elseif sub(item,2,2) == "" then
initial[i] = ""
final[i] = "m"
else
initial[i] = "m"
final[i] = sub(item,2,-1)
end
elseif sub(item,1,2) == "ng" then
if sub(item,3,3) == "h" then
initial[i] = ""
final[i] = "ngh"
elseif sub(item,3,3) == "" then
initial[i] = ""
final[i] = "ng"
else
initial[i] = "ng"
final[i] = sub(item,3,-1)
end
elseif find(item, "^[ptkc]h") then
initial[i] = sub(item,1,2)
final[i] = sub(item,3,-1)
elseif find(item, "^[npbtkgjshl]") then
initial[i] = sub(item,1,1)
final[i] = sub(item,2,-1)
else
initial[i] = ""
final[i] = item
end
if find(initial[i], "^chh?$") or initial[i] == "s" then
if find(final[i], "^i") then
initial[i] = initial[i] .. "i"
end
end
p[i] = item
end
for i = 1, #p do
if tone[i] == "一" then
tonesandhi[i] = "七"
elseif tone[i] == "二" then
tonesandhi[i] = "一"
elseif tone[i] == "三" then
tonesandhi[i] = ar[i+1] and "一" or "二"
elseif tone[i] == "四" then
tonesandhi[i] = "八"
elseif tone[i] == "五" then
tonesandhi[i] = "七"
elseif tone[i] == "七" then
tonesandhi[i] = ar[i+1] and "七" or "三"
elseif tone[i] == "八" then
tonesandhi[i] = "四"
end
if triple[i] then
local tonesandhi1 = nil
if tone[i] == "五" then
tonesandhi1 = "五"
elseif tone[i] == "七" then
tonesandhi1 = "一"
end
psdb[i] = (psdb_initial[initial[i]] or "error")
..(psdb_final(final[i]..(tonesandhi1 or tonesandhi[i])) or "error")
..psdb_initial[initial[i]]
..psdb_final(final[i]..tonesandhi[i])
..psdb_initial[initial[i]]
..psdb_final(final[i]..(i == #tone and tone[i] or tonesandhi[i]))
else
psdb[i] = (psdb_initial[initial[i]] or "error")
..(psdb_final(final[i]..(i == #tone and tone[i] or tonesandhi[i])) or "error")
end
if neutral[i] then
psdb[i] = "~" .. (psdb_initial[initial[i]] or "error")
..(psdb_final(final[i].."七") or "error")
end --psdb[i] = p[i]
end
parts[j] = table.concat(psdb, "")
end
readings[i] = table.concat(parts, " ")
readings[i] = gsub(readings[i], "'+", "'")
readings[i] = gsub(readings[i], "^'", "")
readings[i] = gsub(readings[i], "([^a-z])'", "%1")
readings[i] = gsub(readings[i], "([^pbdtkqgczsjlmnhaeiou])'([aeiouwy])", "%1%2")
readings[i] = gsub(readings[i], "([^aeiouwy])'([ptkbdqmn])", "%1%2")
readings[i] = gsub(readings[i], "([^aeiouwyn])'g", "%1g")
readings[i] = gsub(readings[i], "([^aeiouwypcz])'h", "%1h")
end
return (gsub(table.concat(readings, ", "),'/([^ ])',' / %1'))
end
function export.poj_display(text)
if type(text) == "table" then text = text.args[1] end
text = gsub(text, "仔", "á")
local readings = split(text, "/", true)
for i = 1, #readings do
readings[i] = gsub(readings[i], "[%a,]+:", "")
readings[i] = gsub(readings[i], "#", "")
readings[i] = gsub(readings[i], "%(([^%)]+)%)", "%1-%1-%1")
if not find(readings[i], " ") then
readings[i] = "[[" .. readings[i] .. "#Hokkien|" .. readings[i] .. "]]"
end
end
text = table.concat(readings, " / ")
return text
end
function export.pengim_check_syllable(initial, final, loc)
local validInitials = {
["b"] = 1, ["p"] = 1, ["m"] = 1, ["bh"] = 1,
["d"] = 1, ["t"] = 1, ["n"] = 1, ["l"] = 1,
["g"] = 1, ["k"] = 1, ["ng"] = 1, ["gh"] = 1, ["h"] = 1,
["z"] = 1, ["c"] = 1, ["s"] = 1, ["r"] = 1, [""] = 1,
}
local validFinals = {
["a"] = 1, ["i"] = 1, ["u"] = 1, ["ê"] = 1, ["o"] = 1,
["ah"] = 1, ["ih"] = 1, ["uh"] = 1, ["êh"] = 1, ["oh"] = 1,
["oi"] = 1, ["ai"] = 1, ["ou"] = 1, ["au"] = 1,
["ia"] = 1, ["iu"] = 1, ["uê"] = 1, ["ua"] = 1, ["ui"] = 1, ["uai"] = 1,
["oih"] = 1, ["auh"] = 1, ["uêh"] = 1, ["uah"] = 1,
["ang"] = 1, ["êng"] = 1, ["ong"] = 1,
["iang"] = 1, ["iong"] = 1, ["ung"] = 1, ["uang"] = 1,
["ag"] = 1, ["êg"] = 1, ["og"] = 1, ["iag"] = 1,
["iog"] = 1, ["ug"] = 1, ["uag"] = 1,
["an"] = 1, ["in"] = 1, ["ên"] = 1,
["oun"] = 1, ["ain"] = 1, ["aun"] = 1,
["iun"] = 1, ["ian"] = 1, ["uên"] = 1, ["uin"] = 1, ["uan"] = 1, ["uain"] = 1,
["ng"] = 1, ["m"] = 1,
}
local moreValidFinals = {
["Chaozhou"] = {
["e"] = 1, ["iê"] = 1, ["iou"] = 1, ["iêh"] = 1,
["eng"] = 1, ["am"] = 1, ["ing"] = 1, ["iêng"] = 1,
["im"] = 1, ["iêm"] = 1, ["uêng"] = 1, ["uam"] = 1,
["eg"] = 1, ["ab"] = 1, ["ig"] = 1, ["iêg"] = 1,
["ib"] = 1, ["iêb"] = 1, ["uêg"] = 1, ["uab"] = 1,
["oin"] = 1, ["iên"] = 1,
},
["Shantou"] = {
["e"] = 1, ["io"] = 1, ["iau"] = 1, ["ioh"] = 1,
["eng"] = 1, ["am"] = 1, ["ing"] = 1,
["im"] = 1, ["iam"] = 1, ["uam"] = 1,
["eg"] = 1, ["ab"] = 1, ["ig"] = 1,
["ib"] = 1, ["iab"] = 1, ["uab"] = 1,
["oin"] = 1, ["ion"] = 1,
},
["Chenghai"] = {
["e"] = 1, ["iê"] = 1, ["iou"] = 1, ["iêh"] = 1,
["eng"] = 1, ["ing"] = 1,
["eg"] = 1, ["ig"] = 1,
["oin"] = 1, ["iên"] = 1,
},
["Raoping"] = {
["e"] = 1, ["io"] = 1, ["iau"] = 1, ["ioh"] = 1,
["eng"] = 1, ["am"] = 1, ["ing"] = 1,
["im"] = 1, ["iam"] = 1, ["uam"] = 1,
["eg"] = 1, ["ab"] = 1, ["ig"] = 1,
["ib"] = 1, ["iab"] = 1, ["uab"] = 1,
["oin"] = 1, ["ion"] = 1,
},
["Jieyang"] = {
["e"] = 1, ["io"] = 1, ["iau"] = 1, ["ioh"] = 1,
["am"] = 1, ["im"] = 1, ["iam"] = 1, ["uêng"] = 1,
["uam"] = 1, ["ab"] = 1, ["ib"] = 1, ["iab"] = 1,
["uêg"] = 1, ["uab"] = 1, ["ion"] = 1,
},
["Chaoyang"] = {
["io"] = 1, ["iau"] = 1, ["ioh"] = 1,
["am"] = 1, ["ing"] = 1, ["im"] = 1, ["iam"] = 1,
["uêng"] = 1, ["uam"] = 1, ["ab"] = 1, ["ig"] = 1,
["ib"] = 1, ["iab"] = 1, ["uêg"] = 1, ["uab"] = 1,
["ion"] = 1,
},
}
if not (validInitials[initial] and (validFinals[final] or moreValidFinals[loc][final])) then
return "[[Category:Teochew terms needing pronunciation attention|" .. loc .. "]]"
end
return nil
end
local pengim_to_ipa_two_letters_above = {
["gh"] = "ɡ", ["bh"] = "β", ["ng"] = "ŋ",
["ao"] = "au",
}
local pengim_to_ipa_one_letter = {
--initials
["m"] = "m", ["n"] = "n",
["b"] = "p", ["d"] = "t", ["g"] = "k",
["p"] = "pʰ", ["t"] = "tʰ", ["k"] = "kʰ",
["s"] = "s", ["h"] = "h",
["r"] = "d͡z",
["z"] = "t͡s",
["c"] = "t͡sʰ",
["l"] = "l",
--vowels
["a"] = "a",
["ê"] = "e",
["e"] = "ɯ",
["i"] = "i",
["o"] = "o",
["u"] = "u",
--tones
["1"] = "³³⁻²³",
["2"] = "⁵²⁻³⁵",
["2"] = "⁵²⁻²¹",
["3"] = "²¹³⁻⁵⁵",
["4"] = "²⁻⁴",
["5"] = "⁵⁵⁻¹¹",
["6"] = "³⁵⁻¹¹",
["7"] = "¹¹",
["8"] = "⁴⁻²",
}
local pengim_tone_value = {}
pengim_tone_value["Chaozhou"] = {
["1"] = "33", ["2"] = "53", ["3"] = "213", ["4"] = "2",
["5"] = "55", ["6"] = "35", ["7"] = "11", ["8"] = "5"
}
pengim_tone_value["Shantou"] = pengim_tone_value["Chaozhou"]
pengim_tone_value["Chenghai"] = pengim_tone_value["Chaozhou"]
pengim_tone_value["Jieyang"] = pengim_tone_value["Chaozhou"]
pengim_tone_value["Chaoyang"] = {
["1"] = "33", ["2"] = "53", ["3"] = "31", ["4"] = "2",
["5"] = "55", ["6"] = "313", ["7"] = "11", ["8"] = "5"
}
local pengim_tone_sandhi = {}
pengim_tone_sandhi["Chaozhou"] = {
["1"] = "23",
["2A"] = "23", ["2B"] = "35",
["3A"] = "31", ["3B"] = "53",
["4A"] = "3", ["4B"] = "5",
["5"] = "11", ["6"] = "21", ["7"] = "", ["8"] = "2"
}
pengim_tone_sandhi["Shantou"] = {
["1"] = "23",
["2A"] = "35", ["2B"] = "35",
["3A"] = "55", ["3B"] = "55",
["4A"] = "5", ["4B"] = "5",
["5"] = "11", ["6"] = "21", ["7"] = "", ["8"] = "2"
}
pengim_tone_sandhi["Chenghai"] = pengim_tone_value["Chaozhou"]
pengim_tone_sandhi["Jieyang"] = pengim_tone_value["Chaozhou"]
pengim_tone_sandhi["Chaoyang"] = {
["1"] = "23",
["2A"] = "31", ["2B"] = "31",
["3A"] = "55", ["3B"] = "55",
["4A"] = "5", ["4B"] = "5",
["5"] = "11", ["6"] = "33", ["7"] = "33", ["8"] = "2"
}
local pengim_to_ipa_fix = {
["β"] = "b",
["p([²⁴⁻]+[ -/])"] = "p̚%1",
["k([²⁴⁻]+[ -/])"] = "k̚%1",
["h([²⁴⁻]+[ -/])"] = "ʔ%1",
}
local pengim_to_ipa_nasal = {
["a"] = "ã",
["e"] = "ẽ", -- ê
["ɯ"] = "ɯ̃", -- e
["i"] = "ĩ",
["o"] = "õ",
["u"] = "ũ",
["n"] = "",
}
function export.pengim_to_ipa_conv(text)
local result
for key, val in pairs(pengim_to_ipa_two_letters_above) do
text = gsub(text, key, val)
end
text = gsub(text, "([234]) ([^12345678 ]+)2$", "%1 %22")
local function verbose_function(char) return pengim_to_ipa_one_letter[char] or char end
-- This should work, but it doesn't convert the tone number in "diên1":
-- result = gsub(text, ".", pengim_to_ipa_one_letter)
result = gsub(text, ".", verbose_function)
result = result .. "/"
for key, val in pairs(pengim_to_ipa_fix) do
result = gsub(result, key, val)
end
result = gsub(result, "[aeiouɯ]+nʔ?[¹²³⁴⁵⁻]+[ -/]", function (a)
return gsub(a, ".", pengim_to_ipa_nasal)
end)
result = gsub(result, "(⁻[¹²³⁴⁵]+)/", function(a) return (a ~= "⁻²¹" and "/" or a .. "/") end)
result = gsub(result, ",", "#")
result = gsub(result, "(⁻[¹²³⁴⁵]+)#", function(a) return (a ~= "⁻²¹" and "" or a) end)
result = gsub(result, "#", "")
result = gsub(result, "/$", "")
result = gsub(result, "/", "/, /")
return "/" .. result .. "/"
end
function export.pengim_display(text)
text = gsub(text, "([1-8])/", "%1 / ")
text = gsub(text, "[1-8]+", "<sup>%0</sup>") -- note: originally [1-8-]+ but it seems like websites have the final tone within parentheses, if at all
return (gsub(text, "#", ""))
end
function export.pengim_to_pojlike_conv(text)
-- kind of based on MTR (http://www.ispeakmin.com/bbs/viewthread.php?tid=2784)
text = gsub(text, "#", "")
local words = split(text, "/", true)
local tone_marks = {
['1'] = '',
['2'] = '́',
['3'] = '̀',
['4'] = '',
['5'] = '̂',
['6'] = '̆', -- this is a breve; MTR: breve; current hokkien dialect convention: hacek; missionary: tilde or breve??
['7'] = '̄',
['8'] = '̍'
}
local function get_tone_mark(a, num) return tone_marks[num] .. a end
local function convert_final(x,c,t) -- convert final -g and -b (but not -ng)
if c=='b' then c='p'
elseif c=='g' then c='k' end
return x..c..t
end
local cons_correspondences = { ['bh']='b', ['gh']='g', ['b']='p', ['d']='t',
['g']='k', ['p']='ph', ['t']='th', ['k']='kh', ['z']='ts', ['c']='tsh',
['r']='j' }
local function nasalization(n,h,t) return h..'ⁿ'..t end
for i, word in ipairs(words) do
local syllables = split(word, " ", true)
for i, syllable in ipairs(syllables) do
syllable = gsub(syllable, '^[bdgptkzcr]h?', cons_correspondences)
syllable = gsub(syllable, '([^n])([bg])(%d)', convert_final)
syllable = gsub(syllable, '[êe]', { ['ê']='e', ['e']='ṳ' } )
syllable = gsub(syllable, 'ao', 'au' )
syllable = gsub(syllable, '(n)(h?)(%d)', nasalization)
if find(syllable, 'uai') then
syllable = gsub(syllable, 'uai', 'ua符i')
elseif find(syllable, '[aiueoṳ][aiueoṳ]') then
syllable = gsub(syllable, '([aouṳ])i', '%1符i') -- ?i
syllable = gsub(syllable, 'i([aoeuṳ])', 'i%1符') -- i?
syllable = gsub(syllable, '([ao])([uṳ])', '%1符%2') -- ?u
syllable = gsub(syllable, '([uṳ])([ae])', '%1%2符') -- u?
elseif find(syllable, '[^aiueoṳ][aiueoṳ]') or find(syllable, '^[aiueoṳ]') then
syllable = gsub(syllable, '([aiueoṳ])', '%1符')
elseif find(syllable, 'ngh?%d') then
syllable = gsub(syllable, 'ng(h?)(%d)', 'n符g%1%2')
elseif find(syllable, '[^aiueoṳ]h?%d') then
syllable = gsub(syllable, '([^aiueoṳ])(h?)(%d)', '%1符%2%3')
end
syllable = gsub(syllable, '符(.*)(%d)', get_tone_mark)
syllables[i] = syllable
end
words[i] = table.concat(syllables, ' ')
end
return toNFC(table.concat(words, ' / '))
end
return export