local export = {}
local find = mw.ustring.find
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local gmatch = mw.ustring.gmatch
local gsplit = mw.text.gsplit
local lower = mw.ustring.lower
local upper = mw.ustring.upper
local len = mw.ustring.len
-- https://en.wikipedia.org/wiki/Bouyei_language
local initialConv = {
['b'] = 'p',
['p'] = 'pʰ',
['mb'] = 'ɓ',
['m'] = 'm',
['f'] = 'f',
['v'] = 'v',
['w'] = 'w', -- to support /v/ ~ [w] in some cases
['d'] = 't',
['t'] = 'tʰ',
['nd'] = 'ɗ',
['n'] = 'n',
['sl'] = 'ɬ',
['l'] = 'l',
['g'] = 'k',
['k'] = 'kʰ',
['ng'] = 'ŋ',
['h'] = 'x',
['hr'] = 'ɣ',
['j'] = 't͡ɕ',
['q'] = 't͡ɕʰ',
['ny'] = 'ɲ',
['x'] = 'ɕ',
['y'] = 'j',
['z'] = 't͡s',
['c'] = 't͡sʰ',
['s'] = 's',
['r'] = 'z',
['by'] = 'pʲ',
['my'] = 'mʲ',
['qy'] = 'ˀj',
['gv'] = 'kʷ',
['ngv'] = 'ŋʷ',
['qv'] = 'ˀv',
[''] = 'ʔ',
}
local rimeConv = {
['a'] = 'a',
['o'] = 'o',
['ô'] = 'ɔ',
['ee'] = 'e',
['ê'] = 'ɛ', -- e in Chinese loanwords
['i'] = 'i',
['î'] = 'z̩',
['u'] = 'u',
['e'] = 'ɯ',
['aai'] = 'aːi',
['ai'] = 'ɐi',
['oi'] = 'oːi',
['ei'] = 'ɯi',
['aau'] = 'aːu',
['au'] = 'ɐu',
['eeu'] = 'eːu',
['iu'] = 'iu',
['ae'] = 'ɐɯ',
['ie'] = 'iə',
['ue'] = 'uə',
['ea'] = 'ɯə',
['aam'] = 'aːm',
['am'] = 'ɐm',
['oom'] = 'oːm',
['om'] = 'ɔm',
['eem'] = 'eːm',
['iam'] = 'iəm',
['im'] = 'im',
['uam'] = 'uəm',
['um'] = 'um',
['eam'] = 'ɯəm',
['aan'] = 'aːn',
['an'] = 'ɐn',
['oon'] = 'oːn',
['on'] = 'ɔn',
['een'] = 'eːn',
['ian'] = 'iən',
['in'] = 'in',
['uan'] = 'uən',
['un'] = 'un',
['ean'] = 'ɯən',
['en'] = 'ɯn',
['aang'] = 'aːŋ',
['ang'] = 'ɐŋ',
['oong'] = 'oːŋ',
['ong'] = 'ɔŋ',
['eeng'] = 'eːŋ',
['iang'] = 'iəŋ',
['ing'] = 'iŋ',
['uang'] = 'uəŋ',
['ung'] = 'uŋ',
['eang'] = 'ɯəŋ',
['eng'] = 'ɯŋ',
['aab'] = 'aːp̚',
['ab'] = 'ɐp̚',
['oob'] = 'oːp̚',
['ob'] = 'ɔp̚',
['eeb'] = 'eːp̚',
['iab'] = 'iəp̚',
['ib'] = 'ip̚',
['uab'] = 'uəp̚',
['ub'] = 'up̚',
['eab'] = 'ɯəp̚',
['eb'] = 'ɯp̚',
['aad'] = 'aːt̚',
['ad'] = 'ɐt̚',
['ood'] = 'oːt̚',
['od'] = 'ɔt̚',
['eed'] = 'eːt̚',
['iad'] = 'iət̚',
['id'] = 'it̚',
['uad'] = 'uət̚',
['ud'] = 'ut̚',
['ead'] = 'ɯət̚',
['ed'] = 'ɯt̚',
['ag'] = 'ɐk̚',
['og'] = 'ɔk̚',
['eeg'] = 'ek̚',
['ig'] = 'ik̚',
['ug'] = 'uk̚',
['eg'] = 'ɯk̚',
['ia'] = 'ia',
['io'] = 'io',
['iao'] = 'iɐu',
['ua'] = 'ua',
['ui'] = 'ui',
['uai'] = 'uɐi',
['ao'] = 'aːu',
['ou'] = 'əu',
['er'] = 'ɚ',
}
local toneConv = {
['l'] = '˨˦',
['z'] = '˩',
['c'] = '˥˧',
['x'] = '˧˩',
['s'] = '˧˥',
['h'] = '˧',
['t'] = '˧˥',
[''] = '˧',
['y'] = '˧',
['f'] = '˧˩',
['j'] = '˥˧',
['q'] = '˨˦',
}
local function get_tone(syllable)
local toneless, tone = syllable, ""
if find(syllable, "[lzcxshtyfjq]$") then
toneless, tone = match(syllable, "([a-z]+)([lzcxshtyfjq])$")
end
mw.log(toneless, tone)
return toneless, tone
end
local function syllabify(text)
text = gsub(text, "'", " ")
text = gsub(text, "([aeiou][^aeiou])([aeiou])", "%1 %2")
--text = gsub(text, "([lzcxshtyfjqbdg])([^aeiou])", "%1 %2")
return mw.text.gsplit(text, "[- ]")
end
function export.ipa(text)
text = string.lower(text)
local syllables = {}
for syllable in syllabify(text) do
local initial, rime, tone
syllable, tone = get_tone(syllable)
initial, rime = match(syllable, "^([mnshbq]?[bpfwdtlgkrjyxzc]?v?)([aeiou][aeiou]?[ioubdgmnr]?g?)$")
if not initial or not rime then
error(syllable .. " cannot be recognized")
end
if find(tone, "^[yfjq]$") then
if initial == "e" then
initial = "ê"
elseif initial == "o" then
initial = "ô"
elseif initial == "i" and find(final, "^[zcsr]$") then
initial = "î"
end
end
local initial_ipa, rime_ipa, tone_value = initialConv[initial], rimeConv[rime], toneConv[tone]
if not initial_ipa then
error(initial .. " is not a valid initial")
elseif not rime_ipa then
error(rime .. " is not a valid rime")
end
table.insert(syllables, initial_ipa .. rime_ipa .. tone_value)
end
return "/" .. table.concat(syllables, ".") .. "/"
end
function export.show(frame)
local params = {
[1] = { },
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local text = args[1]
if not text then text = mw.title.getCurrentTitle().text end
local display = string.format("* %s",
require("Module:IPA").format_IPA_full {
lang = require("Module:languages").getByCode("pcc"),
items = {{ pron = export.ipa(text) }},
}
)
return display
end
return export