local export = {}
local u = mw.ustring.char
local ACUTE = u(0x0301)
local CIRC = u(0x0302)
local MACRON = u(0x0304)
local SYLL = u(0x0324)
export.diacritic = MACRON .. ACUTE .. CIRC
export.toneless_vowel = "aeiouāēīōūAEIOUĀĒĪŌŪ." .. SYLL
export.vowel = export.toneless_vowel .. "áéíóúâêîôûḗṓÁÉÍÓÚÂÊÎÔÛḖṒ" .. export.diacritic
-- Use '.' to denote syllabic m. e.g. um.ndeni
function export.split_syllables(word)
local syllables = {}
for syll in mw.ustring.gmatch(word, "[^" .. export.vowel .. "]*[" .. export.vowel .. "%.]+") do
table.insert(syllables, syll)
end
syllables[#syllables] = syllables[#syllables] .. mw.ustring.match(word, "[^" .. export.vowel .. "]*$")
return syllables
end
local function depressor_shift(syllables, pattern)
if #pattern ~= #syllables then
error("Number of syllables and number of tones do not match.")
end
local depressor_consonant = {"bh", "d", "dl", "g", "gc", "gq", "gx", "hh", "j", "mb", "mv", "nd", "ndl", "ng", "ngc", "ngq", "ngx", "nj", "nz", "v", "z"}
local dep_table = {}
for _, consonant in ipairs(depressor_consonant) do
dep_table[consonant] = true
dep_table[consonant .. "w"] = true
end
consonants = {}
for _, syll in ipairs(syllables) do
consonant = mw.ustring.sub(syll, 1, #syll-1)
table.insert(consonants, consonant)
end
for i, cons in ipairs(consonants) do
--If the syllable is H and has a depressor consonant, and next syllable does not have a depressor consonant
if pattern[i] == "H" and dep_table[cons] and not dep_table[consonants[i+1]] then
if #consonants - i > 2 then --next syllable is before the penult
pattern[i] = "L"
if pattern[i+1] == "L" then
pattern[i+1] = "H"
end
elseif #consonants - i == 2 then --next syllable is penultimate
pattern[i] = "L"
if pattern[i+1] == "L" then
pattern[i+1] = "F"
end
end
end
end
return pattern
end
function export.apply_tone(word, pattern, shift)
if shift == nil then
shift = true
end
word = export.split_syllables(word)
pattern = mw.text.split(pattern or mw.ustring.rep("L", #word), "")
if #word ~= #pattern then
error("The word \"" .. table.concat(word) .. "\" and the tone pattern " .. table.concat(pattern) .. " have different numbers of syllables.")
end
if shift then
pattern = depressor_shift(word, pattern)
end
for i, tone in ipairs(pattern) do
if tone == "F" then
word[i] = mw.ustring.gsub(word[i], "([" .. export.toneless_vowel .. "])", "%1" .. CIRC)
-- Delete macron under circumflex
word[i] = mw.ustring.toNFC(mw.ustring.gsub(mw.ustring.toNFD(word[i]), MACRON, ""))
elseif tone == "H" then
word[i] = mw.ustring.gsub(word[i], "([" .. export.toneless_vowel .. "])", "%1" .. ACUTE)
elseif tone ~= "L" then
error("Invalid character \"" .. tone .. "\" in tone pattern string.")
end
end
return (mw.ustring.gsub(mw.ustring.toNFC(table.concat(word)), "̩", ""))
end
-- Strip the tone diacritics from a word.
-- Returns the stripped word and a tone pattern consisting of H, L, and F.
function export.split_tone(word)
local syllables = export.split_syllables(word)
local tones = ""
local stripped = ""
for _, syll in ipairs(syllables) do
-- remove any '.' char and convert to NFD
syll = mw.ustring.toNFD(mw.ustring.gsub(syll, "%.", ""))
local endChar = mw.ustring.sub(syll, mw.ustring.len(syll))
if endChar == ACUTE then
tones = tones .. "H"
stripped = stripped .. mw.ustring.sub(syll, 1, mw.ustring.len(syll)-1)
elseif endChar == CIRC then
tones = tones .. "F"
stripped = stripped .. mw.ustring.sub(syll, 1, mw.ustring.len(syll)-1)
else
tones = tones .. "L"
stripped = stripped .. syll
end
end
return {mw.ustring.toNFC(stripped), tones}
end
return export