local export = {}
local m_izh = require("Module:izh")
local m_IPA = require("Module:IPA")
local gsub_lookahead = require("Module:gsub lookahead")
local lang = m_izh.lang
local U = mw.ustring.char
--- <<< DATA START >>> ---
local LONG = "ː"
local SEMILONG = "ˑ"
local STRESS_PRIMARY = "ˈ"
local STRESS_SECONDARY = "ˌ"
local FRONTAL = U(0x0308)
local NONSYLLABIC = U(0x032F)
local TIE = U(0x0361)
local VERYSHORT = U(0x0306)
local PALATAL = "ʲ"
local IPA_VOWELS = "ɑeiouyæøɨə"
local AUTO_STRESS = U(0xEEEE)
local VIRTUAL_BREAK = U(0xEEEC)
local J_PALATALIZE = U(0xEEEA)
local REALLY_JUST_PALATAL = U(0xEEE8)
local VIRTUAL_BREAK_UNGEMINATE = U(0xEEE6)
local IPA_CONSONANTS = m_izh.consonants .. "ɫʃʒ"
local IPA_CONSONANTS_GEMINATABLE = m_izh.consonants_geminatable .. "ɫ" .. "z" .. "ž"
local PALATALIZE = "'"
local UNGEMINATE = "/"
local ANY_DIACRITICS = "[" .. U(0x0300) .. "-" .. U(0x036F) .. "]*"
local SOME_DIACRITICS = "[" .. U(0x0300) .. "-" .. U(0x036F) .. "]+"
--- <<< DATA END >>> ---
--- <<< COMMON START >>> ---
local function split_syllables(word, keep_sep_symbols)
local consonant = "[" .. IPA_CONSONANTS .. "]"
local consonant_diacritic = "[" .. U(0x030A) .. U(0x0325) .. "]"
local vowel = m_izh.vowel
local consonants_geminatable = IPA_CONSONANTS_GEMINATABLE
local sep_symbols = m_izh.sep_symbols .. VIRTUAL_BREAK
local vowel_sequences = m_izh.vowel_sequences
local res = {}
local syllable = ""
local pos = 1
local found_vowel = false
while pos <= #word do
if mw.ustring.find(mw.ustring.lower(word), "^" .. consonant .. consonant_diacritic .. "*[" .. PALATALIZE .. UNGEMINATE .. VIRTUAL_BREAK_UNGEMINATE .. J_PALATALIZE .. PALATAL .. "]*" .. vowel, pos) then
-- CV: end current syllable if we have found a vowel
if found_vowel then
if #syllable > 0 then table.insert(res, syllable) end
found_vowel = false
syllable = ""
end
syllable = syllable .. mw.ustring.sub(word, pos, pos)
pos = pos + 1
elseif mw.ustring.find(mw.ustring.lower(word), "^" .. consonant, pos) then
if mw.ustring.find(mw.ustring.lower(word), "^" .. consonant .. TIE .. consonant, pos) then
-- /t͡s/
if found_vowel and #syllable > 0 then table.insert(res, syllable) end
syllable = mw.ustring.sub(word, pos, pos + 2)
pos = pos + 3
found_vowel = false
else
-- C: continue
syllable = syllable .. mw.ustring.sub(word, pos, pos)
pos = pos + 1
end
elseif mw.ustring.find(mw.ustring.lower(word), "^" .. vowel, pos) then
if found_vowel then
-- already found a vowel, end current syllable
if #syllable > 0 then
table.insert(res, syllable)
end
syllable = ""
end
found_vowel = true
-- check for diphthongs or long vowels
local seq_ok = false
local search_from = mw.ustring.gsub(mw.ustring.lower(mw.ustring.sub(word, pos)), "[" .. UNGEMINATE .. VIRTUAL_BREAK_UNGEMINATE .. "]", "")
for k, v in pairs(vowel_sequences) do
if mw.ustring.find(search_from, "^" .. v) then
seq_ok = true
break
end
end
if seq_ok then
local total = mw.ustring.len(select(3, mw.ustring.find(mw.ustring.lower(word), "^(" .. vowel .. "[" .. UNGEMINATE .. VIRTUAL_BREAK_UNGEMINATE .. "]*" .. vowel .. ")", pos)))
syllable = syllable .. mw.ustring.sub(word, pos, pos + total - 1)
pos = pos + total
else
syllable = syllable .. mw.ustring.sub(word, pos, pos)
pos = pos + 1
end
elseif mw.ustring.find(mw.ustring.lower(word), "^[" .. sep_symbols .. "]", pos) then
-- separates syllables
if #syllable > 0 then
table.insert(res, syllable)
end
local sepchar = mw.ustring.sub(word, pos, pos)
syllable = (keep_sep_symbols == true or (type(keep_sep_symbols) == "string" and keep_sep_symbols:find(mw.ustring.sub(word, pos, pos)))) and sepchar or ""
pos = pos + 1
found_vowel = false
else
-- ?: continue
syllable = syllable .. mw.ustring.sub(word, pos, pos)
pos = pos + 1
end
end
if #syllable > 0 then
table.insert(res, syllable)
end
return res
end
local function zeroth_round_of_common_replacements(text)
text = mw.ustring.gsub(text, "ts", "t͡s")
text = mw.ustring.gsub(text, "([" .. m_izh.vowels .. m_izh.consonants .. "])(" .. m_izh.vowel .. ")" .. UNGEMINATE .. "i", "%1%2" .. VIRTUAL_BREAK_UNGEMINATE .. "i")
return text
end
local function first_round_of_common_replacements(text)
text = mw.ustring.gsub(text, "n[kg]", {
["nk"] = "ŋk",
["ng"] = "ŋg",
})
text = mw.ustring.gsub(text, "[aäövь’]", {
["a"] = "ɑ",
["ä"] = "æ",
["ö"] = "ø",
["v"] = "ʋ",
["ь"] = "ɨ",
["’"] = ".",
-- ["-"] = STRESS_SECONDARY,
})
return text
end
local function second_round_of_common_replacements(text, do_palatal_repls)
text = mw.ustring.gsub(text, "[cšž]", {
["c"] = "t͡ʃ",
["š"] = "ʃ",
["ž"] = "ʒ"
})
text = mw.ustring.gsub(text, "h([" .. LONG .. SEMILONG .. "])", "x%1")
if do_palatal_repls then
text = mw.ustring.gsub(text, "([nʃʒ])" .. PALATAL, {
["n"] = "ɲ",
["ʃ"] = "ɕ",
["ʒ"] = "ʑ"
})
end
text = mw.ustring.gsub(text, "ɫ" .. PALATAL, "l" .. PALATAL)
text = mw.ustring.gsub(text, "g", "ɡ")
return text
end
local function automatic_palatalization(text, filter)
return mw.ustring.gsub(text, "(" .. filter .. ")j(.?)", function (prev, next)
if next == PALATALIZE then
return prev .. PALATAL .. next
else
return prev .. PALATAL .. LONG .. next
end
end)
end
local function manual_palatalization(text)
if mw.ustring.find(text, PALATALIZE) then
text = mw.ustring.gsub(text, "([" .. IPA_CONSONANTS .. "])" .. PALATALIZE, "%1" .. PALATAL)
text = mw.ustring.gsub(text, PALATALIZE, "")
text = mw.ustring.gsub(text, PALATAL .. PALATAL, PALATAL)
end
text = mw.ustring.gsub(text, "(t)([" .. STRESS_SECONDARY .. AUTO_STRESS .. ".])(t" .. PALATAL .. ")", "%1" .. PALATAL .. "%2%3")
return text
end
local IPA_diphthongs = {
"[ɑeouyæø]i",
"[ɑeio]u",
"[æeiø]y"
}
local function long_vowels_and_diphthongs(text)
text = mw.ustring.gsub(text, "([" .. IPA_VOWELS .. "])%1", "%1" .. LONG)
for _, diphthong in ipairs(IPA_diphthongs) do
local mod_diphthong
if mw.ustring.find(diphthong, "%]$") then
mod_diphthong = mw.ustring.gsub(diphthong, "(.)(%[[^%]]-%])", "%1" .. VERYSHORT .. "?%2")
mod_diphthong = mw.ustring.gsub(diphthong, "(%[[^%]]-%])(%[[^%]]-%])", "%1" .. VERYSHORT .. "?%2")
else
mod_diphthong = mw.ustring.sub(diphthong, 1, -2) .. VERYSHORT .. "?" .. mw.ustring.sub(diphthong, -1, -1)
end
text = mw.ustring.gsub(text, "(" .. mod_diphthong .. ")", "%1" .. NONSYLLABIC)
end
return text
end
local function long_consonants(text)
text = mw.ustring.gsub(text, "(%a)%1", "%1" .. LONG)
text = mw.ustring.gsub(text, LONG .. PALATAL, PALATAL .. LONG)
return text
end
local function standard_sandhi(text)
text = mw.ustring.gsub(text, "n([%s" .. AUTO_STRESS .. "-]*[mpb])", "m%1")
text = mw.ustring.gsub(text, "n([%s" .. AUTO_STRESS .. "-]*[kgɡ̊])", "ŋ%1")
return text
end
local function add_primary_stress(text)
text = mw.ustring.gsub(text, AUTO_STRESS, "-")
text = mw.ustring.gsub(text, "-%.", "-")
text = mw.ustring.gsub(text, "-", STRESS_SECONDARY)
return mw.ustring.toNFC(STRESS_PRIMARY .. mw.ustring.gsub(text, " ", " " .. STRESS_PRIMARY))
end
local function is_stressed_syllable(syllable)
return mw.ustring.find(syllable, "^[ " .. AUTO_STRESS .. "-]")
end
local function add_secondary_stress(syllables)
local distance = 0
for index, syllable in ipairs(syllables) do
if index == #syllables then break end
local stressed = index == 1 or is_stressed_syllable(syllable)
if stressed then
distance = 0
else
distance = distance + 1
if distance == 2 then
distance = 0
if not is_stressed_syllable(syllables[index + 1]) then
syllables[index] = AUTO_STRESS .. syllable
end
end
end
end
end
local function clean_virtual_break(text)
if mw.ustring.find(text, "[" .. VIRTUAL_BREAK .. VIRTUAL_BREAK_UNGEMINATE .. "]") then
local cleaned = mw.ustring.gsub(mw.ustring.gsub(text, "[" .. AUTO_STRESS .. VIRTUAL_BREAK .. "]", ""), VIRTUAL_BREAK_UNGEMINATE, UNGEMINATE)
local syllables = split_syllables(cleaned, true)
add_secondary_stress(syllables)
text = table.concat(syllables)
end
return text
end
local function clean_ungeminate(text)
return mw.ustring.gsub(text, "[" .. VIRTUAL_BREAK .. UNGEMINATE .. VIRTUAL_BREAK_UNGEMINATE .. "]", "")
end
local function do_gemination(syllables, diacritic)
local try_to_geminate = false
for index, syllable in ipairs(syllables) do
local stressed = index == 1 or is_stressed_syllable(syllable)
if try_to_geminate and not stressed then
-- check if the initial consonant in this syllable is followed by two vowels
local rest = syllable .. (syllables[index + 1] or "")
if mw.ustring.find(rest, "^[" .. IPA_CONSONANTS_GEMINATABLE .. "][" .. PALATALIZE .. J_PALATALIZE .. "]*" .. m_izh.vowel .. m_izh.vowel) then
-- CVCVV -> CVC:VV
local cg = select(3, mw.ustring.find(syllable, "^([" .. IPA_CONSONANTS_GEMINATABLE .. "][" .. PALATALIZE .. J_PALATALIZE .. "]*)"))
syllables[index - 1] = syllables[index - 1] .. cg
syllables[index] = mw.ustring.gsub(syllable, "^" .. cg, diacritic)
end
end
try_to_geminate = stressed and mw.ustring.find(syllable, "^[ " .. AUTO_STRESS .. "-]?[" .. IPA_CONSONANTS .. PALATALIZE .. J_PALATALIZE .. TIE .. "]*" .. m_izh.vowel .. "$")
end
end
local REDUCED = U(0x0325) .. U(0x0306)
local reduce_final_vowel = {
["o"] = "o" .. REDUCED,
["ö"] = "ø" .. REDUCED,
["u"] = "u" .. REDUCED,
["y"] = "y" .. REDUCED,
}
local function to_schwa(letter, filter)
return (not filter or mw.ustring.find(letter, "[" .. filter .. "]")) and "ə" or letter .. VERYSHORT
end
local function split_syllables_by_words(syllables)
local i = 1
return function()
local r = {}
local e = i
if e <= #syllables then
table.insert(r, (mw.ustring.gsub(syllables[e], "^%s+", "")))
e = e + 1
while e <= #syllables and not mw.ustring.find(syllables[e], "^%s") do
table.insert(r, syllables[e])
e = e + 1
end
i = e
return r
end
end
end
local function do_by_word_syllables(out_syllables, fn)
local old_syllables = {}
for k, v in pairs(out_syllables) do
old_syllables[k] = v
out_syllables[k] = nil
end
local next_word = false
for syllables in split_syllables_by_words(old_syllables) do
fn(syllables)
for i, syllable in ipairs(syllables) do
if next_word and i == 1 then
table.insert(out_syllables, " " .. syllable)
else
table.insert(out_syllables, syllable)
end
end
next_word = true
end
end
local function begins_with_affricate(syllable)
return syllable and mw.ustring.find(syllable, "^[" .. IPA_CONSONANTS .. "]" .. TIE)
end
local function do_reduction_internal(syllables, replacement)
local prev_was_stressed = false
local prev_was_long = false
local syllables_since_last_stressed = 0
for index, syllable in ipairs(syllables) do
local stressed = index == 1 or is_stressed_syllable(syllable)
local final = index == #syllables
if stressed then
syllables_since_last_stressed = 0
else
syllables_since_last_stressed = syllables_since_last_stressed + 1
end
prev_was_long = prev_was_long or begins_with_affricate(syllable)
if mw.ustring.find(syllable, "^j'") and prev_was_long then
-- hack. /Cj'/ is one consonant.
local previous_syllable = syllables[index - 1]
if mw.ustring.find(previous_syllable, m_izh.vowel .. "[" .. IPA_CONSONANTS .. "]") then
prev_was_long = mw.ustring.find(previous_syllable, m_izh.vowel .. m_izh.vowel) or mw.ustring.find(previous_syllable, m_izh.vowel .. "[" .. IPA_CONSONANTS .. "][" .. IPA_CONSONANTS .. "]")
end
end
if not stressed and ((prev_was_stressed and prev_was_long) or (index > 1 and final and (syllables_since_last_stressed > 1 or prev_was_long))) then
syllables[index] = mw.ustring.gsub(syllable, "(" .. m_izh.vowel .. "+)(.*)", function (nucleus, coda) return replacement(nucleus, coda, index) end)
end
-- reduce the next syllable only if the current syllable is stressed and not short
prev_was_stressed = stressed
prev_was_long = mw.ustring.find(syllable, m_izh.vowel .. "[" .. IPA_CONSONANTS .. m_izh.vowels .. "]")
end
end
local function do_reduction_word(syllables)
do_reduction_internal(syllables, function (nucleus, coda, index)
local final = index == #syllables
local never_open = false
if mw.ustring.find(nucleus, "(" .. m_izh.vowel .. ")%1") then
return mw.ustring.sub(nucleus, 1, 1) .. coda
end
if mw.ustring.find(nucleus, m_izh.vowel .. m_izh.vowel) then
if mw.ustring.sub(nucleus, 2) ~= "i" then
coda = mw.ustring.sub(nucleus, 2) .. coda
else
never_open = true
end
nucleus = mw.ustring.sub(nucleus, 1, 1)
end
local open = #coda == 0 and not never_open
if final then
if open then
-- reduced, but simply drop it
return (reduce_final_vowel[nucleus] or "") or coda
else
if coda == "" and reduce_final_vowel[nucleus] then
-- /oi/, /ui/, /yi/, /øi/
return reduce_final_vowel[nucleus]
end
local reduced
if nucleus == "e" then
reduced = "e"
else
reduced = to_schwa(nucleus, "aä")
end
return reduced .. coda
end
else
local next_syllable = syllables[index + 1]
local next_syllable_starts_with_vowel = mw.ustring.find(next_syllable, "^[ -]?%.?" .. m_izh.vowel)
local next_syllable_stressed = is_stressed_syllable(next_syllable)
local next_syllable_open = not (mw.ustring.find(next_syllable, "[" .. IPA_CONSONANTS .. "]$") or begins_with_affricate(syllables[index + 2]))
if next_syllable_starts_with_vowel then
return nucleus .. coda
elseif next_syllable_stressed then
return to_schwa(nucleus, "aäe") .. coda
elseif next_syllable_open then
return to_schwa(nucleus) .. coda
else
return to_schwa(nucleus, "aäe") .. coda
end
end
end)
end
local function do_coalesce_rhyme_word(syllables)
local vowel = mw.ustring.match(syllables[#syllables], "^[aä]$")
if mw.ustring.match(syllables[#syllables], "^([aä])$") and #syllables > 1 and not is_stressed_syllable(syllables[#syllables - 1] .. syllables[#syllables]) then
local replacement
local prefinal = mw.ustring.sub(syllables[#syllables - 1], -1)
if vowel == "a" then
replacement = ({ ["i"] = "e", ["u"] = "o", ["o"] = "o" })[prefinal]
elseif vowel == "ä" then
replacement = ({ ["i"] = "e", ["y"] = "ö", ["ö"] = "ö" })[prefinal]
end
if replacement then
syllables[#syllables - 1] = mw.ustring.gsub(mw.ustring.sub(syllables[#syllables - 1], 1, -2) .. replacement .. replacement, "^" .. AUTO_STRESS, "")
syllables[#syllables] = nil
end
end
end
local function do_reduction_rhyme_word(syllables)
do_reduction_internal(syllables, function (nucleus, coda, index)
local final = index == #syllables
if mw.ustring.find(nucleus, "(" .. m_izh.vowel .. ")%1") then
return nucleus .. coda
end
if mw.ustring.find(nucleus, m_izh.vowel .. m_izh.vowel) then
if mw.ustring.sub(nucleus, 2) ~= "i" then
coda = mw.ustring.sub(nucleus, 2) .. coda
else
return nucleus .. coda
end
nucleus = mw.ustring.sub(nucleus, 1, 1)
end
local open = #coda == 0
if final and open then
-- reduced, but simply drop it
return (reduce_final_vowel[nucleus] and nucleus or "") or coda
else
return nucleus .. coda
end
end)
end
local function do_final_vowel_dropping_word(syllables)
if #syllables == 1 or not mw.ustring.find(table.concat(syllables, ""), "[^" .. m_izh.vowels .. "]" .. m_izh.vowel .. "$") then return end
local final = mw.ustring.sub(syllables[#syllables], -1, -1)
if reduce_final_vowel[final] then return end
local reduced = {}
for _, syllable in ipairs(syllables) do
table.insert(reduced, syllable)
end
do_reduction_word(reduced)
if not mw.ustring.find(reduced[#reduced], m_izh.vowel .. "$") then
local leftovers = ""
if mw.ustring.find(syllables[#syllables], "i$") then
leftovers = REALLY_JUST_PALATAL
end
syllables[#syllables - 1] = mw.ustring.gsub(syllables[#syllables - 1] .. reduced[#reduced] .. leftovers, "^" .. AUTO_STRESS, "")
syllables[#syllables] = nil
end
end
local function do_reduction(syllables)
do_by_word_syllables(syllables, do_reduction_word)
end
local function do_reduction_rhyme(syllables)
do_by_word_syllables(syllables, do_reduction_rhyme_word)
end
local function do_coalesce_rhyme(syllables)
do_by_word_syllables(syllables, do_coalesce_rhyme_word)
end
local function do_final_vowel_dropping(syllables)
do_by_word_syllables(syllables, do_final_vowel_dropping_word)
end
local function do_narrow_l(text)
-- failsafe
if not mw.ustring.find(text, "l") then return text end
if mw.ustring.find(text, "l" .. PALATALIZE) then return text end
local velar_l = "ɫ"
local palatal_l = U(0xEEEF)
text = mw.ustring.gsub(text, "([aouäöyь])l(" .. m_izh.consonant .. ")", function (before, after)
if after == "l" or after == "j" then
return before .. "l" .. after
elseif mw.ustring.find(before, "[aouь]") then
return before .. velar_l .. after
else
return before .. palatal_l .. after
end
end)
local length = mw.ustring.len(text)
local l_indexes = {}
local i = 1
local env = {}
while true do
local index = mw.ustring.find(text, "l", i)
if index == nil then break end
table.insert(l_indexes, index)
i = index + 1
end
local env_tags = {
["a"] = "a", ["o"] = "a", ["u"] = "a", ["i"] = "i", ["j"] = "j",
["ä"] = "ä", ["ö"] = "ä", ["y"] = "ä", ["e"] = "e", [" "] = "_",
["-"] = "_", ["ь"] = "a"
}
local cleaned = mw.ustring.gsub(text, "[^aeiouäöyjlь -]", "") .. " "
local env_index = 1
local current_env = "_"
local current_env_before = "_"
local backburner, backburner_count = {}, 0
for c in mw.ustring.gmatch(cleaned, ".") do
if c == "l" then
env[env_index] = current_env_before
backburner_count = backburner_count + 1
backburner[backburner_count] = env_index
env_index = env_index + 1
else
current_env = env_tags[c] or "_"
for i = 1, backburner_count do
local back_index = backburner[i]
env[back_index] = env[back_index] .. current_env
end
backburner_count = 0
if current_env ~= "j" then
current_env_before = current_env
end
end
end
old_text = text
text = ""
i = 1
local l_conv = {
["i_"] = palatal_l, ["_i"] = palatal_l, ["äi"] = palatal_l,
["ei"] = palatal_l, ["aj"] = palatal_l, ["äj"] = palatal_l,
["ij"] = palatal_l, ["ej"] = palatal_l, ["ie"] = palatal_l,
["oj"] = palatal_l, ["uj"] = palatal_l,
["a_"] = velar_l, ["_a"] = velar_l, ["aa"] = velar_l,
["ia"] = velar_l, ["ea"] = velar_l, ["ae"] = velar_l
}
for env_index, l_index in ipairs(l_indexes) do
text = text .. mw.ustring.sub(old_text, i, l_index - 1) .. (l_conv[env[env_index]] or "l")
i = l_index + 1
end
text = text .. mw.ustring.sub(old_text, i, length)
text = mw.ustring.gsub(text, palatal_l .. palatal_l, "ll" .. PALATALIZE)
text = mw.ustring.gsub(text, palatal_l, "l" .. PALATALIZE)
return text
end
local reduce_a_diphthong = {
["e"] = "e", ["i"] = "e",
["o"] = "o", ["ö"] = "ö",
["u"] = "o", ["y"] = "ö",
}
local reduce_e_diphthong = {
["u"] = "o", ["y"] = "ö",
}
local function do_additional_reduction(syllables)
-- /VA/ (V != A) never in the same syllable
local last_stressed = 1
for i = 1, #syllables - 1 do
if i == 1 or is_stressed_syllable(syllables[i]) then
last_stressed = i
else
local nucleus = mw.ustring.match(syllables[i], m_izh.vowel .. "+")
if i - last_stressed <= 2 and nucleus then
nucleus = select(3, mw.ustring.find(nucleus, "^" .. UNGEMINATE .. "?(" .. m_izh.vowel .. ")$"))
if nucleus then
local next_syllable_onset, next_syllable_onset_end, consequent = mw.ustring.find(syllables[i + 1], "^" .. UNGEMINATE .. "?([aeä])")
if next_syllable_onset then
if mw.ustring.find(consequent, "[aä]") and reduce_a_diphthong[nucleus] then
syllables[i] = mw.ustring.gsub(syllables[i], nucleus, reduce_a_diphthong[nucleus] .. reduce_a_diphthong[nucleus]) .. mw.ustring.sub(syllables[i + 1], next_syllable_onset_end + 1)
syllables[i + 1] = ""
elseif consequent == "e" and reduce_e_diphthong[nucleus] then
syllables[i] = mw.ustring.gsub(syllables[i], nucleus, reduce_e_diphthong[nucleus] .. reduce_e_diphthong[nucleus]) .. mw.ustring.sub(syllables[i + 1], next_syllable_onset_end + 1)
syllables[i + 1] = ""
end
end
end
end
end
end
-- remove empty syllables
local i, j = 1, 1
while i <= #syllables do
if mw.ustring.len(syllables[i]) > 0 then
syllables[j] = syllables[i]
j = j + 1
end
i = i + 1
end
while j < i do
syllables[j] = nil
j = j + 1
end
end
local function pass_diacritics_through(map, consonant)
local consonant, diacritics = mw.ustring.match(consonant, "([" .. IPA_CONSONANTS .. "])([" .. PALATAL .. "]?)")
return map[consonant] .. diacritics
end
local voiced_consonants = "jlɫmnŋrvʋ"
local voiced_sounds = IPA_VOWELS .. m_izh.vowels .. voiced_consonants
local function do_voicing(text)
text = mw.ustring.gsub(text, "[bdgzž]", { ["b"] = "p", ["d"] = "t", ["g"] = "k", ["z"] = "s", ["ž"]="š" })
local voice = { ["k"] = "g", ["p"] = "b", ["t"] = "d", ["s"] = "z", ["š"] = "ž" }
local semivoice = { ["k"] = "g̊", ["p"] = "b̥", ["t"] = "d̥", ["s"] = "z̥", ["š"] = "ž̥" }
local consonants_to_voice = "[kptsš][" .. PALATAL .. "]?"
local vowel = "[" .. IPA_VOWELS .. m_izh.vowels .. "]"
-- k/p/t/s/š is semivoiced if it follows a voiced sound and is followed by a short vowel or a voiced consonant
text = gsub_lookahead(text, "([" .. voiced_sounds .. "]" .. ANY_DIACRITICS .. PALATAL .. "?[" .. AUTO_STRESS .. "-]?)(" .. consonants_to_voice .. ")([" .. voiced_sounds .. "]" .. ANY_DIACRITICS .. ".?)",
function (before, consonant, after)
if mw.ustring.find(after, vowel .. ANY_DIACRITICS .. vowel) then
return before .. consonant, after
else
return before .. pass_diacritics_through(semivoice, consonant), after
end
end)
-- k/p/t/s/š is semivoiced if it follows a voiced sound and is not followed by anything
text = mw.ustring.gsub(text, "([" .. voiced_sounds .. "]" .. ANY_DIACRITICS .. PALATAL .. "?[" .. AUTO_STRESS .. "-]?)(" .. consonants_to_voice .. ")$",
function (before, consonant)
return before .. pass_diacritics_through(semivoice, consonant)
end)
-- k/p/t/s/š is voiced if it follows a voiced sound and the next sound in the next word is a voiced sound
-- k/p/t/s/š is semivoiced if it follows a voiced sound and the next sound in the next word is not a voiced sound
text = gsub_lookahead(text, "([" .. voiced_sounds .. "]" .. ANY_DIACRITICS .. "[" .. AUTO_STRESS .. "-]?)(" .. consonants_to_voice .. ")([%s" .. AUTO_STRESS .. "-]+)(.)",
function (before, consonant, space, after)
if mw.ustring.find(after, "^[" .. voiced_sounds .. "]") then
return before .. pass_diacritics_through(voice, consonant) .. space, after
else
return before .. consonant .. space, after
end
end)
-- devoice word-initial
text = mw.ustring.gsub(text, "^([bdgzž])[" .. U(0x030a) .. U(0x0325) .. "]?", { ["b"] = "p", ["d"] = "t", ["g"] = "k", ["z"] = "s", ["ž"]="š" })
return text
end
local function do_phonetic_alalaukaa_voicing(text)
local voice = { ["k"] = "g", ["p"] = "b", ["t"] = "d", ["s"] = "z", ["š"] = "ž" }
local voiced_pre = "[" .. voiced_sounds .. "]" .. ANY_DIACRITICS .. LONG .. "?" .. PALATAL .. "?"
-- k, p, t, s get voiced before j, l, r, v, if preceded by a voiced sound
text = mw.ustring.gsub(text, "(" .. voiced_pre .. ")([ptsk])([jlɫrv]" .. PALATALIZE .. "?)",
function (before, consonant, after)
if after == "j" .. PALATALIZE then
return before .. consonant .. after
end
return before .. pass_diacritics_through(voice, consonant) .. after
end)
-- word-final s, t get voiced
text = mw.ustring.gsub(text, "(" .. voiced_pre .. ")s$", "%1z")
text = mw.ustring.gsub(text, "(" .. voiced_pre .. ")s([%s-][ˈˌ]?[" .. voiced_sounds .. "])", "%1z%2")
text = mw.ustring.gsub(text, "(" .. voiced_pre .. ")t$", "%1d")
text = mw.ustring.gsub(text, "(" .. voiced_pre .. ")t([%s-][ˈˌ]?[" .. voiced_sounds .. "])", "%1d%2")
return text
end
local function do_vowel_replacements(text, vowels_find, vowels_short, vowels_long)
return gsub_lookahead(text, "([" .. vowels_find .. "])(.?)",
function (vowel, post)
if post == LONG then
return vowels_long[vowel], post
else
return vowels_short[vowel], post
end
end
)
end
local function cleanup_palatal(text)
text = mw.ustring.gsub(text, REALLY_JUST_PALATAL, PALATAL)
text = mw.ustring.gsub(text, LONG .. PALATAL, PALATAL .. LONG)
text = mw.ustring.gsub(text, PALATAL .. "+", PALATAL)
text = mw.ustring.gsub(text, "([" .. IPA_CONSONANTS .. "])([." .. STRESS_SECONDARY .. AUTO_STRESS .. "])%1" .. PALATAL, "%1" .. PALATAL .. "%2%1" .. PALATAL)
return text
end
--- <<< COMMON END >>> ---
--- <<< DIALECTS START >>> ---
-- narrow_level 0 = broad, 1 = rhyme, 2 = narrow
-- Ala-Laukaa
local function IPA_alalaukaa(text, narrow_level)
if narrow_level <= 1 then
text = mw.ustring.gsub(text, "j?" .. PALATALIZE, { [PALATALIZE] = "", ["j" .. PALATALIZE] = PALATALIZE })
end
text = mw.ustring.gsub(text, "([nr])h", "%1")
text = mw.ustring.gsub(zeroth_round_of_common_replacements(text), VIRTUAL_BREAK_UNGEMINATE, VIRTUAL_BREAK)
if narrow_level > 0 then
if narrow_level > 1 then
text = do_narrow_l(text)
text = mw.ustring.gsub(mw.ustring.gsub(text, "l", "l" .. PALATALIZE), "l" .. PALATALIZE .. "l" .. PALATALIZE, "ll" .. PALATALIZE)
text = mw.ustring.gsub(text, PALATALIZE .. PALATALIZE, PALATALIZE)
text = mw.ustring.gsub(text, "l" .. PALATALIZE .. "j", "lj")
text = do_phonetic_alalaukaa_voicing(text)
end
text = mw.ustring.gsub(text, "j" .. PALATALIZE, J_PALATALIZE)
local syllables = split_syllables(text, true)
add_secondary_stress(syllables)
if narrow_level > 1 then
do_final_vowel_dropping(syllables)
do_gemination(syllables, LONG)
do_additional_reduction(syllables)
do_reduction(syllables)
elseif narrow_level == 1 then
do_final_vowel_dropping(syllables)
do_coalesce_rhyme(syllables)
do_reduction_rhyme(syllables)
end
text = table.concat(syllables)
text = mw.ustring.gsub(text, J_PALATALIZE, "j" .. PALATALIZE)
if narrow_level > 1 then
text = automatic_palatalization(text, "[ln]") -- palatalization
text = mw.ustring.gsub(text, "h([kg])", "x%1")
end
text = clean_virtual_break(text)
end
text = clean_ungeminate(text)
text = mw.ustring.gsub(text, "j" .. PALATALIZE, PALATALIZE)
text = manual_palatalization(text)
text = first_round_of_common_replacements(text)
text = long_vowels_and_diphthongs(text)
text = long_consonants(text)
text = second_round_of_common_replacements(text, narrow_level > 1)
if narrow_level > 1 then
local vowels_short = { ["e"] = "e̞", ["o"] = "o̞", ["ø"] = "ø̞" }
local vowels_long = { ["e"] = "e", ["o"] = "o", ["ø"] = "ø" }
text = do_vowel_replacements(text, "eoø", vowels_short, vowels_long)
text = mw.ustring.gsub(text, "[sz]", { ["s"] = "s̠", ["z"] = "z̠" })
text = standard_sandhi(text)
text = mw.ustring.gsub(text, "([" .. IPA_CONSONANTS .. "]" .. ANY_DIACRITICS .. PALATAL .. "?)j%f[ " .. AUTO_STRESS .. "-]", "%1i")
text = mw.ustring.gsub(text, "([" .. IPA_CONSONANTS .. "]" .. ANY_DIACRITICS .. PALATAL .. "?)j$", "%1i")
end
text = mw.ustring.gsub(text, "([ɑəæeoøuy]" .. ANY_DIACRITICS .. ")j$", "%1i" .. NONSYLLABIC)
text = mw.ustring.gsub(text, "([ɑəæeoøuy]" .. ANY_DIACRITICS .. ")j(" .. STRESS_PRIMARY .. "?" .. STRESS_SECONDARY .. "?[" .. IPA_CONSONANTS .. "])", "%1i" .. NONSYLLABIC .. "%2")
text = mw.ustring.gsub(text, "([ɑəæeoøuy]" .. ANY_DIACRITICS .. ")j ", "%1i" .. NONSYLLABIC .. " ")
text = cleanup_palatal(text)
return add_primary_stress(text)
end
-- Soikkola
local function IPA_soikkola(text, narrow_level)
text = zeroth_round_of_common_replacements(text)
if narrow_level > 0 then
if narrow_level > 1 then
text = do_narrow_l(text)
text = mw.ustring.gsub(text, "h([kg])", "x%1")
end
text = mw.ustring.gsub(text, "j" .. PALATALIZE, J_PALATALIZE)
local syllables = split_syllables(text, true)
add_secondary_stress(syllables)
if narrow_level > 1 then
do_gemination(syllables, SEMILONG)
end
text = table.concat(syllables)
text = mw.ustring.gsub(text, VIRTUAL_BREAK_UNGEMINATE, VIRTUAL_BREAK)
text = mw.ustring.gsub(text, J_PALATALIZE, "j" .. PALATALIZE)
end
text = mw.ustring.gsub(text, "(.)" .. PALATALIZE,
function (preceding)
if preceding == "l" then
return preceding .. PALATALIZE
elseif preceding == "j" then
return PALATALIZE
else
return preceding
end
end)
text = manual_palatalization(text)
if narrow_level > 1 then text = do_voicing(text) end
if narrow_level > 0 then text = clean_virtual_break(text) end
text = first_round_of_common_replacements(text)
text = clean_ungeminate(text)
text = long_vowels_and_diphthongs(text)
text = long_consonants(text)
text = second_round_of_common_replacements(text, narrow_level > 1)
if narrow_level > 1 then
local vowels_short = { ["e"] = "e̞", ["o"] = "o̞", ["ø"] = "ø̞" }
local vowels_long = { ["e"] = "e̝", ["o"] = "o̝", ["ø"] = "ø̝" }
text = do_vowel_replacements(text, "eoø", vowels_short, vowels_long)
text = mw.ustring.gsub(mw.ustring.gsub(text, "^s", "ʃ"), "([^" .. TIE .. "])s", "%1ʃ")
text = mw.ustring.gsub(mw.ustring.gsub(text, "^z", "ʒ"), "([^" .. TIE .. "])z", "%1ʒ")
text = standard_sandhi(text)
end
text = mw.ustring.gsub(text, "([ɑəæeoøuy]" .. ANY_DIACRITICS .. ")j$", "%1i" .. NONSYLLABIC)
text = mw.ustring.gsub(text, "([ɑəæeoøuy]" .. ANY_DIACRITICS .. ")j(" .. STRESS_PRIMARY .. "?" .. STRESS_SECONDARY .. "?[" .. IPA_CONSONANTS .. "])", "%1i" .. NONSYLLABIC .. "%2")
text = mw.ustring.gsub(text, "([ɑəæeoøuy]" .. ANY_DIACRITICS .. ")j ", "%1i" .. NONSYLLABIC .. " ")
return add_primary_stress(text)
end
-- Hevaha
local function IPA_hevaha(text, narrow_level)
text = IPA_soikkola(text, narrow_level)
text = mw.ustring.gsub(text, "ˑ", "ː")
if narrow_level > 1 then
text = mw.ustring.gsub(text, "([bdʒ])" .. U(0x0325) .. "([lr])", "%1%2")
text = mw.ustring.gsub(text, "ɡ" .. U(0x030A) .. "([lr])", "ɡ%1")
end
return text
end
-- Ylä-Laukaa
local function IPA_ylalaukaa(text, narrow_level)
error("Ylä-Laukaa not implemented") -- TODO
end
--- <<< DIALECTS END >>> ---
--- <<< INTERFACE START >>> ---
local function cleanup_IPA(ipa)
return mw.ustring.gsub(ipa, "g", "ɡ")
end
local function cleanup_for_hyphenate(text)
local no_hyph_symbols = "[" .. PALATALIZE .. UNGEMINATE .. "]"
return mw.ustring.gsub(text, no_hyph_symbols, "")
end
local function cleanup_for_hyphenate_int(text)
local no_hyph_symbols = "[" .. PALATALIZE .. UNGEMINATE .. "-]"
return mw.ustring.gsub(text, no_hyph_symbols, "")
end
local function cleanup_for_hyphenate_final(sp)
-- allow final /oi/, /ui/, /yi/, /øi/ for <o>, <u>, <y>, <ö>
return (mw.ustring.gsub(sp, "([ouyö])i$", "%1"))
end
local function match_spelling_with_title_for_hyphenation(sp, title)
if mw.ustring.find(sp, "i$") and not mw.ustring.find(title, "i$") then
sp = mw.ustring.gsub(sp, "i$", "")
end
if mw.ustring.lower(title) == title then
return mw.ustring.lower(sp)
else
-- find letters in title
local letters = {}
for letter in mw.ustring.gmatch(title, "%a") do
table.insert(letters, letter)
end
local respelled = ""
local letter_index = 1
for character in mw.ustring.gmatch(sp, ".") do
if mw.ustring.match(character, "%a") then
local next_letter = letters[letter_index]
if mw.ustring.lower(next_letter) == mw.ustring.lower(character) then
respelled = respelled .. next_letter
letter_index = letter_index + 1
else
respelled = respelled .. character
end
else
respelled = respelled .. character
end
end
return respelled
end
end
local function hyphenate_matches(sp, title)
return cleanup_for_hyphenate_final(mw.ustring.lower(mw.ustring.gsub(cleanup_for_hyphenate_int(sp), "%.", ""))) == cleanup_for_hyphenate_final(mw.ustring.lower(title))
end
local function hyphenate(text)
return split_syllables(cleanup_for_hyphenate(text))
end
local function spell_long_consonants(text)
return mw.ustring.gsub(text, "([" .. m_izh.consonants_geminatable .. "])" .. "(" .. PALATALIZE .. "?)" .. LONG,
function (c, p) return c == "j" and "ij" or c .. c .. p end)
end
local function generate_rhyme(tuple)
local text = tuple.rhyme
text = mw.ustring.gsub(cleanup_IPA(text), STRESS_PRIMARY, "")
local index = mw.ustring.find(text, STRESS_SECONDARY .. "[^" .. STRESS_SECONDARY .. "]*$")
if index ~= nil then text = mw.ustring.sub(text, index + 1) end
index = mw.ustring.find(text, "[" .. IPA_VOWELS .. "]")
if index == nil then return nil end
return mw.ustring.sub(text, index)
end
local function make_IPAs(fn, forms, variety)
local p = {}
for _, form in ipairs(forms) do
form = mw.ustring.lower(form)
local suffix = mw.ustring.find(form, "^%-")
local prefix = mw.ustring.find(form, "%-$")
if suffix then form = mw.ustring.gsub(form, "^%-", "") end
if prefix then form = mw.ustring.gsub(form, "%-$", "") end
local broad = fn(form, 0)
local rhyme = fn(form, 1)
local narrow = fn(form, 2)
if prefix then
broad = broad .. "-"
rhyme = nil
narrow = narrow .. "-"
end
if suffix then
broad = "-" .. mw.ustring.gsub(broad, "^" .. STRESS_PRIMARY, "")
rhyme = nil
narrow = "-" .. mw.ustring.gsub(narrow, "^" .. STRESS_PRIMARY, "")
end
table.insert(p, { broad = broad, rhyme = rhyme, narrow = narrow })
end
local result = {
forms = p,
varieties = { variety }
}
return result
end
local function format_IPAs(tuple, title, has_spaces)
local dialects = require("Module:accent qualifier").format_qualifiers(lang, tuple.varieties)
local p = {}
for _, form in ipairs(tuple.forms) do
table.insert(p, {pron = "/" .. cleanup_IPA(form.broad) .. "/"})
table.insert(p, {pron = "[" .. cleanup_IPA(form.narrow) .. "]"})
end
return "* " .. dialects .. " " .. m_IPA.format_IPA_full { lang = lang, items = p, no_count = has_spaces }
end
local function get_arg_list(param, fallback, allow_dash)
if not param or #param == 0 then return fallback end
if not allow_dash and #param == 1 and param[1] == "-" then return {} end
return param
end
local varieties = {
{"A", "อาลา-เลากา", IPA_alalaukaa, false}, --Ala-Laukaa
{"S", "โซยก์โกลา", IPA_soikkola, false}, --Soikkola
{"H", "เฮวานา", IPA_hevaha, true}, --Hevaha
-- {"Y", "อือแล-เลากา", IPA_ylalaukaa, true}, --Ylä-Laukaa
}
-- rhymes only for these varieties
local varieties_with_rhymes = {
["อาลา-เลากา"] = true,
["โซยก์โกลา"] = true
}
local function get_variety(variety_code)
for _, variety in ipairs(varieties) do
if variety[1] == variety_code then
return variety
end
end
error("Unrecognized variety code: " .. variety_code)
end
function export.get_variety(variety_code)
return get_variety(variety_code)[2]
end
local function allow_rhyme_for_varieties(varieties)
for _, variety in ipairs(varieties) do
if varieties_with_rhymes[variety] then
return true
end
end
return false
end
function export.generate_one(form, variety_code, transcription)
local param, name, fn = unpack(get_variety(variety_code))
local result = make_IPAs(fn, {form}, name).forms[1]
if transcription then result = result[transcription] end
return result
end
function export.generate_multiple(forms, variety_code, transcription)
local param, name, fn = unpack(get_variety(variety_code))
local result = make_IPAs(fn, forms, name).forms
if transcription then
for i, form in ipairs(result) do
result[i] = form[transcription]
end
end
return result
end
function export.show(frame)
local title = mw.title.getCurrentTitle().text
local hyphenation = nil
local rhymes = nil
local categories = {}
local params = {
[1] = { list = true },
["A"] = { list = true }, -- Ala-Laukaa
["S"] = { list = true }, -- Soikkola
["H"] = { list = true }, -- Hevaha
["Y"] = { list = true }, -- Ylä-Laukaa
["title"] = {}, -- for debugging or demonstration only
}
local args = require("Module:parameters").process(frame:getParent().args, params)
title = args["title"] or title
local spellings = get_arg_list(args[1], { mw.ustring.lower(title) }, true)
local IPAs = {}
for _, variety in ipairs(varieties) do
local param, name, fn, optional = unpack(variety)
local forms = get_arg_list(args[param], not optional and spellings or nil, true)
if forms then
table.insert(IPAs, make_IPAs(fn, forms, name))
end
end
local results = {}
local has_spaces = mw.ustring.find(title, " ")
if not hyphenation then
hyphenation = {}
if not has_spaces then
local sp = spellings[1]
if not hyphenate_matches(sp, title) then
-- try to geminate
local syllables = split_syllables(sp, true)
do_gemination(syllables, LONG)
sp = spell_long_consonants(clean_ungeminate(table.concat(syllables)))
end
if hyphenate_matches(sp, title) then
table.insert(hyphenation, hyphenate(match_spelling_with_title_for_hyphenation(sp, title)))
end
end
end
if not rhymes then
rhymes = {}
if not has_spaces then
local found_rhymes = {}
for _, tuple in ipairs(IPAs) do
if allow_rhyme_for_varieties(tuple.varieties) then
for _, form in ipairs(tuple.forms) do
if form.rhyme then
local rhyme = generate_rhyme(form)
if not found_rhymes[rhyme] then
found_rhymes[rhyme] = true
table.insert(rhymes, rhyme)
end
end
end
end
end
end
end
for _, tuple in ipairs(IPAs) do
table.insert(results, format_IPAs(tuple, title, has_spaces))
end
if #rhymes > 0 then
local sylkeys = {}
local sylcounts = {}
-- get all possible syllable counts from syllabifications
for i, h in ipairs(hyphenation) do
local hl = #h
if hl > 0 and not sylkeys[hl] then
table.insert(sylcounts, hl)
sylkeys[hl] = true
end
end
local rhymeobjs = {}
for _, rhyme in ipairs(rhymes) do
table.insert(rhymeobjs, {rhyme = rhyme})
end
table.insert(results, "* " .. require("Module:rhymes").format_rhymes(
{ lang = lang, rhymes = rhymeobjs, num_syl = sylcounts }))
end
if #hyphenation > 0 then
local hyphs = {}
for i, h in ipairs(hyphenation) do
table.insert(hyphs, { ["hyph"] = h })
end
table.insert(results, "* " .. require("Module:hyphenation").format_hyphenations(
{ lang = lang, hyphs = hyphs, caption = "การแบ่งพยางค์" }))
end
return table.concat(results, "\n") .. require("Module:utilities").format_categories(categories, lang)
end
--- <<< INTERFACE END >>> ---
return export