local export = {}
local u = mw.ustring.char
local rsubn = mw.ustring.gsub
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
local regular_tt = {
["А"]="ั", ["Б"]="บ", ["В"]="ว", ["Г"]="ฮ", ["Ґ"]="กฺ", ["Д"]="ด", -- Ukranian в = /ʋ ~ w/
["Е"]="↶แ", ["Є"]="ย↶แ", ["Ж"]="ชฺ", ["З"]="ซฺ",
["И"]="ึ", ["І"]="ิ", ["Ї"]="ยิ", ["Й"]="ย",
["К"]="ก", ["Л"]="ล", ["М"]="ม", ["Н"]="น", ["О"]="อ̂",
["П"]="ป", ["Р"]="ร", ["С"]="ซ", ["Т"]="ต", ["У"]="ุ", ["Ф"]="ฟ",
["Х"]="ฅ", ["Ц"]="ต͜ซ", ["Ч"]="ช", ["Ш"]="ฌ", ["Щ"]="ฌฺ",
["Ь"]=u(0x02B9), ["Ю"]="ยุ", ["Я"]="ยั",
["а"]="ั", ["б"]="บ", ["в"]="ว", ["г"]="ฮ", ["ґ"]="กฺ", ["д"]="ด",
["е"]="↶แ", ["є"]="ย↶แ", ["ж"]="ชฺ", ["з"]="ซฺ",
["и"]="ึ", ["і"]="ิ", ["ї"]="ยิ", ["й"]="ย",
["к"]="ก", ["л"]="ล", ["м"]="ม", ["н"]="น", ["о"]="อ̂",
["п"]="ป", ["р"]="ร", ["с"]="ซ", ["т"]="ต", ["у"]="ุ", ["ф"]="ฟ",
["х"]="ฅ", ["ц"]="ต͜ซ", ["ч"]="ช", ["ш"]="ฌ", ["щ"]="ฌฺ",
["ь"]=u(0x02B9), ["ю"]="ยุ", ["я"]="ยั",
["'"]=u(0x02BA), [u(0x2019)]=u(0x02BA), [u(0x02BC)]=u(0x02BA),
-- others
[u(0x02CA)]="", [u(0x0301)]="", [u(0x0341)]="", -- acute
-- right single quotation mark, modifier letter apostrophe → modifier letter double prime
["’"]='ʺ', ["ʼ"]= 'ʺ',
-- Ukrainian style quotes
['«']='“', ['»']='”',
}
-- These need to be separated from the `regular_tt` so they don't interfere with reverse translit.
local obsolete_tt = {
-- obsolete letters, pre-refom
["Ё"]="Ë", ["ё"]="ë", ["Ъ"]=u(0x02BA), ["ъ"]=u(0x02BA), ["Ы"]="ึ", ["ы"]="ึ", ["Ѣ"]="Ě", ["ѣ"]="ě",
["Э"]="↶เ", ["э"]="↶เ",
-- obsolete letters, Middle Ukrainian
["Ѥ"]='Je', ["ѥ"]='je', ["Ъ"]='ʺ', ["ъ"]='ʺ', ["Ы"]='Y', ["ы"]='y', ["Ѣ"]='I', ["ѣ"]='i',
["Ѧ"]='Ja', ["ѧ"]='ja', ["Ѩ"]='Ja', ["ѩ"]='ja', ["Ѫ"]='U', ["ѫ"]='u', ["Ѭ"]='Ju', ["ѭ"]='ju',
["Ѯ"]='Ks', ["ѯ"]='ks', ["Ѱ"]='Ps', ["Ѱ"]='ps', ["Ѳ"]='F', ["ѳ"]='f', ["Ѵ"]='I', ["ѵ"]='i',
["Ѡ"]='O', ["ѡ"]='o',
}
local AC = mw.ustring.char(0x0301) -- acute = ́
local acute_decomposer = {
["á"] = "a" .. AC,
["é"] = "e" .. AC,
["í"] = "i" .. AC,
["ó"] = "o" .. AC,
["ú"] = "u" .. AC,
["ý"] = "y" .. AC,
["Á"] = "A" .. AC,
["É"] = "E" .. AC,
["Í"] = "I" .. AC,
["Ó"] = "O" .. AC,
["Ú"] = "U" .. AC,
["Ý"] = "Y" .. AC,
}
function export.tr(text)--translit any words or phrases
-- Remove word-final hard sign, either utterance-finally or followed by
-- a non-letter character such as space, comma, period, hyphen, etc.
text = rsub(text, "[Ъъ]$", "")
text = rsub(text, "[Ъъ]([%A])", "%1")
text = rsub(text, "'+", { ["'"] = 'ʺ' }) -- neutral apostrophe
text = rsub(text, '.', regular_tt)
text = rsub(text, '.', obsolete_tt)
--Acute has no use here
text = rsub(text, AC, "")
text = rsub(text, "^([ัิึุ↶])", "อ%1")
text = rsub(text, "([%s%p])([ัิึุ↶])", "%1อ%2")
text = rsub(text, "([ัิุ])([ัิึุ↶])", "%1อ%2")
text = rsub(text, "^(อ̂)", "อ%1")
text = rsub(text, "([%s%p])(อ̂)", "%1อ%2")
text = rsub(text, "([ัิุ])(อ̂)", "%1อ%2")
text = rsub(text, "([ก-ฮ]ฺ?)↶([เแ])", "%2%1")
text = rsub(text, "ั".."ั", "า")
text = rsub(text, "ั$", "า")
text = rsub(text, "ั([%s%p])", "า%1")
text = rsub(text, "ิ".."ิ", "ี")
text = rsub(text, "ิ$", "ี")
text = rsub(text, "ิ([%s%p])", "ี%1")
text = rsub(text, "ึ".."ึ", "ื")
text = rsub(text, "ึ$", "ื")
text = rsub(text, "ึ([%s%p])", "ื%1")
text = rsub(text, "ุ".."ุ", "ู")
text = rsub(text, "ุ$", "ู")
text = rsub(text, "ุ([%s%p])", "ู%1")
text = rsub(text, "ั([ก-ฮ]ฺ?)([ัาิีึืุู])", "า%1%2")
text = rsub(text, "ิ([ก-ฮ]ฺ?)([ัาิีึืุู])", "ี%1%2")
text = rsub(text, "ึ([ก-ฮ]ฺ?)([ัาิีึืุู])", "ื%1%2")
text = rsub(text, "ุ([ก-ฮ]ฺ?)([ัาิีึืุู])", "ู%1%2")
text = rsub(text, "ั([ก-ฮ]ฺ?)(อ̂)", "า%1%2")
text = rsub(text, "ิ([ก-ฮ]ฺ?)(อ̂)", "ี%1%2")
text = rsub(text, "ึ([ก-ฮ]ฺ?)(อ̂)", "ื%1%2")
text = rsub(text, "ุ([ก-ฮ]ฺ?)(อ̂)", "ู%1%2")
text = rsub(text, "ั([ก-ฮ]ฺ?)([ัาิีึืุู])", "า%1%2") --twice
text = rsub(text, "ิ([ก-ฮ]ฺ?)([ัาิีึืุู])", "ี%1%2")
text = rsub(text, "ึ([ก-ฮ]ฺ?)([ัาิีึืุู])", "ื%1%2")
text = rsub(text, "ุ([ก-ฮ]ฺ?)([ัาิีึืุู])", "ู%1%2")
text = rsub(text, "ั([ก-ฮ]ฺ?)(อ̂)", "า%1%2")
text = rsub(text, "ิ([ก-ฮ]ฺ?)(อ̂)", "ี%1%2")
text = rsub(text, "ึ([ก-ฮ]ฺ?)(อ̂)", "ื%1%2")
text = rsub(text, "ุ([ก-ฮ]ฺ?)(อ̂)", "ู%1%2")
text = rsub(text, "ั([เแ])", "า%1")
text = rsub(text, "ิ([เแ])", "ี%1")
text = rsub(text, "ึ([เแ])", "ื%1")
text = rsub(text, "ุ([เแ])", "ู%1")
return text
end
function export.reverse_tr(text)--reverse-translit any words or phrases
local reverse_tt = {}
for k, v in pairs(regular_tt) do
reverse_tt[v] = k
end
reverse_tt['ʺ'] = "'"
reverse_tt['ʹ'] = "ь"
reverse_tt['y'] = "и"
reverse_tt['Y'] = "И"
text = rsub(text, '.', acute_decomposer)
text = rsub(text, '[Jj][aeiu]', reverse_tt)
text = rsub(text, '[Šš]č', reverse_tt)
text = rsub(text, '.', reverse_tt)
return text
end
return export