local rsubn = mw.ustring.gsub
local U = mw.ustring.char
local export = {}
local zabar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F)
local tashdid = U(0x651) -- also called shadda
local jazm = U(0x652) -- also called sukoon
local he = "ه"
local zwnj = U(0x200C)
local highhmz = U(0x654)
local pitchaccent = U(0x301)
local convert_consonants = {
--STOP, fa-IPA should remove incorrect characters,
-- if an incorrect character is appearing check there!
["ḇ"] = "ڤ",
["ḏ"] = "ذ",
["b"] = "ب",
["č"] = "چ",
["d"] = "د",
["f"] = "ف",
["g"] = "گ",
["ğ"] = "غ",
["h"] = he,
["j"] = "ج",
["k"] = "ک",
["l"] = "ل",
["m"] = "م",
["n"] = "ن",
["p"] = "پ",
["q"] = "ق",
["r"] = "ر",
["s"] = "س",
["š"] = "ش",
["t"] = "ت",
["ḍ"] = "ڈ", --only for Hazaragi
["ṭ"] = "ٹ", --only for Hazaragi
["w"] = "و",
["x"] = "خ",
["y"] = "ی",
["z"] = "ز",
["ž"] = "ژ",
["'"] = "ئ",
}
local convert_vowels = {
["a"] = zabar, ["ā"] = "ا", ["i"] = zer, ["u"] = pesh,
}
local vowels = "aiuēīōū" --including ā causes issues
local consonants = "bḇβptṭjčxdðḏḍrzžsšğ'fqkglmnhwy"
local dc_consonants = "ādḍðrzžw"..jazm..""
function export.tr(text, lang, sc)
text = rsubn(text, "([%(%)])", "")
text = rsubn(text, pitchaccent, "")
text = rsubn(text, " | ", "# | #")
text = "##" .. rsubn(text, " ", "# #") .. "##"
text = rsubn(text, "`", "")
text = rsubn(text, ",".." ", ",")
text = rsubn(text, " "..",", ",")
text = rsubn(text, ",", "] ,[")
text = rsubn(text, "%]", "#]#")
text = rsubn(text, "%[", "#[#")
text = rsubn(text, "v", "w")
text = rsubn(text, "#u#", "#ؤ#")
text = rsubn(text, "e#", "i-yi#")
text = rsubn(text, "e([h'])", "i%1")
text = rsubn(text, "o([h'])", "u%1")
text = rsubn(text, "([aiu]h)([^"..vowels.."])#", "%1"..jazm.."%2#")
--prevent izafa from being processed
text = rsubn(text, "(["..consonants.."])([-])i#", "%1_i_#")
text = rsubn(text, "([āōū])([-])yi#", "%1_yi_#")
text = rsubn(text, "([ēīy])([-])yi#", "%1yi_#")
text = rsubn(text, "īy", "iy")
text = rsubn(text, "(["..consonants.."])%1", "%1"..tashdid.."")
text = rsubn(text, "(["..consonants.."])(["..consonants.."])", "%1"..jazm.."%2")
text = rsubn(text, "(["..consonants.."])(["..consonants.."])", "%1"..jazm.."%2")
--must be repeated for overlapping patterns
text = rsubn(text, "#ā", "#آ")
text = rsubn(text, "u'", "uؤ")
text = rsubn(text, "i'", "iئ")
text = rsubn(text, "'ā", "آ")
text = rsubn(text, "'u", "ؤu")
text = rsubn(text, "'i", "ئi")
text = rsubn(text, "([aiu])([-])", "%1h-")
text = rsubn(text, "(["..dc_consonants.."])([-])ā", "%1"..jazm.."آ")
text = rsubn(text, "([^"..dc_consonants.."])([-])ā", "%1"..zwnj.."آ")
-- pair lonely vowels to an alif
text = rsubn(text, "(["..dc_consonants.."])([-])(["..vowels.."])", "%1"..jazm.."ā%3")
text = rsubn(text, "([^"..dc_consonants.."])([-])(["..vowels.."])", "%1"..zwnj.."ā%3")
text = rsubn(text, "(["..dc_consonants.."])([-])(["..consonants.."])", "%1"..jazm.."%3")
text = rsubn(text, "([^"..dc_consonants.."])([-])(["..consonants.."])", "%1"..zwnj.."%3")
text = rsubn(text, "#(["..vowels.."])", "#ā%1")
text = rsubn(text, "[-]", "")
text = rsubn(text, "([aiu])#", "%1h#")
-- try to find izafa marks and unprocess them
text = rsubn(text, "([aiu]h)("..zwnj.."yi)h#", "%1"..highhmz.."#")
text = rsubn(text, "([aiu]h)("..zwnj.."āi)h#", "%1"..highhmz.."#")
text = rsubn(text, "([āōū])_yi_#", "%1yi#")
text = rsubn(text, "(['])#", "ء#")
-- these need to happen last so they are not marked as unpaired consonants
text = rsubn(text, "ū", "uw")
text = rsubn(text, "ī", "iy")
text = rsubn(text, "ē", "y")
text = rsubn(text, "ō", "w")
text = rsubn(text, "#ؤ#", "ؤu")
text = rsubn(text, "%_", "")
text = mw.ustring.gsub(text, '.', convert_consonants)
text = mw.ustring.gsub(text, '.', convert_vowels)
text = rsubn(text, "[.]", "")
text = rsubn(text, "#", "")
text = rsubn(text, "%[".." ", "[") --this prevents weird spacing
return text
end
return export