local export = {}
local langcode = "pl"
local lang = require("Module:languages").getByCode(langcode)
local m_IPA = require("Module:IPA")
local m_pl_IPA = require("Module:pl-IPA")
local vowels = "aeiouyąęó"
local vowel = "[" .. vowels .. "]"
local consonants = "bcćdfghjklłmnńpqrsśtuvwxyzźż"
local consonant = "[" .. consonants .. "]"
-- vowel digraphs, not necessarily actual phonetic diphthongs
local diphthong_i_v2 = "[aąoeęuói]"
local diphthongs = {
["a"] = "u",
["e"] = "u",
["i"] = diphthong_i_v2
}
-- consonant digraphs (key = first letter, value = possible second letters)
local digraphs = {
["c"] = "[hz]",
["d"] = "[zźż]",
["q"] = "u",
["r"] = "z",
["s"] = "z",
}
local past_tense_suffixes = {
"liśmy", "liście", "łyśmy", "łyście",
}
local latin_borrowing_suffixes = {
"ika", "yka",
"iki", "yki",
"ika", "yka",
"ice", "yce",
"ikom", "ykom",
"ikę", "ykę",
"iką", "yką",
"ice", "yce",
"ikach", "ykach",
"iko", "yko",
}
-- if this is changed, the next two functions also need to be
local function is_respelling_close_enough(respelling, word)
word = mw.ustring.gsub(word, "j(" .. diphthong_i_v2 .. ")", "i%1")
respelling = mw.ustring.gsub(respelling, "['.]", "")
respelling = mw.ustring.gsub(respelling, "j(" .. diphthong_i_v2 .. ")", "i%1")
return word == respelling
end
local function partition(word, oword)
local parts = {}
local lenword = mw.ustring.len(word)
local pos = 1
local offset = 0
word = mw.ustring.gsub(word, "['-]", ".")
while pos <= lenword do
if mw.ustring.find(mw.ustring.lower(word), "^" .. vowel, pos) then
local initial = mw.ustring.sub(mw.ustring.lower(word), pos, pos)
local seq = 1
if diphthongs[initial] and mw.ustring.find(mw.ustring.lower(word), "^" .. initial .. diphthongs[initial], pos) then
seq = 2
end
table.insert(parts, { "v", mw.ustring.sub(oword, pos - offset, pos - offset + seq - 1) })
pos = pos + seq
elseif mw.ustring.find(mw.ustring.lower(word), "^" .. consonant, pos) then
local initial = mw.ustring.sub(mw.ustring.lower(word), pos, pos)
local seq = 1
if digraphs[initial] and mw.ustring.find(mw.ustring.lower(word), "^" .. initial .. digraphs[initial], pos) then
seq = 2
end
table.insert(parts, { "c", mw.ustring.sub(oword, pos - offset, pos - offset + seq - 1) })
pos = pos + seq
elseif mw.ustring.find(word, "^% ", pos) then
-- multiword, do not hyphenate
return nil
elseif mw.ustring.find(word, "^%.", pos) then
-- syllable break
if not mw.ustring.find(oword, "^['-]", pos - offset) then
offset = offset + 1
end
table.insert(parts, { "b", nil })
pos = pos + 1
else
-- unrecognized symbol
return nil
end
end
return parts
end
local function get_word_suffix(word)
word = word:gsub("([ˈ'.,ˌ])", "")
local word_suffix = 0
for i,v in ipairs(past_tense_suffixes) do
if word:sub(-string.len(v)) == v
then
word_suffix = 1
end
end
for i,v in ipairs(latin_borrowing_suffixes) do
if word:sub(-string.len(v)) == v
then
word_suffix = 2
end
end
return word_suffix
end
function export.generate_hyphenation(word, otitle)
local syllables = {}
local cursyl = ""
local nucleus = false
local coda = nil
local pos = 1
local parts = partition(word, otitle)
if not parts then return nil end
for pos, p in ipairs(parts) do
local kind, part = unpack(p)
if kind == "v" then
if coda then
cursyl = cursyl .. mw.ustring.sub(syllables[#syllables], -coda)
syllables[#syllables] = mw.ustring.sub(syllables[#syllables], 1, -coda - 1)
coda = nil
end
if nucleus then
table.insert(syllables, cursyl)
cursyl = ""
end
nucleus = true
coda = nil
cursyl = cursyl .. part
elseif kind == "c" then
cursyl = cursyl .. part
if nucleus then
table.insert(syllables, cursyl)
cursyl = ""
nucleus = false
coda = mw.ustring.len(part)
else
coda = nil
end
elseif kind == "b" then
-- implicit syllable break
if #cursyl > 0 then
if nucleus or #syllables < 1 then
table.insert(syllables, cursyl)
else
syllables[#syllables] = syllables[#syllables] .. cursyl
end
end
cursyl = ""
nucleus = false
coda = nil
else
-- unrecognized kind
return nil
end
end
if #cursyl > 0 then
if nucleus or #syllables < 1 then
table.insert(syllables, cursyl)
else
syllables[#syllables] = syllables[#syllables] .. cursyl
end
end
return syllables
end
local ipavowel = "[aɛiɨɔu]"
function export.generate_rhyme(ipa)
local vowels_at = { }
local pos = 1
while true do
local posnext = mw.ustring.find(ipa, ipavowel, pos)
if not posnext then break end
table.insert(vowels_at, posnext)
pos = posnext + 1
end
local vend
if #vowels_at < 1 then return nil end
if #vowels_at > 1 then
vend = vowels_at[#vowels_at - 1]
else
vend = vowels_at[#vowels_at]
end
local snippet = mw.ustring.sub(ipa, vend)
snippet = mw.ustring.gsub(snippet, "[ˈˌ.]", "")
if mw.ustring.find(snippet, " ") then
return nil -- copout, something must be wrong
end
return snippet
end
function export.show(frame)
local args = require("Module:parameters").process(frame:getParent().args, {
[1] = { list = true },
["ipa"] = { list = true, default = nil, allow_holes = true },
["qual"] = { list = true, allow_holes = true },
["n"] = { list = true, allow_holes = true },
["h"] = { list = true, allow_holes = true }, ["hyphen"] = {},
["r"] = { list = true, allow_holes = true }, ["rhymes"] = {},
["a"] = { list = true, default = nil }, ["audio"] = {},
["ac"] = { list = true, default = nil }, ["caption"] = {},
["hh"] = { default = "" }, ["homophones"] = {},
["mp"] = { list = true, allow_holes = true },
["q"] = { list = true, default = nil, allow_holes = true },
["hp"] = { list = true, default = nil, allow_holes = true },
["rp"] = { list = true, default = nil, allow_holes = true },
["hhp"] = { list = true, default = nil, allow_holes = true },
["nohyphen"] = { type = "boolean", default = false },
["norhymes"] = { type = "boolean", default = false },
["fs"] = { type = "boolean" }, ["fixstress"] = {},
["title"] = { default = nil }, -- for debugging or demonstration only
})
local words, transcriptions, transcriptions_raw
local lines = {}
local categories = {}
local actual = args["title"] or mw.title.getCurrentTitle().text
if next(args[1]) ~= nil then
words = args[1]
else
words = { actual }
end
local multiword = mw.ustring.find(words[1], " ")
local hyphenations = args["h"]
local rhymes = args["r"]
local ipa = args["ipa"]
if #ipa < 1 then ipa = nil end
local qualifiers = args["q"]
if not qualifiers or qualifiers.maxindex < 1 then qualifiers = args["qual"] end
local mp = args["mp"]
local hyphlabels = args["hp"]
local rhymlabels = args["rp"]
local nohyphen = args["nohyphen"]
local norhymes = args["norhymes"]
local fixstress = args["fs"]
if args["fixstress"] then fixstress = args["fixstress"] end
local homophones = mw.text.split(args["hh"], ",")
local homophonelabels = args["hhp"]
if #homophones == 1 and homophones[1] == "" then homophones = {} end
local audio = {}
local audios = args["a"]
local captions = args["ac"]
local word_suffix = 0
if not ipa and #words == 1 then
-- 0 - normal word
-- 1 - past tense verb stressed antepenultimately
-- 2 - Latin borrowing stressed antepenultimately
word_suffix = get_word_suffix(words[1])
end
if not (fixstress or (fixstress == nil and word_suffix == 1)) then
word_suffix = 0
end
if args["hyphen"] then hyphenation[1] = args["hyphen"] end
if args["rhymes"] then rhymes[1] = args["rhymes"] end
if args["homophones"] then homophones = mw.text.split(args["homophones"], ",") end
if args["audio"] then audios[1] = args["audio"] end
if args["captions"] then captions[1] = args["caption"] end
local respelling_ok = true
for i, w in ipairs(words) do
if not is_respelling_close_enough(w, actual) then
respelling_ok = false
break
end
end
for i, audiofile in ipairs(audios) do
if audiofile then
table.insert(audio, {file = audiofile, caption = captions[i]})
end
end
if #hyphenations == 1 and hyphenations[1] == "-" then
nohyphen = true
end
if #rhymes == 1 and rhymes[1] == "-" then
norhymes = true
end
if word_suffix == 0 then
if ipa then
transcriptions = {}
transcriptions_raw = {}
for i = 1, #ipa do
local qual = qualifiers[i]
table.insert(transcriptions, {
pron = ipa[i],
qualifiers = qual and { qual } or nil,
note = args.n[i]
})
end
else
transcriptions = {}
transcriptions_raw = {}
for i = 1, #words do
local qual = qualifiers[i]
local ipaconv = m_pl_IPA.convert_to_IPA(words[i])
table.insert(transcriptions_raw, ipaconv)
table.insert(transcriptions, {
pron = "/" .. ipaconv .. "/",
qualifiers = qual and { qual } or nil,
note = args.n[i]
})
end
end
table.insert(lines, "* " .. m_IPA.format_IPA_full { lang = lang, items = transcriptions })
else
transcriptions = {}
transcriptions2 = {}
transcriptions_raw = {}
if word_suffix == 1 then
qualifier1 = {"prescriptive standard; rarely used"}
qualifier2 = {"colloquial; overall more common"}
elseif word_suffix == 2 then
qualifier1 = {"standard"}
qualifier2 = {"colloquial; common in casual speech"}
end
local ipaconv = m_pl_IPA.convert_to_IPA(words[1])
table.insert(transcriptions_raw, ipaconv)
ipaconv_syllables = mw.text.split(ipaconv, "([ˈ.])")
for j, syl in ipairs(ipaconv_syllables) do
if j == (#ipaconv_syllables - 2) then
ipaconv_syllables[j] = "ˈ" .. syl
elseif j ~= 1 then
ipaconv_syllables[j] = "." .. syl
end
end
ipacov_fixed_stress = table.concat(ipaconv_syllables)
table.insert(transcriptions, {
pron = "/" .. ipacov_fixed_stress .. "/",
qualifiers = qualifier1,
})
table.insert(lines, "* " .. m_IPA.format_IPA_full { lang = lang, items = transcriptions })
table.insert(transcriptions2, {
pron = "/" .. ipaconv .. "/",
qualifiers = qualifier2,
})
table.insert(lines, "* " .. m_IPA.format_IPA_full { lang = lang, items = transcriptions2 })
end
if #mp > 0 then
transcriptions = {}
for i = 1, #mp do
if mp[i] == "+" then
mp[i] = actual
end
local ipaconv = require("Module:zlw-mpl-IPA").convert_to_IPA_tables({mp[i]});
for _, v in ipairs(ipaconv) do
table.insert(transcriptions, v);
end
end
table.insert(lines, "* " .. require("Module:accent qualifier").format_qualifiers(lang, {"Middle Polish"}) .. " " .. m_IPA.format_IPA_full { lang = lang, items = transcriptions })
end
for i, a in ipairs(audio) do
table.insert(lines, "* " .. require("Module:audio").format_audio { lang = lang, file = a["file"], caption = a["caption"] })
end
if not ipa and #hyphenations < 1 and respelling_ok and not multiword then
local autohyph = export.generate_hyphenation(words[1], actual)
if autohyph then
table.insert(hyphenations, autohyph)
end
elseif #hyphenations >= 1 then
local newhyphenations = {}
for i, h in ipairs(hyphenations) do
local t = {}
for x in mw.text.gsplit(h, "[.]") do
table.insert(t, x)
end
newhyphenations[i] = t
end
hyphenations = newhyphenations
end
if not norhymes then
if not ipa and #rhymes < 1 and #transcriptions_raw > 0 then
local autorhyme = export.generate_rhyme(transcriptions_raw[1])
if autorhyme then
table.insert(rhymes, autorhyme)
end
end
if #rhymes > 0 then
-- merge rhymes if they have identical labels
local last_label = false
local new_rhymes = {}
local new_labels = {}
local current_list = {}
for i, r in ipairs(rhymes) do
local label = rhymlabels[i]
if last_label == label then
table.insert(current_list, r)
else
if #current_list > 0 then
table.insert(new_rhymes, current_list)
end
if last_label ~= false then
table.insert(new_labels, last_label)
end
current_list = { r }
last_label = label
end
end
table.insert(new_rhymes, current_list)
table.insert(new_labels, last_label)
rhymes = new_rhymes
rhymlabels = new_labels
end
for i, r in ipairs(rhymes) do
local label = ""
if rhymlabels[i] then
label = " " .. require("Module:qualifier").format_qualifier(rhymlabels[i])
end
if #r >= 1 then
local sylkeys = {}
local sylcounts = {}
-- get all possible syllable counts from syllabifications
for i, h in ipairs(hyphenations) do
local hl = #h
if hl > 0 and not sylkeys[hl] then
table.insert(sylcounts, hl)
sylkeys[hl] = true
end
end
local rhymeobjs = {}
for _, rhyme in ipairs(r) do
table.insert(rhymeobjs, {rhyme = rhyme})
end
table.insert(lines, "* " .. require("Module:rhymes").format_rhymes(
{ lang = lang, rhymes = rhymeobjs, num_syl = sylcounts }) .. label)
end
end
end
if not nohyphen then
if #transcriptions > 0 and #hyphenations > 0 then
local syl_IPA = require("Module:syllables").getVowels(transcriptions[1].pron, lang)
local syl_hyph = #hyphenations[1]
if syl_IPA and syl_IPA ~= syl_hyph then
table.insert(categories, "pl-pronunciation syllable count mismatch")
end
end
if not actual:find("[ %.]") and #hyphenations < 1 then
table.insert(categories, "pl-pronunciation without hyphenation")
end
for i, h in ipairs(hyphenations) do
local label = ""
if hyphlabels[i] then
label = " " .. require("Module:qualifier").format_qualifier(hyphlabels[i])
end
table.insert(lines, "* การแบ่งพยางค์: " .. require("Module:links").full_link({lang = lang, alt = table.concat(h, "‧"), tr = "-"}) .. label)
end
end
if #homophones > 0 then
local homophone_objs = {}
for i, h in ipairs(homophones) do
table.insert(homophone_objs, {term = h, qq = homophonelabels[i] and {homophonelabels[i]} or nil})
end
table.insert(lines, "* " .. require("Module:homophones").format_homophones { lang = lang, homophones = homophone_objs })
end
return table.concat(lines, "\n") .. require("Module:utilities").format_categories(categories, lang)
end
return export