--[=[
Common utilities and definitions used by various Old English modules.
Author: Benwing
]=]
local m_table = require("Module:table")
local u = mw.ustring.char
local rsubn = mw.ustring.gsub
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar, n)
local retval = rsubn(term, foo, bar, n)
return retval
end
local export = {}
export.ACUTE = u(0x0301)
export.GRAVE = u(0x0300)
export.CFLEX = u(0x0302)
export.MACRON = u(0x0304)
export.DOTABOVE = u(0x0307)
export.SYLLABIC = u(0x0329)
export.CEDILLA = u(0x0327)
export.DOUBLE_BREVE_BELOW = u(0x035C)
local accent = export.MACRON .. export.ACUTE .. export.GRAVE .. export.CFLEX
local recomposer = {
["g" .. export.DOTABOVE] = "ġ",
["G" .. export.DOTABOVE] = "Ġ",
["c" .. export.DOTABOVE] = "ċ",
["C" .. export.DOTABOVE] = "Ċ",
-- used in "explicit allophone" notation in [[Module:ang-pron]]
["c" .. export.CEDILLA] = "ç",
["C" .. export.CEDILLA] = "Ç",
}
-- Decompose macron, acute, grave, circumflex, but leave alone ġ, ċ and uppercase equiv
function export.decompose(text)
text = mw.ustring.toNFD(text)
text = rsub(text, ".[" .. export.DOTABOVE .. "]", recomposer)
return text
end
-- We use the following syllable-splitting algorithm.
-- (1) A single consonant goes with the following syllable.
-- (2) Two consonants are split down the middle.
-- (3) For three or more consonants, check for clusters ending in
-- onsets_3 then onsets_2, with at least one preceding consonant.
-- If so, split between the onset and the preceding consonant(s).
-- (4) Check similarly for secondary_onsets_2. If seen, then check
-- the preceding consonant; if it's not an l or r, split before
-- the onset.
-- (5) Otherwise, split before the last consonant (i.e. the last
-- consonant goes with the following syllable, and all preceding
-- consonants go with the preceding syllable).
export.onsets_2 = m_table.listToSet({
"pr", "pl",
"br", "bl",
"tr", "tw",
"dr", "dw",
"cr", "cl", "cw", --skip "cn"
"kr", "kl", "kw", --skip "kn"
"gr", "gl", -- skip "gn"
"sm", "sn", "sl", "sw",
"sp",
"st",
"sc", "sk", "sċ",
"fr", "fl", --skip "fn",
"þr", "þw",
"ðr", "ðw",
"hr", "hl", "hw", -- skip "hn"
"wr", "wl",
})
export.secondary_onsets_2 = m_table.listToSet({
"cn", "kn",
"gn",
"fn",
"hn",
})
export.onsets_3 = m_table.listToSet({
"spr", "spl",
"str",
"scr", "skr", "sċr",
})
export.diphthongs = m_table.listToSet({
"ea", export.decompose("ēa"), export.decompose("eā"),
"eo", export.decompose("ēo"), export.decompose("eō"),
"io", export.decompose("īo"), export.decompose("iō"),
"ie", export.decompose("īe"), export.decompose("iē"),
})
export.prefixes = {
{export.decompose("ā"), {verb = "unstressed", noun = "stressed"}},
{"æt", {verb = "unstressed"}},
{"æfter", {verb = "secstressed", noun = "stressed"}}, -- not very common
{"and", {verb = "unstressed", noun = "stressed"}},
{"an", {verb = "unstressed", noun = "stressed"}},
{"be", {verb = "unstressed", noun = "unstressed", restriction = "^[^" .. accent .. "ao]"}},
{export.decompose("bī"), {noun = "stressed"}},
{"ed", {verb = "unstressed", noun = "stressed"}}, -- not very common
{"fore", {verb = "unstressed", noun = "stressed", restriction = "^[^" .. accent .. "ao]"}},
{"for[þð]", {verb = "unstressed", noun = "stressed"}},
{"for", {verb = "unstressed", noun = "unstressed"}},
{"fram", {verb = "unstressed", noun = "stressed"}}, -- not very common
-- following is rare as a noun, mostly from verbal forms
{"ġeond", {verb = "unstressed"}},
{"ġe", {verb = "unstressed", noun = "unstressed", restriction = "^[^" .. accent .. "ao]"}},
{"in", {verb = "unstressed", noun = "stressed"}}, -- not very common
{"mis", {verb = "unstressed"}},
{"ofer", {verb = "secstressed", noun = "stressed"}},
{"of", {verb = "unstressed", noun = "stressed"}},
{"on", {verb = "unstressed", noun = "stressed"}},
{"or", {noun = "stressed"}},
{"o[þð]", {verb = "unstressed"}},
{export.decompose("stēop"), {noun = "stressed"}},
{export.decompose("tō"), {verb = "unstressed", noun = "stressed"}},
{"under", {verb = "secstressed", noun = "stressed"}},
{"un", {verb = "unstressed", noun = "stressed", verbal = "stressed"}}, -- uncommon as verb
{"up", {verb = "unstressed", noun = "stressed"}},
{export.decompose("ūt"), {verb = "unstressed", noun = "stressed"}},
{export.decompose("ū[þð]"), {noun = "stressed"}},
{"[wƿ]i[þð]er", {verb = "secstressed", noun = "stressed"}},
{"[wƿ]i[þð]", {verb = "unstressed"}},
{"ymb", {verb = "unstressed", noun = "stressed"}},
{"[þð]urh", {verb = "unstressed", noun = "stressed"}},
}
export.suffixes = {
{export.decompose("bǣre"), {noun = "secstressed"}},
{"fæst", {noun = "secstressed"}},
{"feald", {noun = "secstressed"}},
{"full?", {noun = "unstressed"}},
{export.decompose("lēas"), {noun = "secstressed"}},
-- These can be "verbal" if following a verbal past participle or similar
{export.decompose("līċe"), {noun = "secstressed", verb = "secstressed"}},
-- ī is decomposed into two chars so can't combine into [īi]
{export.decompose("li[ċc]"), {noun = "unstressed", verb = "unstressed"}},
{export.decompose("lī[ċc]"), {noun = "unstressed", verb = "unstressed"}},
{"n[eiy]ss?", {noun = "unstressed", verb = "unstressed"}},
{"sum", {noun = "unstressed"}},
}
return export