This data submodule defines part of Wiktionary's category structure.

For an introduction to the poscatboiler system and a description of how to add or modify categories, see Module:category tree/poscatboiler/data/documentation.


local labels = {}
local raw_categories = {}
local handlers = {}
local raw_handlers = {}



-----------------------------------------------------------------------------
--                                                                         --
--                                  LABELS                                 --
--                                                                         --
-----------------------------------------------------------------------------


labels["ชื่อ"] = {
	description = "{{{langname}}} terms that are used to refer to specific individuals or groups. Place names, demonyms and other kinds of names can be found in [[:Category:Names]].",
	umbrella_parents = {name = "ศัพท์แบ่งตามหน้าที่เชิงความหมาย", is_label = true, sort = " "},
	parents = {"ศัพท์แบ่งตามหน้าที่เชิงความหมาย", "คำวิสามานยนาม"},
}
labels["names"] = labels["ชื่อ"]

labels["augmentatives of female given names"] = {
	description = "{{{langname}}} augmentative names given to female individuals.",
	breadcrumb = "augmentative",
	parents = {"ชื่อบุคคลหญิง", "augmentative nouns"},
}

labels["augmentatives of male given names"] = {
	description = "{{{langname}}} augmentative names given to male individuals.",
	breadcrumb = "augmentative",
	parents = {"ชื่อบุคคลชาย", "augmentative nouns"},
}

labels["augmentatives of unisex given names"] = {
	description = "{{{langname}}} augmentative names given either to male or to female individuals.",
	breadcrumb = "augmentative",
	parents = {"ชื่อบุคคลชาย-หญิง", "augmentative nouns"},
}

labels["นามสกุลรวม"] = {
	description = "{{{langname}}} names shared by both male and female family members, in languages that distinguish male and female surnames.",
	breadcrumb = "รวม",
	parents = {"นามสกุล"},
}
labels["common-gender surnames"] = labels["นามสกุลรวม"]

labels["diminutives of female given names"] = {
	description = "{{{langname}}} diminutive names given to female individuals.",
	breadcrumb = "diminutives",
	parents = {"ชื่อบุคคลหญิง", "diminutive nouns"},
}

labels["diminutives of male given names"] = {
	description = "{{{langname}}} diminutive names given to male individuals.",
	breadcrumb = "diminutives",
	parents = {"ชื่อบุคคลชาย", "diminutive nouns"},
}

labels["diminutives of unisex given names"] = {
	description = "{{{langname}}} diminutive names given either to male or to female individuals.",
	breadcrumb = "diminutives",
	parents = {"ชื่อบุคคลชาย-หญิง", "diminutive nouns"},
}

labels["ชื่อบุคคลหญิง"] = {
	description = "ชื่อ{{{langcat}}} given to female individuals.",
	breadcrumb = "หญิง",
	parents = {"ชื่อบุคคล"},
}
labels["female given names"] = labels["ชื่อบุคคลหญิง"]

labels["female skin names"] = {
	description = "{{{langname}}} skin names given to female individuals.",
	breadcrumb = "หญิง",
	parents = {"skin names"},
}

labels["นามสกุลหญิง"] = {
	description = "{{{langname}}} names shared by female family members.",
	breadcrumb = "หญิง",
	parents = {"นามสกุล"},
}
labels["female surnames"] = labels["นามสกุลหญิง"]

labels["ชื่อบุคคล"] = {
	description = "ชื่อ{{{langcat}}} given to individuals.",
	parents = {"ชื่อ"},
}
labels["given names"] = labels["ชื่อบุคคล"]

labels["ชื่อบุคคลชาย"] = {
	description = "ชื่อ{{{langcat}}} given to male individuals.",
	breadcrumb = "ชาย",
	parents = {"ชื่อบุคคล"},
}
labels["male given names"] = labels["ชื่อบุคคลชาย"]

labels["male skin names"] = {
	description = "{{{langname}}} skin names given to male individuals.",
	breadcrumb = "ชาย",
	parents = {"skin names"},
}

labels["นามสกุลชาย"] = {
	description = "{{{langname}}} names shared by male family members.",
	breadcrumb = "ชาย",
	parents = {"นามสกุล"},
}
labels["male surnames"] = labels["นามสกุลชาย"]

labels["ชื่อมาตุวงศ์"] = {
	description = "ชื่อ{{{langcat}}} indicating a person's mother, grandmother or earlier female ancestor.",
	parents = {"ชื่อ"},
}
labels["matronymics"] = labels["ชื่อมาตุวงศ์"]

labels["ชื่อปิตุวงศ์"] = {
	description = "ชื่อ{{{langcat}}} indicating a person's father, grandfather or earlier male ancestor.",
	parents = {"ชื่อ"},
}
labels["patronymics"] = labels["ชื่อปิตุวงศ์"]

labels["skin names"] = {
	description = "{{{langname}}} terms given at birth that are used to refer to individuals from specific marital classes.",
	parents = {"คำวิสามานยนาม", "ชื่อ"},
}

labels["นามสกุล"] = {
	description = "ชื่อ{{{langcat}}} shared by family members.",
	parents = {"ชื่อ"},
}
labels["surnames"] = labels["นามสกุล"]

labels["ชื่อบุคคลชาย-หญิง"] = {
	description = "ชื่อ{{{langcat}}} given either to male or to female individuals.",
	breadcrumb = "ชาย-หญิง",
	parents = {"ชื่อบุคคล"},
}
labels["unisex given names"] = labels["unisex given names"]

labels["unisex skin names"] = {
	description = "{{{langname}}} skin names given either to male or to female individuals.",
	breadcrumb = "ชาย-หญิง",
	parents = {"skin names"},
}


-- Add 'umbrella_parents' key if not already present.
for key, data in pairs(labels) do
	if not data.umbrella_parents then
		data.umbrella_parents = "หมวดหมู่ย่อยของชื่อแบ่งตามภาษา"
	end
end



-----------------------------------------------------------------------------
--                                                                         --
--                              RAW CATEGORIES                             --
--                                                                         --
-----------------------------------------------------------------------------


raw_categories["หมวดหมู่ย่อยของชื่อแบ่งตามภาษา"] = {
	description = "Umbrella categories covering topics related to names.",
	additional = "{{{umbrella_meta_msg}}}",
	parents = {
		"หมวดหมู่ใหญ่รวมหัวข้อ",
		{name = "names", is_label = true, sort = " "},
	},
}


-----------------------------------------------------------------------------
--                                                                         --
--                                 HANDLERS                                --
--                                                                         --
-----------------------------------------------------------------------------


local function source_name_to_source(nametype, source_name)
	local special_sources
	if nametype:find("ชื่อบุคคล") then
		special_sources = require("Module:table").listToSet {
			"นามสกุล", "ชื่อสถานที่", "coinages", "ไบเบิล"
		}
	elseif nametype:find("นามสกุล") then
		special_sources = require("Module:table").listToSet {
			"ชื่อบุคคล", "ชื่อสถานที่", "อาชีพ", "ชื่อปิตุวงศ์", "ชื่อมาตุวงศ์",
			"common nouns", "ชื่อเล่น", "ชื่อชาติพันธุ์"
		}
	else
		special_sources = {}
	end
	if special_sources[source_name] then
		return source_name
	else
		return require("Module:languages").getByCanonicalName(source_name, nil,
			"allow etym langs", "allow families")
	end
end

local function get_source_text(source)
	if type(source) == "table" then
		return source:getDisplayForm()
	else
		return source
	end
end

local function get_description(lang, nametype, source)
	local origintext, addltext
	if source == "นามสกุล" then
		origintext = "transferred from surnames"
	elseif source == "ชื่อบุคคล" then
		origintext = "transferred from given names"
	elseif source == "ชื่อเล่น" then
		origintext = "transferred from nicknames"
	elseif source == "ชื่อสถานที่" then
		origintext = "transferred from place names"
		addltext = " For place names that are also surnames, see " .. (
			lang and "[[:Category:{{{langname}}} " .. nametype .. " from surnames]]" or
			"[[:Category:" .. mw.getContentLanguage():ucfirst(nametype) .. " from surnames by language]]"
		) .. "."
	elseif source == "common nouns" then
		origintext = "transferred from common nouns"
	elseif source == "coinages" then
		origintext = "originating as coinages"
		addltext = " These are names of artificial origin, names based on fictional characters, combinations of two words or names or backward spellings. Names of uncertain origin can also be placed here if there is a strong suspicion that they are coinages."
	elseif source == "อาชีพ" then
		origintext = "originating as occupations"
	elseif source == "ชื่อปิตุวงศ์" then
		origintext = "originating as patronymics"
	elseif source == "ชื่อมาตุวงศ์" then
		origintext = "originating as matronymics"
	elseif source == "ชื่อชาติพันธุ์" then
		origintext = "originating as ethnonyms"
	elseif source == "ไบเบิล" then
		-- Hack esp. for Hawaiian names. We should consider changing them to
		-- have the source as Biblical Hebrew and mention the derivation from
		-- the Bible some other way.
		origintext = "originating from the Bible"
	elseif type(source) == "string" then
		error("Internal error: Unrecognized string source \"" .. source .. "\", should be special-cased")
	else
		origintext = "of " .. source:makeCategoryLink() .. " origin"
		if lang and source:getCode() == lang:getCode() then
			addltext = " These are names derived from common nouns, local mythology, etc."
		end
	end
	local introtext
	if lang then
		introtext = "{{{langname}}} "
	else
		introtext = "Categories with "
	end
	return introtext .. nametype .. " " .. origintext ..
		". (This includes names derived at an older stage of the language.)" .. (addltext or "")
end

-- If one of the following families occurs in any of the ancestral families
-- of a given language, use it instead of the three-letter parent
-- (or immediate parent if no three-letter parent).
local high_level_families = require("Module:table").listToSet {
	-- Indo-European
	"gem", -- Germanic (for gme, gmq, gmw)
	"inc", -- Indic (for e.g. pra = Prakrit)
	"ine-ana", -- Anatolian (don't keep going to ine)
	"ine-toc", -- Tocharian (don't keep going to ine)
	"ira", -- Iranian (for e.g. xme = Median, xsc = Scythian)
	"sla", -- Slavic (for zle, zls, zlw)
	-- Other
	"ath", -- Athabaskan (for e.g. apa = Apachean)
	"poz", -- Malayo-Polynesian (for e.g. pqe = Eastern Malayo-Polynesian)
	"cau-nwc", -- Northwest Caucasian
	"cau-nec", -- Northeast Caucasian
}

local function find_high_level_family(lang)
	local family = lang:getFamily()
	-- (1) If no family, return nil (e.g. for Pictish).
	if not family then
		return nil
	end
	-- (2) See if any ancestor family is in `high_level_families`.
	-- if so, return it.
	local high_level_family = family
	while high_level_family do
		local high_level_code = high_level_family:getCode()
		if high_level_code == "qfa-not" then
			-- "not a family"; its own parent, causing an infinite loop.
			-- Break rather than return so we get categories like
			-- [[Category:English female given names from sign languages]] and
			-- [[Category:English female given names from constructed languages]].
			break
		end
		if high_level_families[high_level_code] then
			return high_level_family
		end
		high_level_family = high_level_family:getFamily()
	end
	-- (3) If the family is of the form 'FOO-BAR', see if 'FOO' is a family.
	-- If so, return it.
	local basic_family = family:getCode():match("^(.-)%-.*$")
	if basic_family then
		basic_family = require("Module:families").getByCode(basic_family)
		if basic_family then
			return basic_family
		end
	end
	-- (4) Fall back to just the family itself.
	return family
end

local function match_gendered_nametype(nametype)
	--[[
	local gender, label = nametype:match("^(f?e?male) (given names)$")
	if not gender then
		gender, label = nametype:match("^(unisex) (given names)$")
	end
	--]]
	-- gender may rusult in ชาย, หญิง, or ชาย-หญิง
	local label, gender = nametype:match("^(ชื่อบุคคล)(.+)$")
	if gender then
		return gender, label
	end
end

local function get_parents(lang, nametype, source)
	local parents = {}

	if lang then
		table.insert(parents, {name = nametype, sort = get_source_text(source)})
		if type(source) == "table" then
			table.insert(parents, {name = "ศัพท์ที่รับมาจาก" .. source:getDisplayForm(), sort = " "})
			-- If the source is a regular language, put it in a parent category for the high-level language family, e.g. for
			-- "Russian female given names from German", put it in a parent category "Russian female given names from Germanic languages"
			-- (skipping over West Germanic languages).
			--
			-- If the source is an etymology language, put it in a parent category for the parent non-etymology language, e.g. for
			-- "French male given names from Gascon", put it in a parent category "French male given names from Occitan".
			--
			-- If the source is a family, put it in a parent category for the parent family.
			local sourcetype = source:getType()
			if sourcetype == "family" then
				local parent_family = source:getFamily()
				if parent_family and parent_family:getCode() ~= "qfa-not" then
					table.insert(parents, {
						name = nametype .. "จาก" .. parent_family:getDisplayForm(),
						sort = source:getCanonicalName()
					})
				end
			elseif sourcetype == "etymology language" then
				local source_parent = require("Module:languages").getNonEtymological(source)
				if source_parent and source_parent:getCode() ~= "und" then
					table.insert(parents, {
						name = nametype .. "จาก" .. source_parent:getDisplayForm(),
						sort = source:getCanonicalName()
					})
				end
			else
				local high_level_family = find_high_level_family(source)
				if high_level_family then -- may not exist, e.g. for Pictish
					table.insert(parents,
						{name = nametype .. "จาก" .. high_level_family:getDisplayForm(),
						sort = source:getCanonicalName()
					})
				end
			end
		end
	
		local gender, label = match_gendered_nametype(nametype)
		if gender then
			table.insert(parents, {name = label .. "จาก" .. get_source_text(source), sort = gender})
		end
	else
		local gender, label = match_gendered_nametype(nametype)
		if gender then
			table.insert(parents, {name = label .. "จาก" .. get_source_text(source), is_label = true, sort = " "})
		elseif type(source) == "table" then
			-- FIXME! This is duplicated in [[Module:category tree/poscatboiler/data/terms by etymology]] in the
			-- handler for umbrella categories 'Terms derived from SOURCE'.
			local first_umbrella_parent =
				source:getType() == "family" and {name = source:getCategoryName(), raw = true, sort = " "} or
				source:getType() == "etymology language" and {name = "Category:" .. source:getCategoryName(), sort = nametype} or
				{name = source:getCategoryName(), raw = true, sort = nametype}
			table.insert(parents, first_umbrella_parent)
		end
		table.insert(parents, "Names subcategories by language")
	end
	
	return parents
end

table.insert(handlers, function(data)
	local nametype, source_name = data.label:match("^(.*names) from (.+)$")
	if nametype then
		local m_table = require("Module:table")
		local personal_name_types = m_table.listToSet(require("Module:names").personal_name_types)
		if not personal_name_types[nametype] then
			return nil
		end
		local source = source_name_to_source(nametype, source_name)
		if not source then
			return nil
		end
		return {
			description = get_description(data.lang, nametype, source),
			breadcrumb = "from " .. get_source_text(source),
			parents = get_parents(data.lang, nametype, source),
			umbrella = {
				description = get_description(nil, nametype, source),
				parents = get_parents(nil, nametype, source),
			},
		}
	end
end)



-----------------------------------------------------------------------------
--                                                                         --
--                                RAW HANDLERS                             --
--                                                                         --
-----------------------------------------------------------------------------


-- Handler for e.g. 'en:Russian male given names'.
table.insert(raw_handlers, function(data)
	local langcode, label = data.category:match("^([a-z-]+):(.*)$")
	if langcode then
		local personal_name_types = require("Module:names").personal_name_types
		for _, nametype in ipairs(personal_name_types) do
			local sourcename = label:match("^(.+) " .. nametype .. "$")
			
			if sourcename then
				local source = require("Module:languages").getByCanonicalName(sourcename, nil, "allow etym")
				if source then
					return {
						description = "Transliterations, respellings or other renderings of " .. source:makeCategoryLink() .. " " .. nametype .. " into {{{langlink}}}.",
						lang = langcode,
						breadcrumb = sourcename .. " " .. nametype,
						parents = {
							{ module = "topic cat", args = {code = "{{{langcode}}}", label = "Foreign personal names"}, sort = sourcename },
							{ name = nametype, is_label = true, lang = source:getCode(), sort = "{{{langname}}}" },
						},
					}
				end
			end
		end
	end
end)


return {LABELS = labels, RAW_CATEGORIES = raw_categories, HANDLERS = handlers, RAW_HANDLERS = raw_handlers}