มอดูล:category tree/poscatboiler/data/อักขระ

This data submodule defines part of Wiktionary's category structure.

For an introduction to the poscatboiler system and a description of how to add or modify categories, see Module:category tree/poscatboiler/data/documentation.


local raw_categories = {}
local handlers = {} 
local raw_handlers = {}

local m_str_utils = require("Module:string utilities")

local codepoint = m_str_utils.codepoint
local insert = table.insert
local ulen = m_str_utils.len
local ulower = m_str_utils.lower
local rfind = m_str_utils.find
local rmatch = m_str_utils.match
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD


local function track(page)
	require("Module:debug/track")("poscatboiler-characters/" .. page)
	return true
end


-----------------------------------------------------------------------------
--                                                                         --
--                              RAW CATEGORIES                             --
--                                                                         --
-----------------------------------------------------------------------------


raw_categories["หมวดหมู่ย่อยของศัพท์แบ่งตามตัวอักษรเฉพาะตัวแบ่งตามภาษา"] = {
	description = "Umbrella categories covering terms categorized by unusual characters contained in them.",
	additional = "{{{umbrella_meta_msg}}}",
	parents = {
		"หมวดหมู่ใหญ่รวมหัวข้อ",
		{name = "ศัพท์แบ่งตามตัวอักษรเฉพาะตัว", is_label = true, sort = " "},
	},
}

-- FIXME! This should probably be deleted.
raw_categories["Letters"] = {
	description = "Categories specifying individual letters, containing the languages that use those letters.",
	additional = "{{{umbrella_meta}}}",
	parents = {
		"มูลฐาน",
	},
}



-----------------------------------------------------------------------------
--                                                                         --
--                                 HANDLERS                                --
--                                                                         --
-----------------------------------------------------------------------------


-- 	If char is a combining character, returns a version with a dotted circle before it.
function add_dotted_circle(char, combining)
	return combining and "◌" .. char or char
end


insert(handlers, function(data)
	-- NOTE: The "character" in the title may actually be a description such as
	-- "gershayim". In that case, char= is specified as a parameter indicating the
	-- actual character.
	local titlechar = data.label:match("^ศัพท์ที่สะกดด้วย(.+)$")
	if not titlechar then
		return nil
	end
	if titlechar:find("^ ") then
		titlechar = titlechar:sub(2) -- remove leading space
	end
	local ja_ryu = data.lang and (data.lang:getFamilyCode() == "jpx" or data.lang:getFamilyCode() == "jpx-ryu")
	-- If Japanese or Ryukyuan, only fire on a single kanji character.
	if ja_ryu and (titlechar:find("[A-Za-z]") or ulen(titlechar) > 1) then
		return nil
	end
	local params = {
		["char"] = {},
		["sort"] = {},
		-- Not sure what used to be done with the following parameters.
		["context"] = {},
		["context2"] = {},
	}
	local args = require("Module:parameters").process(data.args, params)
	if args.context or args.context2 then
		track("terms-spelled-with-context")
	end

	local special_cases = {
		["ตัวเลข"] = {
			sort = "#",
			desc = "ตัวเลข",
		},
		["อีโมจิ"] = {
			sort = "⌚", -- the first emoji in our list in [[Module:headword/data]]
		},
		["วงเล็บ"] = {
			sort = "(",
		},
		["วงเล็บเหลี่ยม"] = {
			sort = "[",
		},
		["วงเล็บมุม"] = {
			sort = "<",
		},
		["วงเล็บปีกกา"] = {
			sort = "{",
		},
	}
	if special_cases[titlechar] then
		local sortkey = args.sort or special_cases[titlechar].sort
		return {
			description = "ศัพท์{{{langcat}}}ที่สะกดด้วย " .. (special_cases[titlechar].desc or titlechar) .. " หนึ่งตัวขึ้นไป",
			parents = {{name = "ศัพท์แบ่งตามตัวอักษรเฉพาะตัว", sort = sortkey }},
			breadcrumb = titlechar,
			umbrella = {
				breadcrumb = titlechar,
				parents = {{name = "หมวดหมู่ย่อยของศัพท์แบ่งตามตัวอักษรเฉพาะตัวแบ่งตามภาษา", sort = " " .. sortkey }}
			},
		}, true
	end

	local char = args.char or titlechar
	local titlechar_is_desc = args.char and args.char ~= titlechar
	if titlechar_is_desc then
		track("titlechar_is_desc")
	end

	local lang = data.lang or require("Module:languages").getByCode("mul")
	local combining = ulen(char) == 1 and require("Module:Unicode_data").is_combining(codepoint(char))
	
	local specials = {["ß"] = "ẞ", ["ͅ"] = "ͅ"}
	local upper = toNFD(char)
		:gsub("[%z\1-\127\194-\244][\128-\191]*", function(m)
			return specials[m] or m:uupper()
		end)
	upper = toNFC(upper)
	
	if char ~= upper and ulen(char) == 1 then
		-- We want uppercase characters; but unless we're careful, we run into an issue with
		-- [[Category:English terms spelled with ı]] due to the weird behavior of this character,
		-- which has standard I as its uppercase equivalent.
		local standard_chars = lang:getStandardCharacters()
		if data.lang and standard_chars then
			local function err()
				error("Category titles should use uppercase characters: '" .. data.label .. "'", 2)
			end
			if data.lang:getCode() ~= "hi" and data.lang:getCode() ~= "lo" then
				if not rfind(standard_chars, upper) then
					err()
				end
			elseif not rfind(upper, "[" .. standard_chars .. "]") then
				err()
			end
		end
	end
	
	-- Compute description.
	
	-- If the letter has a lowercase form, show it.
	local character = require("Module:links").full_link(
		{
			term = char,
			alt = combining and add_dotted_circle(char, true) or nil,
			lang = lang,
			tr = combining and "-" or nil,
		},
		"term"
	)
	
	if ulower(char) ~= char then
		character = "อักษรตัวใหญ่ " .. character .. " หรืออักษรตัวเล็ก " ..
			require("Module:links").full_link(
				{
					term = ulower(char),
					lang = lang
				},
				"term"
			)
	end
	
	if titlechar_is_desc then
		character = character .. " (" .. titlechar .. ")"
	end
	
	local description = "ศัพท์{{{langcat}}}ที่สะกดด้วย " .. character

	-- Set tagged character for displaytitle and breadcrumb.
	local tagged_titlechar = not titlechar_is_desc and
		require("Module:script utilities").tag_text(titlechar, lang, nil, "term") or nil
	local tagged_char = titlechar_is_desc and titlechar or
		require("Module:script utilities").tag_text(add_dotted_circle(char, combining), lang, nil, "term")

	-- Compute sort key.

	local sortkey =
		args.sort or
		(lang:makeSortKey(char))
	if sortkey == "" then
		sortkey = char
	end
	
	return {
		description = description,
		additional = "Note that categories of the form '''''LANG terms spelled with CHAR''''' are intended for characters not "
			.. "part of the standard repertoire of a language (e.g. Cyrillic characters in English or Latin characters in Russian).",
		displaytitle = not titlechar_is_desc and "ศัพท์{{{langcat}}}ที่สะกดด้วย " .. tagged_titlechar or nil,
		parents = {{name = "ศัพท์แบ่งตามตัวอักษรเฉพาะตัว", sort = sortkey }},
		breadcrumb = tagged_char,
		umbrella = not ja_ryu and {
			displaytitle = not titlechar_is_desc and "ศัพท์ที่สะกดด้วย " .. tagged_titlechar .. " แบ่งตามภาษา" or nil,
			breadcrumb = tagged_char,
			parents = {{name = "หมวดหมู่ย่อยของศัพท์แบ่งตามตัวอักษรเฉพาะตัวแบ่งตามภาษา", sort = " " .. sortkey }}
		} or false,
	}, true
end)



-----------------------------------------------------------------------------
--                                                                         --
--                               RAW HANDLERS                              --
--                                                                         --
-----------------------------------------------------------------------------


-- Special-cased categories that we allow, for Turkish letters.
local letter_cat_allow_list = require("Module:table/listToSet") {
	"İi",
}

insert(raw_handlers, function(data)
	-- Only recognize cases consisting of an uppercase letter followed by the
	-- corresponding lowercase letter, either as the entire category name or
	-- followed by a colon (for cases like [[Category:Gg: ⠛]]). Cases that
	-- don't fit this profile (e.g. for Turkish [[Category:İi]] and
	-- [[Category:Iı]]) need to call {{letter cat}} directly. Formerly this
	-- handler was much less restrictive and would fire on categories named
	-- [[Category:zh:]], [[Category:RFQ]], etc.
	local upper, lower = rmatch(data.category, "^(%u)(%l)%f[:%z]")
	if not upper or not letter_cat_allow_list[data.category] and lower:uupper() ~= upper then
		return nil
	end
	return {
		description = ('Languages that use the uppercase letter "%s" (lowercase equivalent "%s").'):format(upper, lower),
		parents = {"Letters"},
	}
end)


return {RAW_CATEGORIES = raw_categories, HANDLERS = handlers, RAW_HANDLERS = raw_handlers}