local string = string

local gsub = string.gsub
local upper = string.upper

local data = {}

do
	local tags = mw.loadData("Module:data/parser extension tags")
	local data_end_tags = {}
	
	-- The preprocessor uses the regex "/<\/TAG\s*>/i", so only ASCII characters
	-- are case-insensitive.
	local function char_pattern(ch)
		local upper_ch = upper(ch)
		return upper_ch == ch and ch or "[" .. upper(ch) .. ch .. "]"
	end
	
	-- Generates the string pattern for the end tag.
	local function end_tag_pattern(tag)
		data_end_tags[tag] = "</" .. gsub(tag, "[^\128-\255]", char_pattern) .. "%s*>"
	end
	
	for tag in pairs(tags) do
		end_tag_pattern(tag)
	end
	end_tag_pattern("includeonly")
	end_tag_pattern("noinclude")
	data_end_tags["onlyinclude"] = true -- Pattern is not required, but a key is needed for tag validity checks.
	
	data.end_tags = data_end_tags
end

-- Character escapes from PHP's htmlspecialchars.
data.php_htmlspecialchars = {
	["\""] = "&quot;",
	["&"] = "&amp;",
	["'"] = "&#039;",
	["<"] = "&lt;",
	[">"] = "&gt;",
}

-- The parser's HTML sanitizer validates tag attributes with the regex
-- "/^([:_\p{L}\p{N}][:_\.\-\p{L}\p{N}]*)$/sxu". Ustring's "%w" is defined as
-- "[\p{L}\p{Nd}]", so any characters in \p{N} but not \p{Nd} must be added
-- manually.
-- NOTE: \p{N} *MUST* be defined according to the same version of Unicode that
-- the sanitizer uses in order to remain in sync. As of September 2024, this is
-- version 11.0.
local N_not_Nd = "\194\178" .. -- U+00B2
"\194\179" .. -- U+00B3
"\194\185" .. -- U+00B9
"\194\188-\194\190" .. -- U+00BC-U+00BE
"\224\167\180-\224\167\185" .. -- U+09F4-U+09F9
"\224\173\178-\224\173\183" .. -- U+0B72-U+0B77
"\224\175\176-\224\175\178" .. -- U+0BF0-U+0BF2
"\224\177\184-\224\177\190" .. -- U+0C78-U+0C7E
"\224\181\152-\224\181\158" .. -- U+0D58-U+0D5E
"\224\181\176-\224\181\184" .. -- U+0D70-U+0D78
"\224\188\170-\224\188\179" .. -- U+0F2A-U+0F33
"\225\141\169-\225\141\188" .. -- U+1369-U+137C
"\225\155\174-\225\155\176" .. -- U+16EE-U+16F0
"\225\159\176-\225\159\185" .. -- U+17F0-U+17F9
"\225\167\154" .. -- U+19DA
"\226\129\176" .. -- U+2070
"\226\129\180-\226\129\185" .. -- U+2074-U+2079
"\226\130\128-\226\130\137" .. -- U+2080-U+2089
"\226\133\144-\226\134\130" .. -- U+2150-U+2182
"\226\134\133-\226\134\137" .. -- U+2185-U+2189
"\226\145\160-\226\146\155" .. -- U+2460-U+249B
"\226\147\170-\226\147\191" .. -- U+24EA-U+24FF
"\226\157\182-\226\158\147" .. -- U+2776-U+2793
"\226\179\189" .. -- U+2CFD
"\227\128\135" .. -- U+3007
"\227\128\161-\227\128\169" .. -- U+3021-U+3029
"\227\128\184-\227\128\186" .. -- U+3038-U+303A
"\227\134\146-\227\134\149" .. -- U+3192-U+3195
"\227\136\160-\227\136\169" .. -- U+3220-U+3229
"\227\137\136-\227\137\143" .. -- U+3248-U+324F
"\227\137\145-\227\137\159" .. -- U+3251-U+325F
"\227\138\128-\227\138\137" .. -- U+3280-U+3289
"\227\138\177-\227\138\191" .. -- U+32B1-U+32BF
"\234\155\166-\234\155\175" .. -- U+A6E6-U+A6EF
"\234\160\176-\234\160\181" .. -- U+A830-U+A835
"\240\144\132\135-\240\144\132\179" .. -- U+10107-U+10133
"\240\144\133\128-\240\144\133\184" .. -- U+10140-U+10178
"\240\144\134\138" .. -- U+1018A
"\240\144\134\139" .. -- U+1018B
"\240\144\139\161-\240\144\139\187" .. -- U+102E1-U+102FB
"\240\144\140\160-\240\144\140\163" .. -- U+10320-U+10323
"\240\144\141\129" .. -- U+10341
"\240\144\141\138" .. -- U+1034A
"\240\144\143\145-\240\144\143\149" .. -- U+103D1-U+103D5
"\240\144\161\152-\240\144\161\159" .. -- U+10858-U+1085F
"\240\144\161\185-\240\144\161\191" .. -- U+10879-U+1087F
"\240\144\162\167-\240\144\162\175" .. -- U+108A7-U+108AF
"\240\144\163\187-\240\144\163\191" .. -- U+108FB-U+108FF
"\240\144\164\150-\240\144\164\155" .. -- U+10916-U+1091B
"\240\144\166\188" .. -- U+109BC
"\240\144\166\189" .. -- U+109BD
"\240\144\167\128-\240\144\167\143" .. -- U+109C0-U+109CF
"\240\144\167\146-\240\144\167\191" .. -- U+109D2-U+109FF
"\240\144\169\128-\240\144\169\136" .. -- U+10A40-U+10A48
"\240\144\169\189" .. -- U+10A7D
"\240\144\169\190" .. -- U+10A7E
"\240\144\170\157-\240\144\170\159" .. -- U+10A9D-U+10A9F
"\240\144\171\171-\240\144\171\175" .. -- U+10AEB-U+10AEF
"\240\144\173\152-\240\144\173\159" .. -- U+10B58-U+10B5F
"\240\144\173\184-\240\144\173\191" .. -- U+10B78-U+10B7F
"\240\144\174\169-\240\144\174\175" .. -- U+10BA9-U+10BAF
"\240\144\179\186-\240\144\179\191" .. -- U+10CFA-U+10CFF
"\240\144\185\160-\240\144\185\190" .. -- U+10E60-U+10E7E
"\240\144\188\157-\240\144\188\166" .. -- U+10F1D-U+10F26
"\240\144\189\145-\240\144\189\148" .. -- U+10F51-U+10F54
"\240\145\129\146-\240\145\129\165" .. -- U+11052-U+11065
"\240\145\135\161-\240\145\135\180" .. -- U+111E1-U+111F4
"\240\145\156\186" .. -- U+1173A
"\240\145\156\187" .. -- U+1173B
"\240\145\163\170-\240\145\163\178" .. -- U+118EA-U+118F2
"\240\145\177\154-\240\145\177\172" .. -- U+11C5A-U+11C6C
"\240\146\144\128-\240\146\145\174" .. -- U+12400-U+1246E
"\240\150\173\155-\240\150\173\161" .. -- U+16B5B-U+16B61
"\240\150\186\128-\240\150\186\150" .. -- U+16E80-U+16E96
"\240\157\139\160-\240\157\139\179" .. -- U+1D2E0-U+1D2F3
"\240\157\141\160-\240\157\141\184" .. -- U+1D360-U+1D378
"\240\158\163\135-\240\158\163\143" .. -- U+1E8C7-U+1E8CF
"\240\158\177\177-\240\158\178\171" .. -- U+1EC71-U+1ECAB
"\240\158\178\173-\240\158\178\175" .. -- U+1ECAD-U+1ECAF
"\240\158\178\177-\240\158\178\180" .. -- U+1ECB1-U+1ECB4
"\240\159\132\128-\240\159\132\140" -- U+1F100-U+1F10C

data.valid_attribute_name = "^[:_%w" .. N_not_Nd .."][:_.%-%w" .. N_not_Nd .. "]*$"

-- Value is the namespace number of the linked page at parameter 0, where 0 is mainspace.
-- If the namespace is the mainspace, it can be overridden by an explicitly specified category (e.g. {{PAGENAME:Category:Foo}} refers to "Category:Foo"). This does not apply to any other namespace (e.g. {{#SPECIAL:Category:Foo}} refers to "Special:Category:Foo").
data.template_link_param_1 = {
	["#CATEGORYTREE:"] = 14, -- Category:
	["#IFEXIST:"] = 0,
	["#INVOKE:"] = 828, -- Module:
	["#LST:"] = 0,
	["#LSTH:"] = 0,
	["#LSTX:"] = 0,
	["#SPECIAL:"] = -1, -- Special:
	["#SPECIALE:"] = -1, -- Special:
	["#TITLEPARTS:"] = 0,
	["BASEPAGENAME:"] = 0,
	["BASEPAGENAMEE:"] = 0,
	["CANONICALURL:"] = 0,
	["CANONICALURLE:"] = 0,
	["CASCADINGSOURCES:"] = 0,
	["FILEPATH:"] = 6, -- File:
	["FULLPAGENAME:"] = 0,
	["FULLPAGENAMEE:"] = 0,
	["FULLURL:"] = 0,
	["FULLURLE:"] = 0,
	["INT:"] = 8, -- MediaWiki:
	["LOCALURL:"] = 0,
	["LOCALURLE:"] = 0,
	["NAMESPACE:"] = 0,
	["NAMESPACEE:"] = 0,
	["NAMESPACENUMBER:"] = 0,
	["PAGEID:"] = 0,
	["PAGENAME:"] = 0,
	["PAGENAMEE:"] = 0,
	["PAGESINCATEGORY:"] = 14, -- Category:
	["PAGESIZE:"] = 0,
	["REVISIONDAY:"] = 0,
	["REVISIONDAY2:"] = 0,
	["REVISIONID:"] = 0,
	["REVISIONMONTH:"] = 0,
	["REVISIONMONTH1:"] = 0,
	["REVISIONTIMESTAMP:"] = 0,
	["REVISIONUSER:"] = 0,
	["REVISIONYEAR:"] = 0,
	["ROOTPAGENAME:"] = 0,
	["ROOTPAGENAMEE:"] = 0,
	["SUBJECTPAGENAME:"] = 0,
	["SUBJECTPAGENAMEE:"] = 0,
	["SUBJECTSPACE:"] = 0,
	["SUBJECTSPACEE:"] = 0,
	["SUBPAGENAME:"] = 0,
	["SUBPAGENAMEE:"] = 0,
	["TALKPAGENAME:"] = 0,
	["TALKPAGENAMEE:"] = 0,
	["TALKSPACE:"] = 0,
	["TALKSPACEE:"] = 0,
}

-- Value is the namespace number of the linked page at parameter 1.
data.template_link_param_2 = {
	["PROTECTIONEXPIRY:"] = 0,
	["PROTECTIONLEVEL:"] = 0,
}

return data