local language_codes = require "Module:languages/code to canonical name"

local function determine_preferred_etymology_language_code(code1, code2)
	if code2:find "^[%a-]+$" then
		if code1:find "^[%a-]+$" then
			if not code2:find "%u%l" then
				if not code1:find "%u%l" then
					if #code2 < #code1 then
						return code2
					else
						-- Prefer nrf-grn and nrf-jer over roa-grn and roa-jer
						-- (Guernsey and Jersey).
						local first_word1, first_word2 =
							code1:match "^[a-z]+", code2:match "^[a-z]+"
						if first_word1 and first_word2
						and language_codes[first_word1] then
							return code1
						else
							return code2
						end
					end
				else
					return code2
				end
			else
				return code1
			end
		else
			return code2
		end
	else
		return code1
	end
end

local function fold(t, accum, func)
	for k, v in pairs(t) do
		accum = func(k, v, accum)
	end
	return accum
end

local function invert(t)
	local inverted = {}
	for k, v in pairs(t) do
		inverted[v] = k
	end
	return inverted
end

return invert(fold(
	require "Module:etymology languages/data",
	{},
	function (code, data, data_to_code)
		if data_to_code[data] then
			local preferred_code = determine_preferred_etymology_language_code(data_to_code[data], code)
			data_to_code[data] = preferred_code
			table.insert(data.codes, code)
		else
			data_to_code[data] = code
			data.codes = { code }
		end
		return data_to_code
	end))