local language_codes = require "Module:languages/code to canonical name"
local function determine_preferred_etymology_language_code(code1, code2)
if code2:find "^[%a-]+$" then
if code1:find "^[%a-]+$" then
if not code2:find "%u%l" then
if not code1:find "%u%l" then
if #code2 < #code1 then
return code2
else
-- Prefer nrf-grn and nrf-jer over roa-grn and roa-jer
-- (Guernsey and Jersey).
local first_word1, first_word2 =
code1:match "^[a-z]+", code2:match "^[a-z]+"
if first_word1 and first_word2
and language_codes[first_word1] then
return code1
else
return code2
end
end
else
return code2
end
else
return code1
end
else
return code2
end
else
return code1
end
end
local function fold(t, accum, func)
for k, v in pairs(t) do
accum = func(k, v, accum)
end
return accum
end
local function invert(t)
local inverted = {}
for k, v in pairs(t) do
inverted[v] = k
end
return inverted
end
return invert(fold(
require "Module:etymology languages/data",
{},
function (code, data, data_to_code)
if data_to_code[data] then
local preferred_code = determine_preferred_etymology_language_code(data_to_code[data], code)
data_to_code[data] = preferred_code
table.insert(data.codes, code)
else
data_to_code[data] = code
data.codes = { code }
end
return data_to_code
end))