local export = {}
local word_limit = 2000
local m_fun = require "Module:fun"
local m_table = require "Module:table"
local decompose = mw.ustring.toNFD
local U = mw.ustring.char
local acute = U(0x301)
local grave = U(0x300)
local circumflex = U(0x342)
-- matches U+0300-U+037F
local diacritic = ""
local UTF8_char = "*"
local semicolon = "·"
local function match_to_array(str, patt, filter_func, process_func)
local array = {}
local i = 0
for match in str:gmatch(patt) do
if filter_func(match) then
i = i + 1
array = process_func(match)
---[[
if i == word_limit then
break
end
--]]
end
end
return array
end
local replacements = {
= acute,
= "",
"] = "",
= "’",
= "",
= "",
= "",
= "",
= "",
= "",
= "",
}
local process_word = m_fun.memoize(function (word)
local found_accent = false
return decompose(word)
-- Remove all but first accent in word.
-- Use replacements table.
:gsub(
UTF8_char,
function (char)
if char == acute or char == grave or char == circumflex then
if found_accent then
return ""
end
found_accent = true
if char == grave then
return acute
else
return -- no change
end
end
return replacements
end)
end)
-- No macrons or breves in Odyssey text.
local function make_entry_name(word)
return word:gsub("’", "'")
end
local function link(text)
return '<span class="polytonic" lang="grc">]</span>'
end
local function count(array)
local count_map = {}
for _, item in ipairs(array) do
count_map = (count_map or 0) + 1
end
return count_map
end
local function process_count(count, word)
return "* " .. link(word) .. " (" .. count .. ")"
end
local ugsub = mw.ustring.gsub
local ulower = mw.ustring.lower
local remove_diacritics = m_fun.memoize(function (word)
return ulower(word):gsub(diacritic, "")
end)
local function count_comp_gen(count)
return function(word1, word2)
local count1, count2 = count, count
if count1 == count2 then
return remove_diacritics(word1) < remove_diacritics(word2)
else
return count1 > count2
end
end
end
function export.show(frame)
local content = mw.title.new("Module:User:Erutuon/07/documentation"):getContent()
local Odyssey1 = content:match"<!%-%-(.-)%-%->"
local count = count(match_to_array(Odyssey1, "%S+", function(word) return word:find "" end, process_word))
return table.concat(m_fun.mapIter(process_count, m_table.sortedPairs(count, count_comp_gen(count))), "\n")
end
return export