模組:沙盒/Artoria2e5/unicode-other
模块文档[创建]
您可能想要创建本Scribunto模块的文档。 编者可以在本模块的沙盒 (创建 | 镜像)和测试样例 (创建)页面进行实验。 请在/doc子页面中添加分类。本模块的子页面。 |
-- [[Special:滥用过滤器/180|不可见字符]]诊断工具
-- {{see|Template:uw-unicode-other}}
-- {{#invoke:沙盒/Artoria2e5/unicode-other|main|}}
-- Released under CC0.
local function __unframe(func)
return function(maybe_frame)
if maybe_frame == mw.getCurrentFrame() then
return func(maybe_frame.args)
else
return func(maybe_frame)
end
end
end
-- ret: <function():iter => <matched: ustr, start_idx: num, end_idx: num>@i,j>
local function gmatch_with_idx(us, pattern)
i, j = 1, 0
return function()
i, j = mw.ustring.find(us, pattern, j + 1)
if i ~= nil then
return mw.ustring.sub(us, i, j), i, j
end
end
end
uni_gc_others = {
["Cc"] =
'[%z-' .. mw.ustring.char(0x001f) ..
mw.ustring.char(0x007f) .. '-' .. mw.ustring.char(0x009f) ..
']',
["Cs"] = '[' ..
mw.ustring.char(0xD800) .. '-' .. mw.ustring.char(0xDFFF) ..
']',
["Cp"] = '[' ..
mw.ustring.char(0xE000) .. '-' .. mw.ustring.char(0xF8FF) ..
-- sua planes (15, 16)
mw.ustring.char(0xF0000) .. '-' .. mw.ustring.char(0xF00FD) ..
mw.ustring.char(0x100000) .. '-' .. mw.ustring.char(0x1000FD) ..
']',
["Cn"] = '[' ..
mw.ustring.char(0xFDD0) .. '-' .. mw.ustring.char(0xFDEF) ..
-- for each plane...
mw.ustring.char(0xFFFE, 0xFFFF) ..
mw.ustring.char(0x1FFFE, 0x1FFFF) ..
mw.ustring.char(0x2FFFE, 0x2FFFF) ..
mw.ustring.char(0x3FFFE, 0x3FFFF) ..
mw.ustring.char(0x4FFFE, 0x4FFFF) ..
mw.ustring.char(0x5FFFE, 0x5FFFF) ..
mw.ustring.char(0x6FFFE, 0x6FFFF) ..
mw.ustring.char(0x7FFFE, 0x7FFFF) ..
mw.ustring.char(0x8FFFE, 0x8FFFF) ..
mw.ustring.char(0x9FFFE, 0x9FFFF) ..
mw.ustring.char(0xAFFFE, 0xAFFFF) ..
mw.ustring.char(0xBFFFE, 0xBFFFF) ..
mw.ustring.char(0xCFFFE, 0xCFFFF) ..
mw.ustring.char(0xDFFFE, 0xDFFFF) ..
mw.ustring.char(0xEFFFE, 0xEFFFF) ..
mw.ustring.char(0xFFFFE, 0xFFFFF) ..
mw.ustring.char(0x10FFFE, 0x10FFFF) ..
']',
["Cf"] = '[' ..
mw.ustring.char(0x00AD, 0x070F, 0x17B4, 0x17B5) ..
mw.ustring.char(0x200B) .. '-' .. mw.ustring.char(0x200F) ..
mw.ustring.char(0x202A) .. '-' .. mw.ustring.char(0x202E) ..
mw.ustring.char(0x2060) .. '-' .. mw.ustring.char(0x2064) ..
mw.ustring.char(0x206A) .. '-' .. mw.ustring.char(0x206F) ..
mw.ustring.char(0xFEFF) ..
mw.ustring.char(0x0600) .. '-' .. mw.ustring.char(0x0603) ..
mw.ustring.char(0x06DD) .. -- << ^^ five visible Cf chars
mw.ustring.char(0x110BD) ..
mw.ustring.char(0x1D173) .. '-' .. mw.ustring.char(0x1D17A) ..
mw.ustring.char(0xE0001) ..
mw.ustring.char(0xE0020) .. '-' .. mw.ustring.char(0xE0096) ..
']'
}
-- ret: wikitext<ustr>
local function main(args)
ret = {}
hex = "%04X"
lineno = 1
charno = 1
for chr in mw.ustring.gmatch(line, ".") do
__for_find_cat_break = (chr == "\n" or chr == "\t")
for cat, patt in pairs(uni_gc_others) do
if (not __for_find_cat_break) and mw.ustring.find(chr, patt) then
table.insert(ret,
"* '''" .. cat .. "''': <tt>U+" .. (hex:format(mw.ustring.codepoint(chr))) ..
'</tt> at line ' .. lineno .. ', char ' .. charno .. '.')
__for_find_cat_break = true
end
end
if chr == "\n" then
lineno = lineno + 1
charno = 1
else
charno = charno + 1
end
end
return table.concat(ret, '\n')
end
return {
["main"] = __unframe(main)
}