-------------------------------------------------------------------------------
--- String-related tools
-- @module lua-nucleo.string
-- This file is a part of lua-nucleo library
-- @copyright lua-nucleo authors (see file `COPYRIGHT` for the license)
--------------------------------------------------------------------------------
local table_concat, table_insert = table.concat, table.insert
local math_floor = math.floor
local string_find, string_sub, string_format = string.find, string.sub, string.format
local string_byte, string_char = string.byte, string.char
local assert, pairs, type = assert, pairs, type
local tidentityset = go('#157').tidentityset
--[[
local arguments
= import 'lua-nucleo/args.lua'
{
'arguments'
}
]]
local make_concatter -- TODO: rename, is not factory
do
make_concatter = function()
local buf = { }
local function cat(v)
buf[#buf + 1] = v
return cat
end
local concat = function(glue)
return table_concat(buf, glue or "")
end
return cat, concat
end
end
-- Remove trailing and leading whitespace from string.
-- From Programming in Lua 2 20.4
local trim = function(s)
return (s:gsub("^%s*(.-)%s*$", "%1"))
end
local create_escape_subst = function(string_subst, ignore)
ignore = ignore or { "\n", "\t" }
local subst = setmetatable(
tidentityset(ignore),
{
__metatable = "escape.char";
__index = function(t, k)
local v = (string_subst):format(k:byte())
t[k] = v
return v
end;
}
)
return subst
end
-- WARNING: This is not a suitable replacement for urlencode
local escape_string
do
local escape_subst = create_escape_subst("%%%02X")
escape_string = function(str)
return (str:gsub("[%c%z\128-\255]", escape_subst))
end
end
local url_encode
do
local escape_subst = create_escape_subst("%%%02X")
url_encode = function(str)
return str:gsub("([^%w-_ ])", escape_subst):gsub(" ", "+")
end
end
local htmlspecialchars = nil
do
local subst =
{
["&"] = "&";
['"'] = """;
["'"] = "'";
["<"] = "<";
[">"] = ">";
}
htmlspecialchars = function(value)
if type(value) == "number" then
return value
end
value = tostring(value)
return (value:gsub("[&\"'<>]", subst))
end
end
local cdata_wrap = function(value)
-- "]]>" is escaped as ("]]" + "]]><![CDATA[" + ">")
return '<![CDATA[' .. value:gsub("]]>", ']]]]><![CDATA[>') .. ']]>'
end
local cdata_cat = function(cat, value)
-- "]]>" is escaped as ("]]" + "]]><![CDATA[" + ">")
cat '<![CDATA[' (value:gsub("]]>", ']]]]><![CDATA[>')) ']]>'
end
--- Split a string by char.
--
-- Returns an array of strings, each of which is a substring of string formed by
-- splitting it on boundaries formed by the char delimiter.
--
-- @tparam string str Input string
-- @tparam string delimiter Boundary char
-- @treturn table Returns an array of strings created by splitting the string
-- parameter on boundaries formed by the delimiter
local split_by_char = function(str, delimiter)
assert(type(str) == "string", "Param str must be a string")
assert(
type(delimiter) == "string" and #delimiter == 1,
"Invalid delimiter"
)
if str == "" then
return { }
end
local sep = delimiter:byte()
local result = { }
local pos = 1
-- lookup delimiter in string
for i = 1, #str do
-- delimiter found?
if str:byte(i) == sep then
-- store chunk before delimiter
result[#result + 1] = str:sub(pos, i - 1)
pos = i + 1
end
end
-- store string remainder
result[#result + 1] = str:sub(pos)
return result
end
--- Count the number of substring occurrences.
-- @tparam string str The string to search in
-- @tparam string substr The substring to search for, must be not empty
-- @treturn number Returns the number of substring occurrences
local count_substrings = function(str, substr)
-- Check substring length to prevent infinite loop
assert(#substr > 0, "substring must be not empty")
-- Main calculation loop
local count = 0
local s, e = nil, 0
while true do
s, e = str:find(substr, e + 1, true)
if s ~= nil then
count = count + 1
else
break
end
end
return count
end
--- Split a string into two parts at offset.
-- @tparam string str Input string
-- @tparam number offset Offset at which string will be splitted
-- @treturn table Returns two strings, the first one - is to the left from offset
-- and the second one to the right from offset
local split_by_offset = function(str, offset, skip_right)
assert(offset <= #str, "offset greater than str length")
return str:sub(1, offset), str:sub(offset + 1 + (skip_right or 0))
end
--- Expands variables in input string matched by capture string with values
-- from dictionary.
-- @tparam string capture Variable matching expression
-- @tparam string str Input string, containing variables to expand
-- @tparam table dict Dictionary, containing variables's values
-- @treturn string A result string, where variables substituted with values
-- @usage Universal value substitution to any placeholder, for example:
-- fill_placeholders_ex("%$%((.-)%)", "a = $(a)", { a = 42 })
-- returns "a = 42"
-- @see fill_placeholders
-- @see fill_curly_placeholders
local fill_placeholders_ex = function(capture, str, dict)
return (str:gsub(capture, dict))
end
--- Expands variables like $(varname) with values from dictionary.
-- @tparam string str Input string, containing variables to expand
-- @tparam table dict Dictionary, containing variables's values
-- @treturn string A result string, where variables substituted with values
-- @usage fill_placeholders("a = $(a)", { a = 42 })
-- returns "a = 42"
local fill_placeholders = function(str, dict)
return fill_placeholders_ex("%$%((.-)%)", str, dict)
end
--- Expands variables like ${varname} with values from dictionary.
-- @tparam string str Input string, containing variables to expand
-- @tparam table dict Dictionary, containing variables's values
-- @treturn string A result string, where variables substituted with values
-- @usage fill_placeholders("a = ${a}", { a = 42 })
-- returns "a = 42"
local fill_curly_placeholders = function(str, dict)
return fill_placeholders_ex("%${(.-)}", str, dict)
end
--- Convert non-hierarchical table into string.
--
-- Values of key and value are concatted using custom glue `kv_glue`.
-- Allowed values for key and value are numbers and strings.
-- Pairs are concatted using custom glue `pair_glue`.
-- Table can be traversed using custom iterator `pairs_fn`.
-- @tparam table t Non-hierarchical table with [key]=value pairs
-- @tparam string kv_glue Glue between key and value
-- @tparam string pair_glue Glue between pairs (defaut: "")
-- @tparam function pairs_fn Table iterator (default: pairs)
-- @treturn string A result string
-- @usage kv_concat({a = 1, b = 2}, " => ", "; ", pairs)
local kv_concat = function(t, kv_glue, pair_glue, pairs_fn)
pair_glue = pair_glue or ""
pairs_fn = pairs_fn or pairs
local cat, concat = make_concatter()
local glue = ""
for k, v in pairs_fn(t) do
cat (glue) (k) (kv_glue) (v)
glue = pair_glue
end
return concat()
end
local escape_lua_pattern
do
local matches =
{
["^"] = "%^";
["$"] = "%$";
["("] = "%(";
[")"] = "%)";
["%"] = "%%";
["."] = "%.";
["["] = "%[";
["]"] = "%]";
["*"] = "%*";
["+"] = "%+";
["-"] = "%-";
["?"] = "%?";
["\0"] = "%z";
}
escape_lua_pattern = function(s)
return (s:gsub(".", matches))
end
end
local escape_for_json
do
-- Based on luajson code (comments copied verbatim).
-- https://github.com/harningt/luajson/blob/master/lua/json/encode/strings.lua
local matches =
{
['"'] = '\\"';
['\\'] = '\\\\';
-- ['/'] = '\\/'; -- TODO: ?! Do we really need to escape this?
['\b'] = '\\b';
['\f'] = '\\f';
['\n'] = '\\n';
['\r'] = '\\r';
['\t'] = '\\t';
['\v'] = '\\v'; -- not in official spec, on report, removing
}
-- Pre-encode the control characters to speed up encoding...
-- NOTE: UTF-8 may not work out right w/ JavaScript
-- JavaScript uses 2 bytes after a \u... yet UTF-8 is a
-- byte-stream encoding, not pairs of bytes (it does encode
-- some letters > 1 byte, but base case is 1)
for i = 0, 255 do
local c = string.char(i)
if c:match('[%z\1-\031\128-\255]') and not matches[c] then
-- WARN: UTF8 specializes values >= 0x80 as parts of sequences...
-- without \x encoding, do not allow encoding > 7F
matches[c] = ('\\u%.4X'):format(i)
end
end
escape_for_json = function(s)
return '"' .. s:gsub('[\\"/%z\1-\031]', matches) .. '"'
end
end
local starts_with = function(str, prefix)
if type(str) ~= 'string' or type(prefix) ~= 'string' then return false end
local plen = #prefix
return (#str >= plen) and (str:sub(1, plen) == prefix)
end
local ends_with = function(str, suffix)
if type(str) ~= 'string' or type(suffix) ~= 'string' then return false end
local slen = #suffix
return slen == 0 or ((#str >= slen) and (str:sub(-slen, -1) == suffix))
end
local integer_to_string_with_base
do
-- TODO: use arbitrary set of digits
-- https://github.com/lua-nucleo/lua-nucleo/issues/2
local digits =
{
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B";
"C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N";
"O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z";
}
integer_to_string_with_base = function(n, base)
base = base or 10
assert(type(n) == "number", "n must be a number")
assert(type(base) == "number", "base must be a number")
assert(base > 0 and base <= #digits, "base out of range")
assert(n == n, "n is nan")
assert(n ~= 1 / 0 and n ~= -1 / 0, "n is inf")
n = math_floor(n)
if base == 10 or n == 0 then
return tostring(n)
end
local sign = ""
if n < 0 then
sign = "-"
n = -n
end
local r = { }
while n ~= 0 do
r[#r + 1] = digits[(n % base) + 1]
n = math_floor(n / base)
end
return sign .. table_concat(r, ""):reverse()
end
end
local cut_with_ellipsis
do
local ellipsis = "..."
local ellipsis_length = #ellipsis
cut_with_ellipsis = function(str, max_length)
max_length = max_length or 80
arguments(
"string", str,
"number", max_length
)
assert(max_length > 0, "required string length must be positive")
if #str > max_length then
if max_length > ellipsis_length then
str = str:sub(1, max_length - ellipsis_length) .. ellipsis
else
str = str:sub(1, max_length)
end
end
return str
end
end
-- convert numbers into loadable string, including inf, -inf and nan
local number_to_string
local serialize_number
do
local t =
{
[tostring(1/0)] = "1/0";
[tostring(-1/0)] = "-1/0";
[tostring(0/0)] = "0/0";
}
number_to_string = function(number)
-- no argument checking - called very often
local text = tostring(number)
return t[text] or text
end
serialize_number = function(number)
-- no argument checking - called very often
local text = ("%.17g"):format(number)
-- on the same platform tostring() and string.format()
-- return the same results for 1/0, -1/0, 0/0
-- so we don't need separate substitution table
return t[text] or text
end
end
local get_escaped_chars_in_ranges
do
--- Returns '%'-separated character string.
-- @param ranges If range[i], range[i+1] are numbers, concats all chars ('%'
-- separated) from char with ranges[1] code to char with ranges[2] code,
-- concats it to same way to ranges[3] - ranges[4], and so on.
--
-- If range[i], range[i+1] are strings,
-- ignore all string chars but first, and
-- concats all chars ('%' separated) from ranges[1][1] to ranges[2][1],
-- concats it to ranges[3][1] - ranges[4][1], and so on.
--
-- If range[i], range[i+1] are different types, also works fine, for example:
-- get_escaped_chars_in_ranges({"0",50}) returns "%0%1%2".
-- @treturn string Returns '%'-separated character string.
-- @local here
get_escaped_chars_in_ranges = function(ranges)
assert(
type(ranges) == "table",
"argument must be a table"
)
assert(
#ranges % 2 == 0,
"argument must have even number of elements"
)
local cat, concat = make_concatter()
for i = 1, #ranges, 2 do
local char_code_start = ranges[i]
local char_code_end = ranges[i + 1]
if type(char_code_start) == "string" then
char_code_start = string_byte(char_code_start)
end
if type(char_code_end) == "string" then
char_code_end = string_byte(char_code_end)
end
assert(
type(char_code_start) == "number"
and type(char_code_end) == "number",
"argument elements must be numbers or strings"
)
for i = char_code_start, char_code_end do
cat "%" (string_char(i))
end
end
return concat()
end
end
return
{
escape_string = escape_string;
make_concatter = make_concatter;
trim = trim;
create_escape_subst = create_escape_subst;
htmlspecialchars = htmlspecialchars;
fill_placeholders_ex = fill_placeholders_ex;
fill_placeholders = fill_placeholders;
fill_curly_placeholders = fill_curly_placeholders;
cdata_wrap = cdata_wrap;
cdata_cat = cdata_cat;
split_by_char = split_by_char;
split_by_offset = split_by_offset;
count_substrings = count_substrings;
kv_concat = kv_concat;
escape_lua_pattern = escape_lua_pattern;
escape_for_json = escape_for_json;
starts_with = starts_with;
ends_with = ends_with;
url_encode = url_encode;
integer_to_string_with_base = integer_to_string_with_base;
cut_with_ellipsis = cut_with_ellipsis;
number_to_string = number_to_string;
serialize_number = serialize_number;
get_escaped_chars_in_ranges = get_escaped_chars_in_ranges;
}test run test run with input download show line numbers
Travelled to 12 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
| Recognizer | Recognition Result | Visualize | Recalc |
|---|---|---|---|
| #308 | javax.imageio.IIOException: Can't get input stream from URL! | [visualize] |
| Snippet ID: | #156 |
| Snippet name: | string.lua (luanucleo) |
| Eternal ID of this version: | #156/1 |
| Text MD5: | 1f8b1c5fd10779250dd8c24bc8737602 |
| Author: | stefan |
| Category: | |
| Type: | Lua code |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2014-01-13 03:50:10 |
| Source code size: | 14538 bytes / 490 lines |
| Pitched / IR pitched: | Yes / Yes |
| Views / Downloads: | 1537 / 408 |
| Referenced in: | #153 - tstr.lua (lua-nucleo) #3000188 - Answer for stefanreich(>> t search) #3000190 - Answer for stefanreich(>> t 20 questions) #3000382 - Answer for ferdie (>> t = 1, f = 0) #3000383 - Answer for funkoverflow (>> t=1, f=0 okay) |