-------------------------------------------------------------------------------
--- String-related tools
-- @module lua-nucleo.string
-- This file is a part of lua-nucleo library
-- @copyright lua-nucleo authors (see file `COPYRIGHT` for the license)
--------------------------------------------------------------------------------
local table_concat, table_insert = table.concat, table.insert
local math_floor = math.floor
local string_find, string_sub, string_format = string.find, string.sub, string.format
local string_byte, string_char = string.byte, string.char
local assert, pairs, type = assert, pairs, type
local tidentityset = go('#157').tidentityset
--[[
local arguments
      = import 'lua-nucleo/args.lua'
      {
        'arguments'
      }
]]
local make_concatter -- TODO: rename, is not factory
do
  make_concatter = function()
    local buf = { }
    local function cat(v)
      buf[#buf + 1] = v
      return cat
    end
    local concat = function(glue)
      return table_concat(buf, glue or "")
    end
    return cat, concat
  end
end
-- Remove trailing and leading whitespace from string.
-- From Programming in Lua 2 20.4
local trim = function(s)
  return (s:gsub("^%s*(.-)%s*$", "%1"))
end
local create_escape_subst = function(string_subst, ignore)
  ignore = ignore or { "\n", "\t" }
  local subst = setmetatable(
      tidentityset(ignore),
      {
        __metatable = "escape.char";
        __index = function(t, k)
          local v = (string_subst):format(k:byte())
          t[k] = v
          return v
        end;
      }
    )
  return subst
end
-- WARNING: This is not a suitable replacement for urlencode
local escape_string
do
  local escape_subst = create_escape_subst("%%%02X")
  escape_string = function(str)
    return (str:gsub("[%c%z\128-\255]", escape_subst))
  end
end
local url_encode
do
  local escape_subst = create_escape_subst("%%%02X")
  url_encode = function(str)
    return str:gsub("([^%w-_ ])", escape_subst):gsub(" ", "+")
  end
end
local htmlspecialchars = nil
do
  local subst =
  {
    ["&"] = "&";
    ['"'] = """;
    ["'"] = "'";
    ["<"] = "<";
    [">"] = ">";
  }
  htmlspecialchars = function(value)
    if type(value) == "number" then
      return value
    end
    value = tostring(value)
    return (value:gsub("[&\"'<>]", subst))
  end
end
local cdata_wrap = function(value)
  -- "]]>" is escaped as ("]]" + "]]><![CDATA[" + ">")
  return '<![CDATA[' .. value:gsub("]]>", ']]]]><![CDATA[>') .. ']]>'
end
local cdata_cat = function(cat, value)
  -- "]]>" is escaped as ("]]" + "]]><![CDATA[" + ">")
  cat '<![CDATA[' (value:gsub("]]>", ']]]]><![CDATA[>')) ']]>'
end
--- Split a string by char.
--
-- Returns an array of strings, each of which is a substring of string formed by
-- splitting it on boundaries formed by the char delimiter.
--
-- @tparam string str Input string
-- @tparam string delimiter Boundary char
-- @treturn table Returns an array of strings created by splitting the string
-- parameter on boundaries formed by the delimiter
local split_by_char = function(str, delimiter)
  assert(type(str) == "string", "Param str must be a string")
  assert(
      type(delimiter) == "string" and #delimiter == 1,
      "Invalid delimiter"
    )
  if str == "" then
    return { }
  end
  
  local sep = delimiter:byte()
  local result = { }
  local pos = 1
  -- lookup delimiter in string
  for i = 1, #str do
    -- delimiter found?
    if str:byte(i) == sep then
      -- store chunk before delimiter
      result[#result + 1] = str:sub(pos, i - 1)
      pos = i + 1
    end
  end
  -- store string remainder
  result[#result + 1] = str:sub(pos)
  return result
end
--- Count the number of substring occurrences.
-- @tparam string str The string to search in
-- @tparam string substr The substring to search for, must be not empty
-- @treturn number Returns the number of substring occurrences
local count_substrings = function(str, substr)
  -- Check substring length to prevent infinite loop
  assert(#substr > 0, "substring must be not empty")
  -- Main calculation loop
  local count = 0
  local s, e = nil, 0
  while true do
    s, e = str:find(substr, e + 1, true)
    if s ~= nil then
      count = count + 1
    else
      break
    end
  end
  return count
end
--- Split a string into two parts at offset.
-- @tparam string str Input string
-- @tparam number offset Offset at which string will be splitted
-- @treturn table Returns two strings, the first one - is to the left from offset
-- and the second one to the right from offset
local split_by_offset = function(str, offset, skip_right)
  assert(offset <= #str, "offset greater than str length")
  return str:sub(1, offset), str:sub(offset + 1 + (skip_right or 0))
end
--- Expands variables in input string matched by capture string with values
-- from dictionary.
-- @tparam string capture Variable matching expression
-- @tparam string str Input string, containing variables to expand
-- @tparam table dict Dictionary, containing variables's values
-- @treturn string A result string, where variables substituted with values
-- @usage Universal value substitution to any placeholder, for example:
-- fill_placeholders_ex("%$%((.-)%)", "a = $(a)", { a = 42 })
-- returns "a = 42"
-- @see fill_placeholders
-- @see fill_curly_placeholders
local fill_placeholders_ex = function(capture, str, dict)
  return (str:gsub(capture, dict))
end
--- Expands variables like $(varname) with values from dictionary.
-- @tparam string str Input string, containing variables to expand
-- @tparam table dict Dictionary, containing variables's values
-- @treturn string A result string, where variables substituted with values
-- @usage fill_placeholders("a = $(a)", { a = 42 })
-- returns "a = 42"
local fill_placeholders = function(str, dict)
  return fill_placeholders_ex("%$%((.-)%)", str, dict)
end
--- Expands variables like ${varname} with values from dictionary.
-- @tparam string str Input string, containing variables to expand
-- @tparam table dict Dictionary, containing variables's values
-- @treturn string A result string, where variables substituted with values
-- @usage fill_placeholders("a = ${a}", { a = 42 })
-- returns "a = 42"
local fill_curly_placeholders = function(str, dict)
  return fill_placeholders_ex("%${(.-)}", str, dict)
end
--- Convert non-hierarchical table into string.
--
-- Values of key and value are concatted using custom glue `kv_glue`.
-- Allowed values for key and value are numbers and strings.
-- Pairs are concatted using custom glue `pair_glue`.
-- Table can be traversed using custom iterator `pairs_fn`.
-- @tparam table t Non-hierarchical table with [key]=value pairs
-- @tparam string kv_glue Glue between key and value
-- @tparam string pair_glue Glue between pairs (defaut: "")
-- @tparam function pairs_fn Table iterator (default: pairs)
-- @treturn string A result string
-- @usage kv_concat({a = 1, b = 2}, " => ", "; ", pairs)
local kv_concat = function(t, kv_glue, pair_glue, pairs_fn)
  pair_glue = pair_glue or ""
  pairs_fn = pairs_fn or pairs
  local cat, concat = make_concatter()
  local glue = ""
  for k, v in pairs_fn(t) do
    cat (glue) (k) (kv_glue) (v)
    glue = pair_glue
  end
  return concat()
end
local escape_lua_pattern
do
  local matches =
  {
    ["^"] = "%^";
    ["$"] = "%$";
    ["("] = "%(";
    [")"] = "%)";
    ["%"] = "%%";
    ["."] = "%.";
    ["["] = "%[";
    ["]"] = "%]";
    ["*"] = "%*";
    ["+"] = "%+";
    ["-"] = "%-";
    ["?"] = "%?";
    ["\0"] = "%z";
  }
  escape_lua_pattern = function(s)
    return (s:gsub(".", matches))
  end
end
local escape_for_json
do
  -- Based on luajson code (comments copied verbatim).
  -- https://github.com/harningt/luajson/blob/master/lua/json/encode/strings.lua
  local matches =
  {
    ['"'] = '\\"';
    ['\\'] = '\\\\';
-- ['/'] = '\\/'; -- TODO: ?! Do we really need to escape this?
    ['\b'] = '\\b';
    ['\f'] = '\\f';
    ['\n'] = '\\n';
    ['\r'] = '\\r';
    ['\t'] = '\\t';
    ['\v'] = '\\v'; -- not in official spec, on report, removing
  }
  -- Pre-encode the control characters to speed up encoding...
  -- NOTE: UTF-8 may not work out right w/ JavaScript
  -- JavaScript uses 2 bytes after a \u... yet UTF-8 is a
  -- byte-stream encoding, not pairs of bytes (it does encode
  -- some letters > 1 byte, but base case is 1)
  for i = 0, 255 do
    local c = string.char(i)
    if c:match('[%z\1-\031\128-\255]') and not matches[c] then
      -- WARN: UTF8 specializes values >= 0x80 as parts of sequences...
      -- without \x encoding, do not allow encoding > 7F
      matches[c] = ('\\u%.4X'):format(i)
    end
  end
  escape_for_json = function(s)
    return '"' .. s:gsub('[\\"/%z\1-\031]', matches) .. '"'
  end
end
local starts_with = function(str, prefix)
  if type(str) ~= 'string' or type(prefix) ~= 'string' then return false end
  local plen = #prefix
  return (#str >= plen) and (str:sub(1, plen) == prefix)
end
local ends_with = function(str, suffix)
  if type(str) ~= 'string' or type(suffix) ~= 'string' then return false end
  local slen = #suffix
  return slen == 0 or ((#str >= slen) and (str:sub(-slen, -1) == suffix))
end
local integer_to_string_with_base
do
  -- TODO: use arbitrary set of digits
  -- https://github.com/lua-nucleo/lua-nucleo/issues/2
  local digits =
  {
    "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B";
    "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N";
    "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z";
  }
  integer_to_string_with_base = function(n, base)
    base = base or 10
    assert(type(n) == "number", "n must be a number")
    assert(type(base) == "number", "base must be a number")
    assert(base > 0 and base <= #digits, "base out of range")
    assert(n == n, "n is nan")
    assert(n ~= 1 / 0 and n ~= -1 / 0, "n is inf")
    n = math_floor(n)
    if base == 10 or n == 0 then
      return tostring(n)
    end
    local sign = ""
    if n < 0 then
      sign = "-"
      n = -n
    end
    local r = { }
    while n ~= 0 do
      r[#r + 1] = digits[(n % base) + 1]
      n = math_floor(n / base)
    end
    return sign .. table_concat(r, ""):reverse()
  end
end
local cut_with_ellipsis
do
  local ellipsis = "..."
  local ellipsis_length = #ellipsis
  cut_with_ellipsis = function(str, max_length)
    max_length = max_length or 80
    arguments(
        "string", str,
        "number", max_length
      )
    assert(max_length > 0, "required string length must be positive")
    if #str > max_length then
      if max_length > ellipsis_length then
        str = str:sub(1, max_length - ellipsis_length) .. ellipsis
      else
        str = str:sub(1, max_length)
      end
   end
    return str
  end
end
-- convert numbers into loadable string, including inf, -inf and nan
local number_to_string
local serialize_number
do
  local t =
  {
    [tostring(1/0)] = "1/0";
    [tostring(-1/0)] = "-1/0";
    [tostring(0/0)] = "0/0";
  }
  number_to_string = function(number)
    -- no argument checking - called very often
    local text = tostring(number)
    return t[text] or text
  end
  serialize_number = function(number)
    -- no argument checking - called very often
    local text = ("%.17g"):format(number)
    -- on the same platform tostring() and string.format()
    -- return the same results for 1/0, -1/0, 0/0
    -- so we don't need separate substitution table
    return t[text] or text
  end
end
local get_escaped_chars_in_ranges
do
  --- Returns '%'-separated character string.
  -- @param ranges If range[i], range[i+1] are numbers, concats all chars ('%'
  -- separated) from char with ranges[1] code to char with ranges[2] code,
  -- concats it to same way to ranges[3] - ranges[4], and so on.
  --
  -- If range[i], range[i+1] are strings,
  -- ignore all string chars but first, and
  -- concats all chars ('%' separated) from ranges[1][1] to ranges[2][1],
  -- concats it to ranges[3][1] - ranges[4][1], and so on.
  --
  -- If range[i], range[i+1] are different types, also works fine, for example:
  -- get_escaped_chars_in_ranges({"0",50}) returns "%0%1%2".
  -- @treturn string Returns '%'-separated character string.
  -- @local here
  get_escaped_chars_in_ranges = function(ranges)
    assert(
        type(ranges) == "table",
        "argument must be a table"
      )
    assert(
        #ranges % 2 == 0,
        "argument must have even number of elements"
      )
    local cat, concat = make_concatter()
    for i = 1, #ranges, 2 do
      local char_code_start = ranges[i]
      local char_code_end = ranges[i + 1]
      if type(char_code_start) == "string" then
        char_code_start = string_byte(char_code_start)
      end
      if type(char_code_end) == "string" then
        char_code_end = string_byte(char_code_end)
      end
      assert(
          type(char_code_start) == "number"
            and type(char_code_end) == "number",
          "argument elements must be numbers or strings"
        )
      for i = char_code_start, char_code_end do
        cat "%" (string_char(i))
      end
    end
    return concat()
  end
end
return
{
  escape_string = escape_string;
  make_concatter = make_concatter;
  trim = trim;
  create_escape_subst = create_escape_subst;
  htmlspecialchars = htmlspecialchars;
  fill_placeholders_ex = fill_placeholders_ex;
  fill_placeholders = fill_placeholders;
  fill_curly_placeholders = fill_curly_placeholders;
  cdata_wrap = cdata_wrap;
  cdata_cat = cdata_cat;
  split_by_char = split_by_char;
  split_by_offset = split_by_offset;
  count_substrings = count_substrings;
  kv_concat = kv_concat;
  escape_lua_pattern = escape_lua_pattern;
  escape_for_json = escape_for_json;
  starts_with = starts_with;
  ends_with = ends_with;
  url_encode = url_encode;
  integer_to_string_with_base = integer_to_string_with_base;
  cut_with_ellipsis = cut_with_ellipsis;
  number_to_string = number_to_string;
  serialize_number = serialize_number;
  get_escaped_chars_in_ranges = get_escaped_chars_in_ranges;
}test run test run with input download show line numbers
Travelled to 12 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
| Recognizer | Recognition Result | Visualize | Recalc | 
|---|---|---|---|
| #308 | javax.imageio.IIOException: Can't get input stream from URL! | [visualize] | 
| Snippet ID: | #156 | 
| Snippet name: | string.lua (luanucleo) | 
| Eternal ID of this version: | #156/1 | 
| Text MD5: | 1f8b1c5fd10779250dd8c24bc8737602 | 
| Author: | stefan | 
| Category: | |
| Type: | Lua code | 
| Public (visible to everyone): | Yes | 
| Archived (hidden from active list): | No | 
| Created/modified: | 2014-01-13 03:50:10 | 
| Source code size: | 14538 bytes / 490 lines | 
| Pitched / IR pitched: | Yes / Yes | 
| Views / Downloads: | 1533 / 407 | 
| Referenced in: | [show references] |