Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

490
LINES

< > BotCompany Repo | #156 // string.lua (luanucleo)

Lua code

1  
-------------------------------------------------------------------------------
2  
--- String-related tools
3  
-- @module lua-nucleo.string
4  
-- This file is a part of lua-nucleo library
5  
-- @copyright lua-nucleo authors (see file `COPYRIGHT` for the license)
6  
--------------------------------------------------------------------------------
7  
8  
local table_concat, table_insert = table.concat, table.insert
9  
local math_floor = math.floor
10  
local string_find, string_sub, string_format = string.find, string.sub, string.format
11  
local string_byte, string_char = string.byte, string.char
12  
local assert, pairs, type = assert, pairs, type
13  
14  
local tidentityset = go('#157').tidentityset
15  
16  
--[[
17  
local arguments
18  
      = import 'lua-nucleo/args.lua'
19  
      {
20  
        'arguments'
21  
      }
22  
]]
23  
24  
local make_concatter -- TODO: rename, is not factory
25  
do
26  
  make_concatter = function()
27  
    local buf = { }
28  
29  
    local function cat(v)
30  
      buf[#buf + 1] = v
31  
      return cat
32  
    end
33  
34  
    local concat = function(glue)
35  
      return table_concat(buf, glue or "")
36  
    end
37  
38  
    return cat, concat
39  
  end
40  
end
41  
42  
-- Remove trailing and leading whitespace from string.
43  
-- From Programming in Lua 2 20.4
44  
local trim = function(s)
45  
  return (s:gsub("^%s*(.-)%s*$", "%1"))
46  
end
47  
48  
local create_escape_subst = function(string_subst, ignore)
49  
  ignore = ignore or { "\n", "\t" }
50  
  local subst = setmetatable(
51  
      tidentityset(ignore),
52  
      {
53  
        __metatable = "escape.char";
54  
        __index = function(t, k)
55  
          local v = (string_subst):format(k:byte())
56  
          t[k] = v
57  
          return v
58  
        end;
59  
      }
60  
    )
61  
  return subst
62  
end
63  
64  
-- WARNING: This is not a suitable replacement for urlencode
65  
local escape_string
66  
do
67  
  local escape_subst = create_escape_subst("%%%02X")
68  
  escape_string = function(str)
69  
    return (str:gsub("[%c%z\128-\255]", escape_subst))
70  
  end
71  
end
72  
73  
local url_encode
74  
do
75  
  local escape_subst = create_escape_subst("%%%02X")
76  
  url_encode = function(str)
77  
    return str:gsub("([^%w-_ ])", escape_subst):gsub(" ", "+")
78  
  end
79  
end
80  
81  
local htmlspecialchars = nil
82  
do
83  
  local subst =
84  
  {
85  
    ["&"] = "&amp;";
86  
    ['"'] = "&quot;";
87  
    ["'"] = "&apos;";
88  
    ["<"] = "&lt;";
89  
    [">"] = "&gt;";
90  
  }
91  
92  
  htmlspecialchars = function(value)
93  
    if type(value) == "number" then
94  
      return value
95  
    end
96  
    value = tostring(value)
97  
    return (value:gsub("[&\"'<>]", subst))
98  
  end
99  
end
100  
101  
local cdata_wrap = function(value)
102  
  -- "]]>" is escaped as ("]]" + "]]><![CDATA[" + ">")
103  
  return '<![CDATA[' .. value:gsub("]]>", ']]]]><![CDATA[>') .. ']]>'
104  
end
105  
106  
local cdata_cat = function(cat, value)
107  
  -- "]]>" is escaped as ("]]" + "]]><![CDATA[" + ">")
108  
  cat '<![CDATA[' (value:gsub("]]>", ']]]]><![CDATA[>')) ']]>'
109  
end
110  
111  
--- Split a string by char.
112  
--
113  
-- Returns an array of strings, each of which is a substring of string formed by
114  
-- splitting it on boundaries formed by the char delimiter.
115  
--
116  
-- @tparam string str Input string
117  
-- @tparam string delimiter Boundary char
118  
-- @treturn table Returns an array of strings created by splitting the string
119  
-- parameter on boundaries formed by the delimiter
120  
local split_by_char = function(str, delimiter)
121  
  assert(type(str) == "string", "Param str must be a string")
122  
  assert(
123  
      type(delimiter) == "string" and #delimiter == 1,
124  
      "Invalid delimiter"
125  
    )
126  
127  
  if str == "" then
128  
    return { }
129  
  end
130  
  
131  
  local sep = delimiter:byte()
132  
  local result = { }
133  
  local pos = 1
134  
135  
  -- lookup delimiter in string
136  
  for i = 1, #str do
137  
    -- delimiter found?
138  
    if str:byte(i) == sep then
139  
      -- store chunk before delimiter
140  
      result[#result + 1] = str:sub(pos, i - 1)
141  
      pos = i + 1
142  
    end
143  
  end
144  
  -- store string remainder
145  
  result[#result + 1] = str:sub(pos)
146  
147  
  return result
148  
end
149  
150  
--- Count the number of substring occurrences.
151  
-- @tparam string str The string to search in
152  
-- @tparam string substr The substring to search for, must be not empty
153  
-- @treturn number Returns the number of substring occurrences
154  
local count_substrings = function(str, substr)
155  
  -- Check substring length to prevent infinite loop
156  
  assert(#substr > 0, "substring must be not empty")
157  
158  
  -- Main calculation loop
159  
  local count = 0
160  
  local s, e = nil, 0
161  
  while true do
162  
    s, e = str:find(substr, e + 1, true)
163  
    if s ~= nil then
164  
      count = count + 1
165  
    else
166  
      break
167  
    end
168  
  end
169  
170  
  return count
171  
end
172  
173  
--- Split a string into two parts at offset.
174  
-- @tparam string str Input string
175  
-- @tparam number offset Offset at which string will be splitted
176  
-- @treturn table Returns two strings, the first one - is to the left from offset
177  
-- and the second one to the right from offset
178  
local split_by_offset = function(str, offset, skip_right)
179  
  assert(offset <= #str, "offset greater than str length")
180  
  return str:sub(1, offset), str:sub(offset + 1 + (skip_right or 0))
181  
end
182  
183  
--- Expands variables in input string matched by capture string with values
184  
-- from dictionary.
185  
-- @tparam string capture Variable matching expression
186  
-- @tparam string str Input string, containing variables to expand
187  
-- @tparam table dict Dictionary, containing variables's values
188  
-- @treturn string A result string, where variables substituted with values
189  
-- @usage Universal value substitution to any placeholder, for example:
190  
-- fill_placeholders_ex("%$%((.-)%)", "a = $(a)", { a = 42 })
191  
-- returns "a = 42"
192  
-- @see fill_placeholders
193  
-- @see fill_curly_placeholders
194  
local fill_placeholders_ex = function(capture, str, dict)
195  
  return (str:gsub(capture, dict))
196  
end
197  
198  
--- Expands variables like $(varname) with values from dictionary.
199  
-- @tparam string str Input string, containing variables to expand
200  
-- @tparam table dict Dictionary, containing variables's values
201  
-- @treturn string A result string, where variables substituted with values
202  
-- @usage fill_placeholders("a = $(a)", { a = 42 })
203  
-- returns "a = 42"
204  
local fill_placeholders = function(str, dict)
205  
  return fill_placeholders_ex("%$%((.-)%)", str, dict)
206  
end
207  
208  
--- Expands variables like ${varname} with values from dictionary.
209  
-- @tparam string str Input string, containing variables to expand
210  
-- @tparam table dict Dictionary, containing variables's values
211  
-- @treturn string A result string, where variables substituted with values
212  
-- @usage fill_placeholders("a = ${a}", { a = 42 })
213  
-- returns "a = 42"
214  
local fill_curly_placeholders = function(str, dict)
215  
  return fill_placeholders_ex("%${(.-)}", str, dict)
216  
end
217  
218  
--- Convert non-hierarchical table into string.
219  
--
220  
-- Values of key and value are concatted using custom glue `kv_glue`.
221  
-- Allowed values for key and value are numbers and strings.
222  
-- Pairs are concatted using custom glue `pair_glue`.
223  
-- Table can be traversed using custom iterator `pairs_fn`.
224  
-- @tparam table t Non-hierarchical table with [key]=value pairs
225  
-- @tparam string kv_glue Glue between key and value
226  
-- @tparam string pair_glue Glue between pairs (defaut: "")
227  
-- @tparam function pairs_fn Table iterator (default: pairs)
228  
-- @treturn string A result string
229  
-- @usage kv_concat({a = 1, b = 2}, " => ", "; ", pairs)
230  
local kv_concat = function(t, kv_glue, pair_glue, pairs_fn)
231  
  pair_glue = pair_glue or ""
232  
  pairs_fn = pairs_fn or pairs
233  
234  
  local cat, concat = make_concatter()
235  
  local glue = ""
236  
  for k, v in pairs_fn(t) do
237  
    cat (glue) (k) (kv_glue) (v)
238  
    glue = pair_glue
239  
  end
240  
  return concat()
241  
end
242  
243  
local escape_lua_pattern
244  
do
245  
  local matches =
246  
  {
247  
    ["^"] = "%^";
248  
    ["$"] = "%$";
249  
    ["("] = "%(";
250  
    [")"] = "%)";
251  
    ["%"] = "%%";
252  
    ["."] = "%.";
253  
    ["["] = "%[";
254  
    ["]"] = "%]";
255  
    ["*"] = "%*";
256  
    ["+"] = "%+";
257  
    ["-"] = "%-";
258  
    ["?"] = "%?";
259  
    ["\0"] = "%z";
260  
  }
261  
262  
  escape_lua_pattern = function(s)
263  
    return (s:gsub(".", matches))
264  
  end
265  
end
266  
267  
local escape_for_json
268  
do
269  
  -- Based on luajson code (comments copied verbatim).
270  
  -- https://github.com/harningt/luajson/blob/master/lua/json/encode/strings.lua
271  
272  
  local matches =
273  
  {
274  
    ['"'] = '\\"';
275  
    ['\\'] = '\\\\';
276  
-- ['/'] = '\\/'; -- TODO: ?! Do we really need to escape this?
277  
    ['\b'] = '\\b';
278  
    ['\f'] = '\\f';
279  
    ['\n'] = '\\n';
280  
    ['\r'] = '\\r';
281  
    ['\t'] = '\\t';
282  
    ['\v'] = '\\v'; -- not in official spec, on report, removing
283  
  }
284  
285  
  -- Pre-encode the control characters to speed up encoding...
286  
  -- NOTE: UTF-8 may not work out right w/ JavaScript
287  
  -- JavaScript uses 2 bytes after a \u... yet UTF-8 is a
288  
  -- byte-stream encoding, not pairs of bytes (it does encode
289  
  -- some letters > 1 byte, but base case is 1)
290  
  for i = 0, 255 do
291  
    local c = string.char(i)
292  
    if c:match('[%z\1-\031\128-\255]') and not matches[c] then
293  
      -- WARN: UTF8 specializes values >= 0x80 as parts of sequences...
294  
      -- without \x encoding, do not allow encoding > 7F
295  
      matches[c] = ('\\u%.4X'):format(i)
296  
    end
297  
  end
298  
299  
  escape_for_json = function(s)
300  
    return '"' .. s:gsub('[\\"/%z\1-\031]', matches) .. '"'
301  
  end
302  
end
303  
304  
local starts_with = function(str, prefix)
305  
  if type(str) ~= 'string' or type(prefix) ~= 'string' then return false end
306  
  local plen = #prefix
307  
  return (#str >= plen) and (str:sub(1, plen) == prefix)
308  
end
309  
310  
local ends_with = function(str, suffix)
311  
  if type(str) ~= 'string' or type(suffix) ~= 'string' then return false end
312  
  local slen = #suffix
313  
  return slen == 0 or ((#str >= slen) and (str:sub(-slen, -1) == suffix))
314  
end
315  
316  
local integer_to_string_with_base
317  
do
318  
  -- TODO: use arbitrary set of digits
319  
  -- https://github.com/lua-nucleo/lua-nucleo/issues/2
320  
  local digits =
321  
  {
322  
    "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B";
323  
    "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N";
324  
    "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z";
325  
  }
326  
327  
  integer_to_string_with_base = function(n, base)
328  
    base = base or 10
329  
330  
    assert(type(n) == "number", "n must be a number")
331  
    assert(type(base) == "number", "base must be a number")
332  
    assert(base > 0 and base <= #digits, "base out of range")
333  
334  
    assert(n == n, "n is nan")
335  
    assert(n ~= 1 / 0 and n ~= -1 / 0, "n is inf")
336  
337  
    n = math_floor(n)
338  
    if base == 10 or n == 0 then
339  
      return tostring(n)
340  
    end
341  
342  
    local sign = ""
343  
    if n < 0 then
344  
      sign = "-"
345  
      n = -n
346  
    end
347  
348  
    local r = { }
349  
    while n ~= 0 do
350  
      r[#r + 1] = digits[(n % base) + 1]
351  
      n = math_floor(n / base)
352  
    end
353  
    return sign .. table_concat(r, ""):reverse()
354  
  end
355  
end
356  
357  
local cut_with_ellipsis
358  
do
359  
  local ellipsis = "..."
360  
  local ellipsis_length = #ellipsis
361  
362  
  cut_with_ellipsis = function(str, max_length)
363  
364  
    max_length = max_length or 80
365  
    arguments(
366  
        "string", str,
367  
        "number", max_length
368  
      )
369  
370  
    assert(max_length > 0, "required string length must be positive")
371  
372  
    if #str > max_length then
373  
      if max_length > ellipsis_length then
374  
        str = str:sub(1, max_length - ellipsis_length) .. ellipsis
375  
      else
376  
        str = str:sub(1, max_length)
377  
      end
378  
   end
379  
380  
    return str
381  
  end
382  
end
383  
384  
-- convert numbers into loadable string, including inf, -inf and nan
385  
local number_to_string
386  
local serialize_number
387  
do
388  
  local t =
389  
  {
390  
    [tostring(1/0)] = "1/0";
391  
    [tostring(-1/0)] = "-1/0";
392  
    [tostring(0/0)] = "0/0";
393  
  }
394  
  number_to_string = function(number)
395  
    -- no argument checking - called very often
396  
    local text = tostring(number)
397  
    return t[text] or text
398  
  end
399  
  serialize_number = function(number)
400  
    -- no argument checking - called very often
401  
    local text = ("%.17g"):format(number)
402  
    -- on the same platform tostring() and string.format()
403  
    -- return the same results for 1/0, -1/0, 0/0
404  
    -- so we don't need separate substitution table
405  
    return t[text] or text
406  
  end
407  
end
408  
409  
local get_escaped_chars_in_ranges
410  
do
411  
  --- Returns '%'-separated character string.
412  
  -- @param ranges If range[i], range[i+1] are numbers, concats all chars ('%'
413  
  -- separated) from char with ranges[1] code to char with ranges[2] code,
414  
  -- concats it to same way to ranges[3] - ranges[4], and so on.
415  
  --
416  
  -- If range[i], range[i+1] are strings,
417  
  -- ignore all string chars but first, and
418  
  -- concats all chars ('%' separated) from ranges[1][1] to ranges[2][1],
419  
  -- concats it to ranges[3][1] - ranges[4][1], and so on.
420  
  --
421  
  -- If range[i], range[i+1] are different types, also works fine, for example:
422  
  -- get_escaped_chars_in_ranges({"0",50}) returns "%0%1%2".
423  
  -- @treturn string Returns '%'-separated character string.
424  
  -- @local here
425  
  get_escaped_chars_in_ranges = function(ranges)
426  
    assert(
427  
        type(ranges) == "table",
428  
        "argument must be a table"
429  
      )
430  
431  
    assert(
432  
        #ranges % 2 == 0,
433  
        "argument must have even number of elements"
434  
      )
435  
436  
    local cat, concat = make_concatter()
437  
438  
    for i = 1, #ranges, 2 do
439  
      local char_code_start = ranges[i]
440  
      local char_code_end = ranges[i + 1]
441  
442  
      if type(char_code_start) == "string" then
443  
        char_code_start = string_byte(char_code_start)
444  
      end
445  
      if type(char_code_end) == "string" then
446  
        char_code_end = string_byte(char_code_end)
447  
      end
448  
449  
      assert(
450  
          type(char_code_start) == "number"
451  
            and type(char_code_end) == "number",
452  
          "argument elements must be numbers or strings"
453  
        )
454  
455  
      for i = char_code_start, char_code_end do
456  
        cat "%" (string_char(i))
457  
      end
458  
    end
459  
460  
    return concat()
461  
  end
462  
end
463  
464  
return
465  
{
466  
  escape_string = escape_string;
467  
  make_concatter = make_concatter;
468  
  trim = trim;
469  
  create_escape_subst = create_escape_subst;
470  
  htmlspecialchars = htmlspecialchars;
471  
  fill_placeholders_ex = fill_placeholders_ex;
472  
  fill_placeholders = fill_placeholders;
473  
  fill_curly_placeholders = fill_curly_placeholders;
474  
  cdata_wrap = cdata_wrap;
475  
  cdata_cat = cdata_cat;
476  
  split_by_char = split_by_char;
477  
  split_by_offset = split_by_offset;
478  
  count_substrings = count_substrings;
479  
  kv_concat = kv_concat;
480  
  escape_lua_pattern = escape_lua_pattern;
481  
  escape_for_json = escape_for_json;
482  
  starts_with = starts_with;
483  
  ends_with = ends_with;
484  
  url_encode = url_encode;
485  
  integer_to_string_with_base = integer_to_string_with_base;
486  
  cut_with_ellipsis = cut_with_ellipsis;
487  
  number_to_string = number_to_string;
488  
  serialize_number = serialize_number;
489  
  get_escaped_chars_in_ranges = get_escaped_chars_in_ranges;
490  
}

test run  test run with input  download  show line numbers   

Travelled to 12 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Image recognition results

Recognizer Recognition Result Visualize Recalc
#308 javax.imageio.IIOException: Can't get input stream from URL! [visualize]

Snippet ID: #156
Snippet name: string.lua (luanucleo)
Eternal ID of this version: #156/1
Text MD5: 1f8b1c5fd10779250dd8c24bc8737602
Author: stefan
Category:
Type: Lua code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2014-01-13 03:50:10
Source code size: 14538 bytes / 490 lines
Pitched / IR pitched: Yes / Yes
Views / Downloads: 1092 / 275
Referenced in: [show references]