1 | ------------------------------------------------------------------------------- |
2 | --- String-related tools |
3 | -- @module lua-nucleo.string |
4 | -- This file is a part of lua-nucleo library |
5 | -- @copyright lua-nucleo authors (see file `COPYRIGHT` for the license) |
6 | -------------------------------------------------------------------------------- |
7 | |
8 | local table_concat, table_insert = table.concat, table.insert |
9 | local math_floor = math.floor |
10 | local string_find, string_sub, string_format = string.find, string.sub, string.format |
11 | local string_byte, string_char = string.byte, string.char |
12 | local assert, pairs, type = assert, pairs, type |
13 | |
14 | local tidentityset = go('#157').tidentityset |
15 | |
16 | --[[ |
17 | local arguments |
18 | = import 'lua-nucleo/args.lua' |
19 | { |
20 | 'arguments' |
21 | } |
22 | ]] |
23 | |
24 | local make_concatter -- TODO: rename, is not factory |
25 | do |
26 | make_concatter = function() |
27 | local buf = { } |
28 | |
29 | local function cat(v) |
30 | buf[#buf + 1] = v |
31 | return cat |
32 | end |
33 | |
34 | local concat = function(glue) |
35 | return table_concat(buf, glue or "") |
36 | end |
37 | |
38 | return cat, concat |
39 | end |
40 | end |
41 | |
42 | -- Remove trailing and leading whitespace from string. |
43 | -- From Programming in Lua 2 20.4 |
44 | local trim = function(s) |
45 | return (s:gsub("^%s*(.-)%s*$", "%1")) |
46 | end |
47 | |
48 | local create_escape_subst = function(string_subst, ignore) |
49 | ignore = ignore or { "\n", "\t" } |
50 | local subst = setmetatable( |
51 | tidentityset(ignore), |
52 | { |
53 | __metatable = "escape.char"; |
54 | __index = function(t, k) |
55 | local v = (string_subst):format(k:byte()) |
56 | t[k] = v |
57 | return v |
58 | end; |
59 | } |
60 | ) |
61 | return subst |
62 | end |
63 | |
64 | -- WARNING: This is not a suitable replacement for urlencode |
65 | local escape_string |
66 | do |
67 | local escape_subst = create_escape_subst("%%%02X") |
68 | escape_string = function(str) |
69 | return (str:gsub("[%c%z\128-\255]", escape_subst)) |
70 | end |
71 | end |
72 | |
73 | local url_encode |
74 | do |
75 | local escape_subst = create_escape_subst("%%%02X") |
76 | url_encode = function(str) |
77 | return str:gsub("([^%w-_ ])", escape_subst):gsub(" ", "+") |
78 | end |
79 | end |
80 | |
81 | local htmlspecialchars = nil |
82 | do |
83 | local subst = |
84 | { |
85 | ["&"] = "&"; |
86 | ['"'] = """; |
87 | ["'"] = "'"; |
88 | ["<"] = "<"; |
89 | [">"] = ">"; |
90 | } |
91 | |
92 | htmlspecialchars = function(value) |
93 | if type(value) == "number" then |
94 | return value |
95 | end |
96 | value = tostring(value) |
97 | return (value:gsub("[&\"'<>]", subst)) |
98 | end |
99 | end |
100 | |
101 | local cdata_wrap = function(value) |
102 | -- "]]>" is escaped as ("]]" + "]]><![CDATA[" + ">") |
103 | return '<![CDATA[' .. value:gsub("]]>", ']]]]><![CDATA[>') .. ']]>' |
104 | end |
105 | |
106 | local cdata_cat = function(cat, value) |
107 | -- "]]>" is escaped as ("]]" + "]]><![CDATA[" + ">") |
108 | cat '<![CDATA[' (value:gsub("]]>", ']]]]><![CDATA[>')) ']]>' |
109 | end |
110 | |
111 | --- Split a string by char. |
112 | -- |
113 | -- Returns an array of strings, each of which is a substring of string formed by |
114 | -- splitting it on boundaries formed by the char delimiter. |
115 | -- |
116 | -- @tparam string str Input string |
117 | -- @tparam string delimiter Boundary char |
118 | -- @treturn table Returns an array of strings created by splitting the string |
119 | -- parameter on boundaries formed by the delimiter |
120 | local split_by_char = function(str, delimiter) |
121 | assert(type(str) == "string", "Param str must be a string") |
122 | assert( |
123 | type(delimiter) == "string" and #delimiter == 1, |
124 | "Invalid delimiter" |
125 | ) |
126 | |
127 | if str == "" then |
128 | return { } |
129 | end |
130 | |
131 | local sep = delimiter:byte() |
132 | local result = { } |
133 | local pos = 1 |
134 | |
135 | -- lookup delimiter in string |
136 | for i = 1, #str do |
137 | -- delimiter found? |
138 | if str:byte(i) == sep then |
139 | -- store chunk before delimiter |
140 | result[#result + 1] = str:sub(pos, i - 1) |
141 | pos = i + 1 |
142 | end |
143 | end |
144 | -- store string remainder |
145 | result[#result + 1] = str:sub(pos) |
146 | |
147 | return result |
148 | end |
149 | |
150 | --- Count the number of substring occurrences. |
151 | -- @tparam string str The string to search in |
152 | -- @tparam string substr The substring to search for, must be not empty |
153 | -- @treturn number Returns the number of substring occurrences |
154 | local count_substrings = function(str, substr) |
155 | -- Check substring length to prevent infinite loop |
156 | assert(#substr > 0, "substring must be not empty") |
157 | |
158 | -- Main calculation loop |
159 | local count = 0 |
160 | local s, e = nil, 0 |
161 | while true do |
162 | s, e = str:find(substr, e + 1, true) |
163 | if s ~= nil then |
164 | count = count + 1 |
165 | else |
166 | break |
167 | end |
168 | end |
169 | |
170 | return count |
171 | end |
172 | |
173 | --- Split a string into two parts at offset. |
174 | -- @tparam string str Input string |
175 | -- @tparam number offset Offset at which string will be splitted |
176 | -- @treturn table Returns two strings, the first one - is to the left from offset |
177 | -- and the second one to the right from offset |
178 | local split_by_offset = function(str, offset, skip_right) |
179 | assert(offset <= #str, "offset greater than str length") |
180 | return str:sub(1, offset), str:sub(offset + 1 + (skip_right or 0)) |
181 | end |
182 | |
183 | --- Expands variables in input string matched by capture string with values |
184 | -- from dictionary. |
185 | -- @tparam string capture Variable matching expression |
186 | -- @tparam string str Input string, containing variables to expand |
187 | -- @tparam table dict Dictionary, containing variables's values |
188 | -- @treturn string A result string, where variables substituted with values |
189 | -- @usage Universal value substitution to any placeholder, for example: |
190 | -- fill_placeholders_ex("%$%((.-)%)", "a = $(a)", { a = 42 }) |
191 | -- returns "a = 42" |
192 | -- @see fill_placeholders |
193 | -- @see fill_curly_placeholders |
194 | local fill_placeholders_ex = function(capture, str, dict) |
195 | return (str:gsub(capture, dict)) |
196 | end |
197 | |
198 | --- Expands variables like $(varname) with values from dictionary. |
199 | -- @tparam string str Input string, containing variables to expand |
200 | -- @tparam table dict Dictionary, containing variables's values |
201 | -- @treturn string A result string, where variables substituted with values |
202 | -- @usage fill_placeholders("a = $(a)", { a = 42 }) |
203 | -- returns "a = 42" |
204 | local fill_placeholders = function(str, dict) |
205 | return fill_placeholders_ex("%$%((.-)%)", str, dict) |
206 | end |
207 | |
208 | --- Expands variables like ${varname} with values from dictionary. |
209 | -- @tparam string str Input string, containing variables to expand |
210 | -- @tparam table dict Dictionary, containing variables's values |
211 | -- @treturn string A result string, where variables substituted with values |
212 | -- @usage fill_placeholders("a = ${a}", { a = 42 }) |
213 | -- returns "a = 42" |
214 | local fill_curly_placeholders = function(str, dict) |
215 | return fill_placeholders_ex("%${(.-)}", str, dict) |
216 | end |
217 | |
218 | --- Convert non-hierarchical table into string. |
219 | -- |
220 | -- Values of key and value are concatted using custom glue `kv_glue`. |
221 | -- Allowed values for key and value are numbers and strings. |
222 | -- Pairs are concatted using custom glue `pair_glue`. |
223 | -- Table can be traversed using custom iterator `pairs_fn`. |
224 | -- @tparam table t Non-hierarchical table with [key]=value pairs |
225 | -- @tparam string kv_glue Glue between key and value |
226 | -- @tparam string pair_glue Glue between pairs (defaut: "") |
227 | -- @tparam function pairs_fn Table iterator (default: pairs) |
228 | -- @treturn string A result string |
229 | -- @usage kv_concat({a = 1, b = 2}, " => ", "; ", pairs) |
230 | local kv_concat = function(t, kv_glue, pair_glue, pairs_fn) |
231 | pair_glue = pair_glue or "" |
232 | pairs_fn = pairs_fn or pairs |
233 | |
234 | local cat, concat = make_concatter() |
235 | local glue = "" |
236 | for k, v in pairs_fn(t) do |
237 | cat (glue) (k) (kv_glue) (v) |
238 | glue = pair_glue |
239 | end |
240 | return concat() |
241 | end |
242 | |
243 | local escape_lua_pattern |
244 | do |
245 | local matches = |
246 | { |
247 | ["^"] = "%^"; |
248 | ["$"] = "%$"; |
249 | ["("] = "%("; |
250 | [")"] = "%)"; |
251 | ["%"] = "%%"; |
252 | ["."] = "%."; |
253 | ["["] = "%["; |
254 | ["]"] = "%]"; |
255 | ["*"] = "%*"; |
256 | ["+"] = "%+"; |
257 | ["-"] = "%-"; |
258 | ["?"] = "%?"; |
259 | ["\0"] = "%z"; |
260 | } |
261 | |
262 | escape_lua_pattern = function(s) |
263 | return (s:gsub(".", matches)) |
264 | end |
265 | end |
266 | |
267 | local escape_for_json |
268 | do |
269 | -- Based on luajson code (comments copied verbatim). |
270 | -- https://github.com/harningt/luajson/blob/master/lua/json/encode/strings.lua |
271 | |
272 | local matches = |
273 | { |
274 | ['"'] = '\\"'; |
275 | ['\\'] = '\\\\'; |
276 | -- ['/'] = '\\/'; -- TODO: ?! Do we really need to escape this? |
277 | ['\b'] = '\\b'; |
278 | ['\f'] = '\\f'; |
279 | ['\n'] = '\\n'; |
280 | ['\r'] = '\\r'; |
281 | ['\t'] = '\\t'; |
282 | ['\v'] = '\\v'; -- not in official spec, on report, removing |
283 | } |
284 | |
285 | -- Pre-encode the control characters to speed up encoding... |
286 | -- NOTE: UTF-8 may not work out right w/ JavaScript |
287 | -- JavaScript uses 2 bytes after a \u... yet UTF-8 is a |
288 | -- byte-stream encoding, not pairs of bytes (it does encode |
289 | -- some letters > 1 byte, but base case is 1) |
290 | for i = 0, 255 do |
291 | local c = string.char(i) |
292 | if c:match('[%z\1-\031\128-\255]') and not matches[c] then |
293 | -- WARN: UTF8 specializes values >= 0x80 as parts of sequences... |
294 | -- without \x encoding, do not allow encoding > 7F |
295 | matches[c] = ('\\u%.4X'):format(i) |
296 | end |
297 | end |
298 | |
299 | escape_for_json = function(s) |
300 | return '"' .. s:gsub('[\\"/%z\1-\031]', matches) .. '"' |
301 | end |
302 | end |
303 | |
304 | local starts_with = function(str, prefix) |
305 | if type(str) ~= 'string' or type(prefix) ~= 'string' then return false end |
306 | local plen = #prefix |
307 | return (#str >= plen) and (str:sub(1, plen) == prefix) |
308 | end |
309 | |
310 | local ends_with = function(str, suffix) |
311 | if type(str) ~= 'string' or type(suffix) ~= 'string' then return false end |
312 | local slen = #suffix |
313 | return slen == 0 or ((#str >= slen) and (str:sub(-slen, -1) == suffix)) |
314 | end |
315 | |
316 | local integer_to_string_with_base |
317 | do |
318 | -- TODO: use arbitrary set of digits |
319 | -- https://github.com/lua-nucleo/lua-nucleo/issues/2 |
320 | local digits = |
321 | { |
322 | "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B"; |
323 | "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N"; |
324 | "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"; |
325 | } |
326 | |
327 | integer_to_string_with_base = function(n, base) |
328 | base = base or 10 |
329 | |
330 | assert(type(n) == "number", "n must be a number") |
331 | assert(type(base) == "number", "base must be a number") |
332 | assert(base > 0 and base <= #digits, "base out of range") |
333 | |
334 | assert(n == n, "n is nan") |
335 | assert(n ~= 1 / 0 and n ~= -1 / 0, "n is inf") |
336 | |
337 | n = math_floor(n) |
338 | if base == 10 or n == 0 then |
339 | return tostring(n) |
340 | end |
341 | |
342 | local sign = "" |
343 | if n < 0 then |
344 | sign = "-" |
345 | n = -n |
346 | end |
347 | |
348 | local r = { } |
349 | while n ~= 0 do |
350 | r[#r + 1] = digits[(n % base) + 1] |
351 | n = math_floor(n / base) |
352 | end |
353 | return sign .. table_concat(r, ""):reverse() |
354 | end |
355 | end |
356 | |
357 | local cut_with_ellipsis |
358 | do |
359 | local ellipsis = "..." |
360 | local ellipsis_length = #ellipsis |
361 | |
362 | cut_with_ellipsis = function(str, max_length) |
363 | |
364 | max_length = max_length or 80 |
365 | arguments( |
366 | "string", str, |
367 | "number", max_length |
368 | ) |
369 | |
370 | assert(max_length > 0, "required string length must be positive") |
371 | |
372 | if #str > max_length then |
373 | if max_length > ellipsis_length then |
374 | str = str:sub(1, max_length - ellipsis_length) .. ellipsis |
375 | else |
376 | str = str:sub(1, max_length) |
377 | end |
378 | end |
379 | |
380 | return str |
381 | end |
382 | end |
383 | |
384 | -- convert numbers into loadable string, including inf, -inf and nan |
385 | local number_to_string |
386 | local serialize_number |
387 | do |
388 | local t = |
389 | { |
390 | [tostring(1/0)] = "1/0"; |
391 | [tostring(-1/0)] = "-1/0"; |
392 | [tostring(0/0)] = "0/0"; |
393 | } |
394 | number_to_string = function(number) |
395 | -- no argument checking - called very often |
396 | local text = tostring(number) |
397 | return t[text] or text |
398 | end |
399 | serialize_number = function(number) |
400 | -- no argument checking - called very often |
401 | local text = ("%.17g"):format(number) |
402 | -- on the same platform tostring() and string.format() |
403 | -- return the same results for 1/0, -1/0, 0/0 |
404 | -- so we don't need separate substitution table |
405 | return t[text] or text |
406 | end |
407 | end |
408 | |
409 | local get_escaped_chars_in_ranges |
410 | do |
411 | --- Returns '%'-separated character string. |
412 | -- @param ranges If range[i], range[i+1] are numbers, concats all chars ('%' |
413 | -- separated) from char with ranges[1] code to char with ranges[2] code, |
414 | -- concats it to same way to ranges[3] - ranges[4], and so on. |
415 | -- |
416 | -- If range[i], range[i+1] are strings, |
417 | -- ignore all string chars but first, and |
418 | -- concats all chars ('%' separated) from ranges[1][1] to ranges[2][1], |
419 | -- concats it to ranges[3][1] - ranges[4][1], and so on. |
420 | -- |
421 | -- If range[i], range[i+1] are different types, also works fine, for example: |
422 | -- get_escaped_chars_in_ranges({"0",50}) returns "%0%1%2". |
423 | -- @treturn string Returns '%'-separated character string. |
424 | -- @local here |
425 | get_escaped_chars_in_ranges = function(ranges) |
426 | assert( |
427 | type(ranges) == "table", |
428 | "argument must be a table" |
429 | ) |
430 | |
431 | assert( |
432 | #ranges % 2 == 0, |
433 | "argument must have even number of elements" |
434 | ) |
435 | |
436 | local cat, concat = make_concatter() |
437 | |
438 | for i = 1, #ranges, 2 do |
439 | local char_code_start = ranges[i] |
440 | local char_code_end = ranges[i + 1] |
441 | |
442 | if type(char_code_start) == "string" then |
443 | char_code_start = string_byte(char_code_start) |
444 | end |
445 | if type(char_code_end) == "string" then |
446 | char_code_end = string_byte(char_code_end) |
447 | end |
448 | |
449 | assert( |
450 | type(char_code_start) == "number" |
451 | and type(char_code_end) == "number", |
452 | "argument elements must be numbers or strings" |
453 | ) |
454 | |
455 | for i = char_code_start, char_code_end do |
456 | cat "%" (string_char(i)) |
457 | end |
458 | end |
459 | |
460 | return concat() |
461 | end |
462 | end |
463 | |
464 | return |
465 | { |
466 | escape_string = escape_string; |
467 | make_concatter = make_concatter; |
468 | trim = trim; |
469 | create_escape_subst = create_escape_subst; |
470 | htmlspecialchars = htmlspecialchars; |
471 | fill_placeholders_ex = fill_placeholders_ex; |
472 | fill_placeholders = fill_placeholders; |
473 | fill_curly_placeholders = fill_curly_placeholders; |
474 | cdata_wrap = cdata_wrap; |
475 | cdata_cat = cdata_cat; |
476 | split_by_char = split_by_char; |
477 | split_by_offset = split_by_offset; |
478 | count_substrings = count_substrings; |
479 | kv_concat = kv_concat; |
480 | escape_lua_pattern = escape_lua_pattern; |
481 | escape_for_json = escape_for_json; |
482 | starts_with = starts_with; |
483 | ends_with = ends_with; |
484 | url_encode = url_encode; |
485 | integer_to_string_with_base = integer_to_string_with_base; |
486 | cut_with_ellipsis = cut_with_ellipsis; |
487 | number_to_string = number_to_string; |
488 | serialize_number = serialize_number; |
489 | get_escaped_chars_in_ranges = get_escaped_chars_in_ranges; |
490 | } |
test run test run with input download show line numbers
Travelled to 12 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Recognizer | Recognition Result | Visualize | Recalc |
---|---|---|---|
#308 | javax.imageio.IIOException: Can't get input stream from URL! | [visualize] |
Snippet ID: | #156 |
Snippet name: | string.lua (luanucleo) |
Eternal ID of this version: | #156/1 |
Text MD5: | 1f8b1c5fd10779250dd8c24bc8737602 |
Author: | stefan |
Category: | |
Type: | Lua code |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2014-01-13 03:50:10 |
Source code size: | 14538 bytes / 490 lines |
Pitched / IR pitched: | Yes / Yes |
Views / Downloads: | 1154 / 295 |
Referenced in: | [show references] |