Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

84
LINES

< > BotCompany Repo | #75 // LZW compressor

Lua code

1  
local math_modf = math.modf
2  
local string_char = string.char
3  
local table_concat = table.concat
4  
5  
--------------------------------------------------------------------------------
6  
-- LZW codec
7  
-- implemented by sheets.jeff@gmail.com
8  
9  
-- encode is used to uniquely encode a number into a sequence of bytes that can be decoded using decode()
10  
-- the bytes returned by this do not contain "\000"
11  
local bytes = {}
12  
13  
---Encode
14  
--@param x
15  
local function encode(x)
16  
        for k = 1, #bytes do bytes[k] = nil end
17  
        local xmod
18  
        x, xmod = math_modf(x/255)
19  
        xmod = xmod * 255
20  
        bytes[#bytes + 1] = xmod
21  
        while x > 0 do
22  
                x, xmod = math_modf(x/255)
23  
                xmod = xmod * 255
24  
                bytes[#bytes + 1] = xmod
25  
        end
26  
        if #bytes == 1 and bytes[1] > 0 and bytes[1] < 250 then
27  
                return string_char(bytes[1])
28  
        else
29  
                for i = 1, #bytes do bytes[i] = bytes[i] + 1 end
30  
                return string_char(256 - #bytes, unpack(bytes))
31  
        end
32  
end
33  
34  
---LibCompress,
35  
-- Compresses the given uncompressed string.
36  
-- Unless the uncompressed string starts with "\002", this is guaranteed to return a string equal to or smaller than
37  
-- the passed string.
38  
-- the returned string will only contain "\000" characters in rare circumstances, and will contain none if the
39  
-- source string has none.
40  
--@param uncompressed
41  
local dict = {}
42  
local function CompressLZW(uncompressed)
43  
        if type(uncompressed) == "string" then
44  
                local dict_size = 256
45  
                for k in pairs(dict) do
46  
                        dict[k] = nil
47  
                end
48  
                local result = {"\002"}
49  
                local w = ''
50  
                local ressize = 1
51  
                for i = 0, 255 do
52  
                        dict[string_char(i)] = i
53  
                end
54  
                for i = 1, #uncompressed do
55  
                        local c = uncompressed:sub(i,i)
56  
                        local wc = w..c
57  
                        if dict[wc] then
58  
                                w = wc
59  
                        else
60  
                                dict[wc] = dict_size
61  
                                dict_size = dict_size +1
62  
                                local r = encode(dict[w])
63  
                                ressize = ressize + #r
64  
                                result[#result + 1] = r
65  
                                w = c
66  
                        end
67  
                end
68  
                if w then
69  
                        local r = encode(dict[w])
70  
                        ressize = ressize + #r
71  
                        result[#result + 1] = r
72  
                end
73  
print("uc="..tostring(#uncompressed)..", ressize="..tostring(ressize))
74  
                if (#uncompressed+1) > ressize then
75  
                        return table_concat(result)
76  
                else
77  
                        return string_char(1)..uncompressed
78  
                end
79  
        else
80  
                return nil, "Can only compress strings"
81  
        end
82  
end
83  
84  
solution = CompressLZW

Author comment

from http://code.google.com/p/eoi/source/browse/trunk/LibCompress.lua

hmm! doesn't seem to compress much.

strings like "hello hello hello ..." stay the same. endless repetitions of one character do get compressed when you put in enough of them.

test run  test run with input  download  show line numbers   

Travelled to 12 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Image recognition results

Recognizer Recognition Result Visualize Recalc
#308 3112 [visualize]

Snippet ID: #75
Snippet name: LZW compressor
Eternal ID of this version: #75/1
Text MD5: b47c1c6300cc75cde87eeb38e7a07fe4
Author: stefan
Category: compressors
Type: Lua code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2014-01-02 04:06:07
Source code size: 3112 bytes / 84 lines
Pitched / IR pitched: Yes / Yes
Views / Downloads: 1533 / 317
Referenced in: [show references]