Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

84
LINES

< > BotCompany Repo | #75 // LZW compressor

Lua code

local math_modf = math.modf
local string_char = string.char
local table_concat = table.concat

--------------------------------------------------------------------------------
-- LZW codec
-- implemented by sheets.jeff@gmail.com

-- encode is used to uniquely encode a number into a sequence of bytes that can be decoded using decode()
-- the bytes returned by this do not contain "\000"
local bytes = {}

---Encode
--@param x
local function encode(x)
        for k = 1, #bytes do bytes[k] = nil end
        local xmod
        x, xmod = math_modf(x/255)
        xmod = xmod * 255
        bytes[#bytes + 1] = xmod
        while x > 0 do
                x, xmod = math_modf(x/255)
                xmod = xmod * 255
                bytes[#bytes + 1] = xmod
        end
        if #bytes == 1 and bytes[1] > 0 and bytes[1] < 250 then
                return string_char(bytes[1])
        else
                for i = 1, #bytes do bytes[i] = bytes[i] + 1 end
                return string_char(256 - #bytes, unpack(bytes))
        end
end

---LibCompress,
-- Compresses the given uncompressed string.
-- Unless the uncompressed string starts with "\002", this is guaranteed to return a string equal to or smaller than
-- the passed string.
-- the returned string will only contain "\000" characters in rare circumstances, and will contain none if the
-- source string has none.
--@param uncompressed
local dict = {}
local function CompressLZW(uncompressed)
        if type(uncompressed) == "string" then
                local dict_size = 256
                for k in pairs(dict) do
                        dict[k] = nil
                end
                local result = {"\002"}
                local w = ''
                local ressize = 1
                for i = 0, 255 do
                        dict[string_char(i)] = i
                end
                for i = 1, #uncompressed do
                        local c = uncompressed:sub(i,i)
                        local wc = w..c
                        if dict[wc] then
                                w = wc
                        else
                                dict[wc] = dict_size
                                dict_size = dict_size +1
                                local r = encode(dict[w])
                                ressize = ressize + #r
                                result[#result + 1] = r
                                w = c
                        end
                end
                if w then
                        local r = encode(dict[w])
                        ressize = ressize + #r
                        result[#result + 1] = r
                end
print("uc="..tostring(#uncompressed)..", ressize="..tostring(ressize))
                if (#uncompressed+1) > ressize then
                        return table_concat(result)
                else
                        return string_char(1)..uncompressed
                end
        else
                return nil, "Can only compress strings"
        end
end

solution = CompressLZW

Author comment

from http://code.google.com/p/eoi/source/browse/trunk/LibCompress.lua

hmm! doesn't seem to compress much.

strings like "hello hello hello ..." stay the same. endless repetitions of one character do get compressed when you put in enough of them.

test run  test run with input  download  show line numbers   

Travelled to 12 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Image recognition results

Recognizer Recognition Result Visualize Recalc
#308 3112 [visualize]

Snippet ID: #75
Snippet name: LZW compressor
Eternal ID of this version: #75/1
Text MD5: b47c1c6300cc75cde87eeb38e7a07fe4
Author: stefan
Category: compressors
Type: Lua code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2014-01-02 04:06:07
Source code size: 3112 bytes / 84 lines
Pitched / IR pitched: Yes / Yes
Views / Downloads: 1613 / 338
Referenced in: [show references]