get("#389") -- mminsert
get("#175") -- tableToString
get("#348") -- rgb functions
get("#349") -- table functions
get("#480") -- horizontalsplit (v2)
get("#388") -- rectangle functions
-- data format is: {{image, crop, text}, ...}
data = {
{"#1000176", "431,275,476,287", "Cestina"},
{"#1000176", "431,296,468,306", "Dansk"},
{"#1000181", "", "Deutsch"},
{"#1000176", "431,353,479,366", "Espanol"},
{"#1000182", "", "Esperanto"},
{"#1000176", "431,392,482,403", "Francais"},
{"#1000176", "431,410,479,420", "H*tski"},
{"#1000176", "431,429,475,439", "Italiano"},
{"#1000176", "431,449,476,460", "Lietuviu"}, -- last u looks like something else...
{"#1000176", "431,468,475,480", "Magyar"},
{"#1000176", "431,486,501,496", "Nederlands"},
{"#1000176", "431,503,514,515", "Norsk Bokmal"},
--{"#1000176", "", ""},
}
maxdepth = 5000
function escapeString(s)
-- taken from #158 (serpent.lua)
local escaped = ("%q"):format(s):gsub("\010","n"):gsub("\026","\\026")
return escaped
end
function prettyPrintTree(tree, tbl)
if type(tree) == 'string' then -- leaf (single character)
table.insert(tbl, escapeString(tree))
elseif type(tree[1]) == 'number' then -- node (decision point)
local x, y, plus, minus = unpack(tree)
table.insert(tbl, "{"..tostring(x)..", "..tostring(y)..", ")
prettyPrintTree(plus, tbl)
table.insert(tbl, ", ")
prettyPrintTree(minus, tbl)
table.insert(tbl, "}")
elseif #tree == 1 then -- leaf with only one character
table.insert(tbl, escapeString(tree[1]))
else -- leaf (list of characters)
table.insert(tbl, "{")
for i=1, #tree do
if i ~= 1 then table.insert(tbl, ", ") end
table.insert(tbl, escapeString(tree[i]))
end
table.insert(tbl, "}")
end
end
function makePoints()
points = {}
for x = 0, math.max(maxw, maxh)-1 do
if x < maxw then
for y = 0, maxh-1 do
points[#points+1] = {x, y}
end
end
if x < maxh and maxw-2 >= 0 then
for xx = maxw-2, 0, -1 do
points[#points+1] = {xx, x}
end
end
end
end
function charsonly(flatchars)
local t = {}
for i=1, #flatchars do
t[flatchars[i][2]] = true
end
return keystolist(t)
end
function makeDecisionTree(flatchars, pointidx, depth)
if maxdepth ~= nil and depth > maxdepth then
return "break "..tostring(#flatchars)
end
local chars = charsonly(flatchars)
if #chars == 1 then
return chars[1]
end
while true do
if points[pointidx] == nil then -- no more points
print("not good: no more points (idx="..tostring(pointidx).."), chars="..table.concat(chars, " "))
for _, fc in ipairs(flatchars) do
--print(" "..fc[2].." "..tableToString(fc[3]))
local crop = fc[3][2]
crop = newRectangle(crop.x-10, crop.y-10, crop.width+20, crop.height+20)
local url = "http://tinybrain.de:8080/tb/crop.php?img="..fc[3][1]:sub(2).."&rect="..recttostring(crop)
print(" "..fc[2].." "..url)
end
return chars
end
--print ("chars", #flatchars, "point", pointidx, "depth", depth)
local x, y = unpack(points[pointidx])
local plus, minus = {}, {}
for _, fc in ipairs(flatchars) do
local cimg, c = unpack(fc)
if x < cimg.width and y < cimg.height then -- point testable
local set = bright(rgb(cimg.getInt(x, y))) <= 0.5
table.insert(set and plus or minus, fc)
else -- outside of character image, leave in both sets
table.insert(plus, fc)
table.insert(minus, fc)
end
end
--print("plus", #plus, "minus", #minus)
--print("plus", tableToString(plus), "minus", tableToString(minus))
if #plus == #flatchars or #minus == #flatchars then -- all chars are on one side. just move on to next point
pointidx = pointidx+1
--plus, minus = nil, nil
--return makeDecisionTree(flatchars, pointidx+1)
else -- actual decision point
local plustree = makeDecisionTree(plus, pointidx+1, depth+1)
local minustree = makeDecisionTree(minus, pointidx+1, depth+1)
local tree = {x, y, plustree, minustree}
return tree
end
end
end
chars = {} -- multimap of char to image
flatchars = {} -- {{cimg, c}, ...}
maxw, maxh = 0, 0
images = {}
for _, d in ipairs(data) do
local img, crop, text = unpack(d)
local theimg = images[img]
if theimg == nil then
theimg = loadImage(img)
images[img] = theimg -- keep them in memory
end
text = text:gsub(" ", "") -- no spaces
local maincrop
if crop ~= "" then
maincrop = stringtorect(crop)
local fullimg = theimg
theimg = {width=maincrop.width, height=maincrop.height,
getInt = function(x, y) return fullimg.getInt(x+maincrop.x, y+maincrop.y) end}
end
local parts = horizontalsplit(theimg)
local cidx = 1
for _, r in ipairs(parts) do
local x1, y1, x2, y2 = r.x, r.y, r.x+r.width, r.y+r.height
-- add one row and col of white pixels to right and bottom
local cimg = {width=x2-x1+1, height=y2-y1+1,
getInt = function(x, y)
return (x < x2-x1 and y < y2-y1) and theimg.getInt(x1+x, y1+y) or 0xFFFFFF
end}
maxw = math.max(maxw, cimg.width)
maxh = math.max(maxh, cimg.height)
local c = text:sub(cidx, cidx)
if c ~= "*" then
--print(c, x1, y1)
--mminsert(chars, c, cimg)
-- make debug info
local actualcrop = r
if maincrop then
actualcrop = newRectangle(maincrop.x+r.x, maincrop.y+r.y,
r.width, r.height)
end
local debug = {img, actualcrop}
flatchars[#flatchars+1] = {cimg, c, debug}
end
cidx = cidx+1
end
end
makePoints()
--print(tableToString(points))
print("maxw", maxw, "maxh", maxh, "points", #points)
tree = makeDecisionTree(flatchars, 1, 1)
tbl = {}
prettyPrintTree(tree, tbl)
result = "tree = "..table.concat(tbl)
print(result)test run test run with input download show line numbers
Travelled to 12 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
| Snippet ID: | #476 |
| Snippet name: | Haiku font recognizer (collecting data) |
| Eternal ID of this version: | #476/1 |
| Text MD5: | 0d380e1dada75d14f51a3b52dfd39a96 |
| Author: | stefan |
| Category: | |
| Type: | Lua code |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2015-02-26 00:14:41 |
| Source code size: | 6053 bytes / 189 lines |
| Pitched / IR pitched: | No / Yes |
| Views / Downloads: | 1039 / 208 |
| Referenced in: | [show references] |