Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

189
LINES

< > BotCompany Repo | #476 - Haiku font recognizer (collecting data)

Lua code

get("#389") -- mminsert
get("#175") -- tableToString
get("#348") -- rgb functions
get("#349") -- table functions
get("#480") -- horizontalsplit (v2)
get("#388") -- rectangle functions

-- data format is: {{image, crop, text}, ...}

data = {
  {"#1000176", "431,275,476,287", "Cestina"},
  {"#1000176", "431,296,468,306", "Dansk"},
  {"#1000181", "", "Deutsch"},
  {"#1000176", "431,353,479,366", "Espanol"},
  {"#1000182", "", "Esperanto"},
  {"#1000176", "431,392,482,403", "Francais"},
  {"#1000176", "431,410,479,420", "H*tski"},
  {"#1000176", "431,429,475,439", "Italiano"},
  {"#1000176", "431,449,476,460", "Lietuviu"}, -- last u looks like something else...
  {"#1000176", "431,468,475,480", "Magyar"},
  {"#1000176", "431,486,501,496", "Nederlands"},
  {"#1000176", "431,503,514,515", "Norsk Bokmal"},
  --{"#1000176", "", ""},
}

maxdepth = 5000

function escapeString(s)
  -- taken from #158 (serpent.lua)
  local escaped = ("%q"):format(s):gsub("\010","n"):gsub("\026","\\026")
  return escaped
end

function prettyPrintTree(tree, tbl)
  if type(tree) == 'string' then -- leaf (single character)
    table.insert(tbl, escapeString(tree))
  elseif type(tree[1]) == 'number' then -- node (decision point)
    local x, y, plus, minus = unpack(tree)
    table.insert(tbl, "{"..tostring(x)..", "..tostring(y)..", ")
    prettyPrintTree(plus, tbl)
    table.insert(tbl, ", ")
    prettyPrintTree(minus, tbl)
    table.insert(tbl, "}")
  elseif #tree == 1 then -- leaf with only one character
    table.insert(tbl, escapeString(tree[1]))
  else -- leaf (list of characters)
    table.insert(tbl, "{")
    for i=1, #tree do
      if i ~= 1 then table.insert(tbl, ", ") end
      table.insert(tbl, escapeString(tree[i]))
    end
    table.insert(tbl, "}")
  end
end

function makePoints()
  points = {}
  for x = 0, math.max(maxw, maxh)-1 do
    if x < maxw then
      for y = 0, maxh-1 do
        points[#points+1] = {x, y}
      end
    end
    if x < maxh and maxw-2 >= 0 then
      for xx = maxw-2, 0, -1 do
        points[#points+1] = {xx, x}
      end
    end
  end
end

function charsonly(flatchars)
  local t = {}
  for i=1, #flatchars do
    t[flatchars[i][2]] = true
  end
  return keystolist(t)
end

function makeDecisionTree(flatchars, pointidx, depth)
  if maxdepth ~= nil and depth > maxdepth then
    return "break "..tostring(#flatchars)
  end
  local chars = charsonly(flatchars)
  if #chars == 1 then
    return chars[1]
  end
  while true do
    if points[pointidx] == nil then -- no more points
      print("not good: no more points (idx="..tostring(pointidx).."), chars="..table.concat(chars, " "))
      for _, fc in ipairs(flatchars) do
        --print("  "..fc[2].." "..tableToString(fc[3]))
        local crop = fc[3][2]
        crop = newRectangle(crop.x-10, crop.y-10, crop.width+20, crop.height+20)
        local url = "http://tinybrain.de:8080/tb/crop.php?img="..fc[3][1]:sub(2).."&rect="..recttostring(crop)
        print("  "..fc[2].." "..url)
      end
      return chars
    end
    
    --print ("chars", #flatchars, "point", pointidx, "depth", depth)
    local x, y = unpack(points[pointidx])
    local plus, minus = {}, {}
    for _, fc in ipairs(flatchars) do
      local cimg, c = unpack(fc)
      if x < cimg.width and y < cimg.height then -- point testable
        local set = bright(rgb(cimg.getInt(x, y))) <= 0.5
        table.insert(set and plus or minus, fc)
      else -- outside of character image, leave in both sets
        table.insert(plus, fc)
        table.insert(minus, fc)
      end
    end
    --print("plus", #plus, "minus", #minus)
    --print("plus", tableToString(plus), "minus", tableToString(minus))
    if #plus == #flatchars or #minus == #flatchars then -- all chars are on one side. just move on to next point
      pointidx = pointidx+1
      --plus, minus = nil, nil
      --return makeDecisionTree(flatchars, pointidx+1)
    else -- actual decision point
      local plustree = makeDecisionTree(plus, pointidx+1, depth+1)
      local minustree = makeDecisionTree(minus, pointidx+1, depth+1)
      local tree = {x, y, plustree, minustree}
      return tree
    end
  end
end

chars = {} -- multimap of char to image
flatchars = {} -- {{cimg, c}, ...}
maxw, maxh = 0, 0
images = {}

for _, d in ipairs(data) do
  local img, crop, text = unpack(d)
  local theimg = images[img]
  if theimg == nil then
    theimg = loadImage(img)
    images[img] = theimg -- keep them in memory
  end
  text = text:gsub(" ", "") -- no spaces
  
  local maincrop
  if crop ~= "" then
    maincrop = stringtorect(crop)
    local fullimg = theimg
    theimg = {width=maincrop.width, height=maincrop.height,
      getInt = function(x, y) return fullimg.getInt(x+maincrop.x, y+maincrop.y) end}
  end
  
  local parts = horizontalsplit(theimg)
  local cidx = 1
  
  for _, r in ipairs(parts) do
    local x1, y1, x2, y2 = r.x, r.y, r.x+r.width, r.y+r.height
    -- add one row and col of white pixels to right and bottom
    local cimg = {width=x2-x1+1, height=y2-y1+1,
      getInt = function(x, y)
        return (x < x2-x1 and y < y2-y1) and theimg.getInt(x1+x, y1+y) or 0xFFFFFF
      end}
    maxw = math.max(maxw, cimg.width)
    maxh = math.max(maxh, cimg.height)
    local c = text:sub(cidx, cidx)
    if c ~= "*" then
      --print(c, x1, y1)
      --mminsert(chars, c, cimg)
      
      -- make debug info
      local actualcrop = r
      if maincrop then
        actualcrop = newRectangle(maincrop.x+r.x, maincrop.y+r.y,
          r.width, r.height)
      end
      local debug = {img, actualcrop}
      flatchars[#flatchars+1] = {cimg, c, debug}
    end
    cidx = cidx+1
  end
end

makePoints()
--print(tableToString(points))
print("maxw", maxw, "maxh", maxh, "points", #points)
tree = makeDecisionTree(flatchars, 1, 1)

tbl = {}
prettyPrintTree(tree, tbl)
result = "tree = "..table.concat(tbl)
print(result)

test run  test run with input  download  show line numbers   

Travelled to 11 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz

No comments. add comment

Snippet ID: #476
Snippet name: Haiku font recognizer (collecting data)
Eternal ID of this version: #476/1
Text MD5: 0d380e1dada75d14f51a3b52dfd39a96
Author: stefan
Category:
Type: Lua code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2015-02-26 00:14:41
Source code size: 6053 bytes / 189 lines
Pitched / IR pitched: No / Yes
Views / Downloads: 522 / 86
Referenced in: [show references]

Formerly at http://tinybrain.de/476 & http://476.tinybrain.de