Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

189
LINES

< > BotCompany Repo | #476 - Haiku font recognizer (collecting data)

Lua code

1  
get("#389") -- mminsert
2  
get("#175") -- tableToString
3  
get("#348") -- rgb functions
4  
get("#349") -- table functions
5  
get("#480") -- horizontalsplit (v2)
6  
get("#388") -- rectangle functions
7  
8  
-- data format is: {{image, crop, text}, ...}
9  
10  
data = {
11  
  {"#1000176", "431,275,476,287", "Cestina"},
12  
  {"#1000176", "431,296,468,306", "Dansk"},
13  
  {"#1000181", "", "Deutsch"},
14  
  {"#1000176", "431,353,479,366", "Espanol"},
15  
  {"#1000182", "", "Esperanto"},
16  
  {"#1000176", "431,392,482,403", "Francais"},
17  
  {"#1000176", "431,410,479,420", "H*tski"},
18  
  {"#1000176", "431,429,475,439", "Italiano"},
19  
  {"#1000176", "431,449,476,460", "Lietuviu"}, -- last u looks like something else...
20  
  {"#1000176", "431,468,475,480", "Magyar"},
21  
  {"#1000176", "431,486,501,496", "Nederlands"},
22  
  {"#1000176", "431,503,514,515", "Norsk Bokmal"},
23  
  --{"#1000176", "", ""},
24  
}
25  
26  
maxdepth = 5000
27  
28  
function escapeString(s)
29  
  -- taken from #158 (serpent.lua)
30  
  local escaped = ("%q"):format(s):gsub("\010","n"):gsub("\026","\\026")
31  
  return escaped
32  
end
33  
34  
function prettyPrintTree(tree, tbl)
35  
  if type(tree) == 'string' then -- leaf (single character)
36  
    table.insert(tbl, escapeString(tree))
37  
  elseif type(tree[1]) == 'number' then -- node (decision point)
38  
    local x, y, plus, minus = unpack(tree)
39  
    table.insert(tbl, "{"..tostring(x)..", "..tostring(y)..", ")
40  
    prettyPrintTree(plus, tbl)
41  
    table.insert(tbl, ", ")
42  
    prettyPrintTree(minus, tbl)
43  
    table.insert(tbl, "}")
44  
  elseif #tree == 1 then -- leaf with only one character
45  
    table.insert(tbl, escapeString(tree[1]))
46  
  else -- leaf (list of characters)
47  
    table.insert(tbl, "{")
48  
    for i=1, #tree do
49  
      if i ~= 1 then table.insert(tbl, ", ") end
50  
      table.insert(tbl, escapeString(tree[i]))
51  
    end
52  
    table.insert(tbl, "}")
53  
  end
54  
end
55  
56  
function makePoints()
57  
  points = {}
58  
  for x = 0, math.max(maxw, maxh)-1 do
59  
    if x < maxw then
60  
      for y = 0, maxh-1 do
61  
        points[#points+1] = {x, y}
62  
      end
63  
    end
64  
    if x < maxh and maxw-2 >= 0 then
65  
      for xx = maxw-2, 0, -1 do
66  
        points[#points+1] = {xx, x}
67  
      end
68  
    end
69  
  end
70  
end
71  
72  
function charsonly(flatchars)
73  
  local t = {}
74  
  for i=1, #flatchars do
75  
    t[flatchars[i][2]] = true
76  
  end
77  
  return keystolist(t)
78  
end
79  
80  
function makeDecisionTree(flatchars, pointidx, depth)
81  
  if maxdepth ~= nil and depth > maxdepth then
82  
    return "break "..tostring(#flatchars)
83  
  end
84  
  local chars = charsonly(flatchars)
85  
  if #chars == 1 then
86  
    return chars[1]
87  
  end
88  
  while true do
89  
    if points[pointidx] == nil then -- no more points
90  
      print("not good: no more points (idx="..tostring(pointidx).."), chars="..table.concat(chars, " "))
91  
      for _, fc in ipairs(flatchars) do
92  
        --print("  "..fc[2].." "..tableToString(fc[3]))
93  
        local crop = fc[3][2]
94  
        crop = newRectangle(crop.x-10, crop.y-10, crop.width+20, crop.height+20)
95  
        local url = "http://tinybrain.de:8080/tb/crop.php?img="..fc[3][1]:sub(2).."&rect="..recttostring(crop)
96  
        print("  "..fc[2].." "..url)
97  
      end
98  
      return chars
99  
    end
100  
    
101  
    --print ("chars", #flatchars, "point", pointidx, "depth", depth)
102  
    local x, y = unpack(points[pointidx])
103  
    local plus, minus = {}, {}
104  
    for _, fc in ipairs(flatchars) do
105  
      local cimg, c = unpack(fc)
106  
      if x < cimg.width and y < cimg.height then -- point testable
107  
        local set = bright(rgb(cimg.getInt(x, y))) <= 0.5
108  
        table.insert(set and plus or minus, fc)
109  
      else -- outside of character image, leave in both sets
110  
        table.insert(plus, fc)
111  
        table.insert(minus, fc)
112  
      end
113  
    end
114  
    --print("plus", #plus, "minus", #minus)
115  
    --print("plus", tableToString(plus), "minus", tableToString(minus))
116  
    if #plus == #flatchars or #minus == #flatchars then -- all chars are on one side. just move on to next point
117  
      pointidx = pointidx+1
118  
      --plus, minus = nil, nil
119  
      --return makeDecisionTree(flatchars, pointidx+1)
120  
    else -- actual decision point
121  
      local plustree = makeDecisionTree(plus, pointidx+1, depth+1)
122  
      local minustree = makeDecisionTree(minus, pointidx+1, depth+1)
123  
      local tree = {x, y, plustree, minustree}
124  
      return tree
125  
    end
126  
  end
127  
end
128  
129  
chars = {} -- multimap of char to image
130  
flatchars = {} -- {{cimg, c}, ...}
131  
maxw, maxh = 0, 0
132  
images = {}
133  
134  
for _, d in ipairs(data) do
135  
  local img, crop, text = unpack(d)
136  
  local theimg = images[img]
137  
  if theimg == nil then
138  
    theimg = loadImage(img)
139  
    images[img] = theimg -- keep them in memory
140  
  end
141  
  text = text:gsub(" ", "") -- no spaces
142  
  
143  
  local maincrop
144  
  if crop ~= "" then
145  
    maincrop = stringtorect(crop)
146  
    local fullimg = theimg
147  
    theimg = {width=maincrop.width, height=maincrop.height,
148  
      getInt = function(x, y) return fullimg.getInt(x+maincrop.x, y+maincrop.y) end}
149  
  end
150  
  
151  
  local parts = horizontalsplit(theimg)
152  
  local cidx = 1
153  
  
154  
  for _, r in ipairs(parts) do
155  
    local x1, y1, x2, y2 = r.x, r.y, r.x+r.width, r.y+r.height
156  
    -- add one row and col of white pixels to right and bottom
157  
    local cimg = {width=x2-x1+1, height=y2-y1+1,
158  
      getInt = function(x, y)
159  
        return (x < x2-x1 and y < y2-y1) and theimg.getInt(x1+x, y1+y) or 0xFFFFFF
160  
      end}
161  
    maxw = math.max(maxw, cimg.width)
162  
    maxh = math.max(maxh, cimg.height)
163  
    local c = text:sub(cidx, cidx)
164  
    if c ~= "*" then
165  
      --print(c, x1, y1)
166  
      --mminsert(chars, c, cimg)
167  
      
168  
      -- make debug info
169  
      local actualcrop = r
170  
      if maincrop then
171  
        actualcrop = newRectangle(maincrop.x+r.x, maincrop.y+r.y,
172  
          r.width, r.height)
173  
      end
174  
      local debug = {img, actualcrop}
175  
      flatchars[#flatchars+1] = {cimg, c, debug}
176  
    end
177  
    cidx = cidx+1
178  
  end
179  
end
180  
181  
makePoints()
182  
--print(tableToString(points))
183  
print("maxw", maxw, "maxh", maxh, "points", #points)
184  
tree = makeDecisionTree(flatchars, 1, 1)
185  
186  
tbl = {}
187  
prettyPrintTree(tree, tbl)
188  
result = "tree = "..table.concat(tbl)
189  
print(result)

test run  test run with input  download  show line numbers   

Travelled to 8 computer(s): aoiabmzegqzx, cbybwowwnfue, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, tslmcundralx, tvejysmllsmz

No comments. add comment

Snippet ID: #476
Snippet name: Haiku font recognizer (collecting data)
Eternal ID of this version: #476/1
Text MD5: 0d380e1dada75d14f51a3b52dfd39a96
Author: stefan
Category:
Type: Lua code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2015-02-26 00:14:41
Source code size: 6053 bytes / 189 lines
Pitched / IR pitched: No / Yes
Views / Downloads: 501 / 76
Referenced in: [show references]

Formerly at http://tinybrain.de/476 & http://476.tinybrain.de