| 1 | get("#389") -- mminsert
 | 
| 2 | get("#175") -- tableToString
 | 
| 3 | get("#348") -- rgb functions
 | 
| 4 | get("#349") -- table functions
 | 
| 5 | get("#480") -- horizontalsplit (v2)
 | 
| 6 | get("#388") -- rectangle functions
 | 
| 7 | |
| 8 | -- data format is: {{image, crop, text}, ...}
 | 
| 9 | |
| 10 | data = {
 | 
| 11 |   {"#1000176", "431,275,476,287", "Cestina"},
 | 
| 12 |   {"#1000176", "431,296,468,306", "Dansk"},
 | 
| 13 |   {"#1000181", "", "Deutsch"},
 | 
| 14 |   {"#1000176", "431,353,479,366", "Espanol"},
 | 
| 15 |   {"#1000182", "", "Esperanto"},
 | 
| 16 |   {"#1000176", "431,392,482,403", "Francais"},
 | 
| 17 |   {"#1000176", "431,410,479,420", "H*tski"},
 | 
| 18 |   {"#1000176", "431,429,475,439", "Italiano"},
 | 
| 19 |   {"#1000176", "431,449,476,460", "Lietuviu"}, -- last u looks like something else...
 | 
| 20 |   {"#1000176", "431,468,475,480", "Magyar"},
 | 
| 21 |   {"#1000176", "431,486,501,496", "Nederlands"},
 | 
| 22 |   {"#1000176", "431,503,514,515", "Norsk Bokmal"},
 | 
| 23 |   --{"#1000176", "", ""},
 | 
| 24 | } | 
| 25 | |
| 26 | maxdepth = 5000 | 
| 27 | |
| 28 | function escapeString(s) | 
| 29 | -- taken from #158 (serpent.lua) | 
| 30 |   local escaped = ("%q"):format(s):gsub("\010","n"):gsub("\026","\\026")
 | 
| 31 | return escaped | 
| 32 | end | 
| 33 | |
| 34 | function prettyPrintTree(tree, tbl) | 
| 35 | if type(tree) == 'string' then -- leaf (single character) | 
| 36 | table.insert(tbl, escapeString(tree)) | 
| 37 | elseif type(tree[1]) == 'number' then -- node (decision point) | 
| 38 | local x, y, plus, minus = unpack(tree) | 
| 39 |     table.insert(tbl, "{"..tostring(x)..", "..tostring(y)..", ")
 | 
| 40 | prettyPrintTree(plus, tbl) | 
| 41 | table.insert(tbl, ", ") | 
| 42 | prettyPrintTree(minus, tbl) | 
| 43 | table.insert(tbl, "}") | 
| 44 | elseif #tree == 1 then -- leaf with only one character | 
| 45 | table.insert(tbl, escapeString(tree[1])) | 
| 46 | else -- leaf (list of characters) | 
| 47 |     table.insert(tbl, "{")
 | 
| 48 | for i=1, #tree do | 
| 49 | if i ~= 1 then table.insert(tbl, ", ") end | 
| 50 | table.insert(tbl, escapeString(tree[i])) | 
| 51 | end | 
| 52 | table.insert(tbl, "}") | 
| 53 | end | 
| 54 | end | 
| 55 | |
| 56 | function makePoints() | 
| 57 |   points = {}
 | 
| 58 | for x = 0, math.max(maxw, maxh)-1 do | 
| 59 | if x < maxw then | 
| 60 | for y = 0, maxh-1 do | 
| 61 |         points[#points+1] = {x, y}
 | 
| 62 | end | 
| 63 | end | 
| 64 | if x < maxh and maxw-2 >= 0 then | 
| 65 | for xx = maxw-2, 0, -1 do | 
| 66 |         points[#points+1] = {xx, x}
 | 
| 67 | end | 
| 68 | end | 
| 69 | end | 
| 70 | end | 
| 71 | |
| 72 | function charsonly(flatchars) | 
| 73 |   local t = {}
 | 
| 74 | for i=1, #flatchars do | 
| 75 | t[flatchars[i][2]] = true | 
| 76 | end | 
| 77 | return keystolist(t) | 
| 78 | end | 
| 79 | |
| 80 | function makeDecisionTree(flatchars, pointidx, depth) | 
| 81 | if maxdepth ~= nil and depth > maxdepth then | 
| 82 | return "break "..tostring(#flatchars) | 
| 83 | end | 
| 84 | local chars = charsonly(flatchars) | 
| 85 | if #chars == 1 then | 
| 86 | return chars[1] | 
| 87 | end | 
| 88 | while true do | 
| 89 | if points[pointidx] == nil then -- no more points | 
| 90 |       print("not good: no more points (idx="..tostring(pointidx).."), chars="..table.concat(chars, " "))
 | 
| 91 | for _, fc in ipairs(flatchars) do | 
| 92 |         --print("  "..fc[2].." "..tableToString(fc[3]))
 | 
| 93 | local crop = fc[3][2] | 
| 94 | crop = newRectangle(crop.x-10, crop.y-10, crop.width+20, crop.height+20) | 
| 95 | local url = "http://tinybrain.de:8080/tb/crop.php?img="..fc[3][1]:sub(2).."&rect="..recttostring(crop) | 
| 96 |         print("  "..fc[2].." "..url)
 | 
| 97 | end | 
| 98 | return chars | 
| 99 | end | 
| 100 | |
| 101 |     --print ("chars", #flatchars, "point", pointidx, "depth", depth)
 | 
| 102 | local x, y = unpack(points[pointidx]) | 
| 103 |     local plus, minus = {}, {}
 | 
| 104 | for _, fc in ipairs(flatchars) do | 
| 105 | local cimg, c = unpack(fc) | 
| 106 | if x < cimg.width and y < cimg.height then -- point testable | 
| 107 | local set = bright(rgb(cimg.getInt(x, y))) <= 0.5 | 
| 108 | table.insert(set and plus or minus, fc) | 
| 109 | else -- outside of character image, leave in both sets | 
| 110 | table.insert(plus, fc) | 
| 111 | table.insert(minus, fc) | 
| 112 | end | 
| 113 | end | 
| 114 |     --print("plus", #plus, "minus", #minus)
 | 
| 115 |     --print("plus", tableToString(plus), "minus", tableToString(minus))
 | 
| 116 | if #plus == #flatchars or #minus == #flatchars then -- all chars are on one side. just move on to next point | 
| 117 | pointidx = pointidx+1 | 
| 118 | --plus, minus = nil, nil | 
| 119 | --return makeDecisionTree(flatchars, pointidx+1) | 
| 120 | else -- actual decision point | 
| 121 | local plustree = makeDecisionTree(plus, pointidx+1, depth+1) | 
| 122 | local minustree = makeDecisionTree(minus, pointidx+1, depth+1) | 
| 123 |       local tree = {x, y, plustree, minustree}
 | 
| 124 | return tree | 
| 125 | end | 
| 126 | end | 
| 127 | end | 
| 128 | |
| 129 | chars = {} -- multimap of char to image
 | 
| 130 | flatchars = {} -- {{cimg, c}, ...}
 | 
| 131 | maxw, maxh = 0, 0 | 
| 132 | images = {}
 | 
| 133 | |
| 134 | for _, d in ipairs(data) do | 
| 135 | local img, crop, text = unpack(d) | 
| 136 | local theimg = images[img] | 
| 137 | if theimg == nil then | 
| 138 | theimg = loadImage(img) | 
| 139 | images[img] = theimg -- keep them in memory | 
| 140 | end | 
| 141 |   text = text:gsub(" ", "") -- no spaces
 | 
| 142 | |
| 143 | local maincrop | 
| 144 | if crop ~= "" then | 
| 145 | maincrop = stringtorect(crop) | 
| 146 | local fullimg = theimg | 
| 147 |     theimg = {width=maincrop.width, height=maincrop.height,
 | 
| 148 | getInt = function(x, y) return fullimg.getInt(x+maincrop.x, y+maincrop.y) end} | 
| 149 | end | 
| 150 | |
| 151 | local parts = horizontalsplit(theimg) | 
| 152 | local cidx = 1 | 
| 153 | |
| 154 | for _, r in ipairs(parts) do | 
| 155 | local x1, y1, x2, y2 = r.x, r.y, r.x+r.width, r.y+r.height | 
| 156 | -- add one row and col of white pixels to right and bottom | 
| 157 |     local cimg = {width=x2-x1+1, height=y2-y1+1,
 | 
| 158 | getInt = function(x, y) | 
| 159 | return (x < x2-x1 and y < y2-y1) and theimg.getInt(x1+x, y1+y) or 0xFFFFFF | 
| 160 | end} | 
| 161 | maxw = math.max(maxw, cimg.width) | 
| 162 | maxh = math.max(maxh, cimg.height) | 
| 163 | local c = text:sub(cidx, cidx) | 
| 164 | if c ~= "*" then | 
| 165 | --print(c, x1, y1) | 
| 166 | --mminsert(chars, c, cimg) | 
| 167 | |
| 168 | -- make debug info | 
| 169 | local actualcrop = r | 
| 170 | if maincrop then | 
| 171 | actualcrop = newRectangle(maincrop.x+r.x, maincrop.y+r.y, | 
| 172 | r.width, r.height) | 
| 173 | end | 
| 174 |       local debug = {img, actualcrop}
 | 
| 175 |       flatchars[#flatchars+1] = {cimg, c, debug}
 | 
| 176 | end | 
| 177 | cidx = cidx+1 | 
| 178 | end | 
| 179 | end | 
| 180 | |
| 181 | makePoints() | 
| 182 | --print(tableToString(points)) | 
| 183 | print("maxw", maxw, "maxh", maxh, "points", #points)
 | 
| 184 | tree = makeDecisionTree(flatchars, 1, 1) | 
| 185 | |
| 186 | tbl = {}
 | 
| 187 | prettyPrintTree(tree, tbl) | 
| 188 | result = "tree = "..table.concat(tbl) | 
| 189 | print(result) | 
test run test run with input download show line numbers
Travelled to 12 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
| Snippet ID: | #476 | 
| Snippet name: | Haiku font recognizer (collecting data) | 
| Eternal ID of this version: | #476/1 | 
| Text MD5: | 0d380e1dada75d14f51a3b52dfd39a96 | 
| Author: | stefan | 
| Category: | |
| Type: | Lua code | 
| Public (visible to everyone): | Yes | 
| Archived (hidden from active list): | No | 
| Created/modified: | 2015-02-26 00:14:41 | 
| Source code size: | 6053 bytes / 189 lines | 
| Pitched / IR pitched: | No / Yes | 
| Views / Downloads: | 1040 / 208 | 
| Referenced in: | [show references] |