1 | get("#389") -- mminsert |
2 | get("#175") -- tableToString |
3 | get("#348") -- rgb functions |
4 | get("#349") -- table functions |
5 | get("#480") -- horizontalsplit (v2) |
6 | get("#388") -- rectangle functions |
7 | |
8 | -- data format is: {{image, crop, text}, ...} |
9 | |
10 | data = { |
11 | {"#1000176", "431,275,476,287", "Cestina"}, |
12 | {"#1000176", "431,296,468,306", "Dansk"}, |
13 | {"#1000181", "", "Deutsch"}, |
14 | {"#1000176", "431,353,479,366", "Espanol"}, |
15 | {"#1000182", "", "Esperanto"}, |
16 | {"#1000176", "431,392,482,403", "Francais"}, |
17 | {"#1000176", "431,410,479,420", "H*tski"}, |
18 | {"#1000176", "431,429,475,439", "Italiano"}, |
19 | {"#1000176", "431,449,476,460", "Lietuviu"}, -- last u looks like something else... |
20 | {"#1000176", "431,468,475,480", "Magyar"}, |
21 | {"#1000176", "431,486,501,496", "Nederlands"}, |
22 | {"#1000176", "431,503,514,515", "Norsk Bokmal"}, |
23 | --{"#1000176", "", ""}, |
24 | } |
25 | |
26 | maxdepth = 5000 |
27 | |
28 | function escapeString(s) |
29 | -- taken from #158 (serpent.lua) |
30 | local escaped = ("%q"):format(s):gsub("\010","n"):gsub("\026","\\026") |
31 | return escaped |
32 | end |
33 | |
34 | function prettyPrintTree(tree, tbl) |
35 | if type(tree) == 'string' then -- leaf (single character) |
36 | table.insert(tbl, escapeString(tree)) |
37 | elseif type(tree[1]) == 'number' then -- node (decision point) |
38 | local x, y, plus, minus = unpack(tree) |
39 | table.insert(tbl, "{"..tostring(x)..", "..tostring(y)..", ") |
40 | prettyPrintTree(plus, tbl) |
41 | table.insert(tbl, ", ") |
42 | prettyPrintTree(minus, tbl) |
43 | table.insert(tbl, "}") |
44 | elseif #tree == 1 then -- leaf with only one character |
45 | table.insert(tbl, escapeString(tree[1])) |
46 | else -- leaf (list of characters) |
47 | table.insert(tbl, "{") |
48 | for i=1, #tree do |
49 | if i ~= 1 then table.insert(tbl, ", ") end |
50 | table.insert(tbl, escapeString(tree[i])) |
51 | end |
52 | table.insert(tbl, "}") |
53 | end |
54 | end |
55 | |
56 | function makePoints() |
57 | points = {} |
58 | for x = 0, math.max(maxw, maxh)-1 do |
59 | if x < maxw then |
60 | for y = 0, maxh-1 do |
61 | points[#points+1] = {x, y} |
62 | end |
63 | end |
64 | if x < maxh and maxw-2 >= 0 then |
65 | for xx = maxw-2, 0, -1 do |
66 | points[#points+1] = {xx, x} |
67 | end |
68 | end |
69 | end |
70 | end |
71 | |
72 | function charsonly(flatchars) |
73 | local t = {} |
74 | for i=1, #flatchars do |
75 | t[flatchars[i][2]] = true |
76 | end |
77 | return keystolist(t) |
78 | end |
79 | |
80 | function makeDecisionTree(flatchars, pointidx, depth) |
81 | if maxdepth ~= nil and depth > maxdepth then |
82 | return "break "..tostring(#flatchars) |
83 | end |
84 | local chars = charsonly(flatchars) |
85 | if #chars == 1 then |
86 | return chars[1] |
87 | end |
88 | while true do |
89 | if points[pointidx] == nil then -- no more points |
90 | print("not good: no more points (idx="..tostring(pointidx).."), chars="..table.concat(chars, " ")) |
91 | for _, fc in ipairs(flatchars) do |
92 | --print(" "..fc[2].." "..tableToString(fc[3])) |
93 | local crop = fc[3][2] |
94 | crop = newRectangle(crop.x-10, crop.y-10, crop.width+20, crop.height+20) |
95 | local url = "http://tinybrain.de:8080/tb/crop.php?img="..fc[3][1]:sub(2).."&rect="..recttostring(crop) |
96 | print(" "..fc[2].." "..url) |
97 | end |
98 | return chars |
99 | end |
100 | |
101 | --print ("chars", #flatchars, "point", pointidx, "depth", depth) |
102 | local x, y = unpack(points[pointidx]) |
103 | local plus, minus = {}, {} |
104 | for _, fc in ipairs(flatchars) do |
105 | local cimg, c = unpack(fc) |
106 | if x < cimg.width and y < cimg.height then -- point testable |
107 | local set = bright(rgb(cimg.getInt(x, y))) <= 0.5 |
108 | table.insert(set and plus or minus, fc) |
109 | else -- outside of character image, leave in both sets |
110 | table.insert(plus, fc) |
111 | table.insert(minus, fc) |
112 | end |
113 | end |
114 | --print("plus", #plus, "minus", #minus) |
115 | --print("plus", tableToString(plus), "minus", tableToString(minus)) |
116 | if #plus == #flatchars or #minus == #flatchars then -- all chars are on one side. just move on to next point |
117 | pointidx = pointidx+1 |
118 | --plus, minus = nil, nil |
119 | --return makeDecisionTree(flatchars, pointidx+1) |
120 | else -- actual decision point |
121 | local plustree = makeDecisionTree(plus, pointidx+1, depth+1) |
122 | local minustree = makeDecisionTree(minus, pointidx+1, depth+1) |
123 | local tree = {x, y, plustree, minustree} |
124 | return tree |
125 | end |
126 | end |
127 | end |
128 | |
129 | chars = {} -- multimap of char to image |
130 | flatchars = {} -- {{cimg, c}, ...} |
131 | maxw, maxh = 0, 0 |
132 | images = {} |
133 | |
134 | for _, d in ipairs(data) do |
135 | local img, crop, text = unpack(d) |
136 | local theimg = images[img] |
137 | if theimg == nil then |
138 | theimg = loadImage(img) |
139 | images[img] = theimg -- keep them in memory |
140 | end |
141 | text = text:gsub(" ", "") -- no spaces |
142 | |
143 | local maincrop |
144 | if crop ~= "" then |
145 | maincrop = stringtorect(crop) |
146 | local fullimg = theimg |
147 | theimg = {width=maincrop.width, height=maincrop.height, |
148 | getInt = function(x, y) return fullimg.getInt(x+maincrop.x, y+maincrop.y) end} |
149 | end |
150 | |
151 | local parts = horizontalsplit(theimg) |
152 | local cidx = 1 |
153 | |
154 | for _, r in ipairs(parts) do |
155 | local x1, y1, x2, y2 = r.x, r.y, r.x+r.width, r.y+r.height |
156 | -- add one row and col of white pixels to right and bottom |
157 | local cimg = {width=x2-x1+1, height=y2-y1+1, |
158 | getInt = function(x, y) |
159 | return (x < x2-x1 and y < y2-y1) and theimg.getInt(x1+x, y1+y) or 0xFFFFFF |
160 | end} |
161 | maxw = math.max(maxw, cimg.width) |
162 | maxh = math.max(maxh, cimg.height) |
163 | local c = text:sub(cidx, cidx) |
164 | if c ~= "*" then |
165 | --print(c, x1, y1) |
166 | --mminsert(chars, c, cimg) |
167 | |
168 | -- make debug info |
169 | local actualcrop = r |
170 | if maincrop then |
171 | actualcrop = newRectangle(maincrop.x+r.x, maincrop.y+r.y, |
172 | r.width, r.height) |
173 | end |
174 | local debug = {img, actualcrop} |
175 | flatchars[#flatchars+1] = {cimg, c, debug} |
176 | end |
177 | cidx = cidx+1 |
178 | end |
179 | end |
180 | |
181 | makePoints() |
182 | --print(tableToString(points)) |
183 | print("maxw", maxw, "maxh", maxh, "points", #points) |
184 | tree = makeDecisionTree(flatchars, 1, 1) |
185 | |
186 | tbl = {} |
187 | prettyPrintTree(tree, tbl) |
188 | result = "tree = "..table.concat(tbl) |
189 | print(result) |
test run test run with input download show line numbers
Travelled to 12 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #476 |
Snippet name: | Haiku font recognizer (collecting data) |
Eternal ID of this version: | #476/1 |
Text MD5: | 0d380e1dada75d14f51a3b52dfd39a96 |
Author: | stefan |
Category: | |
Type: | Lua code |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2015-02-26 00:14:41 |
Source code size: | 6053 bytes / 189 lines |
Pitched / IR pitched: | No / Yes |
Views / Downloads: | 772 / 153 |
Referenced in: | [show references] |