Libraryless. Click here for Pure Java version (14862L/92K).
1 | do not include class IntegralImage. |
2 | do not include class IIntegralImage. |
3 | |
4 | // Note: featureSize should not be smaller than maxDescentLevel |
5 | |
6 | srecord noeq MinimalRecognizer(BufferedImage inputImage) { |
7 | replace Channel with int. |
8 | |
9 | IntegralImage mainImage; |
10 | bool verbose, verboseLookAt, verboseValues, |
11 | verboseDescentProbabilities, verboseFound, verboseImageSize, |
12 | verboseDecentLevelReached; |
13 | |
14 | new Map<Rect, IIntegralImage> clipCache; |
15 | |
16 | static final int grayscale = 3; // channel number for grayscale |
17 | static final int channels = 4; |
18 | |
19 | abstract class IIntegralImage { |
20 | // width and height of image |
21 | int w, h; |
22 | |
23 | int liveliness_cachedChannel = -1; |
24 | double liveliness_cache; |
25 | |
26 | long discoveredInStep; |
27 | |
28 | abstract double integralValue(int x, int y, Channel channel); |
29 | |
30 | BufferedImage render() { |
31 | ret imageFromFunction(w, h, (x, y) -> rgbPixel(x, y, x+1, y+1) | fullAlphaMask()); |
32 | } |
33 | |
34 | double getPixel(Rect r, int channel) { |
35 | ret getPixel(r.x, r.y, r.x2(), r.y2(), channel); |
36 | } |
37 | |
38 | double getPixel(int channel) { ret getPixel(0, 0, w, h, channel); } |
39 | |
40 | // return value ranges from 0 to 1 (usually) |
41 | double getPixel(int x1, int y1, int x2, int y2, int channel) { |
42 | ret doubleRatio(rectSum(x1, y1, x2, y2, channel), (x2-x1)*(y2-y1)*255.0); |
43 | } |
44 | |
45 | double rectSum(Rect r, int channel) { |
46 | ret rectSum(r.x, r.y, r.x2(), r.y2(), channel); |
47 | } |
48 | |
49 | double rectSum(int x1, int y1, int x2, int y2, int channel) { |
50 | double bottomLeft = integralValue(x1-1, y2-1, channel); |
51 | double bottomRight = integralValue(x2-1, y2-1, channel); |
52 | double topLeft = integralValue(x1-1, y1-1, channel); |
53 | double topRight = integralValue(x2-1, y1-1, channel); |
54 | ret bottomRight-topRight-bottomLeft+topLeft; |
55 | } |
56 | |
57 | int rgbPixel(int x1, int y1, int x2, int y2) { |
58 | int r = iround(clampZeroToOne(getPixel(x1, y1, x2, y2, 0))*255); |
59 | int g = iround(clampZeroToOne(getPixel(x1, y1, x2, y2, 1))*255); |
60 | int b = iround(clampZeroToOne(getPixel(x1, y1, x2, y2, 2))*255); |
61 | ret rgbInt(r, g, b); |
62 | } |
63 | |
64 | double liveliness(int channel) { |
65 | if (liveliness_cachedChannel != channel) { |
66 | // optimization (but no change in semantics): |
67 | // if (w <= 1 && h <= 1) ret 0; // liveliness of single pixel is 0 |
68 | liveliness_cache = standardDeviation(map(q -> q.getPixel(channel), quadrants())); |
69 | liveliness_cachedChannel = channel; |
70 | } |
71 | ret liveliness_cache; |
72 | } |
73 | |
74 | // no duplicates, without full image |
75 | L<IIntegralImage> descentShapes_cleaned() { |
76 | ret uniquify(listMinus(descentShapes(), this)); |
77 | } |
78 | |
79 | L<IIntegralImage> descentShapes() { |
80 | ret centerPlusQuadrants(); |
81 | } |
82 | |
83 | L<IIntegralImage> centerPlusQuadrants() { |
84 | int midX = w/2, midY = h/2; |
85 | Rect r = rectAround(iround(midX), iround(midY), max(midX, 1), max(midY, 1)); |
86 | ret itemPlusList(clip(r), quadrants()); |
87 | } |
88 | |
89 | L<IIntegralImage> quadrants() { |
90 | if (w <= 1 && h <= 1) null; // let's really not have quadrants of a single pixel |
91 | int midX = w/2, midY = h/2; |
92 | ret mapLL clip( |
93 | rect(0, 0, max(midX, 1), max(midY, 1)), |
94 | rect(midX, 0, w-midX, max(midY, 1)), |
95 | rect(0, midY, max(midX, 1), h-midY), |
96 | rect(midX, midY, w-midX, h-midY) |
97 | ); |
98 | } |
99 | |
100 | IIntegralImage liveliestSubshape(int channel) { |
101 | ret highestBy(q -> q.liveliness(channel), quadrants()); |
102 | } |
103 | |
104 | ProbabilisticList<IIntegralImage> liveliestSubshape_probabilistic(int channel) { |
105 | ret new ProbabilisticList<IIntegralImage>(map(descentShapes(), shape -> |
106 | withProbability(shape.liveliness(channel), shape))); |
107 | } |
108 | |
109 | IIntegralImage clip(Rect r) { |
110 | Rect me = rect(0, 0, w, h); |
111 | r = intersectRects(me, r); |
112 | if (eq(r, me)) this; |
113 | ret actuallyClip(r); |
114 | } |
115 | |
116 | IIntegralImage actuallyClip(Rect r) { |
117 | ret newClip(this, r); |
118 | } |
119 | |
120 | IIntegralImage clip(int x1, int y1, int w, int h) { ret clip(rect(x1, y1, w, h)); } |
121 | |
122 | Rect positionInImage(IIntegralImage mainImage) { |
123 | ret this == mainImage ? positionInImage() : null; |
124 | } |
125 | |
126 | Rect positionInImage() { |
127 | ret rect(0, 0, w, h); |
128 | } |
129 | |
130 | double area() { ret w*h; } |
131 | double relativeArea() { ret area()/mainImage.area(); } |
132 | |
133 | bool singlePixel() { ret w <= 1 && h <= 1; } |
134 | |
135 | toString { ret w + "*" + h; } |
136 | } |
137 | |
138 | // virtual clip of an integral image |
139 | class Clip extends IIntegralImage { |
140 | IIntegralImage fullImage; |
141 | int x1, y1; |
142 | |
143 | *(IIntegralImage *fullImage, Rect r) { |
144 | x1 = r.x; y1 = r.y; w = r.w; h = r.h; |
145 | } |
146 | |
147 | *(IIntegralImage *fullImage, int *x1, int *y1, int *w, int *h) {} |
148 | |
149 | public double integralValue(int x, int y, int channel) { |
150 | ret fullImage.integralValue(x+x1, y+y1, channel); |
151 | } |
152 | |
153 | // don't clip a clip - be smarter than that! |
154 | IIntegralImage actuallyClip(Rect r) { |
155 | ret newClip(fullImage, translateRect(r, x1, y1)); |
156 | } |
157 | |
158 | Rect positionInImage() { |
159 | ret rect(x1, y1, w, h); |
160 | } |
161 | |
162 | Rect positionInImage(IIntegralImage mainImage) { |
163 | try object Rect r = super.positionInImage(mainImage); |
164 | if (fullImage == mainImage) ret rect(x1, y1, w, h); |
165 | null; |
166 | } |
167 | |
168 | toString { ret positionInImage() + " in " + fullImage; } |
169 | |
170 | // no need for these, we have clipCache |
171 | /* |
172 | @Override public bool equals(O o) { |
173 | if (o == this) true; |
174 | if (o cast Clip) |
175 | ret eq(positionInImage(), o.positionInImage()); |
176 | false; |
177 | } |
178 | |
179 | @Override public int hashCode() { |
180 | ret positionInImage().hashCode(); |
181 | } |
182 | */ |
183 | } |
184 | |
185 | class IntegralImage extends IIntegralImage { |
186 | int[] data; |
187 | |
188 | *(IntegralImage img) { |
189 | w = img.w; |
190 | h = img.h; |
191 | data = img.data; |
192 | } |
193 | |
194 | *(BufferedImage img) { |
195 | w = img.getWidth(); h = img.getHeight(); |
196 | if (longMul(w, h) > 8000000) fail("Image too big: " + w + "*" + h); |
197 | int[] pixels = pixelsOfBufferedImage(img); |
198 | data = new int[w*h*channels]; |
199 | int i = 0, j = 0; |
200 | int[] sum = new[channels]; |
201 | for y to h: { |
202 | for c to channels: sum[c] = 0; |
203 | for x to w: { |
204 | int rgb = pixels[j++] & 0xFFFFFF; |
205 | for c to channels: { |
206 | if (c == grayscale) |
207 | data[i] = iround((sum[0]+sum[1]+sum[2])/3); |
208 | else { |
209 | data[i] = (sum[c] += rgb >> 16); |
210 | rgb = (rgb << 8) & 0xFFFFFF; |
211 | } |
212 | if (y > 0) |
213 | data[i] += data[i-w*channels]; |
214 | i++; |
215 | } |
216 | } |
217 | } |
218 | } |
219 | |
220 | public double integralValue(int x, int y, Channel channel) { |
221 | /*if (channel == grayscale) |
222 | ret doubleAvg(countIterator(3, c -> integralValue(x, y, c)));*/ |
223 | |
224 | ret x < 0 || y < 0 ? 0 |
225 | : data[(min(y, h-1)*w+min(x, w-1))*channels+channel]; |
226 | } |
227 | } |
228 | |
229 | IIntegralImage newClip(IIntegralImage fullImage, Rect r) { |
230 | assertSame(fullImage, mainImage); |
231 | ret getOrCreate(clipCache, r, () -> new Clip(fullImage, r)); |
232 | } |
233 | |
234 | IIntegralImage liveliestPointIn(IIntegralImage image) { |
235 | ret applyUntilEqual_goOneBackOnNull(c -> c.liveliestSubshape(grayscale), image); |
236 | } |
237 | |
238 | int maxPoints = 1000; |
239 | long maxSteps = 1000; |
240 | int maxDescentLevel = 2; // stop descent early |
241 | |
242 | // probability of a completely un-lively block to be looked at |
243 | double minLiveliness = .1; |
244 | |
245 | // How likely we are to drill down from an area we actually look at |
246 | double drillDownProbability = 1.0; |
247 | |
248 | // child must improve liveliness by a factor of this |
249 | // in order to win against the parent in search order |
250 | // (child is assumed to have a quarter the size of the parent) |
251 | double childLivelinessFactor = 1.1; |
252 | |
253 | double featureSize = 0.1; |
254 | |
255 | // feature-level liveliness is scaled with this in the end |
256 | // - TODO: calculate from actual values? |
257 | double finalLivelinessFactor = 3.0; |
258 | |
259 | // discard beneath this value (after factor is applied) |
260 | double finalMinLiveliness = 0; |
261 | |
262 | double minMarkAlpha = 0.2; // so we see stuff on dark monitors |
263 | double markScale = .5; // make marks smaller by this amount |
264 | |
265 | long steps; |
266 | double lowestExecutedProbability; |
267 | |
268 | new ProbabilisticList<IIntegralImage> liveliestPoints; |
269 | |
270 | // level++ <=> a fourth the area |
271 | double level(IIntegralImage image) { |
272 | ret -log(image.relativeArea(), 4); |
273 | } |
274 | |
275 | double descentProbability(IIntegralImage image, int channel) { |
276 | // what depth we at |
277 | double level = level(image); |
278 | |
279 | // descent limit reached? |
280 | if (level >= maxDescentLevel+0.5) { |
281 | if (verboseDecentLevelReached) printVars_str("Descent limit reached", +level, +image); |
282 | ret 0; |
283 | } |
284 | |
285 | // liveliness of area |
286 | double liveliness = rebaseZeroTo(minLiveliness, image.liveliness(channel)); |
287 | |
288 | // correct liveliness for child-ness (distance from root) |
289 | double levelFactor = pow(1.0/childLivelinessFactor, level-1); |
290 | double corrected = liveliness*levelFactor; |
291 | |
292 | if (verbose || verboseDescentProbabilities) |
293 | printVars(level := formatDouble(level, 1), |
294 | rawDescentProbability := formatDouble(corrected, 5), +image, +liveliness, +levelFactor); |
295 | |
296 | //ret scoreToProbability(corrected); |
297 | ret corrected; |
298 | } |
299 | |
300 | // featureSize = relative to smaller image dimension |
301 | double actualFeatureSize() { |
302 | ret featureSize*min(mainImage.w, mainImage.h); |
303 | } |
304 | |
305 | Rect featureArea(IIntegralImage image) { |
306 | ret rectAround(center(image.positionInImage()), |
307 | iround(max(actualFeatureSize(), 1))); |
308 | } |
309 | |
310 | // keeps 0 liveliness as 0 value (=the point is discarded) |
311 | // Any other liveliness is proceeding to possibly make it |
312 | // into the "list of interesting points" |
313 | double leafValue(IIntegralImage image, int channel) { |
314 | Rect pos = image.positionInImage(); |
315 | Pt center = center(pos); |
316 | int actualFeatureSize = iround(max(actualFeatureSize(), 1)); |
317 | Rect r = featureArea(image); |
318 | double value = mainImage.clip(r).liveliness(channel); |
319 | double scaled = value*finalLivelinessFactor; |
320 | if (verbose || verboseValues) printVars(+scaled, +value, +image, +pos, +center, +actualFeatureSize, +r); |
321 | ret scaled; |
322 | } |
323 | |
324 | void clearCaches { |
325 | clipCache.clear(); |
326 | } |
327 | |
328 | ProbabilisticList<IIntegralImage> scheduler; |
329 | Set<IIntegralImage> lookedAt; |
330 | |
331 | run { |
332 | if (mainImage == null) |
333 | mainImage = new IntegralImage(inputImage); |
334 | else |
335 | mainImage = new IntegralImage(mainImage); |
336 | //inputImage = null; // save space |
337 | |
338 | //print(liveliness := mainImage.liveliness(grayscale)); |
339 | if (verbose || verboseImageSize) print("Full image size: " + mainImage.w + "*" + mainImage.h); |
340 | |
341 | time "Recognition" { |
342 | liveliestPoints = new ProbabilisticList; |
343 | scheduler = new ProbabilisticList; |
344 | lookedAt = new Set; |
345 | lowestExecutedProbability = 1; |
346 | steps = 0; |
347 | |
348 | scheduler.add(WithProbability(mainImage)); |
349 | |
350 | int channel = grayscale; |
351 | while (nempty(scheduler) && steps++ < maxSteps) { |
352 | WithProbability<IIntegralImage> clip = popFirst(scheduler); |
353 | var cp = clip.probability(); |
354 | lowestExecutedProbability = min(lowestExecutedProbability, cp); |
355 | if (!lookedAt.add(clip!)) |
356 | continue; // We were here before... |
357 | |
358 | if (verbose || verboseLookAt) |
359 | print("LEVEL " + formatDouble(level(clip!), 1) + " (p=" |
360 | + cp + ") - " |
361 | + clip); |
362 | |
363 | L<WithProbability<IIntegralImage>> subs1 |
364 | = mapToProbabilities(clip->descentShapes_cleaned(), |
365 | shape -> descentProbability(shape, channel)); |
366 | var preferredSub = getVar(first(subs1)); |
367 | |
368 | ProbabilisticList<IIntegralImage> subs = new ProbabilisticList<>(subs1); |
369 | |
370 | if (empty(subs)) { |
371 | if (verbose) print(" Is leaf"); |
372 | // leaf (single point) - save with value based on |
373 | // liveliness of surroundings on a certain level (=scale) |
374 | if (!liveliestPoints.containsElement(clip!)) { |
375 | if (verboseFound) print("Found point: " + clip); |
376 | clip->discoveredInStep = steps; |
377 | liveliestPoints.add(withProbability(leafValue(clip!, channel), clip!)); |
378 | if (l(liveliestPoints) >= maxPoints) break; |
379 | } |
380 | } else { |
381 | if (verbose) print(" Has " + n2(subs, "sub") + ":"); |
382 | if (verbose) pnlIndent(subs); |
383 | for (var sub : subs) { |
384 | // always force at least one descent of every area we actually looked at |
385 | //var p = descentProbability(sub!, channel); |
386 | var p = sub.probability(); |
387 | if (p == 0) continue; |
388 | if (sub! == preferredSub) p = drillDownProbability; |
389 | if (verbose) print(" Descending at " + p + " to " + sub!); |
390 | scheduler.at(p, sub!); |
391 | } |
392 | } |
393 | } |
394 | } |
395 | } |
396 | |
397 | void show { |
398 | print("Have " + nPoints(liveliestPoints) + " after " + nSteps(steps) + " (areas looked at: " + n2(lookedAt) + ", cache size=" + n2(clipCache) + ")"); |
399 | print("p=" + lowestExecutedProbability); |
400 | pnl(takeFirst(10, liveliestPoints)); |
401 | int n = l(liveliestPoints); |
402 | liveliestPoints.truncateBelow(finalMinLiveliness); |
403 | int m = l(liveliestPoints); |
404 | if (m < n) |
405 | print("Truncated to " + nPoints(m)); |
406 | L<Long> stepList = map(liveliestPoints, p -> p->discoveredInStep); |
407 | print("Points found in steps: " + sorted(stepList)); |
408 | |
409 | var markedImage = mainImage.render(); |
410 | int markSize = max(3, iround(actualFeatureSize()*markScale)); |
411 | forEach(liveliestPoints, p -> |
412 | markPointInImageWithAlpha( |
413 | markedImage, |
414 | center(p->positionInImage()), |
415 | Color.red, |
416 | rebaseZeroTo(minMarkAlpha, p.probability()), |
417 | markSize)); |
418 | showImage(markedImage); |
419 | } |
420 | } |
Began life as a copy of #1032199
download show line numbers debug dex old transpilations
Travelled to 3 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx
No comments. add comment
Snippet ID: | #1032226 |
Snippet name: | Minimal Recognizer v1 [finds "interesting points", backup] |
Eternal ID of this version: | #1032226/1 |
Text MD5: | a58a29299ef36e481fe46b4d04f0b938 |
Transpilation MD5: | 5550eeac36ddb20870da32b9faecaf7e |
Author: | stefan |
Category: | |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2021-08-20 21:53:59 |
Source code size: | 14019 bytes / 420 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 176 / 214 |
Referenced in: | [show references] |