Libraryless. Click here for Pure Java version (15019L/93K).
1 | do not include class IntegralImage. |
2 | do not include class IIntegralImage. |
3 | |
4 | // Note: featureSize should not be smaller than maxDescentLevel |
5 | |
6 | srecord noeq MinimalRecognizer(BufferedImage inputImage) { |
7 | replace Channel with int. |
8 | |
9 | // input image (mandatory) |
10 | |
11 | IntegralImage mainImage; |
12 | |
13 | // Be verbose? |
14 | |
15 | bool verbose, verboseLookAt, verboseValues, |
16 | verboseDescentProbabilities, verboseFound, verboseImageSize, |
17 | verboseDecentLevelReached; |
18 | |
19 | // the big internal cache |
20 | |
21 | new Map<Rect, IIntegralImage> clipCache; |
22 | |
23 | // channel definitions (r, g, b and grayscale are channels) |
24 | |
25 | static final int grayscale = 3; // channel number for grayscale |
26 | static final int channels = 4; |
27 | |
28 | // user-settable maximums |
29 | |
30 | int maxPoints = 1000; |
31 | long maxSteps = 1000; |
32 | int maxDescentLevel = 2; // stop descent early |
33 | |
34 | // very important value. see fixFeatureSize() |
35 | |
36 | double featureSize = 0.1; |
37 | |
38 | // diverse probabilities and factors follow |
39 | |
40 | // probability of a completely un-lively block to be looked at |
41 | double minLiveliness = .1; |
42 | |
43 | // How likely we are to drill down from an area we actually look at |
44 | double drillDownProbability = 1.0; |
45 | |
46 | // child must improve liveliness by a factor of this |
47 | // in order to win against the parent in search order |
48 | // (child is assumed to have a quarter the size of the parent) |
49 | double childLivelinessFactor = 1.1; |
50 | |
51 | // feature-level liveliness is scaled with this in the end |
52 | // - TODO: calculate from actual values? |
53 | double finalLivelinessFactor = 3.0; |
54 | |
55 | // discard beneath this value (after factor is applied) |
56 | double finalMinLiveliness = 0; |
57 | |
58 | // visualization parameters |
59 | |
60 | double minMarkAlpha = 0.2; // so we see stuff on dark monitors |
61 | double markScale = .5; // make marks smaller by this amount |
62 | |
63 | // important stats (step counter, probability level reached) |
64 | |
65 | long steps; |
66 | double lowestExecutedProbability; |
67 | |
68 | // OUTPUT of the algorithm (found points) |
69 | |
70 | new ProbabilisticList<IIntegralImage> liveliestPoints; |
71 | |
72 | // INTERNAL (probabilistic scheduling, memory) |
73 | |
74 | ProbabilisticList<IIntegralImage> scheduler; |
75 | Set<IIntegralImage> lookedAt; |
76 | |
77 | abstract class IIntegralImage { |
78 | // width and height of image |
79 | int w, h; |
80 | |
81 | int liveliness_cachedChannel = -1; |
82 | double liveliness_cache; |
83 | |
84 | long discoveredInStep; |
85 | |
86 | abstract double integralValue(int x, int y, Channel channel); |
87 | |
88 | BufferedImage render() { |
89 | ret imageFromFunction(w, h, (x, y) -> rgbPixel(x, y, x+1, y+1) | fullAlphaMask()); |
90 | } |
91 | |
92 | double getPixel(Rect r, int channel) { |
93 | ret getPixel(r.x, r.y, r.x2(), r.y2(), channel); |
94 | } |
95 | |
96 | double getPixel(int channel) { ret getPixel(0, 0, w, h, channel); } |
97 | |
98 | // return value ranges from 0 to 1 (usually) |
99 | double getPixel(int x1, int y1, int x2, int y2, int channel) { |
100 | ret doubleRatio(rectSum(x1, y1, x2, y2, channel), (x2-x1)*(y2-y1)*255.0); |
101 | } |
102 | |
103 | double rectSum(Rect r, int channel) { |
104 | ret rectSum(r.x, r.y, r.x2(), r.y2(), channel); |
105 | } |
106 | |
107 | double rectSum(int x1, int y1, int x2, int y2, int channel) { |
108 | double bottomLeft = integralValue(x1-1, y2-1, channel); |
109 | double bottomRight = integralValue(x2-1, y2-1, channel); |
110 | double topLeft = integralValue(x1-1, y1-1, channel); |
111 | double topRight = integralValue(x2-1, y1-1, channel); |
112 | ret bottomRight-topRight-bottomLeft+topLeft; |
113 | } |
114 | |
115 | int rgbPixel(int x1, int y1, int x2, int y2) { |
116 | int r = iround(clampZeroToOne(getPixel(x1, y1, x2, y2, 0))*255); |
117 | int g = iround(clampZeroToOne(getPixel(x1, y1, x2, y2, 1))*255); |
118 | int b = iround(clampZeroToOne(getPixel(x1, y1, x2, y2, 2))*255); |
119 | ret rgbInt(r, g, b); |
120 | } |
121 | |
122 | double liveliness(int channel) { |
123 | if (liveliness_cachedChannel != channel) { |
124 | // optimization (but no change in semantics): |
125 | // if (w <= 1 && h <= 1) ret 0; // liveliness of single pixel is 0 |
126 | liveliness_cache = standardDeviation(map(q -> q.getPixel(channel), quadrants())); |
127 | liveliness_cachedChannel = channel; |
128 | } |
129 | ret liveliness_cache; |
130 | } |
131 | |
132 | // no duplicates, without full image |
133 | L<IIntegralImage> descentShapes_cleaned() { |
134 | ret uniquify(listMinus(descentShapes(), this)); |
135 | } |
136 | |
137 | L<IIntegralImage> descentShapes() { |
138 | ret centerPlusQuadrants(); |
139 | } |
140 | |
141 | L<IIntegralImage> centerPlusQuadrants() { |
142 | int midX = w/2, midY = h/2; |
143 | Rect r = rectAround(iround(midX), iround(midY), max(midX, 1), max(midY, 1)); |
144 | ret itemPlusList(clip(r), quadrants()); |
145 | } |
146 | |
147 | L<IIntegralImage> quadrants() { |
148 | if (w <= 1 && h <= 1) null; // let's really not have quadrants of a single pixel |
149 | int midX = w/2, midY = h/2; |
150 | ret mapLL clip( |
151 | rect(0, 0, max(midX, 1), max(midY, 1)), |
152 | rect(midX, 0, w-midX, max(midY, 1)), |
153 | rect(0, midY, max(midX, 1), h-midY), |
154 | rect(midX, midY, w-midX, h-midY) |
155 | ); |
156 | } |
157 | |
158 | IIntegralImage liveliestSubshape(int channel) { |
159 | ret highestBy(q -> q.liveliness(channel), quadrants()); |
160 | } |
161 | |
162 | ProbabilisticList<IIntegralImage> liveliestSubshape_probabilistic(int channel) { |
163 | ret new ProbabilisticList<IIntegralImage>(map(descentShapes(), shape -> |
164 | withProbability(shape.liveliness(channel), shape))); |
165 | } |
166 | |
167 | IIntegralImage clip(Rect r) { |
168 | Rect me = rect(0, 0, w, h); |
169 | r = intersectRects(r, 0, 0, w, h); |
170 | if (r.x == 0 && r.y == 0 && r.w == w && r.h == h) this; |
171 | ret actuallyClip(r); |
172 | } |
173 | |
174 | IIntegralImage actuallyClip(Rect r) { |
175 | ret newClip(this, r); |
176 | } |
177 | |
178 | IIntegralImage clip(int x1, int y1, int w, int h) { ret clip(rect(x1, y1, w, h)); } |
179 | |
180 | Rect positionInImage(IIntegralImage mainImage) { |
181 | ret this == mainImage ? positionInImage() : null; |
182 | } |
183 | |
184 | Rect positionInImage() { |
185 | ret rect(0, 0, w, h); |
186 | } |
187 | |
188 | Pt center() { |
189 | ret main center(positionInImage()); |
190 | } |
191 | |
192 | double area() { ret w*h; } |
193 | double relativeArea() { ret area()/mainImage.area(); } |
194 | |
195 | bool singlePixel() { ret w <= 1 && h <= 1; } |
196 | |
197 | toString { ret w + "*" + h; } |
198 | } |
199 | |
200 | // virtual clip of an integral image |
201 | class Clip extends IIntegralImage { |
202 | IIntegralImage fullImage; |
203 | int x1, y1; |
204 | |
205 | *(IIntegralImage *fullImage, Rect r) { |
206 | x1 = r.x; y1 = r.y; w = r.w; h = r.h; |
207 | } |
208 | |
209 | *(IIntegralImage *fullImage, int *x1, int *y1, int *w, int *h) {} |
210 | |
211 | public double integralValue(int x, int y, int channel) { |
212 | ret fullImage.integralValue(x+x1, y+y1, channel); |
213 | } |
214 | |
215 | // don't clip a clip - be smarter than that! |
216 | IIntegralImage actuallyClip(Rect r) { |
217 | ret newClip(fullImage, translateRect(r, x1, y1)); |
218 | } |
219 | |
220 | Rect positionInImage() { |
221 | ret rect(x1, y1, w, h); |
222 | } |
223 | |
224 | Rect positionInImage(IIntegralImage mainImage) { |
225 | try object Rect r = super.positionInImage(mainImage); |
226 | if (fullImage == mainImage) ret rect(x1, y1, w, h); |
227 | null; |
228 | } |
229 | |
230 | toString { ret positionInImage() + " in " + fullImage; } |
231 | |
232 | // no need for these, we have clipCache |
233 | /* |
234 | @Override public bool equals(O o) { |
235 | if (o == this) true; |
236 | if (o cast Clip) |
237 | ret eq(positionInImage(), o.positionInImage()); |
238 | false; |
239 | } |
240 | |
241 | @Override public int hashCode() { |
242 | ret positionInImage().hashCode(); |
243 | } |
244 | */ |
245 | } |
246 | |
247 | class IntegralImage extends IIntegralImage { |
248 | int[] data; |
249 | |
250 | *(IntegralImage img) { |
251 | w = img.w; |
252 | h = img.h; |
253 | data = img.data; |
254 | } |
255 | |
256 | *(BufferedImage img) { |
257 | w = img.getWidth(); h = img.getHeight(); |
258 | if (longMul(w, h) > 8000000) fail("Image too big: " + w + "*" + h); |
259 | int[] pixels = pixelsOfBufferedImage(img); |
260 | data = new int[w*h*channels]; |
261 | int i = 0, j = 0; |
262 | int[] sum = new[channels]; |
263 | for y to h: { |
264 | for c to channels: sum[c] = 0; |
265 | for x to w: { |
266 | int rgb = pixels[j++] & 0xFFFFFF; |
267 | for c to channels: { |
268 | if (c == grayscale) |
269 | data[i] = iround((sum[0]+sum[1]+sum[2])/3); |
270 | else { |
271 | data[i] = (sum[c] += rgb >> 16); |
272 | rgb = (rgb << 8) & 0xFFFFFF; |
273 | } |
274 | if (y > 0) |
275 | data[i] += data[i-w*channels]; |
276 | i++; |
277 | } |
278 | } |
279 | } |
280 | } |
281 | |
282 | public double integralValue(int x, int y, Channel channel) { |
283 | /*if (channel == grayscale) |
284 | ret doubleAvg(countIterator(3, c -> integralValue(x, y, c)));*/ |
285 | |
286 | ret x < 0 || y < 0 ? 0 |
287 | : data[(min(y, h-1)*w+min(x, w-1))*channels+channel]; |
288 | } |
289 | } |
290 | |
291 | IIntegralImage newClip(IIntegralImage fullImage, Rect r) { |
292 | assertSame(fullImage, mainImage); |
293 | ret getOrCreate(clipCache, r, () -> new Clip(fullImage, r)); |
294 | } |
295 | |
296 | IIntegralImage liveliestPointIn(IIntegralImage image) { |
297 | ret applyUntilEqual_goOneBackOnNull(c -> c.liveliestSubshape(grayscale), image); |
298 | } |
299 | |
300 | // level++ <=> a fourth the area |
301 | double level(IIntegralImage image) { |
302 | ret -log(image.relativeArea(), 4); |
303 | } |
304 | |
305 | double descentProbability(IIntegralImage image, int channel) { |
306 | // what depth we at |
307 | double level = level(image); |
308 | |
309 | // descent limit reached? |
310 | if (level >= maxDescentLevel+0.5) { |
311 | if (verboseDecentLevelReached) printVars_str("Descent limit reached", +level, +image); |
312 | ret 0; |
313 | } |
314 | |
315 | // liveliness of area |
316 | double liveliness = rebaseZeroTo(minLiveliness, image.liveliness(channel)); |
317 | |
318 | // correct liveliness for child-ness (distance from root) |
319 | double levelFactor = pow(1.0/childLivelinessFactor, level-1); |
320 | double corrected = liveliness*levelFactor; |
321 | |
322 | if (verbose || verboseDescentProbabilities) |
323 | printVars(level := formatDouble(level, 1), |
324 | rawDescentProbability := formatDouble(corrected, 5), +image, +liveliness, +levelFactor); |
325 | |
326 | //ret scoreToProbability(corrected); |
327 | ret corrected; |
328 | } |
329 | |
330 | // featureSize = relative to smaller image dimension |
331 | double actualFeatureSize() { |
332 | ret featureSize*min(mainImage.w, mainImage.h); |
333 | } |
334 | |
335 | Rect featureArea(IIntegralImage image) { |
336 | ret rectAround(image.center(), |
337 | iround(max(actualFeatureSize(), 1))); |
338 | } |
339 | |
340 | // keeps 0 liveliness as 0 value (=the point is discarded) |
341 | // Any other liveliness is proceeding to possibly make it |
342 | // into the "list of interesting points" |
343 | double leafValue(IIntegralImage image, int channel) { |
344 | Pt center = image.center(); |
345 | int actualFeatureSize = iround(max(actualFeatureSize(), 1)); |
346 | Rect r = featureArea(image); |
347 | double value = mainImage.clip(r).liveliness(channel); |
348 | double scaled = value*finalLivelinessFactor; |
349 | if (verbose || verboseValues) printVars(+scaled, +value, +image, pos := image.positionInImage(), +center, +actualFeatureSize, +r); |
350 | ret scaled; |
351 | } |
352 | |
353 | void clearCaches { |
354 | clipCache.clear(); |
355 | } |
356 | |
357 | // this prevents a really small feature area being scanned |
358 | // on a low descent level (which would mean we are basically |
359 | // scanning a random area) |
360 | void fixFeatureSize { |
361 | featureSize = max(featureSize, pow(.5, maxDescentLevel-1)); |
362 | } |
363 | |
364 | void prepareImage { |
365 | if (mainImage == null) |
366 | // make integral image |
367 | mainImage = new IntegralImage(inputImage); |
368 | else |
369 | // not sure why we are doing this one or whether we should do it |
370 | mainImage = new IntegralImage(mainImage); |
371 | //inputImage = null; // save space |
372 | |
373 | //print(liveliness := mainImage.liveliness(grayscale)); |
374 | if (verbose || verboseImageSize) print("Full image size: " + mainImage.w + "*" + mainImage.h); |
375 | } |
376 | |
377 | run { |
378 | prepareImage(); |
379 | |
380 | time "Recognition" { |
381 | liveliestPoints = new ProbabilisticList; |
382 | scheduler = new ProbabilisticList; |
383 | lookedAt = new Set; |
384 | lowestExecutedProbability = 1; |
385 | steps = 0; |
386 | |
387 | scheduler.add(WithProbability(mainImage)); |
388 | |
389 | int channel = grayscale; |
390 | while (nempty(scheduler) && steps++ < maxSteps) { |
391 | WithProbability<IIntegralImage> clip = popFirst(scheduler); |
392 | var cp = clip.probability(); |
393 | lowestExecutedProbability = min(lowestExecutedProbability, cp); |
394 | if (!lookedAt.add(clip!)) |
395 | continue; // We were here before... |
396 | |
397 | if (verbose || verboseLookAt) |
398 | print("LEVEL " + formatDouble(level(clip!), 1) + " (p=" |
399 | + cp + ") - " |
400 | + clip); |
401 | |
402 | L<WithProbability<IIntegralImage>> subs1 |
403 | = mapToProbabilities(clip->descentShapes_cleaned(), |
404 | shape -> descentProbability(shape, channel)); |
405 | var preferredSub = getVar(first(subs1)); |
406 | |
407 | ProbabilisticList<IIntegralImage> subs = new ProbabilisticList<>(subs1); |
408 | |
409 | if (empty(subs)) { |
410 | if (verbose) print(" Is leaf"); |
411 | // leaf (single point) - save with value based on |
412 | // liveliness of surroundings on a certain level (=scale) |
413 | if (!liveliestPoints.containsElement(clip!)) { |
414 | if (verboseFound) print("Found point: " + clip); |
415 | clip->discoveredInStep = steps; |
416 | liveliestPoints.add(withProbability(leafValue(clip!, channel), clip!)); |
417 | if (l(liveliestPoints) >= maxPoints) break; |
418 | } |
419 | } else { |
420 | if (verbose) print(" Has " + n2(subs, "sub") + ":"); |
421 | if (verbose) pnlIndent(subs); |
422 | for (var sub : subs) { |
423 | // always force at least one descent of every area we actually looked at |
424 | //var p = descentProbability(sub!, channel); |
425 | var p = sub.probability(); |
426 | if (p == 0) continue; |
427 | if (sub! == preferredSub) p = drillDownProbability; |
428 | if (verbose) print(" Descending at " + p + " to " + sub!); |
429 | scheduler.at(p, sub!); |
430 | } |
431 | } |
432 | } |
433 | } |
434 | } |
435 | |
436 | BufferedImage markedImage() { |
437 | print("Have " + nPoints(liveliestPoints) + " after " + nSteps(steps) + " (areas looked at: " + n2(lookedAt) + ", cache size=" + n2(clipCache) + ")"); |
438 | print("p=" + lowestExecutedProbability); |
439 | pnl(takeFirst(10, liveliestPoints)); |
440 | int n = l(liveliestPoints); |
441 | liveliestPoints.truncateBelow(finalMinLiveliness); |
442 | int m = l(liveliestPoints); |
443 | if (m < n) |
444 | print("Truncated to " + nPoints(m)); |
445 | L<Long> stepList = map(liveliestPoints, p -> p->discoveredInStep); |
446 | print("Points found in steps: " + sorted(stepList)); |
447 | |
448 | var markedImage = mainImage.render(); |
449 | int markSize = max(3, iround(actualFeatureSize()*markScale)); |
450 | forEach(liveliestPoints, p -> |
451 | markPointInImageWithAlpha( |
452 | markedImage, |
453 | p->center(), |
454 | Color.red, |
455 | rebaseZeroTo(minMarkAlpha, p.probability()), |
456 | markSize)); |
457 | ret markedImage; |
458 | } |
459 | |
460 | void show { |
461 | showImage(markedImage()); |
462 | } |
463 | |
464 | void setInputImage aka setImage(BufferedImage image) { |
465 | inputImage = image; |
466 | mainImage = null; |
467 | } |
468 | |
469 | // one-stop shop method |
470 | Set<Pt> interestingPoints aka points(BufferedImage image) { |
471 | setInputImage(image); |
472 | run(); |
473 | ret points(); |
474 | } |
475 | |
476 | // accessor after run() |
477 | Set<Pt> interestingPoints aka points() { |
478 | ret mapToSet(liveliestPoints, p -> p->center()); |
479 | } |
480 | } |
download show line numbers debug dex old transpilations
Travelled to 4 computer(s): bhatertpkbcr, ekrmjmnbrukm, mqqgnosmbjvj, pyentgdyhuwx
No comments. add comment
Snippet ID: | #1032199 |
Snippet name: | Minimal Recognizer v1 [finds "interesting points"] |
Eternal ID of this version: | #1032199/200 |
Text MD5: | d217dd17b98431a38935b5913df34ceb |
Transpilation MD5: | e73841d1f3a2c24ffd41b748fc3af6f1 |
Author: | stefan |
Category: | |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2021-09-02 19:30:08 |
Source code size: | 15477 bytes / 480 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 240 / 739 |
Version history: | 199 change(s) |
Referenced in: | [show references] |