do not include class IntegralImage. do not include class IIntegralImage. // Note: featureSize should not be smaller than maxDescentLevel srecord noeq MinimalRecognizer(BufferedImage inputImage) { replace Channel with int. IntegralImage mainImage; bool verbose, verboseLookAt, verboseValues, verboseDescentProbabilities, verboseFound, verboseImageSize, verboseDecentLevelReached; new Map clipCache; static final int grayscale = 3; // channel number for grayscale static final int channels = 4; abstract class IIntegralImage { // width and height of image int w, h; int liveliness_cachedChannel = -1; double liveliness_cache; long discoveredInStep; abstract double integralValue(int x, int y, Channel channel); BufferedImage render() { ret imageFromFunction(w, h, (x, y) -> rgbPixel(x, y, x+1, y+1) | fullAlphaMask()); } double getPixel(Rect r, int channel) { ret getPixel(r.x, r.y, r.x2(), r.y2(), channel); } double getPixel(int channel) { ret getPixel(0, 0, w, h, channel); } // return value ranges from 0 to 1 (usually) double getPixel(int x1, int y1, int x2, int y2, int channel) { ret doubleRatio(rectSum(x1, y1, x2, y2, channel), (x2-x1)*(y2-y1)*255.0); } double rectSum(Rect r, int channel) { ret rectSum(r.x, r.y, r.x2(), r.y2(), channel); } double rectSum(int x1, int y1, int x2, int y2, int channel) { double bottomLeft = integralValue(x1-1, y2-1, channel); double bottomRight = integralValue(x2-1, y2-1, channel); double topLeft = integralValue(x1-1, y1-1, channel); double topRight = integralValue(x2-1, y1-1, channel); ret bottomRight-topRight-bottomLeft+topLeft; } int rgbPixel(int x1, int y1, int x2, int y2) { int r = iround(clampZeroToOne(getPixel(x1, y1, x2, y2, 0))*255); int g = iround(clampZeroToOne(getPixel(x1, y1, x2, y2, 1))*255); int b = iround(clampZeroToOne(getPixel(x1, y1, x2, y2, 2))*255); ret rgbInt(r, g, b); } double liveliness(int channel) { if (liveliness_cachedChannel != channel) { // optimization (but no change in semantics): // if (w <= 1 && h <= 1) ret 0; // liveliness of single pixel is 0 liveliness_cache = standardDeviation(map(q -> q.getPixel(channel), quadrants())); liveliness_cachedChannel = channel; } ret liveliness_cache; } // no duplicates, without full image L descentShapes_cleaned() { ret uniquify(listMinus(descentShapes(), this)); } L descentShapes() { ret centerPlusQuadrants(); } L centerPlusQuadrants() { int midX = w/2, midY = h/2; Rect r = rectAround(iround(midX), iround(midY), max(midX, 1), max(midY, 1)); ret itemPlusList(clip(r), quadrants()); } L quadrants() { if (w <= 1 && h <= 1) null; // let's really not have quadrants of a single pixel int midX = w/2, midY = h/2; ret mapLL clip( rect(0, 0, max(midX, 1), max(midY, 1)), rect(midX, 0, w-midX, max(midY, 1)), rect(0, midY, max(midX, 1), h-midY), rect(midX, midY, w-midX, h-midY) ); } IIntegralImage liveliestSubshape(int channel) { ret highestBy(q -> q.liveliness(channel), quadrants()); } ProbabilisticList liveliestSubshape_probabilistic(int channel) { ret new ProbabilisticList(map(descentShapes(), shape -> withProbability(shape.liveliness(channel), shape))); } IIntegralImage clip(Rect r) { Rect me = rect(0, 0, w, h); r = intersectRects(me, r); if (eq(r, me)) this; ret actuallyClip(r); } IIntegralImage actuallyClip(Rect r) { ret newClip(this, r); } IIntegralImage clip(int x1, int y1, int w, int h) { ret clip(rect(x1, y1, w, h)); } Rect positionInImage(IIntegralImage mainImage) { ret this == mainImage ? positionInImage() : null; } Rect positionInImage() { ret rect(0, 0, w, h); } double area() { ret w*h; } double relativeArea() { ret area()/mainImage.area(); } bool singlePixel() { ret w <= 1 && h <= 1; } toString { ret w + "*" + h; } } // virtual clip of an integral image class Clip extends IIntegralImage { IIntegralImage fullImage; int x1, y1; *(IIntegralImage *fullImage, Rect r) { x1 = r.x; y1 = r.y; w = r.w; h = r.h; } *(IIntegralImage *fullImage, int *x1, int *y1, int *w, int *h) {} public double integralValue(int x, int y, int channel) { ret fullImage.integralValue(x+x1, y+y1, channel); } // don't clip a clip - be smarter than that! IIntegralImage actuallyClip(Rect r) { ret newClip(fullImage, translateRect(r, x1, y1)); } Rect positionInImage() { ret rect(x1, y1, w, h); } Rect positionInImage(IIntegralImage mainImage) { try object Rect r = super.positionInImage(mainImage); if (fullImage == mainImage) ret rect(x1, y1, w, h); null; } toString { ret positionInImage() + " in " + fullImage; } @Override public bool equals(O o) { if (o == this) true; if (o cast Clip) ret eq(positionInImage(), o.positionInImage()); false; } @Override public int hashCode() { ret positionInImage().hashCode(); } } class IntegralImage extends IIntegralImage { int[] data; *(IntegralImage img) { w = img.w; h = img.h; data = img.data; } *(BufferedImage img) { w = img.getWidth(); h = img.getHeight(); if (longMul(w, h) > 8000000) fail("Image too big: " + w + "*" + h); int[] pixels = pixelsOfBufferedImage(img); data = new int[w*h*channels]; int i = 0, j = 0; int[] sum = new[channels]; for y to h: { for c to channels: sum[c] = 0; for x to w: { int rgb = pixels[j++] & 0xFFFFFF; for c to channels: { if (c == grayscale) data[i] = iround((sum[0]+sum[1]+sum[2])/3); else { data[i] = (sum[c] += rgb >> 16); rgb = (rgb << 8) & 0xFFFFFF; } if (y > 0) data[i] += data[i-w*channels]; i++; } } } } public double integralValue(int x, int y, Channel channel) { /*if (channel == grayscale) ret doubleAvg(countIterator(3, c -> integralValue(x, y, c)));*/ ret x < 0 || y < 0 ? 0 : data[(min(y, h-1)*w+min(x, w-1))*channels+channel]; } } IIntegralImage newClip(IIntegralImage fullImage, Rect r) { assertSame(fullImage, mainImage); ret getOrCreate(clipCache, r, () -> new Clip(fullImage, r)); } IIntegralImage liveliestPointIn(IIntegralImage image) { ret applyUntilEqual_goOneBackOnNull(c -> c.liveliestSubshape(grayscale), image); } int maxPoints = 1000; long maxSteps = 1000; int maxDescentLevel = 2; // stop descent early // probability of a completely un-lively block to be looked at double minLiveliness = .1; // How likely we are to drill down from an area we actually look at double drillDownProbability = 1.0; // child must improve liveliness by a factor of this // in order to win against the parent in search order // (child is assumed to have a quarter the size of the parent) double childLivelinessFactor = 1.1; double featureSize = 0.1; // feature-level liveliness is scaled with this in the end // - TODO: calculate from actual values? double finalLivelinessFactor = 3.0; // discard beneath this value (after factor is applied) double finalMinLiveliness = 0; double minMarkAlpha = 0.2; // so we see stuff on dark monitors double markScale = .5; // make marks smaller by this amount long steps; double lowestExecutedProbability; new ProbabilisticList liveliestPoints; // level++ <=> a fourth the area double level(IIntegralImage image) { ret -log(image.relativeArea(), 4); } double descentProbability(IIntegralImage image, int channel) { // what depth we at double level = level(image); // descent limit reached? if (level >= maxDescentLevel+0.5) { if (verboseDecentLevelReached) printVars_str("Descent limit reached", +level, +image); ret 0; } // liveliness of area double liveliness = rebaseZeroTo(minLiveliness, image.liveliness(channel)); // correct liveliness for child-ness (distance from root) double levelFactor = pow(1.0/childLivelinessFactor, level-1); double corrected = liveliness*levelFactor; if (verbose || verboseDescentProbabilities) printVars(level := formatDouble(level, 1), rawDescentProbability := formatDouble(corrected, 5), +image, +liveliness, +levelFactor); //ret scoreToProbability(corrected); ret corrected; } // featureSize = relative to smaller image dimension double actualFeatureSize() { ret featureSize*min(mainImage.w, mainImage.h); } Rect featureArea(IIntegralImage image) { ret rectAround(center(image.positionInImage()), iround(max(actualFeatureSize(), 1))); } // keeps 0 liveliness as 0 value (=the point is discarded) // Any other liveliness is proceeding to possibly make it // into the "list of interesting points" double leafValue(IIntegralImage image, int channel) { Rect pos = image.positionInImage(); Pt center = center(pos); int actualFeatureSize = iround(max(actualFeatureSize(), 1)); Rect r = featureArea(image); double value = mainImage.clip(r).liveliness(channel); double scaled = value*finalLivelinessFactor; if (verbose || verboseValues) printVars(+scaled, +value, +image, +pos, +center, +actualFeatureSize, +r); ret scaled; } void clearCaches { clipCache.clear(); } ProbabilisticList scheduler; Set lookedAt; run { if (mainImage == null) mainImage = new IntegralImage(inputImage); else mainImage = new IntegralImage(mainImage); //inputImage = null; // save space //print(liveliness := mainImage.liveliness(grayscale)); if (verbose || verboseImageSize) print("Full image size: " + mainImage.w + "*" + mainImage.h); time "Recognition" { liveliestPoints = new ProbabilisticList; scheduler = new ProbabilisticList; lookedAt = new Set; lowestExecutedProbability = 1; steps = 0; scheduler.add(WithProbability(mainImage)); int channel = grayscale; while (nempty(scheduler) && steps++ < maxSteps) { WithProbability clip = popFirst(scheduler); var cp = clip.probability(); lowestExecutedProbability = min(lowestExecutedProbability, cp); if (!lookedAt.add(clip!)) continue; // We were here before... if (verbose || verboseLookAt) print("LEVEL " + formatDouble(level(clip!), 1) + " (p=" + cp + ") - " + clip); L> subs1 = mapToProbabilities(clip->descentShapes_cleaned(), shape -> descentProbability(shape, channel)); var preferredSub = getVar(first(subs1)); ProbabilisticList subs = new ProbabilisticList<>(subs1); if (empty(subs)) { if (verbose) print(" Is leaf"); // leaf (single point) - save with value based on // liveliness of surroundings on a certain level (=scale) if (!liveliestPoints.containsElement(clip!)) { if (verboseFound) print("Found point: " + clip); clip->discoveredInStep = steps; liveliestPoints.add(withProbability(leafValue(clip!, channel), clip!)); if (l(liveliestPoints) >= maxPoints) break; } } else { if (verbose) print(" Has " + n2(subs, "sub") + ":"); if (verbose) pnlIndent(subs); for (var sub : subs) { // always force at least one descent of every area we actually looked at //var p = descentProbability(sub!, channel); var p = sub.probability(); if (p == 0) continue; if (sub! == preferredSub) p = drillDownProbability; if (verbose) print(" Descending at " + p + " to " + sub!); scheduler.at(p, sub!); } } } } } void show { print("Have " + nPoints(liveliestPoints) + " after " + nSteps(steps) + " (areas looked at: " + n2(lookedAt) + ", cache size=" + n2(clipCache) + ")"); print("p=" + lowestExecutedProbability); pnl(takeFirst(10, liveliestPoints)); int n = l(liveliestPoints); liveliestPoints.truncateBelow(finalMinLiveliness); int m = l(liveliestPoints); if (m < n) print("Truncated to " + nPoints(m)); L stepList = map(liveliestPoints, p -> p->discoveredInStep); print("Points found in steps: " + sorted(stepList)); var markedImage = mainImage.render(); int markSize = max(3, iround(actualFeatureSize()*markScale)); forEach(liveliestPoints, p -> markPointInImageWithAlpha( markedImage, center(p->positionInImage()), Color.red, rebaseZeroTo(minMarkAlpha, p.probability()), markSize)); showImage(markedImage); } }