// For audio recognition, we need a double-precision (16 bit) // integral image. We simply turn the sample data into pixels and // have ourselves a verybig*1 image. srecord noeq AudioRecognizer { interface IAudioSample { double length(); double sampleSum(int channel, double start, double end); } // the 16 bit per channel integral image class AudioSample implements IAudioSample { int channels; int length; // 5 bytes are needed to store a sum table entry int[] data; byte[] highBytes; double length() { ret length; } public double sampleSum(int channel, double start, double end) { int a = ifloor(start), b = ifloor(end); ret getEntry(b)-getEntry(a); } public getPixel(int channel, double start, double end) { ret doubleRatio(sampleSum, end-start); } // get an entry of the sum table long getEntry(int i) { ret ((long) highBytes[i]) & 0xFF) << 16 | uintToLong(data[i]); } *(IntegralImage img) { w = img.w; h = img.h; data = img.data; } *(short[] samples) { w = img.getWidth(); h = img.getHeight(); if (longMul(w, h) > 8000000) fail("Image too big: " + w + "*" + h); data = new int[w*h*channels]; int i = 0, j = 0; int[] sum = new[channels]; for y to h: { for c to channels: sum[c] = 0; for x to w: { int rgb = pixels[j++] & 0xFFFFFF; for c to channels: { if (c == grayscale) data[i] = iround((sum[0]+sum[1]+sum[2])/3); else { data[i] = (sum[c] += rgb >> 16); rgb = (rgb << 8) & 0xFFFFFF; } if (y > 0) data[i] += data[i-w*channels]; i++; } } } } public double integralValue(int x, int y, Channel channel) { /*if (channel == grayscale) ret doubleAvg(countIterator(3, c -> integralValue(x, y, c)));*/ ret x < 0 || y < 0 ? 0 : data[(min(y, h-1)*w+min(x, w-1))*channels+channel]; } } IIntegralImage newClip(IIntegralImage fullImage, Rect r) { assertSame(fullImage, mainImage); ret getOrCreate(clipCache, r, () -> new Clip(fullImage, r)); } IIntegralImage liveliestPointIn(IIntegralImage image) { ret applyUntilEqual_goOneBackOnNull(c -> c.liveliestSubshape(grayscale), image); } // level++ <=> a fourth the area double level(IIntegralImage image) { ret -log(image.relativeArea(), 4); } double descentProbability(IIntegralImage image, int channel) { // what depth we at double level = level(image); // descent limit reached? if (level >= maxDescentLevel+0.5) { if (verboseDecentLevelReached) printVars_str("Descent limit reached", +level, +image); ret 0; } // liveliness of area double liveliness = rebaseZeroTo(minLiveliness, image.liveliness(channel)); // correct liveliness for child-ness (distance from root) double levelFactor = pow(1.0/childLivelinessFactor, level-1); double corrected = liveliness*levelFactor; if (verbose || verboseDescentProbabilities) printVars(level := formatDouble(level, 1), rawDescentProbability := formatDouble(corrected, 5), +image, +liveliness, +levelFactor); //ret scoreToProbability(corrected); ret corrected; } // featureSize = relative to smaller image dimension double actualFeatureSize() { ret featureSize*min(mainImage.w, mainImage.h); } Rect featureArea(IIntegralImage image) { ret rectAround(image.center(), iround(max(actualFeatureSize(), 1))); } // keeps 0 liveliness as 0 value (=the point is discarded) // Any other liveliness is proceeding to possibly make it // into the "list of interesting points" double leafValue(IIntegralImage image, int channel) { Pt center = image.center(); int actualFeatureSize = iround(max(actualFeatureSize(), 1)); Rect r = featureArea(image); double value = mainImage.clip(r).liveliness(channel); double scaled = value*finalLivelinessFactor; if (verbose || verboseValues) printVars(+scaled, +value, +image, pos := image.positionInImage(), +center, +actualFeatureSize, +r); ret scaled; } void clearCaches { clipCache.clear(); } // this prevents a really small feature area being scanned // on a low descent level (which would mean we are basically // scanning a random area) void fixFeatureSize { featureSize = max(featureSize, pow(.5, maxDescentLevel-1)); } void prepareImage { if (mainImage == null) // make integral image mainImage = new IntegralImage(inputImage); else // not sure why we are doing this one or whether we should do it mainImage = new IntegralImage(mainImage); //inputImage = null; // save space //print(liveliness := mainImage.liveliness(grayscale)); if (verbose || verboseImageSize) print("Full image size: " + mainImage.w + "*" + mainImage.h); } run { prepareImage(); time "Recognition" { liveliestPoints = new ProbabilisticList; scheduler = new ProbabilisticList; lookedAt = new Set; lowestExecutedProbability = 1; steps = 0; scheduler.add(WithProbability(mainImage)); int channel = grayscale; while (nempty(scheduler) && steps++ < maxSteps) { WithProbability clip = popFirst(scheduler); var cp = clip.probability(); lowestExecutedProbability = min(lowestExecutedProbability, cp); if (!lookedAt.add(clip!)) continue; // We were here before... if (verbose || verboseLookAt) print("LEVEL " + formatDouble(level(clip!), 1) + " (p=" + cp + ") - " + clip); L> subs1 = mapToProbabilities(clip->descentShapes_cleaned(), shape -> descentProbability(shape, channel)); var preferredSub = getVar(first(subs1)); ProbabilisticList subs = new ProbabilisticList<>(subs1); if (empty(subs)) { if (verbose) print(" Is leaf"); // leaf (single point) - save with value based on // liveliness of surroundings on a certain level (=scale) if (!liveliestPoints.containsElement(clip!)) { if (verboseFound) print("Found point: " + clip); clip->discoveredInStep = steps; liveliestPoints.add(withProbability(leafValue(clip!, channel), clip!)); if (l(liveliestPoints) >= maxPoints) break; } } else { if (verbose) print(" Has " + n2(subs, "sub") + ":"); if (verbose) pnlIndent(subs); for (var sub : subs) { // always force at least one descent of every area we actually looked at //var p = descentProbability(sub!, channel); var p = sub.probability(); if (p == 0) continue; if (sub! == preferredSub) p = drillDownProbability; if (verbose) print(" Descending at " + p + " to " + sub!); scheduler.at(p, sub!); } } } } } BufferedImage markedImage() { print("Have " + nPoints(liveliestPoints) + " after " + nSteps(steps) + " (areas looked at: " + n2(lookedAt) + ", cache size=" + n2(clipCache) + ")"); print("p=" + lowestExecutedProbability); pnl(takeFirst(10, liveliestPoints)); int n = l(liveliestPoints); liveliestPoints.truncateBelow(finalMinLiveliness); int m = l(liveliestPoints); if (m < n) print("Truncated to " + nPoints(m)); L stepList = map(liveliestPoints, p -> p->discoveredInStep); print("Points found in steps: " + sorted(stepList)); var markedImage = mainImage.render(); int markSize = max(3, iround(actualFeatureSize()*markScale)); forEach(liveliestPoints, p -> markPointInImageWithAlpha( markedImage, p->center(), Color.red, rebaseZeroTo(minMarkAlpha, p.probability()), markSize)); ret markedImage; } void show { showImage(markedImage()); } void setInputImage aka setImage(BufferedImage image) { inputImage = image; mainImage = null; } // one-stop shop method Set interestingPoints aka points(BufferedImage image) { setInputImage(image); run(); ret points(); } // accessor after run() Set interestingPoints aka points() { ret mapToSet(liveliestPoints, p -> p->center()); } }