Minimal Recognizer v1 [finds "interesting points", backup] [1032226]

do not include class IntegralImage.
do not include class IIntegralImage.

// Note: featureSize should not be smaller than maxDescentLevel

srecord noeq MinimalRecognizer(BufferedImage inputImage) {
  replace Channel with int.

  IntegralImage mainImage;
  bool verbose, verboseLookAt, verboseValues,
    verboseDescentProbabilities, verboseFound, verboseImageSize,
    verboseDecentLevelReached;

  new Map<Rect, IIntegralImage> clipCache;
  
  static final int grayscale = 3; // channel number for grayscale
  static final int channels = 4;
  
  abstract class IIntegralImage {
    // width and height of image
    int w, h;

    int liveliness_cachedChannel = -1;
    double liveliness_cache;
    
    long discoveredInStep;
    
    abstract double integralValue(int x, int y, Channel channel);
    
    BufferedImage render() {
      ret imageFromFunction(w, h, (x, y) -> rgbPixel(x, y, x+1, y+1) | fullAlphaMask());
    }

    double getPixel(Rect r, int channel) {
      ret getPixel(r.x, r.y, r.x2(), r.y2(), channel);
    }

    double getPixel(int channel) { ret getPixel(0, 0, w, h, channel); }
    
    // return value ranges from 0 to 1 (usually)
    double getPixel(int x1, int y1, int x2, int y2, int channel) {
      ret doubleRatio(rectSum(x1, y1, x2, y2, channel), (x2-x1)*(y2-y1)*255.0);
    }
    
    double rectSum(Rect r, int channel) {
      ret rectSum(r.x, r.y, r.x2(), r.y2(), channel);
    }
    
    double rectSum(int x1, int y1, int x2, int y2, int channel) {
      double bottomLeft  = integralValue(x1-1, y2-1, channel);
      double bottomRight = integralValue(x2-1, y2-1, channel);
      double topLeft     = integralValue(x1-1, y1-1, channel);
      double topRight    = integralValue(x2-1, y1-1, channel);
      ret bottomRight-topRight-bottomLeft+topLeft;
    }

    int rgbPixel(int x1, int y1, int x2, int y2) {
      int r = iround(clampZeroToOne(getPixel(x1, y1, x2, y2, 0))*255);
      int g = iround(clampZeroToOne(getPixel(x1, y1, x2, y2, 1))*255);
      int b = iround(clampZeroToOne(getPixel(x1, y1, x2, y2, 2))*255);
      ret rgbInt(r, g, b);
    }
    
    double liveliness(int channel) {
      if (liveliness_cachedChannel != channel) {
        // optimization (but no change in semantics):
        // if (w <= 1 && h <= 1) ret 0; // liveliness of single pixel is 0
        liveliness_cache = standardDeviation(map(q -> q.getPixel(channel), quadrants()));
        liveliness_cachedChannel = channel;
      }
      ret liveliness_cache;
    }

    // no duplicates, without full image
    L<IIntegralImage> descentShapes_cleaned() {
      ret uniquify(listMinus(descentShapes(), this));
    }

    L<IIntegralImage> descentShapes() {
      ret centerPlusQuadrants();
    }
    
    L<IIntegralImage> centerPlusQuadrants() {
      int midX = w/2, midY = h/2;
      Rect r = rectAround(iround(midX), iround(midY), max(midX, 1), max(midY, 1));
      ret itemPlusList(clip(r), quadrants());
    }
    
    L<IIntegralImage> quadrants() {
      if (w <= 1 && h <= 1) null; // let's really not have quadrants of a single pixel
      int midX = w/2, midY = h/2;
      ret mapLL clip(
        rect(0, 0, max(midX, 1), max(midY, 1)),
        rect(midX, 0, w-midX, max(midY, 1)),
        rect(0, midY, max(midX, 1), h-midY),
        rect(midX, midY, w-midX, h-midY)
      );
    }

    IIntegralImage liveliestSubshape(int channel) {
      ret highestBy(q -> q.liveliness(channel), quadrants());
    }
    
    ProbabilisticList<IIntegralImage> liveliestSubshape_probabilistic(int channel) {
      ret new ProbabilisticList<IIntegralImage>(map(descentShapes(), shape ->
        withProbability(shape.liveliness(channel), shape)));
    }

    IIntegralImage clip(Rect r) {
      Rect me = rect(0, 0, w, h);
      r = intersectRects(me, r);
      if (eq(r, me)) this;
      ret actuallyClip(r);
    }

    IIntegralImage actuallyClip(Rect r) {
      ret newClip(this, r);
    }    
    
    IIntegralImage clip(int x1, int y1, int w, int h) { ret clip(rect(x1, y1, w, h)); }
    
    Rect positionInImage(IIntegralImage mainImage) {
      ret this == mainImage ? positionInImage() : null;
    }

    Rect positionInImage() {
      ret rect(0, 0, w, h);
    }
    
    double area() { ret w*h; }
    double relativeArea() { ret area()/mainImage.area(); }

    bool singlePixel() { ret w <= 1 && h <= 1; }

    toString { ret w + "*" + h; }
  }

  // virtual clip of an integral image
  class Clip extends IIntegralImage {
    IIntegralImage fullImage;
    int x1, y1;

    *(IIntegralImage *fullImage, Rect r) {
      x1 = r.x; y1 = r.y; w = r.w; h = r.h;
    }
     
    *(IIntegralImage *fullImage, int *x1, int *y1, int *w, int *h) {}
    
    public double integralValue(int x, int y, int channel) {
      ret fullImage.integralValue(x+x1, y+y1, channel);
    }

    // don't clip a clip - be smarter than that!
    IIntegralImage actuallyClip(Rect r) {
      ret newClip(fullImage, translateRect(r, x1, y1));
    }

    Rect positionInImage() {
      ret rect(x1, y1, w, h);
    }

    Rect positionInImage(IIntegralImage mainImage) {
      try object Rect r = super.positionInImage(mainImage);
      if (fullImage == mainImage) ret rect(x1, y1, w, h);
      null;
    }

    toString { ret positionInImage() + " in " + fullImage; }

    // no need for these, we have clipCache
    /*
    @Override public bool equals(O o) {
      if (o == this) true;
      if (o cast Clip)
        ret eq(positionInImage(), o.positionInImage());
      false;
    }

    @Override public int hashCode() {
      ret positionInImage().hashCode();
    }
    */
  }

  class IntegralImage extends IIntegralImage {
    int[] data;

    *(IntegralImage img) {
      w = img.w;
      h = img.h;
      data = img.data;
    }
     
    *(BufferedImage img) {
      w = img.getWidth(); h = img.getHeight();
      if (longMul(w, h) > 8000000) fail("Image too big: " + w + "*" + h);
      int[] pixels = pixelsOfBufferedImage(img);
      data = new int[w*h*channels];
      int i = 0, j = 0;
      int[] sum = new[channels];
      for y to h: {
        for c to channels: sum[c] = 0;
        for x to w: {
          int rgb = pixels[j++] & 0xFFFFFF;
          for c to channels: {
            if (c == grayscale)
              data[i] = iround((sum[0]+sum[1]+sum[2])/3);
            else {
              data[i] = (sum[c] += rgb >> 16);
              rgb = (rgb << 8) & 0xFFFFFF;
            }
            if (y > 0)
              data[i] += data[i-w*channels];
            i++;
          }
        }
      }
    }
    
    public double integralValue(int x, int y, Channel channel) {
      /*if (channel == grayscale)
        ret doubleAvg(countIterator(3, c -> integralValue(x, y, c)));*/
        
      ret x < 0 || y < 0 ? 0
        : data[(min(y, h-1)*w+min(x, w-1))*channels+channel];
    }
  }

  IIntegralImage newClip(IIntegralImage fullImage, Rect r) {
    assertSame(fullImage, mainImage);
    ret getOrCreate(clipCache, r, () -> new Clip(fullImage, r));
  }

  IIntegralImage liveliestPointIn(IIntegralImage image) {
    ret applyUntilEqual_goOneBackOnNull(c -> c.liveliestSubshape(grayscale), image);
  }

  int maxPoints = 1000;
  long maxSteps = 1000;
  int maxDescentLevel = 2; // stop descent early

  // probability of a completely un-lively block to be looked at
  double minLiveliness = .1;

  // How likely we are to drill down from an area we actually look at
  double drillDownProbability = 1.0;

  // child must improve liveliness by a factor of this
  // in order to win against the parent in search order
  // (child is assumed to have a quarter the size of the parent)
  double childLivelinessFactor = 1.1;

  double featureSize = 0.1;

  // feature-level liveliness is scaled with this in the end
  // - TODO: calculate from actual values?
  double finalLivelinessFactor = 3.0;

  // discard beneath this value (after factor is applied)
  double finalMinLiveliness = 0;

  double minMarkAlpha = 0.2; // so we see stuff on dark monitors
  double markScale = .5; // make marks smaller by this amount

  long steps;
  double lowestExecutedProbability;
  
  new ProbabilisticList<IIntegralImage> liveliestPoints;

  // level++ <=> a fourth the area
  double level(IIntegralImage image) {
    ret -log(image.relativeArea(), 4);
  }
  
  double descentProbability(IIntegralImage image, int channel) {
    // what depth we at
    double level = level(image);

    // descent limit reached?
    if (level >= maxDescentLevel+0.5) {
      if (verboseDecentLevelReached) printVars_str("Descent limit reached", +level, +image);
      ret 0;
    }

    // liveliness of area
    double liveliness = rebaseZeroTo(minLiveliness, image.liveliness(channel));

    // correct liveliness for child-ness (distance from root)
    double levelFactor = pow(1.0/childLivelinessFactor, level-1);
    double corrected = liveliness*levelFactor;

    if (verbose || verboseDescentProbabilities)
      printVars(level := formatDouble(level, 1),
        rawDescentProbability := formatDouble(corrected, 5), +image, +liveliness, +levelFactor);
    
    //ret scoreToProbability(corrected);
    ret corrected;
  }

  // featureSize = relative to smaller image dimension
  double actualFeatureSize() {
    ret featureSize*min(mainImage.w, mainImage.h);
  }

  Rect featureArea(IIntegralImage image) {
   ret rectAround(center(image.positionInImage()),
      iround(max(actualFeatureSize(), 1)));
  }

  // keeps 0 liveliness as 0 value (=the point is discarded)
  // Any other liveliness is proceeding to possibly make it
  // into the "list of interesting points"
  double leafValue(IIntegralImage image, int channel) {
    Rect pos = image.positionInImage();
    Pt center = center(pos);
    int actualFeatureSize = iround(max(actualFeatureSize(), 1));
    Rect r = featureArea(image);
    double value = mainImage.clip(r).liveliness(channel);
    double scaled = value*finalLivelinessFactor;
    if (verbose || verboseValues) printVars(+scaled, +value, +image, +pos, +center, +actualFeatureSize, +r);
    ret scaled;
  }

  void clearCaches {
    clipCache.clear();
  }

  ProbabilisticList<IIntegralImage> scheduler;
  Set<IIntegralImage> lookedAt;
  
  run {
    if (mainImage == null)
      mainImage = new IntegralImage(inputImage);
    else
      mainImage = new IntegralImage(mainImage);
    //inputImage = null; // save space
    
    //print(liveliness := mainImage.liveliness(grayscale));
    if (verbose || verboseImageSize) print("Full image size: " + mainImage.w + "*" + mainImage.h);

    time "Recognition" {
      liveliestPoints = new ProbabilisticList;
      scheduler = new ProbabilisticList;
      lookedAt = new Set;
      lowestExecutedProbability = 1;
      steps = 0;
      
      scheduler.add(WithProbability(mainImage));
  
      int channel = grayscale;
      while (nempty(scheduler) && steps++ < maxSteps) {
        WithProbability<IIntegralImage> clip = popFirst(scheduler);
        var cp = clip.probability();
        lowestExecutedProbability = min(lowestExecutedProbability, cp);
        if (!lookedAt.add(clip!))
          continue; // We were here before...
          
        if (verbose || verboseLookAt)
          print("LEVEL " + formatDouble(level(clip!), 1) + " (p=" 
            + cp + ") - "
            + clip);
  
        L<WithProbability<IIntegralImage>> subs1
          = mapToProbabilities(clip->descentShapes_cleaned(),
            shape -> descentProbability(shape, channel));
        var preferredSub = getVar(first(subs1));
        
        ProbabilisticList<IIntegralImage> subs = new ProbabilisticList<>(subs1);
  
        if (empty(subs)) {
          if (verbose) print("  Is leaf");
          // leaf (single point) - save with value based on
          // liveliness of surroundings on a certain level (=scale)
          if (!liveliestPoints.containsElement(clip!)) {
            if (verboseFound) print("Found point: " + clip);
            clip->discoveredInStep = steps;
            liveliestPoints.add(withProbability(leafValue(clip!, channel), clip!));
            if (l(liveliestPoints) >= maxPoints) break;
          }
        } else {
          if (verbose) print("  Has " + n2(subs, "sub") + ":");
          if (verbose) pnlIndent(subs);
          for (var sub : subs) {
            // always force at least one descent of every area we actually looked at
            //var p = descentProbability(sub!, channel);
            var p = sub.probability();
            if (p == 0) continue;
            if (sub! == preferredSub) p = drillDownProbability;
            if (verbose) print("  Descending at " + p + " to " + sub!);
            scheduler.at(p, sub!);
          }
        }
      }
    }
  }

  void show {    
    print("Have " + nPoints(liveliestPoints) + " after " + nSteps(steps) + " (areas looked at: " + n2(lookedAt) + ", cache size=" + n2(clipCache) + ")");
    print("p=" + lowestExecutedProbability);
    pnl(takeFirst(10, liveliestPoints));
    int n = l(liveliestPoints);
    liveliestPoints.truncateBelow(finalMinLiveliness);
    int m = l(liveliestPoints);
    if (m < n)
      print("Truncated to " + nPoints(m));
    L<Long> stepList = map(liveliestPoints, p -> p->discoveredInStep);
    print("Points found in steps: " + sorted(stepList));

    var markedImage = mainImage.render();
    int markSize = max(3, iround(actualFeatureSize()*markScale));
    forEach(liveliestPoints, p ->
      markPointInImageWithAlpha(
        markedImage,
        center(p->positionInImage()), 
        Color.red,
        rebaseZeroTo(minMarkAlpha, p.probability()),
        markSize));
    showImage(markedImage);
  }
}
Snippet ID:	#1032226
Snippet name:	Minimal Recognizer v1 [finds "interesting points", backup]
Eternal ID of this version:	#1032226/1
Text MD5:	a58a29299ef36e481fe46b4d04f0b938
Transpilation MD5:	5550eeac36ddb20870da32b9faecaf7e
Author:	stefan
Category:
Type:	JavaX fragment (include)
Public (visible to everyone):	Yes
Archived (hidden from active list):	No
Created/modified:	2021-08-20 21:53:59
Source code size:	14019 bytes / 420 lines
Pitched / IR pitched:	No / No
Views / Downloads:	174 / 211
Referenced in:	[show references]
< > BotCompany Repo | #1032226 // Minimal Recognizer v1 [finds "interesting points", backup]

JavaX fragment (include) [tags: use-pretranspiled]

Author comment