import jdk.incubator.vector.*; // TODO: image w/h not divisible by 8 final sclass BWIntegralImage_doubleVectorized implements MakesBufferedImage, IBWIntegralImage { // dual logarithm of block size & corresponding int vector species replace blockShift with 3. replace species with IntVector.SPECIES_256. replace blockSize with (1 << blockShift). int w, h; // actual image size int blockW, blockH; // width and height of block array Block[] blocks; ifdef BWIntegralImage_CountAccesses long accesses; endifdef sclass Block { int[] rowAndColSums = new[blockSize*2]; // length 16 int sum; int[] data; // length 64 if calculated } *() {} *(File f) { this(loadImage2(f)); } *(MakesBufferedImage img) { this(toBufferedImage(img)); } *(BufferedImage image) ctex { alloc(image.getWidth(), image.getHeight()); // Grab image // TODO: could use grayscale color model here (faster?) int[] data = new[w*h]; PixelGrabber pixelGrabber = new PixelGrabber(image, 0, 0, w, h, data, 0, w); if (!pixelGrabber.grabPixels()) fail("Could not grab pixels"); // for brightness of pixels, // for now we cheat by just using one of the channels // calculate sums in each block int iBlock = 0; for by to blockH: { int iLine = (by << blockShift)*w; for bx to blockW: { Block block = blocks[iBlock++]; int[] sums = block.rowAndColSums; IntVector vColSums = by == 0 ? IntVector.zero(species) : IntVector.fromArray(species, getBlock(bx, by-1).rowAndColSums, blockSize); int[] leftSums = bx == 0 ? null : getBlock(bx-1, by).rowAndColSums; for (int y = 0; y < blockSize; y++) { IntVector v = IntVector.fromArray(species, data, y << blockShift); int leftSum = leftSums != null ? leftSums[y] : 0; sums[y] = v.reduceLanes(VectorOperators.ADD)+leftSum; v = v.add(leftSum); vColSums = vColSums.add(v); } vColSums.intoArray(sums, blockSize); block.sum = vColSums.reduceLanes(VectorOperators.ADD); } } } int blockSum(int bx, int by) { ret bx < 0 || by < 0 ? 0 : getBlock(bx, by).sum; } Block getBlock(int bx, int by) { ret blocks[by*blockW+bx]; } int[] getBlockData(int bx, int by) { Block block = getBlock(bx, by); if (block.data == null) calcData(bx, by, block); ret block.data; } void calcData(int bx, int by, Block block) { todo(); } private void alloc(int w, int h) { if ((w % blockSize) != 0 || (h % blockSize) != 0) fail("Need image dimensions divisible by " + blockSize + ": " + w + "*" + h); this.w = w; this.h = h; blockW = ratioRoundUp(w, blockSize); blockH = ratioRoundUp(h, blockSize); //int dataLength = blockSize*blockSize; blocks = repF_array Block(blockW*blockH, () -> new Block); } // get sum value at x, y // pixels outside of image are considered black public int getIIValue(int x, int y) { ifdef BWIntegralImage_CountAccesses ++accesses; endifdef if (x < 0 || y < 0 || x >= w || y >= h) ret 0; int idx = ((x & (blockSize-1)) << blockSize) | (y & (blockSize-1)); ret idx == 0 ? getBlock(x >> blockShift, y >> blockShift).sum : getBlockData(x >> blockShift, y >> blockShift)[idx]; } public double getPixelAverage(int x1, int y1, int x2, int y2) { int area = (x2-x1)*(y2-y1); ret doubleRatio(bwIntegralImage_sumRect(this, x1, y1, x2, y2), area); } int getPixel(int x, int y) { ret bwIntegralImage_sumRect(this, x, y, x+1, y+1); } int getPixel(Pt p) { ret getPixel(p.x, p.y); } public int getWidth() { ret w; } public int getHeight() { ret h; } // unoptimized public BufferedImage getBufferedImage() { ret scaleDownUsingIntegralImageBW(this, w, h).getBufferedImage(); } }