// supports 8 different characters sclass SpecialCharsHierarchicalBitMap { S specialChars; int minLevel = 1, maxLevel = 8; byte[] input; byte[] charIndex = new byte[256]; byte[][] tables; int reachedLevel; // how many computation steps it took int steps; *() {} *(byte[] *input) {} *(S specialChars, byte[] *input) {} selfType specialChar(S s, int idx) { int n = l(s); for i to n: specialChar(s.charAt(i), idx); this; } selfType specialChar(char c, int idx) { if (idx < 1 || idx > 8) fail("Char index out of range (must be 1 through 8) - " + idx); charIndex[ubyteToInt(c)] |= (byte) (1 << (idx-1)); this; } // These probably must all be in the first character page. // TODO: check that selfType specialChars(S s) { specialChars = s; // make charIndex table var m = l(specialChars); if (m > 8) fail("Too many special chars (max: 8) - " + m); for i to m: charIndex[ubyteToInt(specialChars.charAt(i))] = (byte) (1 << i); this; } void analyze(S input) { this.input = toUtf8(input); // XXX probably wrong analyze(); } void analyze(byte[] input) { this.input = input; analyze(); } void analyze { // allocate logarithmic tables var input = this.input; // n is actual input length // allLevels is size of n in bits // n2 is n rounded up to power of two int n = l(input), allLevels = ld_iceil(n), n2 = 1 << allLevels; int minLevel = this.minLevel; // reachedLevel is the minimum of allLevels and maxLevel reachedLevel = min(allLevels, maxLevel); // representedLevels is how many array elements we need int representedLevels = max(0, reachedLevel+1-minLevel); // n3 is the table length for the first represented level int n3 = n2 >> minLevel; ifdef SpecialCharsHierarchicalBitMap_debug printVars(+n, +allLevels, +minLevel, +maxLevel, +representedLevels, +n2, +n3); endifdef tables = new byte[representedLevels][]; int n4 = n3; for level to representedLevels: { // TODO: this can probably be up to 50% shorter tables[level] = new byte[n4]; ifdef SpecialCharsHierarchicalBitMap_debug printVars(+level, +n4); endifdef n4 >>= 1; } int steps = n; // for each outer iteration for i to n: { int c = ubyteToInt(input[i]); byte mask = charIndex[c]; ifdef SpecialCharsHierarchicalBitMap_debug printVars(+i, +c, +mask); endifdef if (mask == 0) continue; int j = i >> minLevel; for level to representedLevels: { ++steps; var tbl = tables[level]; ifdef SpecialCharsHierarchicalBitMap_debug printVars(+level, len := l(tables[level]), +j); endifdef var val = tbl[j]; byte val2 = (byte) (val | mask); ifdef SpecialCharsHierarchicalBitMap_debug printVars(+val, +val2); endifdef if (val == val2) break; tbl[j] = val2; j >>= 1; } } this.steps = steps; } S stats() { ret renderVars( inputLength := l(input), +steps, representedLevels := l(tables), cells := lengthLevel2_byteArrays(tables)); } int representedLevels() { ret l(tables); } int minLevel() { ret minLevel; } int reachedLevel() { ret reachedLevel; } byte[] table(int level) { ret tables[level-minLevel]; } }