// supports 8 different characters sclass SpecialCharsHierarchicalBitMap { S specialChars; int minLevel = 1, maxLevel = 8; byte[] input; byte[] charIndex = new byte[256]; byte[][] tables; *() {} *(byte[] *input) {} *(S specialChars, byte[] *input) {} selfType specialChar(S s, int idx) { int n = l(s); for i to n: specialChar(s.charAt(i), idx); this; } selfType specialChar(char c, int idx) { if (idx < 1 || idx > 8) fail("Char index out of range (must be 1 through 8) - " + idx); charIndex[ubyteToInt(c)] |= (byte) (1 << (idx-1)); this; } // These probably must all be in the first character page. // TODO: check that selfType specialChars(S s) { specialChars = s; // make charIndex table var m = l(specialChars); if (m > 8) fail("Too many special chars (max: 8) - " + m); for i to m: charIndex[ubyteToInt(specialChars.charAt(i))] = (byte) (1 << i); this; } void analyze(S input) { this.input = toUtf8(input); // XXX probably wrong analyze(); } void analyze(byte[] input) { this.input = input; analyze(); } void analyze { // allocate logarithmic tables var input = this.input; int n = l(input), allLevels = ld_iceil(n), n2 = 1 << allLevels; int minLevel = this.minLevel; int representedLevels = max(0, maxLevel+1-minLevel); int n3 = n2 >> minLevel; ifdef SpecialCharsHierarchicalBitMap_debug printVars(+n, +allLevels, +minLevel, +maxLevel, +representedLevels, +n2, +n3); endifdef tables = new byte[representedLevels][]; int n4 = n3; for level to representedLevels: { // TODO: this can probably be up to 50% shorter tables[level] = new byte[n4]; ifdef SpecialCharsHierarchicalBitMap_debug printVars(+level, +n4); endifdef n4 >>= 1; } for i to n: { byte mask = charIndex[ubyteToInt(input[i])]; if (mask == 0) continue; int j = i >> minLevel; for level to representedLevels: { var tbl = tables[level]; var val = tbl[j]; byte val2 = (byte) (val | mask); if (val != val2) tbl[j] = val2; else break; } } } }