sclass AdaptiveIdentifierCompression { new SS shortenings; new MultiSet tokenCount; new SS expansions; S escapeWord = "xx"; S codeAlphabet = lowerCaseAlphabet(); ItIt newCodeMaker; S nextCode; int conflicts; settable int minCountToCompress = 1; void init { if (newCodeMaker == null) { newCodeMaker = allWordsOfAlphabet(codeAlphabet); newCode(); } } S encode(S token) { if (!isIdentifier(token)) ret token; init(); ret compressIdentifier(token); } S compressIdentifier(S token) { tokenCount.add(token); S code = shortenings.get(token); if (code != null) ret code; int count = tokenCount.get(token); if (count < minCountToCompress) ret token; if (eq(token, nextCode)) { newCode(); ret escapeWord + " " + token; } // create code for token code = nextCode; if (code != null) { newCode(); S conflict = expansions.get(code); if (conflict != null) { warn("conflict: " + token); ++conflicts; // TODO } shortenings.put(token, code); expansions.put(code, token); // first time, so return original token ret token; } // out of codes (unlikely), return as is ret token; } S newCode() { do { if (!newCodeMaker.hasNext()) null; nextCode = newCodeMaker.next(); } while (shortenings.containsKey(nextCode)); ret nextCode; } /// decoder bool escape; S decode(S token) { if (!isIdentifier(token)) ret escape ? "" : token; init(); if (escape) { escape = false; ret decoded(token); } if (eq(token, escapeWord)) { set escape; ret ""; } S expanded = expansions.get(token); if (expanded != null) ret decoded(expanded); ret decoded(token); } S decoded(S token) { compressIdentifier(token); ret token; } }