Libraryless. Click here for Pure Java version (2563L/16K).
// clever index of tokenized strings // maintaining one CI-sorted list per token position sclass PositionalTokenIndex { L<LLS> sorts = new L; // token index -> entries sorted by token at index bool dirty; *() {} *(Iterable<LS> toks) { addAll(toks); } void addAll(Iterable<LS> toks) { fOr (LS tok : toks) add(tok); } void add(LS tok) { for (int i = 1; i < l(tok); i += 2) { LL<S> idx = listGetOrCreate_List(sorts, i/2); idx.add(tok); } set dirty; } private void sort { for i over sorts: { int j = i*2+1; sortInPlaceByCalculatedFieldIC(sorts.get(i), func(LS tok) -> S { tok.get(j) }); } dirty = false; } // return all tokenizations with token t in position i // may return null LLS byToken(int i, S t) { clean(); LLS list = get(sorts, i); if (list == null) null; int start = binarySearchForStart(list, i, t); if (start < 0) null; int end = binarySearchForEnd(list, i, t, start); ret subList(list, start, end); } private int binarySearchForStart(LLS list, int position, S t) { int low = 0, high = list.size()-1; while (low <= high) { int mid = (low + high) >>> 1; S token = list.get(mid).get(position*2+1); int cmp = compareIC(token, t); if (cmp < 0) low = mid + 1; else if (cmp > 0) high = mid - 1; else // we found the target token, now check if we're at start if (mid == 0 || !eqic(list.get(mid-1).get(position*2+1), t)) ret mid; // yep, it's the start else high = mid - 1; // continue to search higher up } ret -1; } private int binarySearchForEnd(LLS list, int position, S t, int start) { int low = start+1, high = list.size()-1; while (low <= high) { int mid = (low + high) >>> 1; S token = list.get(mid).get(position*2+1); if (eqic(token, t)) low = mid + 1; // still in target range, go lower else // we found a different token, now check if we're at the end if (mid-1 == start || eqic(list.get(mid-1).get(position*2+1), t)) ret mid; // yep, it's the end else high = mid - 1; // continue to search higher up } ret low; } LLS fullList() { ret first(sorts); } // if you want to sort it ahead of time void clean() { if (dirty) sort(); } }
download show line numbers debug dex old transpilations
Travelled to 6 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1025688 |
Snippet name: | PositionalTokenIndex [OK] |
Eternal ID of this version: | #1025688/22 |
Text MD5: | 8f04881744542f85c46fddd354f95789 |
Transpilation MD5: | 0aa7216c098c99abd973bb522ab773d2 |
Author: | stefan |
Category: | javax / agi.blue |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2019-10-13 23:15:53 |
Source code size: | 2496 bytes / 88 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 285 / 749 |
Version history: | 21 change(s) |
Referenced in: | #1025695 - PositionalTokenIndex2 - PositionalTokenIndex grouped by token count [OK] #1034167 - Standard Classes + Interfaces (LIVE, continuation of #1003674) |