Libraryless. Click here for Pure Java version (3730L/23K).
| 1 | transient sclass DeepWordPairIndex<A> {
 | 
| 2 | S regexp = "\\w+"; | 
| 3 | bool useHashMaps = true; // makes it case-sensitive and doesn't allow partial word searches | 
| 4 | new Map<A, Entry<A>> entries; | 
| 5 | new MultiSetMap<PairS, Entry<A>> entriesByPair; | 
| 6 | |
| 7 |   sclass Entry<A> extends Var<A> {
 | 
| 8 | new Map<PairS, int[]> pairPositions; // int array is sorted | 
| 9 | |
| 10 |     *(A id) { super(id); }
 | 
| 11 | } | 
| 12 | |
| 13 |   void useHashMaps() {
 | 
| 14 | set useHashMaps; | 
| 15 | entriesByPair = new MultiSetMap; | 
| 16 | } | 
| 17 | |
| 18 |   L<IntRange> wordRanges(S text) {
 | 
| 19 | ret regexpFindRanges(regexp, text); | 
| 20 | } | 
| 21 | |
| 22 |   void add(A a, S text) {
 | 
| 23 | Entry<A> e = new Entry<A>(a); | 
| 24 |     if (useHashMaps) {
 | 
| 25 | e.pairPositions = new HashMap; | 
| 26 | text = upper(text); | 
| 27 | } | 
| 28 |     if (entries.put(a, e) != null) fail("Double insertion");
 | 
| 29 | new MultiMap<PairS, Int> pairPositions; | 
| 30 |     for (Pair<IntRange> p : overlappingPairs(wordRanges(text))) {
 | 
| 31 | PairS pair = pair(substring(text, p.a), substring(text, p.b)); | 
| 32 | pairPositions.put(pair, p.a.start); | 
| 33 | entriesByPair.put(pair, e); | 
| 34 | } | 
| 35 | for (PairS pair : keys(pairPositions)) | 
| 36 | e.pairPositions.put(pair, toIntArray(pairPositions.get(pair))); | 
| 37 | } | 
| 38 | |
| 39 |   Set<Entry<A>> get(PairS pair) { ret entriesByPair.get(pair); }
 | 
| 40 | |
| 41 |   int numPairs() { ret entriesByPair.keysSize(); }
 | 
| 42 | |
| 43 |   Iterable<Pair<A, Cl<Int>>>lookupString_withPositions(S query, O... _) {
 | 
| 44 | optPar bool debug; | 
| 45 | if (useHashMaps) query = upper(query); | 
| 46 | S _query = query; | 
| 47 | L<IntRange> ranges = wordRanges(query); | 
| 48 | if (empty(ranges)) null; | 
| 49 | int nRanges = l(ranges); | 
| 50 | int iFirstComplete = first(ranges).start == 0 ? 1 : 0; | 
| 51 | int iLastComplete = last(ranges).end == l(query) ? nRanges-1 : nRanges; | 
| 52 | --iLastComplete; // because pairs | 
| 53 | |
| 54 | LS words = map(ranges, r -> substring(_query, r)); | 
| 55 | LPairS pairs = overlappingPairs(words); | 
| 56 | L<Set<Entry<A>>> entriesAtIndex = map(pairs, pair -> entriesByPair.get(pair)); | 
| 57 | |
| 58 |     if (iLastComplete >= iFirstComplete+1) {
 | 
| 59 | int shortest = iFirstComplete, nBest = l(entriesAtIndex.get(shortest)); | 
| 60 | if (nBest == 0) ret emptyList(); | 
| 61 |       for (int iWord = iFirstComplete+1; iWord < iLastComplete; iWord++) {
 | 
| 62 | int n = l(entriesAtIndex.get(iWord)); | 
| 63 | if (n == 0) ret emptyList(); | 
| 64 |         if (n < nBest) {
 | 
| 65 | shortest = iWord; | 
| 66 | nBest = n; | 
| 67 | } | 
| 68 | } | 
| 69 | |
| 70 |       /*if (debug)*/ print("pairs: " + zipTwoLists(pairs, lmap l(entriesAtIndex)));
 | 
| 71 | |
| 72 | Set<Entry<A>> entries = entriesAtIndex.get(shortest); | 
| 73 | int startShortest = ranges.get(shortest).start; | 
| 74 | PairS shortestPair = pairs.get(shortest); // not the shortest pair, but the pair with the shortest result list | 
| 75 | new IntBuffer intBuffer; | 
| 76 | |
| 77 | new LPair<A, Cl<Int>> out; | 
| 78 |       entrySearch: for (Entry<A> entry : entries) {
 | 
| 79 | int[] positions = entry.pairPositions.get(shortestPair); | 
| 80 | |
| 81 |         for (int iWord = iFirstComplete; iWord < iLastComplete; iWord++) {
 | 
| 82 | continue if iWord == shortest; | 
| 83 | IntRange r2 = ranges.get(iWord); | 
| 84 | PairS pair2 = pairs.get(iWord); | 
| 85 | int[] positions2 = entry.pairPositions.get(pair2); | 
| 86 | if (positions2 == null) continue entrySearch; | 
| 87 | int ofs = startShortest-r2.start; | 
| 88 |           //if (debug) print("Intersecting " + asList(positions) + "/" + asList(positions2) + " with ofs " + ofs);
 | 
| 89 | positions = intersectSortedIntArrays_ofs_optimized2(positions, positions2, ofs, intBuffer); | 
| 90 |           //if (debug) print("Got " + asList(positions));
 | 
| 91 | if (empty(positions)) continue entrySearch; | 
| 92 | } | 
| 93 | |
| 94 | out.add(pair(entry!, wrapIntArrayAsImmutableList_ofs(positions, -startShortest))); | 
| 95 | } | 
| 96 | ret out; | 
| 97 | } | 
| 98 | |
| 99 | null; | 
| 100 | } | 
| 101 | } | 
Began life as a copy of #1029024
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
| Snippet ID: | #1029042 | 
| Snippet name: | DeepWordPairIndex [dev.] | 
| Eternal ID of this version: | #1029042/13 | 
| Text MD5: | 031dd215db5e9dc11c14d88ea68dca35 | 
| Transpilation MD5: | 2cd245965f98520999330cd9f397c38a | 
| Author: | stefan | 
| Category: | javax | 
| Type: | JavaX fragment (include) | 
| Public (visible to everyone): | Yes | 
| Archived (hidden from active list): | No | 
| Created/modified: | 2020-07-17 16:29:02 | 
| Source code size: | 3776 bytes / 101 lines | 
| Pitched / IR pitched: | No / No | 
| Views / Downloads: | 474 / 828 | 
| Version history: | 12 change(s) | 
| Referenced in: | [show references] |