// A = element (e.g. a word) // B = instance (e.g. a text snippet) // should support concurrent lookups transient sclass ElementInstanceMatrix { new UniqueList instances; new Map index; swappable int instanceToNumber(B b) { ret instances.addOrGetIndex(b); } swappable B numberToInstance(int i) { ret instances.get(i); } swappable Set extractElements(B instance) { fail("implement me"); } bool doneAdding; // each word can be represented as either an int array, a bit set or both // for now, the int array is always there sclass Entry { new Set intSet; // while building the index int[] intArray; BitSet bitSet; BitSet bitSet() { if (bitSet == null) bitSet = intArrayToBitSet(intArray); ret bitSet; } int count() { ret intArray.length; } } *() {} void add(B instance, Cl elements default extractElements(instance)) { assertFalse(doneAdding); int idx = instanceToNumber(instance); fOr (A word : elements) addMatrixDot(idx, word); } void addMatrixDot(int idx, A word) { Entry e = index.get(word); if (e == null) index.put(word, e = new Entry); e.intSet.add(idx); } // Call this exactly once before doing queries on the index void doneAdding() { if (doneAdding) ret; set doneAdding; for (Entry e : values(index)) { e.intArray = sortIntArrayInPlace(toIntArray(e.intSet)); e.intSet = null; } } L instancesContainingElement(A element) { ret instanceListFromIntArray(instancesContainingElement_intArray(element)); } int[] instancesContainingElement_intArray(A element) { doneAdding(); Entry e = index.get(element); ret e == null ? null : e.intArray; } L instanceListFromIntArray(int[] array) { ret array == null ? null : listFromFunction(array.length, i -> numberToInstance(array[i])); } // may return null (=empty list) L instancesContainingAllElements(Cl elements) { ret instanceListFromIntArray(instancesContainingAllElements_intArray(elements)); } // may return null (=empty list). also returns null for 0 elements int[] instancesContainingAllElements_intArray(Cl elements) { doneAdding(); int n = l(elements); if (n == 0) null; if (n == 1) ret instancesContainingElement_intArray(first(elements)); // get entries for elements, exit when an element is unknown Entry[] entries = new[n]; Iterator itElements = iterator(elements); for i to n: { Entry e = index.get(itElements.next()); if (e == null) null; entries[i] = e; } // go through elements again, AND-combine all bit sets BitSet bs = cloneBitSet(entries[0].bitSet()); for (int i = 1; i < n; i++) bs.and(entries[i].bitSet()); ret bitSetToIntArray(bs); } L instancesThatAreSuperSetsOf(B instance, O... _) { ret instancesContainingAllElements(extractElements(instance)); } NavigableSet elements() { ret (NavigableSet) keys(index); } int numElements() { ret l(index); } }