Uses 911K of libraries. Click here for Pure Java version (18263L/98K).
!7 cmodule TheoryMaker > DynConvo { /* 1. measurable features (fields of object) 2. labels (words the user throws in) 3. make theories (random connectors between features and labels) 4. check theories 1. show a random line 2. user types keyword 3. assign keyword to line 4. check if prediction weas correct Basic theory making ------------------- For any label X: test theory (for every M: M has label X) test theory (for every M: M doesn't have label X) For any feature F: for every seen value V of F: for every label X: test theory (for every M: msg M's feature F has value V => msg has label x)) test theory (for every M: msg M's feature F has value V => msg doesn't have label x)) */ srecord Theory(BasicLogicRule statement) { new PosNeg<Msg> examples; //bool iff; // <=> instead of only => toString { ret str(statement.lhs instanceof MPTrue ? "Every message is " + statement.rhs : bidiMode ? statement.lhs + " <=> " + statement.rhs : statement); } } // propositions about a message. check returns null if unknown asclass MsgProp { abstract Bool check(Msg msg); } srecord MPTrue() > MsgProp { Bool check(Msg msg) { true; } toString { ret "always"; } } record HasLabel(S label) > MsgProp { Bool check(Msg msg) { ret msg2label_new.get(msg, label); } toString { ret label; } } record DoesntHaveLabel(S label) > MsgProp { Bool check(Msg msg) { ret not(msg2label_new.get(msg, label)); } toString { ret "not " + label; } } record FeatureValueIs(S feature, O value) > MsgProp { Bool check(Msg msg) { ret eq(getMsgFeature(msg, feature), value); } toString { ret feature + "=" + value; } } class Label { S name; *() {} *(S *name) {} TreeSetWithDuplicates<Theory> bestTheories = new(reverseComparatorFromCalculatedField theoryScore()); int score() { ret theoryScore(first(bestTheories)); } Theory bestTheory() { ret first(bestTheories); } } switchable double minAdjustedScoreToDisplay = 50; switchable bool autoNext = false; static bool bidiMode = true; // treat all theories as bidirectional L<Msg> msgs; // full dialog L<Msg> shownMsgs; transient Map<Msg, Map<S, O>> msg2features = AutoMap<>(lambda1 calcMsgFeatures); new LinkedHashSet<Theory> theories; S analysisText; transient JTable theoryTable, labelsTable, trainedExamplesTable; transient JTabbedPane tabs; transient SingleComponentPanel scpPredictions; transient new Map<S, Label> labelsByName; new Set<S> allLabels; transient new L<IVF1<S>> onNewLabel; new DoubleKeyedMap<Msg, S, Bool> msg2label_new; transient new Map<S, FeatureExtractor<Msg>> featureExtractors; transient Q thinkQ; sinterface FeatureEnv<A> { A mainObject(); O getFeature(S name); } sinterface FeatureExtractor<A> { O get(FeatureEnv<A> env); } start { thinkQ = dm_startQ("Thought Queue"); thinkQ.add(r { // legacy + after deletion cleaning setField(allLabels := asTreeSet(msg2label_new.bKeys())); updateLabelsByName(); onNewLabel.add(lbl -> change()); makeTheoriesAboutLabels(); makeTheoriesAboutFeaturesAndLabels(); for (S field : fields(Msg)) featureExtractors.put(field, env -> getOpt(env.mainObject(), field)); makeTextExtractors("text"); callFAllOnAll(onNewLabel, allLabels); msg2labelUpdated(); if (empty(msgs)) setField(msgs := mainCruddieLog()); showRandomMsg(); }); } void makeTheoriesAboutLabels { // For any label X: onNewLabel.add(lbl -> { // test theory (for every M: M has label X) addTheory(new Theory(BasicLogicRule(new MPTrue, new HasLabel(lbl)))); // test theory (for every M: M doesn't have label X) addTheory(new Theory(BasicLogicRule(new MPTrue, new DoesntHaveLabel(lbl)))); }); } void makeTheoriesAboutFeaturesAndLabels { // for every label X: onNewLabel.add(lbl -> { // For any feature F: for (S feature : keys(featureExtractors)) // for every seen value V of F: for (O value : possibleValuesOfFeatureRelatedToLabel(feature, lbl)) for (O rhs : ll(new HasLabel(lbl), new DoesntHaveLabel(lbl))) // test theory (for every M: msg M's feature F has value V => msg has/doesn't have label x)) addTheory(new Theory(BasicLogicRule( new FeatureValueIs(feature, value), rhs))); }); } Set possibleValuesOfFeature(S feature) { if (isBoolField(Msg, feature)) ret litset(false, true); ret litset(); } Set possibleValuesOfFeatureRelatedToLabel(S feature, S label) { Set set = possibleValuesOfFeature(feature); fOr (Msg msg : getMsgsRelatedToLabel(label)) set.add(getMsgFeature(msg, feature)); ret set; } // returns AutoMap with no realized entries Map<S, O> calcMsgFeatures(Msg msg) { new Var<FeatureEnv<Msg>> env; AutoMap<S, O> map = new(feature -> featureExtractors.get(feature).get(env!)); env.set(new FeatureEnv<Msg> { Msg mainObject() { ret msg; } O getFeature(S feature) { ret map.get(feature); } }); ret map; } void showMsgs(L<Msg> l) { setField(shownMsgs := l); setMsgs(l); if (l(shownMsgs) == 1) { Msg msg = first(shownMsgs); setField(analysisText := joinWithEmptyLines( "Trained Labels: " + or2(renderBoolMap(getMsgLabels(msg)), "-"), "Features:\n" + formatColonProperties_quoteStringValues( msg2features.get(msg)) )); setSCPComponent(scpPredictions, scrollableStackWithSpacing(map(predictionsForMsg(msg), p -> withSideMargin(jLabelWithButtons(iround(p.adjustedConfidence) + "%: " + p.predictedLabel(), "Right", rThread { acceptPrediction(p) }, "Wrong", rThread { rejectPrediction(p) }))))); } else setField(analysisText := ""); } void updatePredictions() { showMsgs(shownMsgs); } srecord Prediction(S label, bool plus, double adjustedConfidence) { toString { ret predictedLabel() + " (confidence: " + iround(adjustedConfidence) + "%)"; } S predictedLabel() { ret (plus ? "" : "not ") + label; } } L<Prediction> predictionsForMsg(Msg msg) { // positive labels first, then "not"s. sort by score in each group new L<Prediction> out; for (Label label : values(labelsByName)) { Theory t = label.bestTheory(), continue if null; Bool lhs = evalTheoryLHS(t, msg), continue if null; bool prediction = t.statement.rhs instanceof DoesntHaveLabel ? !lhs : lhs; double conf = threeB1BScore(t.examples), adjusted = adjustConfidence(conf); if (adjusted < minAdjustedScoreToDisplay) continue; out.add(new Prediction(label.name, prediction, adjusted)); } ret sortedByCalculatedFieldDesc(out, p -> pair(p.plus, p.adjustedConfidence)); } // go from range 50-100 to 0-100 (might look better) double adjustConfidence(double x) { ret max(0, (x-50)*2); } void showRandomMsg { showMsgs(randomElementAsList(msgs)); } void acceptPrediction(Prediction p) { if (p != null) sendInput2(p.predictedLabel()); } void rejectPrediction(Prediction p) { if (p != null) sendInput2(cloneWithFlippedBoolField plus(p).predictedLabel()); } @Override void sendInput2(S s) { // treat input as a label if (l(shownMsgs) == 1) { Msg shown = first(shownMsgs); new Matches m; if "not ..." { S label = cleanLabel(m.rest()); doubleKeyedMapPutVerbose(+msg2label_new, shown, label, false); msg2labelUpdated(label); if (autoNext) showRandomMsg(); } else { S label = cleanLabel(s); doubleKeyedMapPutVerbose(+msg2label_new, shown, label, true); msg2labelUpdated(label); if (autoNext) showRandomMsg(); } change(); } } Map<S, Bool> getMsgLabels(Msg msg) { ret msg2label_new.getA(msg); } Set<Msg> getMsgsRelatedToLabel(S label) { ret msg2label_new.asForB(label); } void msg2labelUpdated(S label) { for (Theory t : cloneList(labelByName(label).bestTheories)) checkTheory(t); msg2labelUpdated(); } void msg2labelUpdated() { callFAllOnAll(onNewLabel, addAll_returnNew(allLabels, msg2label_new.bKeys())); updateTrainedExamplesTable(); } void updateTrainedExamplesTable { dataToTable_uneditable(trainedExamplesTable, map(msg2label_new.map1, (msg, map) -> litorderedmap( "Message" := (msg.fromUser ? "User" : "Bot") + ": " + msg.text, "Labels" := renderBoolMap(map)))); } JComponent mainPart() { ret jhsplit(jvsplit( jCenteredSection("Focused Message", super.mainPart()), jhsplit( jCenteredSection("Message Analysis", dm_textArea analysisText()), jCenteredSection("Predictions", scpPredictions = singleComponentPanel()) )), with(r updateTabs, tabs = jtabs( "", with(r updateLabelsTable, labelsTable = sexyTable()), "", with(r updateTheoryTable, tableWithSearcher2_returnPanel(theoryTable = sexyTable())), "", with(r updateTrainedExamplesTable, tableWithSearcher2_returnPanel(trainedExamplesTable = sexyTable())) ))); } void updateTabs { setTabTitles(tabs, firstLetterToUpper(nLabels(labelsByName)), firstLetterToUpper(nTheories(theories)), n2(msg2label_new.aKeys(), "Trained Example")); } void updateTheoryTable { L<Theory> sorted = sortedByCalculatedFieldDesc(theories, t -> t.examples == null ? null : t.examples.score()); dataToTable_uneditable(theoryTable, map(sorted, t -> litorderedmap( "Score" := renderTheoryScore(t), "Theory" := str(t)))); } Map<S, Theory> labelsToBestTheoryMap() { Map<S, L<Theory>> map = multiMapToMap(multiMapIndex targetLabelOfTheory(theories)); ret mapValues(map, theories -> highestBy theoryScore(theories)); } void updateLabelsTable { L<Label> sorted = sortedByCalculatedFieldDesc(values(labelsByName), l -> l.score()); dataToTable_uneditable(labelsTable, map(sorted, label -> { Cl<Theory> bestTheories = label.bestTheories.tiedForFirst(); ret litorderedmap( "Label" := label.name, "Prediction Confidence" := renderTheoryScore(first(bestTheories)), "Best Theory" := empty(bestTheories) ? "" : (l(bestTheories) > 1 ? "[+" + (l(bestTheories)-1) + "] " : "") + first(bestTheories)); })); } S renderTheoryScore(Theory t) { //ret renderPosNegCounts(t.examples); ret t == null || t.examples.isEmpty() ? "" : iround(adjustConfidence(threeB1BScore(t.examples))) + "%" + " / " + renderPosNegScoreAndCount(t.examples); } int theoryScore(Theory t) { ret t == null ? -100 : t.examples.score(); } void theoriesChanged { updateTheoryTable(); updateLabelsTable(); updateTabs(); updatePredictions(); change(); } visual withCenteredButtons(super, "Show random msg", rInThinkQ(r showRandomMsg), jPopDownButton_noText(flattenObjectArray( "Check theories", rInThinkQ(r checkAllTheories), "Clear theories", rInThinkQ(r clearTheories), "Update predictions", rInThinkQ(r updatePredictions), dm_importAndExportAllDataMenuItems()))); Runnable rInThinkQ(Runnable r) { ret rInQ(thinkQ, r); } void addTheory(Theory theory) { if (theories.add(theory)) { print("New theory: " + theory); addTheoryToCollectors(theory); theoriesChanged(); } } void clearTheories { theories.clear(); theoriesChanged(); } Bool checkMsgProp(O prop, Msg msg) { if (prop cast And) ret checkMsgProp(prop.a, msg) && checkMsgProp(prop.b, msg); if (prop cast Not) ret not(checkMsgProp(prop.a, msg)); ret ((MsgProp) prop).check(msg); } Bool evalTheoryLHS(Theory theory, Msg msg) { ret theory == null ? null : checkMsgProp(theory.statement.lhs, msg); } Bool testTheoryOnMsg(Theory theory, Msg msg) { Bool lhs = evalTheoryLHS(theory, msg); Bool rhs = checkMsgProp(theory.statement.rhs, msg); if (lhs == null || rhs == null) null; if (bidiMode) ret eq(lhs, rhs); else ret isTrue(rhs) || isFalse(lhs); } void checkAllTheories { for (Theory theory : theories) checkTheory_noTrigger(theory); theoriesChanged(); } void checkTheory(Theory theory) { checkTheory_noTrigger(theory); theoriesChanged(); } void checkTheory_noTrigger(Theory theory) { new PosNeg<Msg> pn; for (Msg msg : msgs) pn.add(msg, testTheoryOnMsg(theory, msg)); if (!eq(theory.examples, pn)) { removeTheoryFromCollectors(theory); theory.examples = pn; addTheoryToCollectors(theory); change(); } } S cleanLabel(S label) { ret upper(label); } S targetLabelOfTheory(Theory theory) { O o = theory.statement.rhs; if (o cast HasLabel) ret o.label; if (o cast DoesntHaveLabel) ret o.label; null; } void addTheoryToCollectors(Theory theory) { S lbl = targetLabelOfTheory(theory); if (lbl != null) labelByName(lbl).bestTheories.add(theory); } void removeTheoryFromCollectors(Theory theory) { S lbl = targetLabelOfTheory(theory); if (lbl != null) labelByName(lbl).bestTheories.remove(theory); } Label labelByName(S name) { ret getOrCreate(labelsByName, name, () -> new Label(name)); } void updateLabelsByName() { for (S lbl : allLabels) labelByName(lbl); for (Theory t : theories) addTheoryToCollectors(t); } O getMsgFeature(Msg msg, S feature) { ret msg2features.get(msg).get(feature); } void makeTextExtractors(S textFeature) { for (WithName<IF1<S, O>> f : textExtractors()) { IF1<S, O> theFunction = f!; featureExtractors.put(f.name, env -> theFunction.get((S) env.getFeature(textFeature))); } } L<WithName<IF1<S, O>>> textExtractors() { new L<WithName<IF1<S, O>>> l; l.add(WithName<>("number of words", lambda1 numberOfWords)); l.add(WithName<>("number of characters", lambda1 l)); for (char c : characters("\"', .-_")) l.add(WithName<>("contains " + quote(c), s -> contains(s, c))); ret l; } }
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1027773 |
Snippet name: | Auto Classifier v1[learning message classifier] |
Eternal ID of this version: | #1027773/179 |
Text MD5: | 970ed7539dfbe1b678b0fc42a7f08fda |
Transpilation MD5: | 567e1aa73e99525599ccc46d760825dd |
Author: | stefan |
Category: | javax / a.i. |
Type: | JavaX source code (Dynamic Module) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-05-07 14:04:38 |
Source code size: | 14798 bytes / 463 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 252 / 5692 |
Version history: | 178 change(s) |
Referenced in: | -