!7 cmodule TheoryMaker > DynConvo { /* 1. measurable features (fields of object) 2. labels (words the user throws in) 3. make theories (random connectors between features and labels) 4. check theories 1. show a random line 2. user types keyword 3. assign keyword to line 4. check if prediction weas correct Basic theory making ------------------- For any label X: test theory (for every M: M has label X) test theory (for every M: M doesn't have label X) For any feature F: for every seen value V of F: for every label X: test theory (for every M: msg M's feature F has value V => msg has label x)) test theory (for every M: msg M's feature F has value V => msg doesn't have label x)) */ srecord Theory(BasicLogicRule statement) { new PosNeg examples; //bool iff; // <=> instead of only => toString { ret str(statement.lhs instanceof MPTrue ? "Every message is " + statement.rhs : bidiMode ? statement.lhs + " <=> " + statement.rhs : statement); } } // propositions about a message. check returns null if unknown asclass MsgProp { abstract Bool check(Msg msg); } srecord MPTrue() > MsgProp { Bool check(Msg msg) { true; } toString { ret "always"; } } record HasLabel(S label) > MsgProp { Bool check(Msg msg) { ret msg2label_new.get(msg, label); } toString { ret label; } } record DoesntHaveLabel(S label) > MsgProp { Bool check(Msg msg) { ret not(msg2label_new.get(msg, label)); } toString { ret "not " + label; } } record FeatureValueIs(S feature, O value) > MsgProp { Bool check(Msg msg) { ret eq(getMsgFeature(msg, feature), value); } toString { ret feature + "=" + value; } } class Label { S name; *() {} *(S *name) {} TreeSetWithDuplicates bestTheories = new(reverseComparatorFromCalculatedField theoryScore()); int score() { ret theoryScore(first(bestTheories)); } Theory bestTheory() { ret first(bestTheories); } } switchable double minAdjustedScoreToDisplay = 50; switchable bool autoNext = false; static bool bidiMode = true; // treat all theories as bidirectional L msgs; // full dialog L shownMsgs; transient Map> msg2features = AutoMap<>(lambda1 calcMsgFeatures); new LinkedHashSet theories; S analysisText; transient JTable theoryTable, labelsTable, trainedExamplesTable; transient JTabbedPane tabs; transient SingleComponentPanel scpPredictions; transient new Map labelsByName; new Set allLabels; transient new L> onNewLabel; new DoubleKeyedMap msg2label_new; transient new Map> featureExtractors; transient Q thinkQ; sinterface FeatureEnv { A mainObject(); O getFeature(S name); } sinterface FeatureExtractor { O get(FeatureEnv env); } start { thinkQ = dm_startQ("Thought Queue"); thinkQ.add(r { // legacy + after deletion cleaning setField(allLabels := asTreeSet(msg2label_new.bKeys())); updateLabelsByName(); onNewLabel.add(lbl -> change()); makeTheoriesAboutLabels(); makeTheoriesAboutFeaturesAndLabels(); for (S field : fields(Msg)) featureExtractors.put(field, env -> getOpt(env.mainObject(), field)); makeTextExtractors("text"); callFAllOnAll(onNewLabel, allLabels); msg2labelUpdated(); if (empty(msgs)) setField(msgs := mainCruddieLog()); showRandomMsg(); }); } void makeTheoriesAboutLabels { // For any label X: onNewLabel.add(lbl -> { // test theory (for every M: M has label X) addTheory(new Theory(BasicLogicRule(new MPTrue, new HasLabel(lbl)))); // test theory (for every M: M doesn't have label X) addTheory(new Theory(BasicLogicRule(new MPTrue, new DoesntHaveLabel(lbl)))); }); } void makeTheoriesAboutFeaturesAndLabels { // for every label X: onNewLabel.add(lbl -> { // For any feature F: for (S feature : keys(featureExtractors)) // for every seen value V of F: for (O value : possibleValuesOfFeatureRelatedToLabel(feature, lbl)) for (O rhs : ll(new HasLabel(lbl), new DoesntHaveLabel(lbl))) // test theory (for every M: msg M's feature F has value V => msg has/doesn't have label x)) addTheory(new Theory(BasicLogicRule( new FeatureValueIs(feature, value), rhs))); }); } Set possibleValuesOfFeature(S feature) { if (isBoolField(Msg, feature)) ret litset(false, true); ret litset(); } Set possibleValuesOfFeatureRelatedToLabel(S feature, S label) { Set set = possibleValuesOfFeature(feature); fOr (Msg msg : getMsgsRelatedToLabel(label)) set.add(getMsgFeature(msg, feature)); ret set; } // returns AutoMap with no realized entries Map calcMsgFeatures(Msg msg) { new Var> env; AutoMap map = new(feature -> featureExtractors.get(feature).get(env!)); env.set(new FeatureEnv { Msg mainObject() { ret msg; } O getFeature(S feature) { ret map.get(feature); } }); ret map; } void showMsgs(L l) { setField(shownMsgs := l); setMsgs(l); if (l(shownMsgs) == 1) { Msg msg = first(shownMsgs); setField(analysisText := joinWithEmptyLines( "Trained Labels: " + or2(renderBoolMap(getMsgLabels(msg)), "-"), "Features:\n" + formatColonProperties_quoteStringValues( msg2features.get(msg)) )); setSCPComponent(scpPredictions, scrollableStackWithSpacing(map(predictionsForMsg(msg), p -> withSideMargin(jLabelWithButtons(iround(p.adjustedConfidence) + "%: " + p.predictedLabel(), "Right", rThread { acceptPrediction(p) }, "Wrong", rThread { rejectPrediction(p) }))))); } else setField(analysisText := ""); } void updatePredictions() { showMsgs(shownMsgs); } srecord Prediction(S label, bool plus, double adjustedConfidence) { toString { ret predictedLabel() + " (confidence: " + iround(adjustedConfidence) + "%)"; } S predictedLabel() { ret (plus ? "" : "not ") + label; } } L predictionsForMsg(Msg msg) { // positive labels first, then "not"s. sort by score in each group new L out; for (Label label : values(labelsByName)) { Theory t = label.bestTheory(), continue if null; Bool lhs = evalTheoryLHS(t, msg), continue if null; bool prediction = t.statement.rhs instanceof DoesntHaveLabel ? !lhs : lhs; double conf = threeB1BScore(t.examples), adjusted = adjustConfidence(conf); if (adjusted < minAdjustedScoreToDisplay) continue; out.add(new Prediction(label.name, prediction, adjusted)); } ret sortedByCalculatedFieldDesc(out, p -> pair(p.plus, p.adjustedConfidence)); } // go from range 50-100 to 0-100 (might look better) double adjustConfidence(double x) { ret max(0, (x-50)*2); } void showRandomMsg { showMsgs(randomElementAsList(msgs)); } void acceptPrediction(Prediction p) { if (p != null) sendInput2(p.predictedLabel()); } void rejectPrediction(Prediction p) { if (p != null) sendInput2(cloneWithFlippedBoolField plus(p).predictedLabel()); } @Override void sendInput2(S s) { // treat input as a label if (l(shownMsgs) == 1) { Msg shown = first(shownMsgs); new Matches m; if "not ..." { S label = cleanLabel(m.rest()); doubleKeyedMapPutVerbose(+msg2label_new, shown, label, false); msg2labelUpdated(label); if (autoNext) showRandomMsg(); } else { S label = cleanLabel(s); doubleKeyedMapPutVerbose(+msg2label_new, shown, label, true); msg2labelUpdated(label); if (autoNext) showRandomMsg(); } change(); } } Map getMsgLabels(Msg msg) { ret msg2label_new.getA(msg); } Set getMsgsRelatedToLabel(S label) { ret msg2label_new.asForB(label); } void msg2labelUpdated(S label) { for (Theory t : cloneList(labelByName(label).bestTheories)) checkTheory(t); msg2labelUpdated(); } void msg2labelUpdated() { callFAllOnAll(onNewLabel, addAll_returnNew(allLabels, msg2label_new.bKeys())); updateTrainedExamplesTable(); } void updateTrainedExamplesTable { dataToTable_uneditable(trainedExamplesTable, map(msg2label_new.map1, (msg, map) -> litorderedmap( "Message" := (msg.fromUser ? "User" : "Bot") + ": " + msg.text, "Labels" := renderBoolMap(map)))); } JComponent mainPart() { ret jhsplit(jvsplit( jCenteredSection("Focused Message", super.mainPart()), jhsplit( jCenteredSection("Message Analysis", dm_textArea analysisText()), jCenteredSection("Predictions", scpPredictions = singleComponentPanel()) )), with(r updateTabs, tabs = jtabs( "", with(r updateLabelsTable, labelsTable = sexyTable()), "", with(r updateTheoryTable, tableWithSearcher2_returnPanel(theoryTable = sexyTable())), "", with(r updateTrainedExamplesTable, tableWithSearcher2_returnPanel(trainedExamplesTable = sexyTable())) ))); } void updateTabs { setTabTitles(tabs, firstLetterToUpper(nLabels(labelsByName)), firstLetterToUpper(nTheories(theories)), n2(msg2label_new.aKeys(), "Trained Example")); } void updateTheoryTable { L sorted = sortedByCalculatedFieldDesc(theories, t -> t.examples == null ? null : t.examples.score()); dataToTable_uneditable(theoryTable, map(sorted, t -> litorderedmap( "Score" := renderTheoryScore(t), "Theory" := str(t)))); } Map labelsToBestTheoryMap() { Map> map = multiMapToMap(multiMapIndex targetLabelOfTheory(theories)); ret mapValues(map, theories -> highestBy theoryScore(theories)); } void updateLabelsTable { L