Uses 911K of libraries. Click here for Pure Java version (19135L/103K).
!7 cmodule AutoClassifier > DynConvo { // THEORY BUILDING BLOCKS (Theory + MsgProp + subclasses) srecord Theory(BasicLogicRule statement) { new PosNeg<Msg> examples; //bool iff; // <=> instead of only => toString { ret str(statement.lhs instanceof MPTrue ? "Every message is " + statement.rhs : bidiMode ? statement.lhs + " <=> " + statement.rhs : statement); } } // propositions about a message. check returns null if unknown asclass MsgProp { abstract Bool check(Msg msg); } srecord MPTrue() > MsgProp { Bool check(Msg msg) { true; } toString { ret "always"; } } record HasLabel(S label) > MsgProp { Bool check(Msg msg) { ret msg2label_new.get(msg, label); } toString { ret label; } } record DoesntHaveLabel(S label) > MsgProp { Bool check(Msg msg) { ret not(msg2label_new.get(msg, label)); } toString { ret "not " + label; } } record FeatureValueIs(S feature, O value) > MsgProp { Bool check(Msg msg) { ret eq(getMsgFeature(msg, feature), value); } toString { ret feature + "=" + value; } } // LABEL class (with best theories) class Label { S name; *() {} *(S *name) {} TreeSetWithDuplicates<Theory> bestTheories = new(reverseComparatorFromCalculatedField theoryScore()); double score() { ret theoryScore(first(bestTheories)); } Theory bestTheory() { ret first(bestTheories); } } // FEATURE base classes (FeatureEnv + FeatureExtractor) sinterface FeatureEnv<A> { A mainObject(); O getFeature(S name); } sinterface FeatureExtractor<A> { O get(FeatureEnv<A> env); } // PREDICTION class (output of classifier) srecord Prediction(S label, bool plus, double adjustedConfidence) { toString { ret predictedLabel() + " (confidence: " + iround(adjustedConfidence) + "%)"; } S predictedLabel() { ret (plus ? "" : "not ") + label; } } // DATA (backend) sbool bidiMode = true; // treat all theories as bidirectional L<Msg> msgs; // all messages (order not used yet) transient Map<Msg, Map<S, O>> msg2features = AutoMap<>(lambda1 calcMsgFeatures); new Set<S> allLabels; transient new Map<S, Label> labelsByName; new LinkedHashSet<Theory> theories; transient Q thinkQ; transient new L<IVF1<S>> onNewLabel; new DoubleKeyedMap<Msg, S, Bool> msg2label_new; transient new Map<S, FeatureExtractor<Msg>> featureExtractors; // DATA (GUI) switchable double minAdjustedScoreToDisplay = 50; switchable bool autoNext = false; L<Msg> shownMsgs; S analysisText; transient JTable theoryTable, labelsTable, trainedExamplesTable, objectsTable; transient JTabbedPane tabs; transient SingleComponentPanel scpPredictions; // START CODE start { thinkQ = dm_startQ("Thought Queue"); thinkQ.add(r { // legacy + after deletion cleaning setField(allLabels := asTreeSet(msg2label_new.bKeys())); updateLabelsByName(); onNewLabel.add(lbl -> change()); makeTheoriesAboutLabels(); makeTheoriesAboutFeaturesAndLabels(); for (S field : fields(Msg)) featureExtractors.put(field, env -> getOpt(env.mainObject(), field)); makeTextExtractors("text"); callFAllOnAll(onNewLabel, allLabels); msg2labelUpdated(); checkAllTheories(); //showRandomMsg(); }); } // THEORY MAKING void makeTheoriesAboutLabels { // For any label X: onNewLabel.add(lbl -> { // test theory (for every M: M has label X) addTheory(new Theory(BasicLogicRule(new MPTrue, new HasLabel(lbl)))); // test theory (for every M: M doesn't have label X) addTheory(new Theory(BasicLogicRule(new MPTrue, new DoesntHaveLabel(lbl)))); }); } void makeTheoriesAboutFeaturesAndLabels { // for every label X: onNewLabel.add(lbl -> { // For any feature F: for (S feature : keys(featureExtractors)) // for every seen value V of F: for (O value : possibleValuesOfFeatureRelatedToLabel(feature, lbl)) for (O rhs : ll(new HasLabel(lbl), new DoesntHaveLabel(lbl))) // test theory (for every M: msg M's feature F has value V => msg has/doesn't have label x)) addTheory(new Theory(BasicLogicRule( new FeatureValueIs(feature, value), rhs))); }); } // THEORY MAKING (helper functions) Set possibleValuesOfFeature(S feature) { if (isBoolField(Msg, feature)) ret litset(false, true); ret litset(); } Set possibleValuesOfFeatureRelatedToLabel(S feature, S label) { Set set = possibleValuesOfFeature(feature); fOr (Msg msg : getMsgsRelatedToLabel(label)) set.add(getMsgFeature(msg, feature)); ret set; } // CALCULATE FEATURES O getMsgFeature(Msg msg, S feature) { ret msg2features.get(msg).get(feature); } // returns AutoMap with no realized entries Map<S, O> calcMsgFeatures(Msg msg) { new Var<FeatureEnv<Msg>> env; AutoMap<S, O> map = new(feature -> featureExtractors.get(feature).get(env!)); env.set(new FeatureEnv<Msg> { Msg mainObject() { ret msg; } O getFeature(S feature) { ret map.get(feature); } }); ret map; } // GUI: Show messages void showMsgs(L<Msg> l) { setField(shownMsgs := l); setMsgs(l); if (l(shownMsgs) == 1) { Msg msg = first(shownMsgs); setField(analysisText := joinWithEmptyLines( "Trained Labels: " + or2(renderBoolMap(getMsgLabels(msg)), "-"), "Features:\n" + formatColonProperties_quoteStringValues( msg2features.get(msg)) )); setSCPComponent(scpPredictions, scrollableStackWithSpacing(map(predictionsForMsg(msg), p -> withSideMargin(jLabelWithButtons(iround(p.adjustedConfidence) + "%: " + p.predictedLabel(), "Right", rThread { acceptPrediction(p) }, "Wrong", rThread { rejectPrediction(p) }))))); } else setField(analysisText := ""); } void updatePredictions() { showMsgs(shownMsgs); } void showRandomMsg { showMsgs(randomElementAsList(msgs)); } void showPrevMsg { showMsgs(llNonNulls(prevInCyclicList(msgs, first(shownMsgs)))); } void showNextMsg { showMsgs(llNonNulls(nextInCyclicList(msgs, first(shownMsgs)))); } // CALCULATE PREDICTIONS FOR MESSAGE L<Prediction> predictionsForMsg(Msg msg) { // positive labels first, then "not"s. sort by score in each group new L<Prediction> out; for (Label label : values(labelsByName)) { Theory t = label.bestTheory(), continue if null; Bool lhs = evalTheoryLHS(t, msg), continue if null; bool prediction = t.statement.rhs instanceof DoesntHaveLabel ? !lhs : lhs; double conf = threeB1BScore(t.examples), adjusted = adjustConfidence(conf); //if (adjusted < minAdjustedScoreToDisplay) continue; out.add(new Prediction(label.name, prediction, adjusted)); } ret sortedByCalculatedFieldDesc(out, p -> /*pair(p.plus,*/ p.adjustedConfidence/*)*/); } // go from range 50-100 to 0-100 (looks better/more intuitive) double adjustConfidence(double x) { ret max(0, (x-50)*2); } // rough reverse function of adjustConfidence double unadjustConfidence(double x) { ret x/2+50; } // GUI: Enter labels void acceptPrediction(Prediction p) { if (p != null) sendInput2(p.predictedLabel()); } void rejectPrediction(Prediction p) { if (p != null) sendInput2(cloneWithFlippedBoolField plus(p).predictedLabel()); } @Override void sendInput2(S s) { // treat input as a label if (l(shownMsgs) == 1) { Msg shown = first(shownMsgs); new Matches m; if "not ..." { S label = cleanLabel(m.rest()); doubleKeyedMapPutVerbose(+msg2label_new, shown, label, false); msg2labelUpdated(label); if (autoNext) showRandomMsg(); } else { S label = cleanLabel(s); doubleKeyedMapPutVerbose(+msg2label_new, shown, label, true); msg2labelUpdated(label); if (autoNext) showRandomMsg(); } change(); } } // MESSAGE LABEL HANDLING Map<S, Bool> getMsgLabels(Msg msg) { ret msg2label_new.getA(msg); } Set<Msg> getMsgsRelatedToLabel(S label) { ret msg2label_new.asForB(label); } void msg2labelUpdated(S label) { for (Theory t : cloneList(labelByName(label).bestTheories)) checkTheory(t); msg2labelUpdated(); } void msg2labelUpdated() { callFAllOnAll(onNewLabel, addAll_returnNew(allLabels, msg2label_new.bKeys())); updateTrainedExamplesTable(); } // QUERY: get all labels + best theory each Map<S, Theory> labelsToBestTheoryMap() { Map<S, L<Theory>> map = multiMapToMap(multiMapIndex targetLabelOfTheory(theories)); ret mapValues(map, theories -> highestBy theoryScore(theories)); } // GUI: Main layout visual withCenteredButtons(super, "<", rInThinkQ(r showPrevMsg), "Show random msg", rInThinkQ(r showRandomMsg), ">", rInThinkQ(r showNextMsg), jPopDownButton_noText(flattenObjectArray( "Check theories", rInThinkQ(r checkAllTheories), "Forget bad theories", rInThinkQ(r { forgetBadTheories(0) }), "Forget all theories", rInThinkQ(r clearTheories), "Update predictions", rInThinkQ(r updatePredictions), dm_importAndExportAllDataMenuItems(), "Upgrade to v4", rThreadEnter upgradeMe))); JComponent mainPart() { ret jhsplit(jvsplit( jCenteredSection("Focused Message", super.mainPart()), jhsplit( jCenteredSection("Message Analysis", dm_textArea analysisText()), jCenteredSection("Predictions", scpPredictions = singleComponentPanel()) )), with(r updateTabs, tabs = jtabs( "", with(r updateObjectsTable, withRightAlignedButtons( objectsTable = sexyTable(), "Import messages...", rThreadEnter importMsgs)), "", with(r updateLabelsTable, labelsTable = sexyTable()), "", with(r updateTheoryTable, tableWithSearcher2_returnPanel(theoryTable = sexyTable())), "", with(r updateTrainedExamplesTable, tableWithSearcher2_returnPanel(trainedExamplesTable = sexyTable())) ))); } // GUI: Update tables & tabs void updateTrainedExamplesTable { dataToTable_uneditable(trainedExamplesTable, map(msg2label_new.map1, (msg, map) -> litorderedmap( "Message" := (msg.fromUser ? "User" : "Bot") + ": " + msg.text, "Labels" := renderBoolMap(map)))); } void updateTabs { setTabTitles(tabs, firstLetterToUpper(nMessages(msgs)), firstLetterToUpper(nLabels(labelsByName)), firstLetterToUpper(nTheories(theories)), n2(msg2label_new.aKeys(), "Trained Example")); } void updateTheoryTable { L<Theory> sorted = sortedByCalculatedFieldDesc theoryScore(theories); dataToTable_uneditable(theoryTable, map(sorted, t -> litorderedmap( "Score" := renderTheoryScore(t), "Theory" := str(t)))); } void updateObjectsTable enter { dataToTable_uneditable_ifHasTable(objectsTable, map(msgs, msg -> litorderedmap("Text" := msg.text) )); } void updateLabelsTable enter { L<Label> sorted = sortedByCalculatedFieldDesc(values(labelsByName), l -> l.score()); dataToTable_uneditable_ifHasTable(labelsTable, map(sorted, label -> { Cl<Theory> bestTheories = label.bestTheories.tiedForFirst(); ret litorderedmap( "Label" := label.name, "Prediction Confidence" := renderTheoryScore(first(bestTheories)), "Best Theory" := empty(bestTheories) ? "" : (l(bestTheories) > 1 ? "[+" + (l(bestTheories)-1) + "] " : "") + first(bestTheories)); })); } void theoriesChanged { updateTheoryTable(); updateLabelsTable(); updateTabs(); updatePredictions(); change(); } // THEORY SCORING S renderTheoryScore(Theory t) { //ret renderPosNegCounts(t.examples); ret t == null || t.examples.isEmpty() ? "" : iround(theoryScore(t)) + "%" + " / " + renderPosNegScore2(t.examples); } // adjusted + 3b1b double theoryScore(Theory t) { ret t == null ? -100 : adjustConfidence(threeB1BScore(t.examples)); } // QUEUE HELPER Runnable rInThinkQ(Runnable r) { ret rInQ(thinkQ, r); } // ADD + REMOVE + CLEAN UP THEORIES void addTheory(Theory theory) { if (theories.add(theory)) { addTheoryToCollectors(theory); theoriesChanged(); } } void clearTheories { theories.clear(); theoriesChanged(); } // theories with exaclty minScore will go too void forgetBadTheories(double minScore) { if (removeElementsThat(theories, t -> theoryScore(t) <= minScore)) theoriesChanged(); } // CHECK PROPOSITIONS + THEORIES Bool checkMsgProp(O prop, Msg msg) { if (prop cast And) ret checkMsgProp(prop.a, msg) && checkMsgProp(prop.b, msg); if (prop cast Not) ret not(checkMsgProp(prop.a, msg)); ret ((MsgProp) prop).check(msg); } Bool evalTheoryLHS(Theory theory, Msg msg) { ret theory == null ? null : checkMsgProp(theory.statement.lhs, msg); } Bool testTheoryOnMsg(Theory theory, Msg msg) { Bool lhs = evalTheoryLHS(theory, msg); Bool rhs = checkMsgProp(theory.statement.rhs, msg); if (lhs == null || rhs == null) null; if (bidiMode) ret eq(lhs, rhs); else ret isTrue(rhs) || isFalse(lhs); } void checkAllTheories { for (Theory theory : theories) checkTheory_noTrigger(theory); theoriesChanged(); } void checkTheory(Theory theory) { checkTheory_noTrigger(theory); theoriesChanged(); } void checkTheory_noTrigger(Theory theory) { new PosNeg<Msg> pn; for (Msg msg : msgs) pn.add(msg, testTheoryOnMsg(theory, msg)); if (!eq(theory.examples, pn)) { removeTheoryFromCollectors(theory); theory.examples = pn; addTheoryToCollectors(theory); change(); } } S targetLabelOfTheory(Theory theory) { O o = theory.statement.rhs; if (o cast HasLabel) ret o.label; if (o cast DoesntHaveLabel) ret o.label; null; } // CANONICALIZE LABELS S cleanLabel(S label) { ret upper(label); } // THEORY + LABEL UPDATES void addTheoryToCollectors(Theory theory) { S lbl = targetLabelOfTheory(theory); if (lbl != null) labelByName(lbl).bestTheories.add(theory); } void removeTheoryFromCollectors(Theory theory) { S lbl = targetLabelOfTheory(theory); if (lbl != null) labelByName(lbl).bestTheories.remove(theory); } Label labelByName(S name) { ret getOrCreate(labelsByName, name, () -> new Label(name)); } void updateLabelsByName() { for (S lbl : allLabels) labelByName(lbl); for (Theory t : theories) addTheoryToCollectors(t); } // MAKE FEATURE EXTRACTORS void makeTextExtractors(S textFeature) { for (WithName<IF1<S, O>> f : textExtractors()) { IF1<S, O> theFunction = f!; featureExtractors.put(f.name, env -> theFunction.get((S) env.getFeature(textFeature))); } } L<WithName<IF1<S, O>>> textExtractors() { new L<WithName<IF1<S, O>>> l; l.add(WithName<>("number of words", lambda1 numberOfWords)); l.add(WithName<>("number of characters", lambda1 l)); for (char c : characters("\"', .-_")) l.add(WithName<>("contains " + quote(c), s -> contains(s, c))); /*for (S word : concatAsCISet(lambdaMap words(collect text(msgs)))) l.add(WithName<>("contains word " + quote(word), s -> containsWord(s, word)));*/ ret l; } // GUI: Import messages dialog void importMsgs { inputMultiLineText("Messages to import (one per line)", voidfunc(S text) { Cl<S> toImport = listMinusSet(asOrderedSet(tlft(text)), collectAsSet text(msgs)); if (msgs == null) msgs = ll(); for (S line : toImport) msgs.add(new Msg(true, line)); change(); infoBox(nMessages(toImport) + " imported"); updateObjectsTable(); showRandomMsg(); }); } void upgradeMe { dm_exportStructureToDefaultFile(this); dm_changeModuleLibID(this, "#1028063/AutoClassifier"); } }
Began life as a copy of #1028055
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv
No comments. add comment
Snippet ID: | #1028058 |
Snippet name: | Auto Classifier v3 [learning message classifier, dev.] |
Eternal ID of this version: | #1028058/14 |
Text MD5: | 82abc889936952c3f24d06a57b99218c |
Transpilation MD5: | 1c8f6f456387cf063caada2da795c7a0 |
Author: | stefan |
Category: | javax / a.i. |
Type: | JavaX source code (Dynamic Module) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2020-05-15 14:21:33 |
Source code size: | 16718 bytes / 537 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 197 / 692 |
Version history: | 13 change(s) |
Referenced in: | [show references] |