Auto Classifier v4 [learning message classifier] [1028063]

!7

cmodule AutoClassifier > DynConvo {
  // THEORY BUILDING BLOCKS (Theory + MsgProp + subclasses)
  
  srecord Theory(BasicLogicRule statement) {
    new PosNeg<Msg> examples;
    //bool iff; // <=> instead of only =>
    toString { ret str(statement.lhs instanceof MPTrue ? "Every message is " + statement.rhs
      : bidiMode ? statement.lhs + " <=> " + statement.rhs : statement); }
  }

  // propositions about a message. check returns null if unknown
  asclass MsgProp { abstract Bool check(Msg msg); }

  srecord MPTrue() > MsgProp {
    Bool check(Msg msg) { true; }
    toString { ret "always"; }
  }
  
  record HasLabel(S label) > MsgProp {
    Bool check(Msg msg) { ret msg2label_new.get(msg, label); }
    toString { ret label; }
  }
  
  record DoesntHaveLabel(S label) > MsgProp {
    Bool check(Msg msg) { ret not(msg2label_new.get(msg, label)); }
    toString { ret "not " + label; }
  }

  record FeatureValueIs(S feature, O value) > MsgProp {
    Bool check(Msg msg) { ret eq(getMsgFeature(msg, feature), value); }
    toString { ret feature + "=" + value; }
  }
  
  // LABEL class (with best theories)

  class Label {
    S name;

    *() {}
    *(S *name) {}
    
    TreeSetWithDuplicates<Theory> bestTheories = new(reverseComparatorFromCalculatedField theoryScore());

    double score() { ret theoryScore(first(bestTheories)); }
    Theory bestTheory() { ret first(bestTheories); }
  }
  
  // FEATURE base classes (FeatureEnv + FeatureExtractor)
  
  sinterface FeatureEnv<A> {
    A mainObject();
    O getFeature(S name);
  }

  sinterface FeatureExtractor<A> {
    O get(FeatureEnv<A> env);
  }
  
  // PREDICTION class (output of classifier)
  
  srecord Prediction(S label, bool plus, double adjustedConfidence) {
    toString {
      ret predictedLabel() + " (confidence: " + iround(adjustedConfidence) + "%)";
    }

    S predictedLabel() {
      ret (plus ? "" : "not ") + label;
    }
  }

  // DATA (backend)
  
  sbool bidiMode = true; // treat all theories as bidirectional
  L<Msg> msgs; // all messages (order not used yet)
  transient Map<Msg, Map<S, O>> msg2features = AutoMap<>(lambda1 calcMsgFeatures);
  new Set<S> allLabels;
  transient new Map<S, Label> labelsByName;
  new LinkedHashSet<Theory> theories;
  transient Q thinkQ;
  transient new L<IVF1<S>> onNewLabel;
  new DoubleKeyedMap<Msg, S, Bool> msg2label_new;
  transient new Map<S, FeatureExtractor<Msg>> featureExtractors;
  
  // DATA (GUI)
  
  switchable double minAdjustedScoreToDisplay = 50;
  switchable bool autoNext = false;
  L<Msg> shownMsgs;
  S analysisText;
  transient JTable theoryTable, labelsTable, trainedExamplesTable, objectsTable;
  transient JTabbedPane tabs;
  transient SingleComponentPanel scpPredictions;
  
  // START CODE

  start {
    thinkQ = dm_startQ("Thought Queue");
    thinkQ.add(r {
      // legacy + after deletion cleaning
      setField(allLabels := asTreeSet(msg2label_new.bKeys()));
      updateLabelsByName();
      
      onNewLabel.add(lbl -> change());
  
      makeTheoriesAboutLabels();
      makeTheoriesAboutFeaturesAndLabels();
      
      for (S field : fields(Msg))
        featureExtractors.put(field, env -> getOpt(env.mainObject(), field));
  
      makeTextExtractors("text");
  
      callFAllOnAll(onNewLabel, allLabels);
      
      msg2labelUpdated();
      updatePredictions();
      checkAllTheories();
      //showRandomMsg();
    });
  }
  
  // THEORY MAKING

  void makeTheoriesAboutLabels {
    // For any label X:
    onNewLabel.add(lbl -> {
      // test theory (for every M: M has label X)
      addTheory(new Theory(BasicLogicRule(new MPTrue, new HasLabel(lbl))));
      // test theory (for every M: M doesn't have label X)
      addTheory(new Theory(BasicLogicRule(new MPTrue, new DoesntHaveLabel(lbl))));
    });
  }

  void makeTheoriesAboutFeaturesAndLabels {
    // for every label X:
    onNewLabel.add(lbl -> {
      // For any feature F:
      for (S feature : keys(featureExtractors))
        // for every seen value V of F:
        for (O value : possibleValuesOfFeatureRelatedToLabel(feature, lbl))
          for (O rhs : ll(new HasLabel(lbl), new DoesntHaveLabel(lbl)))
            // test theory (for every M: msg M's feature F has value V => msg has/doesn't have label x))
            addTheory(new Theory(BasicLogicRule(
              new FeatureValueIs(feature, value), rhs)));
    });
  }
  
  // THEORY MAKING (helper functions)

  Set possibleValuesOfFeature(S feature) {
    if (isBoolField(Msg, feature))
      ret litset(false, true);
    ret litset();
  }

  Set possibleValuesOfFeatureRelatedToLabel(S feature, S label) {
    Set set = possibleValuesOfFeature(feature);
    fOr (Msg msg : getMsgsRelatedToLabel(label))
      set.add(getMsgFeature(msg, feature));
    ret set;
  }
  
  // CALCULATE FEATURES

  O getMsgFeature(Msg msg, S feature) {
    ret msg2features.get(msg).get(feature);
  }
  
  // returns AutoMap with no realized entries
  Map<S, O> calcMsgFeatures(Msg msg) {
    new Var<FeatureEnv<Msg>> env;
    AutoMap<S, O> map = new(feature -> featureExtractors.get(feature).get(env!));
    env.set(new FeatureEnv<Msg> {
      Msg mainObject() { ret msg; }
      O getFeature(S feature) { ret map.get(feature); }
    });
    ret map;    
  }
  
  // GUI: Show messages

  void showMsgs(L<Msg> l) {
    setField(shownMsgs := l);
    setMsgs(l);
    if (l(shownMsgs) == 1) {
      Msg msg = first(shownMsgs);
      setField(analysisText := joinWithEmptyLines(
        "Trained Labels: " + or2(renderBoolMap(getMsgLabels(msg)), "-"),
        "Features:\n" + formatColonProperties_quoteStringValues(
msg2features.get(msg))
      ));
      setSCPComponent(scpPredictions,
        scrollableStackWithSpacing(map(predictionsForMsg(msg), p -> {
          S percent = iround(p.adjustedConfidence) + "%";
          S neg = "not " + p.label;
          Bool knownValue = msg2label_new.get(msg, p.label);
          embedded S strong(S html) { ret b(html, style := "font-size: 18; color: #008000"); }
          embedded JComponent makeButton(bool known, bool predicted, S label) {
            S html = predicted ? jlabel_centerHTML(joinWithBR(
              strong(htmlencode(label)), percent))
              : label;
            S toolTip = predicted ? "Predicted with " + percent + " confidence" + stringIf(!known, ". Click to confirm") 
              : !known ? "Click to set this label for message" : "";
            if (known) ret setTooltip(toolTip, jcenteredlabel(html));
            JButton btn = setTooltip(toolTip, jbutton(html, rThread { sendInput2(label) }));
            ret predicted ? btn : jfullcenter(btn);
          }
          
          ret withSideMargin(jhgridWithSpacing(
            makeButton(isTrue(knownValue), p.plus, p.label),
            makeButton(isFalse(knownValue), !p.plus, neg)
          ));
        })));
    } else setField(analysisText := "");
  }

  void updatePredictions() {
    showMsgs(shownMsgs);
  }
  
  void showRandomMsg {
    showMsgs(randomElementAsList(msgs));
  }
  
  void showPrevMsg {
    showMsgs(llNonNulls(prevInCyclicList(msgs, first(shownMsgs))));
  }

  void showNextMsg {
    showMsgs(llNonNulls(nextInCyclicList(msgs, first(shownMsgs))));
  }

  // CALCULATE PREDICTIONS FOR MESSAGE

  L<Prediction> predictionsForMsg(Msg msg) {
    // positive labels first, then "not"s. sort by score in each group
    new L<Prediction> out;
    for (Label label : values(labelsByName)) {
      Theory t = label.bestTheory(), continue if null;
      Bool lhs = evalTheoryLHS(t, msg), continue if null;
      bool prediction = t.statement.rhs instanceof DoesntHaveLabel ? !lhs : lhs;
      double conf = threeB1BScore(t.examples), adjusted = adjustConfidence(conf);
      //if (adjusted < minAdjustedScoreToDisplay) continue;
      out.add(new Prediction(label.name, prediction, adjusted));
    }
    ret sortedByCalculatedFieldDesc(out, p -> /*pair(p.plus,*/ p.adjustedConfidence/*)*/);
  }

  // go from range 50-100 to 0-100 (looks better/more intuitive)
  double adjustConfidence(double x) {
    ret max(0, (x-50)*2);
  }
  
  // rough reverse function of adjustConfidence
  double unadjustConfidence(double x) {
    ret x/2+50;
  }
  
  // GUI: Enter labels
  
  void acceptPrediction(Prediction p) {
    if (p != null) sendInput2(p.predictedLabel());
  }

  void rejectPrediction(Prediction p) {
    if (p != null) sendInput2(cloneWithFlippedBoolField plus(p).predictedLabel());
  }

  @Override
  void sendInput2(S s) {
    // treat input as a label
    if (l(shownMsgs) == 1) {
      Msg shown = first(shownMsgs);
      new Matches m;
      if "not ..." {
        S label = cleanLabel(m.rest());
        doubleKeyedMapPutVerbose(+msg2label_new, shown, label, false);
        msg2labelUpdated(label);
        if (autoNext) showRandomMsg();
      } else {
        S label = cleanLabel(s);
        doubleKeyedMapPutVerbose(+msg2label_new, shown, label, true);
        msg2labelUpdated(label);
        if (autoNext) showRandomMsg();
      }
      change();
    }
  }
  
  // MESSAGE LABEL HANDLING

  Map<S, Bool> getMsgLabels(Msg msg) {
    ret msg2label_new.getA(msg);
  }
  
  Set<Msg> getMsgsRelatedToLabel(S label) { ret msg2label_new.asForB(label); }

  void msg2labelUpdated(S label) {
    for (Theory t : cloneList(labelByName(label).bestTheories))
      checkTheory(t);
    msg2labelUpdated();
  }

  void msg2labelUpdated() {
    callFAllOnAll(onNewLabel, addAll_returnNew(allLabels, msg2label_new.bKeys()));
    updateTrainedExamplesTable();
  }
  
  // QUERY: get all labels + best theory each
  
  Map<S, Theory> labelsToBestTheoryMap() {
    Map<S, L<Theory>> map = multiMapToMap(multiMapIndex targetLabelOfTheory(theories));
    ret mapValues(map, theories -> highestBy theoryScore(theories));
  }

  // GUI: Main layout

  visual
    withCenteredButtons(super,
      "<", rInThinkQ(r showPrevMsg),
      "Show random msg", rInThinkQ(r showRandomMsg),
      ">", rInThinkQ(r showNextMsg),
      jPopDownButton_noText(flattenObjectArray(
        "Check theories", rInThinkQ(r checkAllTheories),
        "Forget bad theories", rInThinkQ(r { forgetBadTheories(0) }),
        "Forget all theories", rInThinkQ(r clearTheories),
        "Update predictions", rInThinkQ(r updatePredictions),
        dm_importAndExportAllDataMenuItems(),
        "Upgrade to v5", rThreadEnter upgradeMe)));

  JComponent mainPart() {
    ret jhsplit(jvsplit(
        jCenteredSection("Focused Message", super.mainPart()),
        jhsplit(
          jCenteredSection("Message Analysis", dm_textArea analysisText()),
          jCenteredSection("Predictions (green)", scpPredictions = singleComponentPanel())
        )),
      with(r updateTabs, tabs = jtabs(
        "", with(r updateObjectsTable, withRightAlignedButtons(
          objectsTable = sexyTable(),
          "Import messages...", rThreadEnter importMsgs)),
        "", with(r updateLabelsTable, labelsTable = sexyTable()),
        "", with(r updateTheoryTable, tableWithSearcher2_returnPanel(theoryTable = sexyTable())),
        "", with(r updateTrainedExamplesTable, tableWithSearcher2_returnPanel(trainedExamplesTable = sexyTable()))
      )));
  }
  
  // GUI: Update tables & tabs

  void updateTrainedExamplesTable {
    dataToTable_uneditable(trainedExamplesTable, map(msg2label_new.map1, (msg, map) ->
      litorderedmap(
        "Message" := (msg.fromUser ? "User" : "Bot") + ": " + msg.text,
        "Labels" := renderBoolMap(map))));
  }

  void updateTabs {
    setTabTitles(tabs,
      firstLetterToUpper(nMessages(msgs)),
      firstLetterToUpper(nLabels(labelsByName)),
      firstLetterToUpper(nTheories(theories)),
      n2(msg2label_new.aKeys(), "Trained Example"));
  }

  void updateTheoryTable {
    L<Theory> sorted = sortedByCalculatedFieldDesc theoryScore(theories);
    dataToTable_uneditable(theoryTable, map(sorted, t -> litorderedmap(
      "Score" := renderTheoryScore(t),
      "Theory" := str(t))));
  }

  void updateObjectsTable enter {
    dataToTable_uneditable_ifHasTable(objectsTable, map(msgs, msg ->
      litorderedmap("Text" := msg.text)
    ));
  }

  void updateLabelsTable enter {
    L<Label> sorted = sortedByCalculatedFieldDesc(values(labelsByName), l -> l.score());
    dataToTable_uneditable_ifHasTable(labelsTable, map(sorted, label -> {
      Cl<Theory> bestTheories = label.bestTheories.tiedForFirst();
      ret litorderedmap(
        "Label" := label.name,
        "Prediction Confidence" := renderTheoryScore(first(bestTheories)),
        "Best Theory" := empty(bestTheories) ? "" :
          (l(bestTheories) > 1 ? "[+" + (l(bestTheories)-1) + "] " : "") +  first(bestTheories));
    }));
  }
  
  void theoriesChanged {
    updateTheoryTable();
    updateLabelsTable();
    updateTabs();
    updatePredictions();
    change();
  }

  // THEORY SCORING
  
  S renderTheoryScore(Theory t) {
    //ret renderPosNegCounts(t.examples);
    ret t == null || t.examples.isEmpty() ? "" : iround(theoryScore(t)) + "%"
      + " / " + renderPosNegScore2(t.examples);
  }

  // adjusted + 3b1b
  double theoryScore(Theory t) {
    ret t == null ? -100 : adjustConfidence(threeB1BScore(t.examples));
  }
  
  // QUEUE HELPER

  Runnable rInThinkQ(Runnable r) { ret rInQ(thinkQ, r); }
  
  // ADD + REMOVE + CLEAN UP THEORIES

  void addTheory(Theory theory) {
    if (theories.add(theory)) {
      addTheoryToCollectors(theory);
      theoriesChanged();
    }
  }

  void clearTheories { theories.clear(); theoriesChanged(); }
  
  // theories with exaclty minScore will go too
  void forgetBadTheories(double minScore) {
    if (removeElementsThat(theories, t -> theoryScore(t) <= minScore))
      theoriesChanged();
  }
  
  // CHECK PROPOSITIONS + THEORIES

  Bool checkMsgProp(O prop, Msg msg) {
    if (prop cast And) ret checkMsgProp(prop.a, msg) && checkMsgProp(prop.b, msg);
    if (prop cast Not) ret not(checkMsgProp(prop.a, msg));
    ret ((MsgProp) prop).check(msg);
  }

  Bool evalTheoryLHS(Theory theory, Msg msg) {
    ret theory == null ? null
      : checkMsgProp(theory.statement.lhs, msg);
  }

  Bool testTheoryOnMsg(Theory theory, Msg msg) {
    Bool lhs = evalTheoryLHS(theory, msg);
    Bool rhs = checkMsgProp(theory.statement.rhs, msg);
    if (lhs == null || rhs == null) null;
    if (bidiMode)
      ret eq(lhs, rhs);
    else
      ret isTrue(rhs) || isFalse(lhs);
  }

  void checkAllTheories {
    for (Theory theory : theories)
      checkTheory_noTrigger(theory);
    theoriesChanged();
  }

  void checkTheory(Theory theory) {
    checkTheory_noTrigger(theory);
    theoriesChanged();
  }

  void checkTheory_noTrigger(Theory theory) {
    new PosNeg<Msg> pn;
    for (Msg msg : msgs)
      pn.add(msg, testTheoryOnMsg(theory, msg));
    if (!eq(theory.examples, pn)) {
      removeTheoryFromCollectors(theory);
      theory.examples = pn;
      addTheoryToCollectors(theory);
      change();
    }
  }
  
  S targetLabelOfTheory(Theory theory) {
    O o = theory.statement.rhs;
    if (o cast HasLabel) ret o.label;
    if (o cast DoesntHaveLabel) ret o.label;
    null;
  }

  // CANONICALIZE LABELS

  S cleanLabel(S label) { ret upper(label); }
  
  // THEORY + LABEL UPDATES
  
  void addTheoryToCollectors(Theory theory) {
    S lbl = targetLabelOfTheory(theory);
    if (lbl != null)
      labelByName(lbl).bestTheories.add(theory);
  }

  void removeTheoryFromCollectors(Theory theory) {
    S lbl = targetLabelOfTheory(theory);
    if (lbl != null)
      labelByName(lbl).bestTheories.remove(theory);
  }

  Label labelByName(S name) {
    ret getOrCreate(labelsByName, name, () -> new Label(name));
  }

  void updateLabelsByName() {
    for (S lbl : allLabels)
      labelByName(lbl);
    for (Theory t : theories)
      addTheoryToCollectors(t);
  }

  // MAKE FEATURE EXTRACTORS

  void makeTextExtractors(S textFeature) {
    for (WithName<IF1<S, O>> f : textExtractors()) {
      IF1<S, O> theFunction = f!;
      featureExtractors.put(f.name, env -> theFunction.get((S) env.getFeature(textFeature)));
    }
  }

  L<WithName<IF1<S, O>>> textExtractors() {
    new L<WithName<IF1<S, O>>> l;
    l.add(WithName<>("number of words", lambda1 numberOfWords));
    l.add(WithName<>("number of characters", lambda1 l));
    for (char c : characters("\"', .-_"))
      l.add(WithName<>("contains " + quote(c), s -> contains(s, c)));
    /*for (S word : concatAsCISet(lambdaMap words(collect text(msgs))))
      l.add(WithName<>("contains word " + quote(word), s -> containsWord(s, word)));*/
    ret l;
  }
  
  // GUI: Import messages dialog, warn on delete

  void importMsgs {
    inputMultiLineText("Messages to import (one per line)", voidfunc(S text) {
      Cl<S> toImport = listMinusSet(asOrderedSet(tlft(text)), collectAsSet text(msgs));
      if (msgs == null) msgs = ll();
      for (S line : toImport)
        msgs.add(new Msg(true, line));
      change();
      infoBox(nMessages(toImport) + " imported");
      updateObjectsTable();
      showRandomMsg();
    });
  }
  
  bool warnOnDelete() { true; }
  
  void upgradeMe {
    dm_backupStructureAndChangeModuleLibID("#1028066/AutoClassifier");
  }
}

Travelled to 7 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt, xrpafgyirdlv

Snippet ID:	#1028063
Snippet name:	Auto Classifier v4 [learning message classifier]
Eternal ID of this version:	#1028063/29
Text MD5:	791dd66fdbc6d9952d9dd3c09c99e6c6
Transpilation MD5:	e54f432d5d0deec7b2f3c4e78c933f4e
Author:	stefan
Category:	javax / a.i.
Type:	JavaX source code (Dynamic Module)
Public (visible to everyone):	Yes
Archived (hidden from active list):	No
Created/modified:	2020-05-18 14:29:18
Source code size:	17669 bytes / 556 lines
Pitched / IR pitched:	No / No
Views / Downloads:	698 / 2224
Version history:	28 change(s)
Referenced in:	[show references]

< > BotCompany Repo | #1028063 // Auto Classifier v4 [learning message classifier]

JavaX source code (Dynamic Module) [tags: use-pretranspiled] - run with: Stefan's OS

Author comment

1	!7
2
3	cmodule AutoClassifier > DynConvo {
4	// THEORY BUILDING BLOCKS (Theory + MsgProp + subclasses)
5
6	srecord Theory(BasicLogicRule statement) {
7	new PosNeg<Msg> examples;
8	//bool iff; // <=> instead of only =>
9	toString { ret str(statement.lhs instanceof MPTrue ? "Every message is " + statement.rhs
10	: bidiMode ? statement.lhs + " <=> " + statement.rhs : statement); }
11	}
12
13	// propositions about a message. check returns null if unknown
14	asclass MsgProp { abstract Bool check(Msg msg); }
15
16	srecord MPTrue() > MsgProp {
17	Bool check(Msg msg) { true; }
18	toString { ret "always"; }
19	}
20
21	record HasLabel(S label) > MsgProp {
22	Bool check(Msg msg) { ret msg2label_new.get(msg, label); }
23	toString { ret label; }
24	}
25
26	record DoesntHaveLabel(S label) > MsgProp {
27	Bool check(Msg msg) { ret not(msg2label_new.get(msg, label)); }
28	toString { ret "not " + label; }
29	}
30
31	record FeatureValueIs(S feature, O value) > MsgProp {
32	Bool check(Msg msg) { ret eq(getMsgFeature(msg, feature), value); }
33	toString { ret feature + "=" + value; }
34	}
35
36	// LABEL class (with best theories)
37
38	class Label {
39	S name;
40
41	*() {}
42	(S name) {}
43
44	TreeSetWithDuplicates<Theory> bestTheories = new(reverseComparatorFromCalculatedField theoryScore());
45
46	double score() { ret theoryScore(first(bestTheories)); }
47	Theory bestTheory() { ret first(bestTheories); }
48	}
49
50	// FEATURE base classes (FeatureEnv + FeatureExtractor)
51
52	sinterface FeatureEnv<A> {
53	A mainObject();
54	O getFeature(S name);
55	}
56
57	sinterface FeatureExtractor<A> {
58	O get(FeatureEnv<A> env);
59	}
60
61	// PREDICTION class (output of classifier)
62
63	srecord Prediction(S label, bool plus, double adjustedConfidence) {
64	toString {
65	ret predictedLabel() + " (confidence: " + iround(adjustedConfidence) + "%)";
66	}
67
68	S predictedLabel() {
69	ret (plus ? "" : "not ") + label;
70	}
71	}
72
73	// DATA (backend)
74
75	sbool bidiMode = true; // treat all theories as bidirectional
76	L<Msg> msgs; // all messages (order not used yet)
77	transient Map<Msg, Map<S, O>> msg2features = AutoMap<>(lambda1 calcMsgFeatures);
78	new Set<S> allLabels;
79	transient new Map<S, Label> labelsByName;
80	new LinkedHashSet<Theory> theories;
81	transient Q thinkQ;
82	transient new L<IVF1<S>> onNewLabel;
83	new DoubleKeyedMap<Msg, S, Bool> msg2label_new;
84	transient new Map<S, FeatureExtractor<Msg>> featureExtractors;
85
86	// DATA (GUI)
87
88	switchable double minAdjustedScoreToDisplay = 50;
89	switchable bool autoNext = false;
90	L<Msg> shownMsgs;
91	S analysisText;
92	transient JTable theoryTable, labelsTable, trainedExamplesTable, objectsTable;
93	transient JTabbedPane tabs;
94	transient SingleComponentPanel scpPredictions;
95
96	// START CODE
97
98	start {
99	thinkQ = dm_startQ("Thought Queue");
100	thinkQ.add(r {
101	// legacy + after deletion cleaning
102	setField(allLabels := asTreeSet(msg2label_new.bKeys()));
103	updateLabelsByName();
104
105	onNewLabel.add(lbl -> change());
106
107	makeTheoriesAboutLabels();
108	makeTheoriesAboutFeaturesAndLabels();
109
110	for (S field : fields(Msg))
111	featureExtractors.put(field, env -> getOpt(env.mainObject(), field));
112
113	makeTextExtractors("text");
114
115	callFAllOnAll(onNewLabel, allLabels);
116
117	msg2labelUpdated();
118	updatePredictions();
119	checkAllTheories();
120	//showRandomMsg();
121	});
122	}
123
124	// THEORY MAKING
125
126	void makeTheoriesAboutLabels {
127	// For any label X:
128	onNewLabel.add(lbl -> {
129	// test theory (for every M: M has label X)
130	addTheory(new Theory(BasicLogicRule(new MPTrue, new HasLabel(lbl))));
131	// test theory (for every M: M doesn't have label X)
132	addTheory(new Theory(BasicLogicRule(new MPTrue, new DoesntHaveLabel(lbl))));
133	});
134	}
135
136	void makeTheoriesAboutFeaturesAndLabels {
137	// for every label X:
138	onNewLabel.add(lbl -> {
139	// For any feature F:
140	for (S feature : keys(featureExtractors))
141	// for every seen value V of F:
142	for (O value : possibleValuesOfFeatureRelatedToLabel(feature, lbl))
143	for (O rhs : ll(new HasLabel(lbl), new DoesntHaveLabel(lbl)))
144	// test theory (for every M: msg M's feature F has value V => msg has/doesn't have label x))
145	addTheory(new Theory(BasicLogicRule(
146	new FeatureValueIs(feature, value), rhs)));
147	});
148	}
149
150	// THEORY MAKING (helper functions)
151
152	Set possibleValuesOfFeature(S feature) {
153	if (isBoolField(Msg, feature))
154	ret litset(false, true);
155	ret litset();
156	}
157
158	Set possibleValuesOfFeatureRelatedToLabel(S feature, S label) {
159	Set set = possibleValuesOfFeature(feature);
160	fOr (Msg msg : getMsgsRelatedToLabel(label))
161	set.add(getMsgFeature(msg, feature));
162	ret set;
163	}
164
165	// CALCULATE FEATURES
166
167	O getMsgFeature(Msg msg, S feature) {
168	ret msg2features.get(msg).get(feature);
169	}
170
171	// returns AutoMap with no realized entries
172	Map<S, O> calcMsgFeatures(Msg msg) {
173	new Var<FeatureEnv<Msg>> env;
174	AutoMap<S, O> map = new(feature -> featureExtractors.get(feature).get(env!));
175	env.set(new FeatureEnv<Msg> {
176	Msg mainObject() { ret msg; }
177	O getFeature(S feature) { ret map.get(feature); }
178	});
179	ret map;
180	}
181
182	// GUI: Show messages
183
184	void showMsgs(L<Msg> l) {
185	setField(shownMsgs := l);
186	setMsgs(l);
187	if (l(shownMsgs) == 1) {
188	Msg msg = first(shownMsgs);
189	setField(analysisText := joinWithEmptyLines(
190	"Trained Labels: " + or2(renderBoolMap(getMsgLabels(msg)), "-"),
191	"Features:\n" + formatColonProperties_quoteStringValues(
192	msg2features.get(msg))
193	));
194	setSCPComponent(scpPredictions,
195	scrollableStackWithSpacing(map(predictionsForMsg(msg), p -> {
196	S percent = iround(p.adjustedConfidence) + "%";
197	S neg = "not " + p.label;
198	Bool knownValue = msg2label_new.get(msg, p.label);
199	embedded S strong(S html) { ret b(html, style := "font-size: 18; color: #008000"); }
200	embedded JComponent makeButton(bool known, bool predicted, S label) {
201	S html = predicted ? jlabel_centerHTML(joinWithBR(
202	strong(htmlencode(label)), percent))
203	: label;
204	S toolTip = predicted ? "Predicted with " + percent + " confidence" + stringIf(!known, ". Click to confirm")
205	: !known ? "Click to set this label for message" : "";
206	if (known) ret setTooltip(toolTip, jcenteredlabel(html));
207	JButton btn = setTooltip(toolTip, jbutton(html, rThread { sendInput2(label) }));
208	ret predicted ? btn : jfullcenter(btn);
209	}
210
211	ret withSideMargin(jhgridWithSpacing(
212	makeButton(isTrue(knownValue), p.plus, p.label),
213	makeButton(isFalse(knownValue), !p.plus, neg)
214	));
215	})));
216	} else setField(analysisText := "");
217	}
218
219	void updatePredictions() {
220	showMsgs(shownMsgs);
221	}
222
223	void showRandomMsg {
224	showMsgs(randomElementAsList(msgs));
225	}
226
227	void showPrevMsg {
228	showMsgs(llNonNulls(prevInCyclicList(msgs, first(shownMsgs))));
229	}
230
231	void showNextMsg {
232	showMsgs(llNonNulls(nextInCyclicList(msgs, first(shownMsgs))));
233	}
234
235	// CALCULATE PREDICTIONS FOR MESSAGE
236
237	L<Prediction> predictionsForMsg(Msg msg) {
238	// positive labels first, then "not"s. sort by score in each group
239	new L<Prediction> out;
240	for (Label label : values(labelsByName)) {
241	Theory t = label.bestTheory(), continue if null;
242	Bool lhs = evalTheoryLHS(t, msg), continue if null;
243	bool prediction = t.statement.rhs instanceof DoesntHaveLabel ? !lhs : lhs;
244	double conf = threeB1BScore(t.examples), adjusted = adjustConfidence(conf);
245	//if (adjusted < minAdjustedScoreToDisplay) continue;
246	out.add(new Prediction(label.name, prediction, adjusted));
247	}
248	ret sortedByCalculatedFieldDesc(out, p -> /pair(p.plus,/ p.adjustedConfidence/)/);
249	}
250
251	// go from range 50-100 to 0-100 (looks better/more intuitive)
252	double adjustConfidence(double x) {
253	ret max(0, (x-50)*2);
254	}
255
256	// rough reverse function of adjustConfidence
257	double unadjustConfidence(double x) {
258	ret x/2+50;
259	}
260
261	// GUI: Enter labels
262
263	void acceptPrediction(Prediction p) {
264	if (p != null) sendInput2(p.predictedLabel());
265	}
266
267	void rejectPrediction(Prediction p) {
268	if (p != null) sendInput2(cloneWithFlippedBoolField plus(p).predictedLabel());
269	}
270
271	@Override
272	void sendInput2(S s) {
273	// treat input as a label
274	if (l(shownMsgs) == 1) {
275	Msg shown = first(shownMsgs);
276	new Matches m;
277	if "not ..." {
278	S label = cleanLabel(m.rest());
279	doubleKeyedMapPutVerbose(+msg2label_new, shown, label, false);
280	msg2labelUpdated(label);
281	if (autoNext) showRandomMsg();
282	} else {
283	S label = cleanLabel(s);
284	doubleKeyedMapPutVerbose(+msg2label_new, shown, label, true);
285	msg2labelUpdated(label);
286	if (autoNext) showRandomMsg();
287	}
288	change();
289	}
290	}
291
292	// MESSAGE LABEL HANDLING
293
294	Map<S, Bool> getMsgLabels(Msg msg) {
295	ret msg2label_new.getA(msg);
296	}
297
298	Set<Msg> getMsgsRelatedToLabel(S label) { ret msg2label_new.asForB(label); }
299
300	void msg2labelUpdated(S label) {
301	for (Theory t : cloneList(labelByName(label).bestTheories))
302	checkTheory(t);
303	msg2labelUpdated();
304	}
305
306	void msg2labelUpdated() {
307	callFAllOnAll(onNewLabel, addAll_returnNew(allLabels, msg2label_new.bKeys()));
308	updateTrainedExamplesTable();
309	}
310
311	// QUERY: get all labels + best theory each
312
313	Map<S, Theory> labelsToBestTheoryMap() {
314	Map<S, L<Theory>> map = multiMapToMap(multiMapIndex targetLabelOfTheory(theories));
315	ret mapValues(map, theories -> highestBy theoryScore(theories));
316	}
317
318	// GUI: Main layout
319
320	visual
321	withCenteredButtons(super,
322	"<", rInThinkQ(r showPrevMsg),
323	"Show random msg", rInThinkQ(r showRandomMsg),
324	">", rInThinkQ(r showNextMsg),
325	jPopDownButton_noText(flattenObjectArray(
326	"Check theories", rInThinkQ(r checkAllTheories),
327	"Forget bad theories", rInThinkQ(r { forgetBadTheories(0) }),
328	"Forget all theories", rInThinkQ(r clearTheories),
329	"Update predictions", rInThinkQ(r updatePredictions),
330	dm_importAndExportAllDataMenuItems(),
331	"Upgrade to v5", rThreadEnter upgradeMe)));
332
333	JComponent mainPart() {
334	ret jhsplit(jvsplit(
335	jCenteredSection("Focused Message", super.mainPart()),
336	jhsplit(
337	jCenteredSection("Message Analysis", dm_textArea analysisText()),
338	jCenteredSection("Predictions (green)", scpPredictions = singleComponentPanel())
339	)),
340	with(r updateTabs, tabs = jtabs(
341	"", with(r updateObjectsTable, withRightAlignedButtons(
342	objectsTable = sexyTable(),
343	"Import messages...", rThreadEnter importMsgs)),
344	"", with(r updateLabelsTable, labelsTable = sexyTable()),
345	"", with(r updateTheoryTable, tableWithSearcher2_returnPanel(theoryTable = sexyTable())),
346	"", with(r updateTrainedExamplesTable, tableWithSearcher2_returnPanel(trainedExamplesTable = sexyTable()))
347	)));
348	}
349
350	// GUI: Update tables & tabs
351
352	void updateTrainedExamplesTable {
353	dataToTable_uneditable(trainedExamplesTable, map(msg2label_new.map1, (msg, map) ->
354	litorderedmap(
355	"Message" := (msg.fromUser ? "User" : "Bot") + ": " + msg.text,
356	"Labels" := renderBoolMap(map))));
357	}
358
359	void updateTabs {
360	setTabTitles(tabs,
361	firstLetterToUpper(nMessages(msgs)),
362	firstLetterToUpper(nLabels(labelsByName)),
363	firstLetterToUpper(nTheories(theories)),
364	n2(msg2label_new.aKeys(), "Trained Example"));
365	}
366
367	void updateTheoryTable {
368	L<Theory> sorted = sortedByCalculatedFieldDesc theoryScore(theories);
369	dataToTable_uneditable(theoryTable, map(sorted, t -> litorderedmap(
370	"Score" := renderTheoryScore(t),
371	"Theory" := str(t))));
372	}
373
374	void updateObjectsTable enter {
375	dataToTable_uneditable_ifHasTable(objectsTable, map(msgs, msg ->
376	litorderedmap("Text" := msg.text)
377	));
378	}
379
380	void updateLabelsTable enter {
381	L<Label> sorted = sortedByCalculatedFieldDesc(values(labelsByName), l -> l.score());
382	dataToTable_uneditable_ifHasTable(labelsTable, map(sorted, label -> {
383	Cl<Theory> bestTheories = label.bestTheories.tiedForFirst();
384	ret litorderedmap(
385	"Label" := label.name,
386	"Prediction Confidence" := renderTheoryScore(first(bestTheories)),
387	"Best Theory" := empty(bestTheories) ? "" :
388	(l(bestTheories) > 1 ? "[+" + (l(bestTheories)-1) + "] " : "") + first(bestTheories));
389	}));
390	}
391
392	void theoriesChanged {
393	updateTheoryTable();
394	updateLabelsTable();
395	updateTabs();
396	updatePredictions();
397	change();
398	}
399
400	// THEORY SCORING
401
402	S renderTheoryScore(Theory t) {
403	//ret renderPosNegCounts(t.examples);
404	ret t == null \|\| t.examples.isEmpty() ? "" : iround(theoryScore(t)) + "%"
405	+ " / " + renderPosNegScore2(t.examples);
406	}
407
408	// adjusted + 3b1b
409	double theoryScore(Theory t) {
410	ret t == null ? -100 : adjustConfidence(threeB1BScore(t.examples));
411	}
412
413	// QUEUE HELPER
414
415	Runnable rInThinkQ(Runnable r) { ret rInQ(thinkQ, r); }
416
417	// ADD + REMOVE + CLEAN UP THEORIES
418
419	void addTheory(Theory theory) {
420	if (theories.add(theory)) {
421	addTheoryToCollectors(theory);
422	theoriesChanged();
423	}
424	}
425
426	void clearTheories { theories.clear(); theoriesChanged(); }
427
428	// theories with exaclty minScore will go too
429	void forgetBadTheories(double minScore) {
430	if (removeElementsThat(theories, t -> theoryScore(t) <= minScore))
431	theoriesChanged();
432	}
433
434	// CHECK PROPOSITIONS + THEORIES
435
436	Bool checkMsgProp(O prop, Msg msg) {
437	if (prop cast And) ret checkMsgProp(prop.a, msg) && checkMsgProp(prop.b, msg);
438	if (prop cast Not) ret not(checkMsgProp(prop.a, msg));
439	ret ((MsgProp) prop).check(msg);
440	}
441
442	Bool evalTheoryLHS(Theory theory, Msg msg) {
443	ret theory == null ? null
444	: checkMsgProp(theory.statement.lhs, msg);
445	}
446
447	Bool testTheoryOnMsg(Theory theory, Msg msg) {
448	Bool lhs = evalTheoryLHS(theory, msg);
449	Bool rhs = checkMsgProp(theory.statement.rhs, msg);
450	if (lhs == null \|\| rhs == null) null;
451	if (bidiMode)
452	ret eq(lhs, rhs);
453	else
454	ret isTrue(rhs) \|\| isFalse(lhs);
455	}
456
457	void checkAllTheories {
458	for (Theory theory : theories)
459	checkTheory_noTrigger(theory);
460	theoriesChanged();
461	}
462
463	void checkTheory(Theory theory) {
464	checkTheory_noTrigger(theory);
465	theoriesChanged();
466	}
467
468	void checkTheory_noTrigger(Theory theory) {
469	new PosNeg<Msg> pn;
470	for (Msg msg : msgs)
471	pn.add(msg, testTheoryOnMsg(theory, msg));
472	if (!eq(theory.examples, pn)) {
473	removeTheoryFromCollectors(theory);
474	theory.examples = pn;
475	addTheoryToCollectors(theory);
476	change();
477	}
478	}
479
480	S targetLabelOfTheory(Theory theory) {
481	O o = theory.statement.rhs;
482	if (o cast HasLabel) ret o.label;
483	if (o cast DoesntHaveLabel) ret o.label;
484	null;
485	}
486
487	// CANONICALIZE LABELS
488
489	S cleanLabel(S label) { ret upper(label); }
490
491	// THEORY + LABEL UPDATES
492
493	void addTheoryToCollectors(Theory theory) {
494	S lbl = targetLabelOfTheory(theory);
495	if (lbl != null)
496	labelByName(lbl).bestTheories.add(theory);
497	}
498
499	void removeTheoryFromCollectors(Theory theory) {
500	S lbl = targetLabelOfTheory(theory);
501	if (lbl != null)
502	labelByName(lbl).bestTheories.remove(theory);
503	}
504
505	Label labelByName(S name) {
506	ret getOrCreate(labelsByName, name, () -> new Label(name));
507	}
508
509	void updateLabelsByName() {
510	for (S lbl : allLabels)
511	labelByName(lbl);
512	for (Theory t : theories)
513	addTheoryToCollectors(t);
514	}
515
516	// MAKE FEATURE EXTRACTORS
517
518	void makeTextExtractors(S textFeature) {
519	for (WithName<IF1<S, O>> f : textExtractors()) {
520	IF1<S, O> theFunction = f!;
521	featureExtractors.put(f.name, env -> theFunction.get((S) env.getFeature(textFeature)));
522	}
523	}
524
525	L<WithName<IF1<S, O>>> textExtractors() {
526	new L<WithName<IF1<S, O>>> l;
527	l.add(WithName<>("number of words", lambda1 numberOfWords));
528	l.add(WithName<>("number of characters", lambda1 l));
529	for (char c : characters("\"', .-_"))
530	l.add(WithName<>("contains " + quote(c), s -> contains(s, c)));
531	/*for (S word : concatAsCISet(lambdaMap words(collect text(msgs))))
532	l.add(WithName<>("contains word " + quote(word), s -> containsWord(s, word)));*/
533	ret l;
534	}
535
536	// GUI: Import messages dialog, warn on delete
537
538	void importMsgs {
539	inputMultiLineText("Messages to import (one per line)", voidfunc(S text) {
540	Cl<S> toImport = listMinusSet(asOrderedSet(tlft(text)), collectAsSet text(msgs));
541	if (msgs == null) msgs = ll();
542	for (S line : toImport)
543	msgs.add(new Msg(true, line));
544	change();
545	infoBox(nMessages(toImport) + " imported");
546	updateObjectsTable();
547	showRandomMsg();
548	});
549	}
550
551	bool warnOnDelete() { true; }
552
553	void upgradeMe {
554	dm_backupStructureAndChangeModuleLibID("#1028066/AutoClassifier");
555	}
556	}