get("#517") -- mergeTexts get("#515") -- splitLines trainingData = getSnippet("#2000015") newData = getSnippet("#2000016") function tryExactLookup() local annmap = {} for _, pair in ipairs(trainingPairs) do local ann, s = unpack(pair) annmap[s] = ann end for i=1, n do annotations[i] = annotations[i] or annmap[newLines[i]] end end function tryCopyFromParents() local pat = "^ *([^ ]+) +(%d+) +(%d+)" pid2ann = {} for i=1, n do if annotations[i] ~= nil then local _, _, user, pid, ppid = newLines[i]:find(pat) if pid then pid2ann[pid] = annotations[i] --print("pid2ann", pid, annotations[i]) end end end for i=1, n do if annotations[i] == nil then local _, _, user, pid, ppid = newLines[i]:find(pat) if ppid then annotations[i] = pid2ann[ppid] end end end end function getscore() local score = 0 for i=1, n do if annotations[i] then score=score+1 end end return score/n*100 end trainingLines = splitLines(trainingData) trainingPairs = {} for _, l in ipairs(trainingLines) do local _, _, ann, s = l:find("^ *(.*) | (.*)$") if ann then table.insert(trainingPairs, {ann, s}) else print("Warning - unparsable line: "..l) end end newLines = splitLines(newData) n = #newLines annotations = {} tryExactLookup() score = getscore() repeat print("score: "..score) tryCopyFromParents() lastscore, score = score, getscore() until score == lastscore for i=1, n do annotations[i] = annotations[i] or '' end print(mergeTexts(annotations, newLines))