| 1 | get("#517") -- mergeTexts
 | 
| 2 | get("#515") -- splitLines
 | 
| 3 | |
| 4 | trainingData = getSnippet("#2000015")
 | 
| 5 | newData = getSnippet("#2000016")
 | 
| 6 | |
| 7 | function tryExactLookup() | 
| 8 |   local annmap = {}
 | 
| 9 | for _, pair in ipairs(trainingPairs) do | 
| 10 | local ann, s = unpack(pair) | 
| 11 | annmap[s] = ann | 
| 12 | end | 
| 13 | for i=1, n do | 
| 14 | annotations[i] = annotations[i] or annmap[newLines[i]] | 
| 15 | end | 
| 16 | end | 
| 17 | |
| 18 | function tryCopyFromParents() | 
| 19 | local pat = "^ *([^ ]+) +(%d+) +(%d+)" | 
| 20 |   pid2ann = {}
 | 
| 21 | for i=1, n do | 
| 22 | if annotations[i] ~= nil then | 
| 23 | local _, _, user, pid, ppid = newLines[i]:find(pat) | 
| 24 | if pid then | 
| 25 | pid2ann[pid] = annotations[i] | 
| 26 |         --print("pid2ann", pid, annotations[i])
 | 
| 27 | end | 
| 28 | end | 
| 29 | end | 
| 30 | |
| 31 | for i=1, n do | 
| 32 | if annotations[i] == nil then | 
| 33 | local _, _, user, pid, ppid = newLines[i]:find(pat) | 
| 34 | if ppid then | 
| 35 | annotations[i] = pid2ann[ppid] | 
| 36 | end | 
| 37 | end | 
| 38 | end | 
| 39 | end | 
| 40 | |
| 41 | function getscore() | 
| 42 | local score = 0 | 
| 43 | for i=1, n do | 
| 44 | if annotations[i] then score=score+1 end | 
| 45 | end | 
| 46 | return score/n*100 | 
| 47 | end | 
| 48 | |
| 49 | trainingLines = splitLines(trainingData) | 
| 50 | trainingPairs = {}
 | 
| 51 | for _, l in ipairs(trainingLines) do | 
| 52 |   local _, _, ann, s = l:find("^ *(.*) | (.*)$")
 | 
| 53 | if ann then | 
| 54 |     table.insert(trainingPairs, {ann, s})
 | 
| 55 | else | 
| 56 |     print("Warning - unparsable line: "..l)
 | 
| 57 | end | 
| 58 | end | 
| 59 | |
| 60 | newLines = splitLines(newData) | 
| 61 | n = #newLines | 
| 62 | annotations = {}
 | 
| 63 | |
| 64 | tryExactLookup() | 
| 65 | score = getscore() | 
| 66 | repeat | 
| 67 |   print("score: "..score)
 | 
| 68 | tryCopyFromParents() | 
| 69 | lastscore, score = score, getscore() | 
| 70 | until score == lastscore | 
| 71 | |
| 72 | for i=1, n do annotations[i] = annotations[i] or '' end | 
| 73 | print(mergeTexts(annotations, newLines)) | 
Began life as a copy of #518
test run test run with input download show line numbers
Travelled to 12 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
| Snippet ID: | #519 | 
| Snippet name: | Automated annotations (v2 / testing) | 
| Eternal ID of this version: | #519/1 | 
| Text MD5: | 209b6958af2467248dc51d3337af12bc | 
| Author: | stefan | 
| Category: | |
| Type: | Lua code | 
| Public (visible to everyone): | Yes | 
| Archived (hidden from active list): | No | 
| Created/modified: | 2015-03-11 17:29:27 | 
| Source code size: | 1658 bytes / 73 lines | 
| Pitched / IR pitched: | No / Yes | 
| Views / Downloads: | 914 / 223 | 
| Referenced in: | [show references] |