get("#517") -- mergeTexts
get("#515") -- splitLines
trainingData = getSnippet("#2000015")
newData = getSnippet("#2000016")
function tryExactLookup()
  local annmap = {}
  for _, pair in ipairs(trainingPairs) do
    local ann, s = unpack(pair)
    annmap[s] = ann
  end
  for i=1, n do
    annotations[i] = annotations[i] or annmap[newLines[i]]
  end
end
function tryCopyFromParents()
  local pat = "^ *([^ ]+) +(%d+) +(%d+)"
  pid2ann = {}
  for i=1, n do
    if annotations[i] ~= nil then
      local _, _, user, pid, ppid = newLines[i]:find(pat)
      if pid then
        pid2ann[pid] = annotations[i]
        --print("pid2ann", pid, annotations[i])
      end
    end
  end
  for i=1, n do
    if annotations[i] == nil then
      local _, _, user, pid, ppid = newLines[i]:find(pat)
      if ppid then
        annotations[i] = pid2ann[ppid]
      end
    end
  end
end
function getscore()
  local score = 0
  for i=1, n do
    if annotations[i] then score=score+1 end
  end
  return score/n*100
end
trainingLines = splitLines(trainingData)
trainingPairs = {}
for _, l in ipairs(trainingLines) do
  local _, _, ann, s = l:find("^ *(.*) | (.*)$")
  if ann then
    table.insert(trainingPairs, {ann, s})
  else
    print("Warning - unparsable line: "..l)
  end
end
newLines = splitLines(newData)
n = #newLines
annotations = {}
tryExactLookup()
score = getscore()
repeat
  print("score: "..score)
  tryCopyFromParents()
  lastscore, score = score, getscore()
until score == lastscore
for i=1, n do annotations[i] = annotations[i] or '' end
print(mergeTexts(annotations, newLines))Began life as a copy of #518
test run test run with input download show line numbers
Travelled to 12 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
| Snippet ID: | #519 | 
| Snippet name: | Automated annotations (v2 / testing) | 
| Eternal ID of this version: | #519/1 | 
| Text MD5: | 209b6958af2467248dc51d3337af12bc | 
| Author: | stefan | 
| Category: | |
| Type: | Lua code | 
| Public (visible to everyone): | Yes | 
| Archived (hidden from active list): | No | 
| Created/modified: | 2015-03-11 17:29:27 | 
| Source code size: | 1658 bytes / 73 lines | 
| Pitched / IR pitched: | No / Yes | 
| Views / Downloads: | 911 / 223 | 
| Referenced in: | [show references] |