Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

73
LINES

< > BotCompany Repo | #519 // Automated annotations (v2 / testing)

Lua code

get("#517") -- mergeTexts
get("#515") -- splitLines

trainingData = getSnippet("#2000015")
newData = getSnippet("#2000016")

function tryExactLookup()
  local annmap = {}
  for _, pair in ipairs(trainingPairs) do
    local ann, s = unpack(pair)
    annmap[s] = ann
  end
  for i=1, n do
    annotations[i] = annotations[i] or annmap[newLines[i]]
  end
end

function tryCopyFromParents()
  local pat = "^ *([^ ]+) +(%d+) +(%d+)"
  pid2ann = {}
  for i=1, n do
    if annotations[i] ~= nil then
      local _, _, user, pid, ppid = newLines[i]:find(pat)
      if pid then
        pid2ann[pid] = annotations[i]
        --print("pid2ann", pid, annotations[i])
      end
    end
  end

  for i=1, n do
    if annotations[i] == nil then
      local _, _, user, pid, ppid = newLines[i]:find(pat)
      if ppid then
        annotations[i] = pid2ann[ppid]
      end
    end
  end
end

function getscore()
  local score = 0
  for i=1, n do
    if annotations[i] then score=score+1 end
  end
  return score/n*100
end

trainingLines = splitLines(trainingData)
trainingPairs = {}
for _, l in ipairs(trainingLines) do
  local _, _, ann, s = l:find("^ *(.*) | (.*)$")
  if ann then
    table.insert(trainingPairs, {ann, s})
  else
    print("Warning - unparsable line: "..l)
  end
end

newLines = splitLines(newData)
n = #newLines
annotations = {}

tryExactLookup()
score = getscore()
repeat
  print("score: "..score)
  tryCopyFromParents()
  lastscore, score = score, getscore()
until score == lastscore

for i=1, n do annotations[i] = annotations[i] or '' end
print(mergeTexts(annotations, newLines))

Author comment

Began life as a copy of #518

test run  test run with input  download  show line numbers   

Travelled to 12 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #519
Snippet name: Automated annotations (v2 / testing)
Eternal ID of this version: #519/1
Text MD5: 209b6958af2467248dc51d3337af12bc
Author: stefan
Category:
Type: Lua code
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2015-03-11 17:29:27
Source code size: 1658 bytes / 73 lines
Pitched / IR pitched: No / Yes
Views / Downloads: 614 / 145
Referenced in: [show references]