Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

115
LINES

< > BotCompany Repo | #1026083 // Test Text Recognizers [v2, with rects, OK but can't show details yet]

JavaX source code (Dynamic Module) [tags: use-pretranspiled] - run with: Stefan's OS

Uses 911K of libraries. Click here for Pure Java version (10672L/58K).

!7

cmodule TestTextRecognizers > DynSingleFunctionWithPrintLog {
  replace Recognizer with IF1<BufferedImage, L<RecognizedText>>.
  
  S scoreModule;
  long maxMBs = 256; // save dat memory
  transient L<Result> results = syncList();
  new Best<S> best;
  
  transient long dataSize;
  transient new L<TestImage> images;
  transient JProgressBar progressBar;
  
  // result for recognizer
  srecord noeq Result(
    S recognizerName, Recognizer recognizer,
    double score, Map<TestResult, Double> individualScores) {}
    
  // individual test result
  srecord noeq TestResult(
    TestImage test,
    L<RecognizedText> linesFound
  ) {
    void showDetails { infoBox("Detaillls"); }
  }
  
  srecord noeq TestImage(S caseName, BufferedImage img, TreeSet<S> expectedLines) {
    toString { ret caseName; }
  }
  
  bool spaceToSpare() { ret toMB(dataSize) < maxMBs; }

  void doIt {
    prepare();
    results.clear();
    dm_rcall clear(scoreModule);

    _testRecognizer('ocr_recognizeMultiLine_scored, lambda1 ocr_recognizeMultiLine_scored);
  }
  
  void prepare runInQAndWait {
    if (empty(images)) loadImages();
    setField(scoreModule := dm_loadOrActivateScoreMatrixModule(scoreModule));
  }

  Result scoreRecognizer(S name, Recognizer seg) {
    Result result = new(name, seg, 0, new LinkedHashMap);
    
    try {
      new Scorer scorer;
      for (TestImage img : images) {
        L<RecognizedText> out = seg.get(img.img);
        TreeSet<S> outSet = new(map(methodLambda0 text, out));
        Set<S> found = setIntersection(outSet, img.expectedLines);
        //Set<S> extra = setMinusSet(outSet, img.expectedLines);
        double score = doubleRatio(l(found), l(img.expectedLines));
        print("Score: " + score);
        scorer.addZeroToOne(score);
        result.individualScores.put(new TestResult(img, out), score);
      }
    
      print(scorer);
      result.score = scorer.score();
    } catch print e {
      print("RECOGNIZER TOTAL FAIL");
    }
    
    ret result;
  }
  
  void loadImages {
    File dir = javaxDataDir("Screen shots for text recognition");
    L<File> in = asLinkedList(listFilesWithExtension(".expectedlines", dir));
    while (nempty(in)) {
      if (!spaceToSpare())
        break with print("Out of space (" + toM(dataSize) + " MB used), skipping " + nImages(in));
      
      File f = popFirst(in);
      LS lines = quotedOnly_unquote(tlft(loadTextFile(f)));
      if (empty(lines)) continue;
      File fImg = imageFileWithSameBaseName(f);
      if (fImg == null) continue;
      pcall {
        BufferedImage img = loadImage2(fImg);
        images.add(new TestImage(fileName(fImg), img, new TreeSet(lines)));
        dataSize += bufferedImageDataSize(img);
        print("Have " + nImages(images) + ", data size: " + toM(dataSize) + " MB");
      }
    }
    print("Loading done");
  }
  
  start { setFunctionName("Run tests"); }
  
  visual centerAndSouthWithMargin(super, withMargin(progressBar = jProgressBarWithText()));
  
  void _testRecognizer(S name, Recognizer rec) {
    prepare();
    Result r = scoreRecognizer(name, rec);
    print("Score for " + name + ": " + r.score);
    results.add(r);
    if (best.put(name, r.score))
      print("NEW BEST!");
    change();
    dm_rcall add(scoreModule,
      dm_rcall newEntry(scoreModule, r.recognizerName, r.score, r.individualScores));
  }
  
  // API
  
  void testRecognizer(S name, virtual Recognizer _rec) {
    _testRecognizer(name, img -> (L<RecognizedText>) quickImport(callF(_rec, img)));
  }
}

Author comment

Began life as a copy of #1026080

download  show line numbers  debug dex  old transpilations   

Travelled to 6 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt

No comments. add comment

Snippet ID: #1026083
Snippet name: Test Text Recognizers [v2, with rects, OK but can't show details yet]
Eternal ID of this version: #1026083/9
Text MD5: b8d978124cdbd7245388c74234f73706
Transpilation MD5: 664796e271b0dc0f4de74a71b1e9cf0a
Author: stefan
Category: javax / ocr
Type: JavaX source code (Dynamic Module)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2019-11-19 23:50:37
Source code size: 3638 bytes / 115 lines
Pitched / IR pitched: No / No
Views / Downloads: 200 / 339
Version history: 8 change(s)
Referenced in: #1026088 - Test Word Recognizers