Libraryless. Click here for Pure Java version (6241L/41K).
sclass SimpleRecognizer { bool useCache1 = true, useCache2 = true; Lock lock = lock(); transient IF1<BWImage> wordImagePreprocessor; // optional preprocessor for word images (e.g. auto-contast) class GlyphInfo { S meaning; bool multi; // multiple meanings seen toString { ret meaning; } } // key = md5 Map<S, GlyphInfo> glyphInfos = synchroMap(); // optional for full similarity search - character image to MD5 Map<BWImage, S> fullSearchMap; S unknownCharacter = ocr_unknownCharacterPlaceholder(); // "\u2666" - diamond suit symbol; used for unknown characters *() {} void load(S info) { lock lock; recognizeGrouped_cache.clear(); new Matches m; for (S s : toLinesFullTrim(info)) { if (find3("the images * are the characters *", s, m)) { L<S> md5s = splitAtSpace($1); L<S> characters = eachCharAsString(dropSpaces($2)); saveMeanings(md5s, characters); } else if (find3("the images * are the grouped characters *", s, m)) { L<S> md5s = splitAtSpace($1); L<S> characters = ocr_parseGlyphs(dropSpaces($2)); saveMeanings(md5s, characters); } else if (nempty(javaTokC(s))) { print("huh? " + s); } } //print("Have " + n(l(glyphInfos), "glyph info")); //psl(glyphInfos); } void saveMeaning(S md5, S meaning) { GlyphInfo info = getGlyphInfo(md5); if (info.multi) ret; if (hasDifferent(info.meaning, meaning)) { //info.meaning = null; info.meaning = meaning; info.multi = true; //print("multi"); } else info.meaning = meaning; } // gets or creates GlyphInfo GlyphInfo getGlyphInfo(S md5) { synchronized(glyphInfos) { GlyphInfo info = glyphInfos.get(md5); if (info == null) glyphInfos.put(md5, info = new GlyphInfo); ret info; } } void saveMeanings(L<S> md5s, L<S> characters) { if (l(md5s) != l(characters)) { print("huh?"); ret; } for i over md5s: saveMeaning(md5s.get(i), characters.get(i)); } S recognize(BWImage img) { ret ocr_joinGroups(recognizeGrouped(img)); } Scored<S> recognizeScored(BWImage img) { Scored<L<S>> s = recognizeGrouped(img, null); ret scored(ocr_joinGroups(s!), s); } L<S> recognizeGrouped(BWImage img) { ret getVar(recognizeGrouped(img, null)); } // md5 -> recognition result Map<S, Scored<LS>> recognizeGrouped_cache = synchroMap(); int cantCache, cacheHits, cacheMisses; Scored<LS> recognizeGrouped(BWImage img, L<Rect> clips_out) { S md5 = null; if (clips_out != null || !useCache1) ++cantCache; else { img = callFOrKeep(wordImagePreprocessor, img); md5 = md5OfBWImage(img); Scored<LS> result = recognizeGrouped_cache.get(md5); if (result != null) { ++cacheHits; ret result; } else ++cacheMisses; } Scored<LS> result = recognizeGrouped_uncached(img, clips_out); if (md5 != null) recognizeGrouped_cache.put(md5, result); ret result; } Scored<LS> recognizeGrouped_uncached(BWImage img, L<Rect> clips_out) { new LS buf; L<Rect> rects = horizontalAutoSplit2ThenAutoCrop(img); if (empty(rects)) ret scored((L<S>) emptyList(), 0.99); new L<Scored> scores; iLoop: for (int i = 0; i < l(rects); i++) { Rect r = null; for (int j = i; j < l(rects); j++) { r = rectUnion(r, rects.get(j)); BWImage cImg = img.clip(r); Scored<GlyphInfo> scored = recognizeGlyph(cImg, false); GlyphInfo info = getVar(scored); if (info != null && info.meaning != null) { buf.add(info.meaning); buf.addAll(rep("_", j-i)); if (clips_out != null) clips_out.addAll(rep(r, j-i+1)); scores.add(scored); i = j; continue iLoop; } } r = rects.get(i); Scored<GlyphInfo> scored = recognizeGlyph(img.clip(r), true); GlyphInfo info = getVar(scored); if (info != null && info.meaning != null) buf.add(info.meaning); else buf.add(unknownCharacter); if (clips_out != null) clips_out.add(r); scores.add(scored); } ret scored(buf, averageScore(scores)); } // md5 -> recognition result Map<S, Scored<GlyphInfo>> recognizeGlyph_cache = synchroMap(); static int cacheHits2, cacheMisses2; Scored<GlyphInfo> recognizeGlyph(BWImage img) { ret recognizeGlyph(img, true); } Scored<GlyphInfo> recognizeGlyph(BWImage img, bool fullSearch) { S md5 = md5OfBWImage(img); { //lock lock; GlyphInfo info = glyphInfos.get(md5); if (info != null || !fullSearch || fullSearchMap == null) ret fullScored(info); if (useCache2) { Scored<GlyphInfo> result = recognizeGlyph_cache.get(md5); if (result != null) { ++cacheHits2; ret result; } cacheMisses2++; } } new Best<S> best; for (BWImage cImg : /*concurrentlyIterateKeys*/keys(fullSearchMap)) { float sim = bwImageSimilarityResized(img, cImg, (float) best.bestScore()); best.put(fullSearchMap.get(cImg), sim); } Scored<GlyphInfo> result = !best.has() ? null : scored(glyphInfos.get(best!), best.score()); if (useCache2) recognizeGlyph_cache.put(md5, result); ret result; } S cacheStats() { //ret "Cache size: " + l(recognizeGrouped_cache) + ", hits: " + cacheHits + ", misses: " + cacheMisses + ", uncachable: " + cantCache; ret "Cache size: " + l(recognizeGlyph_cache) + ", hits: " + cacheHits2 + ", misses: " + cacheMisses2 + ", full search map: " + l(fullSearchMap); } S sizeStats() { ret l(glyphInfos) + "/" + l(fullSearchMap); } }
Began life as a copy of #1006103
download show line numbers debug dex old transpilations
Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, onxytkatvevr, pyentgdyhuwx, pzhvpgtvlbxg, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1006108 |
Snippet name: | SimpleRecognizer - recognizes a line of text |
Eternal ID of this version: | #1006108/41 |
Text MD5: | 1015f4609a0f4e17fa05ad438b036d60 |
Transpilation MD5: | 1d2bcf46dbd08a869e56a96ee2f27137 |
Author: | stefan |
Category: | javax / ocr |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2019-11-21 14:21:42 |
Source code size: | 5918 bytes / 182 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 1146 / 1934 |
Version history: | 40 change(s) |
Referenced in: | [show references] |