1 | static int ocr_recognizeTextClips_spaceDist = 7; |
2 | |
3 | static L<S> ocr_recognizeTextClips(BWImage bw, L<Rect> clips) { |
4 | SimpleRecognizer sr = filledSimpleRecognizer_cached(); |
5 | final L<Rect> rects = autoSegment(bw); |
6 | new L<S> lines; |
7 | for (Rect clip: clips) { |
8 | S s = diamond(); |
9 | pcall { |
10 | BWImage clipImg = autoCroppedBWImage(bw.clip(clip)); |
11 | new L<Rect> gclips; // glyph clips |
12 | L<S> groups = sr.recognizeGrouped(clipImg, gclips); |
13 | int n = l(groups); |
14 | if (l(gclips) != n) |
15 | print("huh? " + n + "/" + l(gclips)); |
16 | else { |
17 | //print("Finding spaces. " + n + " characters"); |
18 | for (int i = 0; i < l(groups)-1; i++) { |
19 | Rect a = gclips.get(i); |
20 | Rect b = gclips.get(i+1); |
21 | if (a != null && b != null) { |
22 | int dist = b.x-a.x2(); |
23 | //print("Space dist between " + groups.get(i) + " and " + groups.get(i+1) + ": " + dist); |
24 | if (dist >= ocr_recognizeTextClips_spaceDist) { |
25 | groups.add(i+1, " "); |
26 | gclips.add(i+1, null); |
27 | ++i; |
28 | } |
29 | } |
30 | } |
31 | } |
32 | s = ocr_joinGroups(groups); |
33 | } |
34 | lines.add(s); |
35 | } |
36 | ret lines; |
37 | } |
Began life as a copy of #1006333
download show line numbers debug dex old transpilations
Travelled to 14 computer(s): aoiabmzegqzx, bhatertpkbcr, cbybwowwnfue, cfunsshuasjs, gwrvuhgaqvyk, ishqpsrjomds, lpdgvwnxivlt, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, sawdedvomwva, tslmcundralx, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1006339 |
Snippet name: | ocr_recognizeTextClips |
Eternal ID of this version: | #1006339/1 |
Text MD5: | bd6dcc1999e451f26a4ee3521626e89a |
Author: | stefan |
Category: | javax / ocr |
Type: | JavaX fragment (include) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2016-12-29 00:55:35 |
Source code size: | 1219 bytes / 37 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 543 / 556 |
Referenced in: | [show references] |