Uses 992K of libraries. Click here for Pure Java version (1035L/7K).
1 | !7 |
2 | |
3 | lib 1400180 // github.com/optimaize/language-detector |
4 | lib 1400181 // jsonic |
5 | lib 1011966 // slf4j-api-1.7.25.jar |
6 | lib 1400182 // guava |
7 | |
8 | static LS _stickyLibs_langDetect = ll(#1400180, #1400181, #1011966, #1400182); |
9 | |
10 | import com.optimaize.langdetect.*; |
11 | import com.optimaize.langdetect.i18n.*; |
12 | import com.optimaize.langdetect.ngram.*; |
13 | import com.optimaize.langdetect.profiles.*; |
14 | import com.optimaize.langdetect.text.*; |
15 | |
16 | cmodule LanguageDetectorModule { |
17 | transient LanguageDetector languageDetector; |
18 | |
19 | void init() { |
20 | lock lock; |
21 | if (languageDetector != null) ret; |
22 | final new LanguageProfileReader profileReader; |
23 | L<LanguageProfile> languageProfiles = map(ll("de", "en"), func(S lang) -> LanguageProfile { profileReader.readBuiltIn(LdLocale.fromString(lang)) }); |
24 | languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()) |
25 | .withProfiles(languageProfiles) |
26 | .build(); |
27 | } |
28 | |
29 | // API |
30 | |
31 | // returns "en" or "de" or null |
32 | S detectLanguage(S text) { |
33 | S lang = dm_findAndCallModule("#1021121/WordToLanguageCRUD", 'languageForText, text); |
34 | if (lang != null) ret languageToTwoLetters(lang); |
35 | init(); |
36 | L<DetectedLanguage> languages = languageDetector.getProbabilities(text); |
37 | ret empty(languages) ? null : first(languages).getLocale().getLanguage(); |
38 | } |
39 | } |
Began life as a copy of #1021122
download show line numbers debug dex old transpilations
Travelled to 7 computer(s): bhatertpkbcr, cfunsshuasjs, mqqgnosmbjvj, pyentgdyhuwx, pzhvpgtvlbxg, tvejysmllsmz, vouqrxazstgt
No comments. add comment
Snippet ID: | #1021123 |
Snippet name: | Language Detector (German/English) |
Eternal ID of this version: | #1021123/4 |
Text MD5: | 3540a0300976b4b3706e15f670d7a1a2 |
Transpilation MD5: | 5c185b5ae2f07a0933ff81f0969bb997 |
Author: | stefan |
Category: | javax / nlp |
Type: | JavaX source code (Dynamic Module) |
Public (visible to everyone): | Yes |
Archived (hidden from active list): | No |
Created/modified: | 2019-01-23 23:20:52 |
Source code size: | 1346 bytes / 39 lines |
Pitched / IR pitched: | No / No |
Views / Downloads: | 388 / 931 |
Version history: | 3 change(s) |
Referenced in: | [show references] |