Libraryless. Click here for Pure Java version (2625L/17K).
1 | // TODO: empty values might not be handled correctly |
2 | // (e.g. a,,c) |
3 | sclass ExtendedCSVParser implements Steppable {
|
4 | S csv; |
5 | int numFields; |
6 | long numRecords; |
7 | long maxRecordsToLoad = -1; // set to >= 0 to limit |
8 | int i, startOfValue; |
9 | bool inQuotedValue; |
10 | |
11 | *() {}
|
12 | *(S *csv) {}
|
13 | |
14 | run {
|
15 | stepAll(this); |
16 | } |
17 | |
18 | public bool step() {
|
19 | if (i >= l(csv)) {
|
20 | foundValue(substring(csv, startOfValue)); |
21 | endOfRecord(); |
22 | false; |
23 | } |
24 | |
25 | char c = csv.charAt(i); |
26 | if (c == ',') {
|
27 | if (!inQuotedValue) {
|
28 | foundValue(substring(csv, startOfValue, i)); |
29 | startOfValue = i+1; |
30 | } |
31 | } else if (c == '\r' || c == '\n') {
|
32 | if (!inQuotedValue) {
|
33 | foundValue(substring(csv, startOfValue, i)); |
34 | startOfValue = i+1; |
35 | if (endOfRecord()) false; |
36 | } |
37 | } else if (c == '"') {
|
38 | if (inQuotedValue) {
|
39 | if (charAt(csv, i+1) == '"') i++; // double quotes inside of a value |
40 | else |
41 | inQuotedValue = false; |
42 | } else |
43 | set inQuotedValue; |
44 | } |
45 | i++; |
46 | true; |
47 | } |
48 | |
49 | S unquoteValue(S value) {
|
50 | ret dropPrefix("\"", dropSuffix("\"", replace(trim(value), "\"\"", "\"")));
|
51 | } |
52 | |
53 | void foundValue(S value) {
|
54 | if (nempty(value)) {
|
55 | ++numFields; |
56 | onValueFound(unquoteValue(value)); |
57 | } |
58 | } |
59 | |
60 | // return true if should exit |
61 | bool endOfRecord() {
|
62 | if (numFields == 0) false; |
63 | onEndOfRecord(); |
64 | numFields = 0; |
65 | ret ++numRecords >= maxRecordsToLoad && maxRecordsToLoad >= 0; |
66 | } |
67 | |
68 | swappable void onValueFound(S value) {}
|
69 | swappable void onEndOfRecord() {}
|
70 | } |
Began life as a copy of #1008333
download show line numbers debug dex old transpilations
Travelled to 4 computer(s): bhatertpkbcr, mqqgnosmbjvj, pyentgdyhuwx, vouqrxazstgt
No comments. add comment
| Snippet ID: | #1030417 |
| Snippet name: | ExtendedCSVParser - parse CSV with multiple lines in a value, inner quotes escaped as "" |
| Eternal ID of this version: | #1030417/11 |
| Text MD5: | a30109b0d22a814deab4f4cf8632026f |
| Transpilation MD5: | e12f6a057c8b94cb5ad5a2b4b8005432 |
| Author: | stefan |
| Category: | javax / parsing |
| Type: | JavaX fragment (include) |
| Public (visible to everyone): | Yes |
| Archived (hidden from active list): | No |
| Created/modified: | 2020-12-14 16:35:55 |
| Source code size: | 1659 bytes / 70 lines |
| Pitched / IR pitched: | No / No |
| Views / Downloads: | 464 / 752 |
| Version history: | 10 change(s) |
| Referenced in: | [show references] |