Commit d66e43f4abbd9d7538bf1e9af08e3b02a7ab89ef
1 parent
3a6efd40
Updated data_base/read_csv.anubis to use fast_lexer_5 instead of fast_lexer_4.
Showing
1 changed file
with
48 additions
and
31 deletions
Show diff stats
anubis_dev/library/data_base/read_csv.anubis
... | ... | @@ -6,7 +6,7 @@ |
6 | 6 | |
7 | 7 | read tools/basis.anubis |
8 | 8 | read tools/time.anubis |
9 | -read lexical_analysis/fast_lexer_4.anubis | |
9 | +read lexical_analysis/fast_lexer_5.anubis | |
10 | 10 | |
11 | 11 | |
12 | 12 | public type ReadCsvResult: |
... | ... | @@ -38,7 +38,7 @@ type CellPrefixToken: // reading the beginning of a cell until the firs |
38 | 38 | |
39 | 39 | This lexer if for reading the beginning of a cell. |
40 | 40 | |
41 | -define List(LexerItem4(CellPrefixToken,One)) | |
41 | +define List(LexerItem(CellPrefixToken,One)) | |
42 | 42 | begin_cell_description |
43 | 43 | ( |
44 | 44 | String sep |
... | ... | @@ -46,19 +46,19 @@ define List(LexerItem4(CellPrefixToken,One)) |
46 | 46 | [ |
47 | 47 | lexer_item("[# #t]*\"", |
48 | 48 | return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |-> |
49 | - token(double_quote))), | |
49 | + ok(double_quote))), | |
50 | 50 | |
51 | 51 | lexer_item("[^#"+sep+"\"#r#n]*#"+sep, |
52 | 52 | return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |-> |
53 | - token(separator(extract(0,l-1))))), | |
53 | + ok(separator(extract(0,l-1))))), | |
54 | 54 | |
55 | 55 | lexer_item("[^#"+sep+"\"#r#n]*#n" , |
56 | 56 | return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |-> |
57 | - token(separator(extract(0,l-1))))), | |
57 | + ok(separator(extract(0,l-1))))), | |
58 | 58 | |
59 | 59 | lexer_item("[^#"+sep+"\"#r#n]*(#r#n)" , |
60 | 60 | return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |-> |
61 | - token(separator(extract(0,l-2))))) | |
61 | + ok(separator(extract(0,l-2))))) | |
62 | 62 | ]. |
63 | 63 | |
64 | 64 | |
... | ... | @@ -66,52 +66,58 @@ define List(LexerItem4(CellPrefixToken,One)) |
66 | 66 | |
67 | 67 | |
68 | 68 | type InToken: |
69 | + eof, | |
69 | 70 | double_quote, // can also be the end of file |
70 | 71 | two_double_quotes, |
71 | 72 | part(ByteArray). // part of cell |
72 | 73 | |
73 | 74 | |
74 | -define List(LexerItem4(InToken,One)) | |
75 | +define List(LexerItem(InToken,One)) | |
75 | 76 | read_quoted_cell_description |
76 | 77 | ( |
77 | 78 | String sep |
78 | 79 | ) = |
79 | 80 | [ |
80 | 81 | lexer_item("[^\"]*" , |
81 | - return((ByteArray b, LexingTools t, One u) |-> token(part(b)))), | |
82 | + return((ByteArray b, LexingTools t, One u) |-> ok(part(b)))), | |
82 | 83 | |
83 | 84 | lexer_item("\"\"" , |
84 | - return((ByteArray b, LexingTools t, One u) |-> token(two_double_quotes))), | |
85 | + return((ByteArray b, LexingTools t, One u) |-> ok(two_double_quotes))), | |
85 | 86 | |
86 | 87 | lexer_item("\"[# #t]*(("+sep+")|(#n)|(#r#n))" , |
87 | - return((ByteArray b, LexingTools t, One u) |-> token(double_quote))) | |
88 | + return((ByteArray b, LexingTools t, One u) |-> ok(double_quote))) | |
88 | 89 | ]. |
89 | 90 | |
90 | 91 | |
91 | 92 | The lexer described below skips a cell (and eats the trailing separator). |
92 | 93 | |
93 | -define List(LexerItem4(One,One)) | |
94 | +type SkipToken: | |
95 | + eof, | |
96 | + skiped. | |
97 | + | |
98 | +define List(LexerItem(SkipToken,One)) | |
94 | 99 | skip_cell_description |
95 | 100 | ( |
96 | 101 | String sep |
97 | 102 | ) = |
98 | 103 | [ |
99 | 104 | lexer_item("(([^\"#n#r#"+sep+"]*)|([# #t]*\"([^\"]|(\"\"))*\"[# #t]*))#"+sep, |
100 | - return(((Int,Int) -> ByteArray b, Int l, LexingTools t, One u) |-> token(unique))) | |
105 | + return(((Int,Int) -> ByteArray b, Int l, LexingTools t, One u) |-> ok(skiped))) | |
101 | 106 | ]. |
102 | 107 | |
103 | 108 | The lexer described below skips to end of line (and eats the end of line). |
104 | 109 | |
105 | 110 | type EOL_Token: |
111 | + eof, | |
106 | 112 | eol_offset(Int offset). |
107 | 113 | |
108 | -define List(LexerItem4(EOL_Token,One)) | |
114 | +define List(LexerItem(EOL_Token,One)) | |
109 | 115 | to_eol_description |
110 | 116 | = |
111 | 117 | [ |
112 | 118 | lexer_item("([^#r#n]*)((#n)|(#r#n))", |
113 | 119 | return(((Int,Int) -> ByteArray b, Int l, LexingTools t, One u) |-> |
114 | - token(eol_offset(t.offset(unique))))) | |
120 | + ok(eol_offset(t.offset(unique))))) | |
115 | 121 | ]. |
116 | 122 | |
117 | 123 | |
... | ... | @@ -234,7 +240,17 @@ define List(One -> CB_Result) |
234 | 240 | }, r)). |
235 | 241 | |
236 | 242 | |
237 | - | |
243 | +define String | |
244 | + format | |
245 | + ( | |
246 | + LexicalError(One) e | |
247 | + ) = | |
248 | + if e is | |
249 | + { | |
250 | + lex_error(b,t,a) then "error: '"+to_string(b)+"' at line "+line(t)(unique), | |
251 | + lex_error(message) then message, | |
252 | + other_error(a) then should_not_happen("unknown error") | |
253 | + }. | |
238 | 254 | |
239 | 255 | public define One -> ReadCsvResult |
240 | 256 | make_read_csv_line |
... | ... | @@ -243,28 +259,26 @@ public define One -> ReadCsvResult |
243 | 259 | String sep, |
244 | 260 | List(Int) cols_to_get |
245 | 261 | ) = |
246 | - with lex_skip = retrieve_lexer(skip_cell_description(sep), if sep = "," then csv_c_skip_cell else csv_s_skip_cell)(ls,unique), | |
247 | - lex_begin = retrieve_lexer(begin_cell_description(sep), if sep = "," then csv_c_begin_cell else csv_s_begin_cell)(ls,unique), | |
248 | - lex_in = retrieve_lexer(read_quoted_cell_description(sep), if sep = "," then csv_c_quoted_cell else csv_s_quoted_cell)(ls,unique), | |
249 | - lex_eol = retrieve_lexer(to_eol_description, csv_to_eol)(ls,unique), | |
262 | + with lex_skip = retrieve_lexer(skip_cell_description(sep), if sep = "," then csv_c_skip_cell else csv_s_skip_cell, eof)(ls,unique), | |
263 | + lex_begin = retrieve_lexer(begin_cell_description(sep), if sep = "," then csv_c_begin_cell else csv_s_begin_cell, eof)(ls,unique), | |
264 | + lex_in = retrieve_lexer(read_quoted_cell_description(sep), if sep = "," then csv_c_quoted_cell else csv_s_quoted_cell,eof)(ls,unique), | |
265 | + lex_eol = retrieve_lexer(to_eol_description, csv_to_eol, eof)(ls,unique), | |
250 | 266 | skip_cell = (One u) |-> (CB_Result)if lex_skip(u) is |
251 | 267 | { |
252 | - end_of_input then eof, | |
253 | - error(b,line,col) then error("skip "+line+":"+col+" :"+to_string(b)), | |
254 | - token(t) then skip | |
268 | + error(e) then error(format(e)), | |
269 | + ok(_) then skip | |
255 | 270 | }, |
256 | 271 | begin_cell = (One u) |-> (Result(String,CellPrefixToken))if lex_begin(u) is |
257 | 272 | { |
258 | - end_of_input then ok(eof), | |
259 | - error(b,line,col) then error("begin "+to_string(b)), | |
260 | - token(t) then ok(t) | |
273 | + error(e) then error(format(e)), | |
274 | + ok(t) then ok(t) | |
261 | 275 | }, |
262 | 276 | read_in_aux = (List(ByteArray) so_far) |-aux-> (CB_Result)if lex_in(unique) is |
263 | 277 | { |
264 | - end_of_input then eof, | |
265 | - error(b,line,col) then error("in "+to_string(b)), | |
266 | - token(t) then if t is | |
278 | + error(e) then error(format(e)), | |
279 | + ok(t) then if t is | |
267 | 280 | { |
281 | + eof then eof, | |
268 | 282 | double_quote then cell(to_string(concat(reverse(so_far)))), |
269 | 283 | two_double_quotes then aux([{0x22} . so_far]), |
270 | 284 | part(p) then aux([p . so_far]) |
... | ... | @@ -283,9 +297,12 @@ public define One -> ReadCsvResult |
283 | 297 | }, |
284 | 298 | to_eol = (One u) |-> if lex_eol(u) is |
285 | 299 | { |
286 | - end_of_input then eof, | |
287 | - error(b,line,col) then error("eol "+to_string(b)), | |
288 | - token(t) then if t is eol_offset(offset) then eol(offset) | |
300 | + error(e) then error(format(e)), | |
301 | + ok(t) then if t is | |
302 | + { | |
303 | + eof then eof, | |
304 | + eol_offset(offset) then eol(offset) | |
305 | + } | |
289 | 306 | }, |
290 | 307 | make_read_csv_line(make_cbs(skip_cell,read_cell,to_eol,cols_to_get), |
291 | 308 | (One u) |-> forget(to_eol(u))). | ... | ... |