Commit d66e43f4abbd9d7538bf1e9af08e3b02a7ab89ef
1 parent
3a6efd40
Updated data_base/read_csv.anubis to use fast_lexer_5 instead of fast_lexer_4.
Showing
1 changed file
with
48 additions
and
31 deletions
Show diff stats
anubis_dev/library/data_base/read_csv.anubis
@@ -6,7 +6,7 @@ | @@ -6,7 +6,7 @@ | ||
6 | 6 | ||
7 | read tools/basis.anubis | 7 | read tools/basis.anubis |
8 | read tools/time.anubis | 8 | read tools/time.anubis |
9 | -read lexical_analysis/fast_lexer_4.anubis | 9 | +read lexical_analysis/fast_lexer_5.anubis |
10 | 10 | ||
11 | 11 | ||
12 | public type ReadCsvResult: | 12 | public type ReadCsvResult: |
@@ -38,7 +38,7 @@ type CellPrefixToken: // reading the beginning of a cell until the firs | @@ -38,7 +38,7 @@ type CellPrefixToken: // reading the beginning of a cell until the firs | ||
38 | 38 | ||
39 | This lexer if for reading the beginning of a cell. | 39 | This lexer if for reading the beginning of a cell. |
40 | 40 | ||
41 | -define List(LexerItem4(CellPrefixToken,One)) | 41 | +define List(LexerItem(CellPrefixToken,One)) |
42 | begin_cell_description | 42 | begin_cell_description |
43 | ( | 43 | ( |
44 | String sep | 44 | String sep |
@@ -46,19 +46,19 @@ define List(LexerItem4(CellPrefixToken,One)) | @@ -46,19 +46,19 @@ define List(LexerItem4(CellPrefixToken,One)) | ||
46 | [ | 46 | [ |
47 | lexer_item("[# #t]*\"", | 47 | lexer_item("[# #t]*\"", |
48 | return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |-> | 48 | return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |-> |
49 | - token(double_quote))), | 49 | + ok(double_quote))), |
50 | 50 | ||
51 | lexer_item("[^#"+sep+"\"#r#n]*#"+sep, | 51 | lexer_item("[^#"+sep+"\"#r#n]*#"+sep, |
52 | return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |-> | 52 | return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |-> |
53 | - token(separator(extract(0,l-1))))), | 53 | + ok(separator(extract(0,l-1))))), |
54 | 54 | ||
55 | lexer_item("[^#"+sep+"\"#r#n]*#n" , | 55 | lexer_item("[^#"+sep+"\"#r#n]*#n" , |
56 | return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |-> | 56 | return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |-> |
57 | - token(separator(extract(0,l-1))))), | 57 | + ok(separator(extract(0,l-1))))), |
58 | 58 | ||
59 | lexer_item("[^#"+sep+"\"#r#n]*(#r#n)" , | 59 | lexer_item("[^#"+sep+"\"#r#n]*(#r#n)" , |
60 | return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |-> | 60 | return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |-> |
61 | - token(separator(extract(0,l-2))))) | 61 | + ok(separator(extract(0,l-2))))) |
62 | ]. | 62 | ]. |
63 | 63 | ||
64 | 64 | ||
@@ -66,52 +66,58 @@ define List(LexerItem4(CellPrefixToken,One)) | @@ -66,52 +66,58 @@ define List(LexerItem4(CellPrefixToken,One)) | ||
66 | 66 | ||
67 | 67 | ||
68 | type InToken: | 68 | type InToken: |
69 | + eof, | ||
69 | double_quote, // can also be the end of file | 70 | double_quote, // can also be the end of file |
70 | two_double_quotes, | 71 | two_double_quotes, |
71 | part(ByteArray). // part of cell | 72 | part(ByteArray). // part of cell |
72 | 73 | ||
73 | 74 | ||
74 | -define List(LexerItem4(InToken,One)) | 75 | +define List(LexerItem(InToken,One)) |
75 | read_quoted_cell_description | 76 | read_quoted_cell_description |
76 | ( | 77 | ( |
77 | String sep | 78 | String sep |
78 | ) = | 79 | ) = |
79 | [ | 80 | [ |
80 | lexer_item("[^\"]*" , | 81 | lexer_item("[^\"]*" , |
81 | - return((ByteArray b, LexingTools t, One u) |-> token(part(b)))), | 82 | + return((ByteArray b, LexingTools t, One u) |-> ok(part(b)))), |
82 | 83 | ||
83 | lexer_item("\"\"" , | 84 | lexer_item("\"\"" , |
84 | - return((ByteArray b, LexingTools t, One u) |-> token(two_double_quotes))), | 85 | + return((ByteArray b, LexingTools t, One u) |-> ok(two_double_quotes))), |
85 | 86 | ||
86 | lexer_item("\"[# #t]*(("+sep+")|(#n)|(#r#n))" , | 87 | lexer_item("\"[# #t]*(("+sep+")|(#n)|(#r#n))" , |
87 | - return((ByteArray b, LexingTools t, One u) |-> token(double_quote))) | 88 | + return((ByteArray b, LexingTools t, One u) |-> ok(double_quote))) |
88 | ]. | 89 | ]. |
89 | 90 | ||
90 | 91 | ||
91 | The lexer described below skips a cell (and eats the trailing separator). | 92 | The lexer described below skips a cell (and eats the trailing separator). |
92 | 93 | ||
93 | -define List(LexerItem4(One,One)) | 94 | +type SkipToken: |
95 | + eof, | ||
96 | + skiped. | ||
97 | + | ||
98 | +define List(LexerItem(SkipToken,One)) | ||
94 | skip_cell_description | 99 | skip_cell_description |
95 | ( | 100 | ( |
96 | String sep | 101 | String sep |
97 | ) = | 102 | ) = |
98 | [ | 103 | [ |
99 | lexer_item("(([^\"#n#r#"+sep+"]*)|([# #t]*\"([^\"]|(\"\"))*\"[# #t]*))#"+sep, | 104 | lexer_item("(([^\"#n#r#"+sep+"]*)|([# #t]*\"([^\"]|(\"\"))*\"[# #t]*))#"+sep, |
100 | - return(((Int,Int) -> ByteArray b, Int l, LexingTools t, One u) |-> token(unique))) | 105 | + return(((Int,Int) -> ByteArray b, Int l, LexingTools t, One u) |-> ok(skiped))) |
101 | ]. | 106 | ]. |
102 | 107 | ||
103 | The lexer described below skips to end of line (and eats the end of line). | 108 | The lexer described below skips to end of line (and eats the end of line). |
104 | 109 | ||
105 | type EOL_Token: | 110 | type EOL_Token: |
111 | + eof, | ||
106 | eol_offset(Int offset). | 112 | eol_offset(Int offset). |
107 | 113 | ||
108 | -define List(LexerItem4(EOL_Token,One)) | 114 | +define List(LexerItem(EOL_Token,One)) |
109 | to_eol_description | 115 | to_eol_description |
110 | = | 116 | = |
111 | [ | 117 | [ |
112 | lexer_item("([^#r#n]*)((#n)|(#r#n))", | 118 | lexer_item("([^#r#n]*)((#n)|(#r#n))", |
113 | return(((Int,Int) -> ByteArray b, Int l, LexingTools t, One u) |-> | 119 | return(((Int,Int) -> ByteArray b, Int l, LexingTools t, One u) |-> |
114 | - token(eol_offset(t.offset(unique))))) | 120 | + ok(eol_offset(t.offset(unique))))) |
115 | ]. | 121 | ]. |
116 | 122 | ||
117 | 123 | ||
@@ -234,7 +240,17 @@ define List(One -> CB_Result) | @@ -234,7 +240,17 @@ define List(One -> CB_Result) | ||
234 | }, r)). | 240 | }, r)). |
235 | 241 | ||
236 | 242 | ||
237 | - | 243 | +define String |
244 | + format | ||
245 | + ( | ||
246 | + LexicalError(One) e | ||
247 | + ) = | ||
248 | + if e is | ||
249 | + { | ||
250 | + lex_error(b,t,a) then "error: '"+to_string(b)+"' at line "+line(t)(unique), | ||
251 | + lex_error(message) then message, | ||
252 | + other_error(a) then should_not_happen("unknown error") | ||
253 | + }. | ||
238 | 254 | ||
239 | public define One -> ReadCsvResult | 255 | public define One -> ReadCsvResult |
240 | make_read_csv_line | 256 | make_read_csv_line |
@@ -243,28 +259,26 @@ public define One -> ReadCsvResult | @@ -243,28 +259,26 @@ public define One -> ReadCsvResult | ||
243 | String sep, | 259 | String sep, |
244 | List(Int) cols_to_get | 260 | List(Int) cols_to_get |
245 | ) = | 261 | ) = |
246 | - with lex_skip = retrieve_lexer(skip_cell_description(sep), if sep = "," then csv_c_skip_cell else csv_s_skip_cell)(ls,unique), | ||
247 | - lex_begin = retrieve_lexer(begin_cell_description(sep), if sep = "," then csv_c_begin_cell else csv_s_begin_cell)(ls,unique), | ||
248 | - lex_in = retrieve_lexer(read_quoted_cell_description(sep), if sep = "," then csv_c_quoted_cell else csv_s_quoted_cell)(ls,unique), | ||
249 | - lex_eol = retrieve_lexer(to_eol_description, csv_to_eol)(ls,unique), | 262 | + with lex_skip = retrieve_lexer(skip_cell_description(sep), if sep = "," then csv_c_skip_cell else csv_s_skip_cell, eof)(ls,unique), |
263 | + lex_begin = retrieve_lexer(begin_cell_description(sep), if sep = "," then csv_c_begin_cell else csv_s_begin_cell, eof)(ls,unique), | ||
264 | + lex_in = retrieve_lexer(read_quoted_cell_description(sep), if sep = "," then csv_c_quoted_cell else csv_s_quoted_cell,eof)(ls,unique), | ||
265 | + lex_eol = retrieve_lexer(to_eol_description, csv_to_eol, eof)(ls,unique), | ||
250 | skip_cell = (One u) |-> (CB_Result)if lex_skip(u) is | 266 | skip_cell = (One u) |-> (CB_Result)if lex_skip(u) is |
251 | { | 267 | { |
252 | - end_of_input then eof, | ||
253 | - error(b,line,col) then error("skip "+line+":"+col+" :"+to_string(b)), | ||
254 | - token(t) then skip | 268 | + error(e) then error(format(e)), |
269 | + ok(_) then skip | ||
255 | }, | 270 | }, |
256 | begin_cell = (One u) |-> (Result(String,CellPrefixToken))if lex_begin(u) is | 271 | begin_cell = (One u) |-> (Result(String,CellPrefixToken))if lex_begin(u) is |
257 | { | 272 | { |
258 | - end_of_input then ok(eof), | ||
259 | - error(b,line,col) then error("begin "+to_string(b)), | ||
260 | - token(t) then ok(t) | 273 | + error(e) then error(format(e)), |
274 | + ok(t) then ok(t) | ||
261 | }, | 275 | }, |
262 | read_in_aux = (List(ByteArray) so_far) |-aux-> (CB_Result)if lex_in(unique) is | 276 | read_in_aux = (List(ByteArray) so_far) |-aux-> (CB_Result)if lex_in(unique) is |
263 | { | 277 | { |
264 | - end_of_input then eof, | ||
265 | - error(b,line,col) then error("in "+to_string(b)), | ||
266 | - token(t) then if t is | 278 | + error(e) then error(format(e)), |
279 | + ok(t) then if t is | ||
267 | { | 280 | { |
281 | + eof then eof, | ||
268 | double_quote then cell(to_string(concat(reverse(so_far)))), | 282 | double_quote then cell(to_string(concat(reverse(so_far)))), |
269 | two_double_quotes then aux([{0x22} . so_far]), | 283 | two_double_quotes then aux([{0x22} . so_far]), |
270 | part(p) then aux([p . so_far]) | 284 | part(p) then aux([p . so_far]) |
@@ -283,9 +297,12 @@ public define One -> ReadCsvResult | @@ -283,9 +297,12 @@ public define One -> ReadCsvResult | ||
283 | }, | 297 | }, |
284 | to_eol = (One u) |-> if lex_eol(u) is | 298 | to_eol = (One u) |-> if lex_eol(u) is |
285 | { | 299 | { |
286 | - end_of_input then eof, | ||
287 | - error(b,line,col) then error("eol "+to_string(b)), | ||
288 | - token(t) then if t is eol_offset(offset) then eol(offset) | 300 | + error(e) then error(format(e)), |
301 | + ok(t) then if t is | ||
302 | + { | ||
303 | + eof then eof, | ||
304 | + eol_offset(offset) then eol(offset) | ||
305 | + } | ||
289 | }, | 306 | }, |
290 | make_read_csv_line(make_cbs(skip_cell,read_cell,to_eol,cols_to_get), | 307 | make_read_csv_line(make_cbs(skip_cell,read_cell,to_eol,cols_to_get), |
291 | (One u) |-> forget(to_eol(u))). | 308 | (One u) |-> forget(to_eol(u))). |