Updated data_base/read_csv.anubis to use fast_lexer_5 instead of fast_lexer_4.

Alain Prouté
1 parent 3a6efd40
Showing 1 changed file with 48 additions and 31 deletions Show diff stats
anubis_dev/library/data_base/read_csv.anubis
@@ -6,7 +6,7 @@
 read tools/basis.anubis   
 read tools/time.anubis
-read lexical_analysis/fast_lexer_4.anubis
+read lexical_analysis/fast_lexer_5.anubis
 public type ReadCsvResult:
@@ -38,7 +38,7 @@ type CellPrefixToken:          // reading the beginning of a cell until the firs
    This lexer if for reading the beginning of a cell. 
-define List(LexerItem4(CellPrefixToken,One))
+define List(LexerItem(CellPrefixToken,One))
    begin_cell_description
    (
      String        sep
@@ -46,19 +46,19 @@ define List(LexerItem4(CellPrefixToken,One))
    [
      lexer_item("[# #t]*\"",      
        return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |-> 
-           token(double_quote))),
+           ok(double_quote))),
      lexer_item("[^#"+sep+"\"#r#n]*#"+sep,      
        return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |-> 
-           token(separator(extract(0,l-1))))), 
+           ok(separator(extract(0,l-1))))), 
      lexer_item("[^#"+sep+"\"#r#n]*#n"  ,      
        return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |-> 
-           token(separator(extract(0,l-1))))), 
+           ok(separator(extract(0,l-1))))), 
      lexer_item("[^#"+sep+"\"#r#n]*(#r#n)"  ,      
        return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |-> 
-           token(separator(extract(0,l-2)))))           
+           ok(separator(extract(0,l-2)))))           
    ].
@@ -66,52 +66,58 @@ define List(LexerItem4(CellPrefixToken,One))
 type InToken:
+   eof, 
    double_quote,         // can also be the end of file
    two_double_quotes, 
    part(ByteArray).      // part of cell
-define List(LexerItem4(InToken,One))
+define List(LexerItem(InToken,One))
    read_quoted_cell_description
    (
      String        sep
    ) = 
    [
      lexer_item("[^\"]*"  ,      
-          return((ByteArray b, LexingTools t, One u) |-> token(part(b)))), 
+          return((ByteArray b, LexingTools t, One u) |-> ok(part(b)))), 
      lexer_item("\"\""  ,      
-          return((ByteArray b, LexingTools t, One u) |-> token(two_double_quotes))), 
+          return((ByteArray b, LexingTools t, One u) |-> ok(two_double_quotes))), 
      lexer_item("\"[# #t]*(("+sep+")|(#n)|(#r#n))"  ,      
-          return((ByteArray b, LexingTools t, One u) |-> token(double_quote)))
+          return((ByteArray b, LexingTools t, One u) |-> ok(double_quote)))
    ].
     The lexer described below skips a cell (and eats the trailing separator). 
-define List(LexerItem4(One,One)) 
+type SkipToken:
+   eof, 
+   skiped. 
+    
+define List(LexerItem(SkipToken,One)) 
    skip_cell_description
    (
      String sep
    ) =
    [
      lexer_item("(([^\"#n#r#"+sep+"]*)|([# #t]*\"([^\"]|(\"\"))*\"[# #t]*))#"+sep,  
-                   return(((Int,Int) -> ByteArray b, Int l, LexingTools t, One u) |-> token(unique)))
+                   return(((Int,Int) -> ByteArray b, Int l, LexingTools t, One u) |-> ok(skiped)))
    ].
    The lexer described below skips to end of line (and eats the end of line).
 type EOL_Token:
+   eof, 
    eol_offset(Int offset).    
-define List(LexerItem4(EOL_Token,One))
+define List(LexerItem(EOL_Token,One))
    to_eol_description
    = 
    [
      lexer_item("([^#r#n]*)((#n)|(#r#n))",  
                 return(((Int,Int) -> ByteArray b, Int l, LexingTools t, One u) |-> 
-                           token(eol_offset(t.offset(unique)))))
+                           ok(eol_offset(t.offset(unique)))))
    ]. 
@@ -234,7 +240,17 @@ define List(One -&gt; CB_Result)
            }, r)). 
-           
+define String 
+   format
+   (
+     LexicalError(One) e
+   ) =
+   if e is 
+   {
+     lex_error(b,t,a)    then "error: '"+to_string(b)+"' at line "+line(t)(unique), 
+     lex_error(message)  then message, 
+     other_error(a)      then should_not_happen("unknown error")
+   }.  
 public define One -> ReadCsvResult
    make_read_csv_line
@@ -243,28 +259,26 @@ public define One -&gt; ReadCsvResult
      String          sep,
      List(Int)       cols_to_get
    ) =
-   with lex_skip   = retrieve_lexer(skip_cell_description(sep),          if sep = "," then csv_c_skip_cell else csv_s_skip_cell)(ls,unique), 
-        lex_begin  = retrieve_lexer(begin_cell_description(sep),         if sep = "," then csv_c_begin_cell else csv_s_begin_cell)(ls,unique),
-        lex_in     = retrieve_lexer(read_quoted_cell_description(sep),   if sep = "," then csv_c_quoted_cell else csv_s_quoted_cell)(ls,unique),   
-        lex_eol    = retrieve_lexer(to_eol_description,                  csv_to_eol)(ls,unique),
+   with lex_skip   = retrieve_lexer(skip_cell_description(sep),          if sep = "," then csv_c_skip_cell else csv_s_skip_cell, eof)(ls,unique), 
+        lex_begin  = retrieve_lexer(begin_cell_description(sep),         if sep = "," then csv_c_begin_cell else csv_s_begin_cell, eof)(ls,unique),
+        lex_in     = retrieve_lexer(read_quoted_cell_description(sep),   if sep = "," then csv_c_quoted_cell else csv_s_quoted_cell,eof)(ls,unique),   
+        lex_eol    = retrieve_lexer(to_eol_description,                  csv_to_eol, eof)(ls,unique),
         skip_cell  = (One u) |-> (CB_Result)if lex_skip(u) is 
                        {
-                         end_of_input       then eof,
-                         error(b,line,col)  then error("skip "+line+":"+col+" :"+to_string(b)),
-                         token(t)           then skip
+                         error(e)  then error(format(e)), 
+                         ok(_)     then skip
                        }, 
          begin_cell = (One u) |-> (Result(String,CellPrefixToken))if lex_begin(u) is 
                        {
-                         end_of_input       then ok(eof),
-                         error(b,line,col)  then error("begin "+to_string(b)),
-                         token(t)           then ok(t)
+                         error(e)  then error(format(e)), 
+                         ok(t)     then ok(t)
                        },                        
          read_in_aux  = (List(ByteArray) so_far) |-aux-> (CB_Result)if lex_in(unique) is 
                        {
-                         end_of_input       then eof,
-                         error(b,line,col)  then error("in "+to_string(b)),
-                         token(t)           then if t is 
+                         error(e)  then error(format(e)), 
+                         ok(t)     then if t is 
                            {
+                             eof                then eof, 
                              double_quote       then cell(to_string(concat(reverse(so_far)))), 
                              two_double_quotes  then aux([{0x22} . so_far]),
                              part(p)            then aux([p . so_far])
@@ -283,9 +297,12 @@ public define One -&gt; ReadCsvResult
                        }, 
          to_eol     = (One u) |-> if lex_eol(u) is 
                        {
-                         end_of_input       then eof,
-                         error(b,line,col)  then error("eol "+to_string(b)),
-                         token(t)           then if t is eol_offset(offset) then eol(offset)
+                         error(e)  then error(format(e)),
+                         ok(t)     then if t is 
+                         {
+                           eof                then eof, 
+                           eol_offset(offset) then eol(offset)
+                         }
                        }, 
          make_read_csv_line(make_cbs(skip_cell,read_cell,to_eol,cols_to_get),
                             (One u) |-> forget(to_eol(u))).