Commit 633d58adaff26e3bc529a9b96dfe3c24a8cfea57

Authored by HerrmannM
2 parents df8d5af6 bcb6ccd9

correction + iterator

anubis_dev/compiler/src/compil.h
@@ -200,7 +200,10 @@ extern void NormalizeFileName(char *pathName); @@ -200,7 +200,10 @@ extern void NormalizeFileName(char *pathName);
200 of the form mcons3(file,line,col) where file, line and col Lisp integers. 200 of the form mcons3(file,line,col) where file, line and col Lisp integers.
201 */ 201 */
202 202
  203 +extern char *path_prefix(char *name);
  204 +
203 #define file_in(x) (assert(((int)(integer_value(car(x)))) < max_already_included), already_included[integer_value(car(x))]) 205 #define file_in(x) (assert(((int)(integer_value(car(x)))) < max_already_included), already_included[integer_value(car(x))])
  206 +#define dir_in(x) path_prefix(file_in(x))
204 #define line_in(x) integer_value(second(x)) 207 #define line_in(x) integer_value(second(x))
205 #define col_in(x) integer_value(cdr2(x)) 208 #define col_in(x) integer_value(cdr2(x))
206 209
@@ -631,6 +634,7 @@ extern Expr linecol(void); @@ -631,6 +634,7 @@ extern Expr linecol(void);
631 item(unlock_var)\ 634 item(unlock_var)\
632 item(__line__)\ 635 item(__line__)\
633 item(__file__)\ 636 item(__file__)\
  637 + item(__dir__)\
634 item(lazy)\ 638 item(lazy)\
635 639
636 /* true 'dynamic' modules and 'load_adm' (added in version 1.13) */ 640 /* true 'dynamic' modules and 'load_adm' (added in version 1.13) */
anubis_dev/compiler/src/compile.c
@@ -3439,6 +3439,7 @@ Expr compile_term(Expr head, @@ -3439,6 +3439,7 @@ Expr compile_term(Expr head,
3439 } 3439 }
3440 break; 3440 break;
3441 3441
  3442 + case __dir__:
3442 case __file__: /* (__file__ . <lisp string>) */ 3443 case __file__: /* (__file__ . <lisp string>) */
3443 { 3444 {
3444 code = cons(cons(string, 3445 code = cons(cons(string,
anubis_dev/compiler/src/expr.cpp
@@ -591,6 +591,7 @@ Expr _symbols_in_interp(Expr head) @@ -591,6 +591,7 @@ Expr _symbols_in_interp(Expr head)
591 case operation: 591 case operation:
592 case __line__: 592 case __line__:
593 case __file__: 593 case __file__:
  594 + case __dir__:
594 return nil; 595 return nil;
595 596
596 case macro: 597 case macro:
@@ -755,6 +756,7 @@ Expr _symbols_in_term(Expr term) @@ -755,6 +756,7 @@ Expr _symbols_in_term(Expr term)
755 case todo: /* (todo <lc> <filename> . <text>) */ 756 case todo: /* (todo <lc> <filename> . <text>) */
756 case __line__: 757 case __line__:
757 case __file__: 758 case __file__:
  759 + case __dir__:
758 case byte_array: 760 case byte_array:
759 return nil; 761 return nil;
760 762
anubis_dev/compiler/src/grammar.y
@@ -81,7 +81,7 @@ int is_global = 0; @@ -81,7 +81,7 @@ int is_global = 0;
81 %token<expr> yy__config_file yy__verbose yy__stop_after yy__mapsto yy__rec_mapsto yy__language 81 %token<expr> yy__config_file yy__verbose yy__stop_after yy__mapsto yy__rec_mapsto yy__language
82 %token<expr> yy__mapstoo yy__rec_mapstoo yy__arroww 82 %token<expr> yy__mapstoo yy__rec_mapstoo yy__arroww
83 %token<expr> yy__conf_int yy__conf_string yy__conf_symbol 83 %token<expr> yy__conf_int yy__conf_string yy__conf_symbol
84 -%token<expr> yy__enddot yy__eof yy__LINE__ yy__FILE__ yy__colon_equals 84 +%token<expr> yy__enddot yy__eof yy__LINE__ yy__FILE__ yy__DIR__ yy__colon_equals
85 %token<expr> yy__integer yy__macro_integer yy__dummy yy__end_LBA 85 %token<expr> yy__integer yy__macro_integer yy__dummy yy__end_LBA
86 %token<expr> yy__defaults_as yy__lazy 86 %token<expr> yy__defaults_as yy__lazy
87 87
@@ -666,6 +666,7 @@ Term: yy__alert { alert_obsolete($1); @@ -666,6 +666,7 @@ Term: yy__alert { alert_obsolete($1);
666 | yy__symbol yy__colon_equals Term { $$ = $3; } 666 | yy__symbol yy__colon_equals Term { $$ = $3; }
667 | yy__LINE__ %prec prec_symbol { $$ = $1; } 667 | yy__LINE__ %prec prec_symbol { $$ = $1; }
668 | yy__FILE__ %prec prec_symbol { $$ = $1; } 668 | yy__FILE__ %prec prec_symbol { $$ = $1; }
  669 +| yy__DIR__ %prec prec_symbol { $$ = $1; }
669 //| yy__Symbol %prec prec_symbol { $$ = mcons3(symbol,linecol(),$1); } 670 //| yy__Symbol %prec prec_symbol { $$ = mcons3(symbol,linecol(),$1); }
670 | yy__lpar yy__rpar %prec prec_symbol { $$ = mcons3(symbol,linecol(),pdstr_voidpars); } 671 | yy__lpar yy__rpar %prec prec_symbol { $$ = mcons3(symbol,linecol(),pdstr_voidpars); }
671 | yy__integer %prec prec_symbol { $$ = mcons3(car($1),linecol(), cdr($1)); } 672 | yy__integer %prec prec_symbol { $$ = mcons3(car($1),linecol(), cdr($1)); }
anubis_dev/compiler/src/interp.c
@@ -2316,6 +2316,7 @@ term_interpretations(Expr ttype, /* required type for that term (may contai @@ -2316,6 +2316,7 @@ term_interpretations(Expr ttype, /* required type for that term (may contai
2316 2316
2317 case __line__: /* (__line__ . <lisp integer>) */ 2317 case __line__: /* (__line__ . <lisp integer>) */
2318 case __file__: /* (__file__ . <lisp string>) */ 2318 case __file__: /* (__file__ . <lisp string>) */
  2319 + case __dir__:
2319 result = list1(cons(term,env)); 2320 result = list1(cons(term,env));
2320 break; 2321 break;
2321 2322
anubis_dev/compiler/src/lexer.l
@@ -326,7 +326,7 @@ extern void NormalizeFileName(char *pathName); @@ -326,7 +326,7 @@ extern void NormalizeFileName(char *pathName);
326 326
327 327
328 /* open an Anubis source file. */ 328 /* open an Anubis source file. */
329 -FILE *fopensrc(const char *name, int n) 329 +FILE *fopensrc(const char *name, int n) // n is for debugging purpose (identifies from where the call comes)
330 /* The given name may be either absolute or relative. It 330 /* The given name may be either absolute or relative. It
331 must be immediately converted into an absolute path, 331 must be immediately converted into an absolute path,
332 because the compiler uses ONLY absolute file paths 332 because the compiler uses ONLY absolute file paths
@@ -544,7 +544,7 @@ int is_already_included(char *name) @@ -544,7 +544,7 @@ int is_already_included(char *name)
544 /* same up to case */ 544 /* same up to case */
545 if (!strcmp(already_included[i],fopensrc_abs_path)) 545 if (!strcmp(already_included[i],fopensrc_abs_path))
546 { 546 {
547 - /* exactely same: file already compiled */ 547 + /* exactly same: file already compiled */
548 fclose(fp); 548 fclose(fp);
549 return 1; 549 return 1;
550 } 550 }
@@ -683,6 +683,8 @@ void come_back(void) @@ -683,6 +683,8 @@ void come_back(void)
683 if (verbose) printf("Returning to directory: %s\n", 683 if (verbose) printf("Returning to directory: %s\n",
684 include_dir[include_stack_ptr]); 684 include_dir[include_stack_ptr]);
685 current_file_abs_path = abs_file_paths_stack[include_stack_ptr]; 685 current_file_abs_path = abs_file_paths_stack[include_stack_ptr];
  686 + fopensrc_abs_path = current_file_abs_path; // these two variables seem to be redondant !
  687 + fopensrc_cur_dir = include_dir[include_stack_ptr];
686 current_file_id = get_file_id(current_file_abs_path); 688 current_file_id = get_file_id(current_file_abs_path);
687 saturate_visi_table(); 689 saturate_visi_table();
688 if (show_reads) show_come_back(current_file_abs_path,current_file_id); 690 if (show_reads) show_come_back(current_file_abs_path,current_file_id);
@@ -986,6 +988,8 @@ W [\ \t\r\n] @@ -986,6 +988,8 @@ W [\ \t\r\n]
986 return yy__LINE__; } 988 return yy__LINE__; }
987 <PAR>__FILE__ { yylval.expr = cons(__file__,new_string(current_file_abs_path)); 989 <PAR>__FILE__ { yylval.expr = cons(__file__,new_string(current_file_abs_path));
988 return yy__FILE__; } 990 return yy__FILE__; }
  991 +<PAR>__DIR__ { yylval.expr = cons(__dir__,new_string(path_prefix(current_file_abs_path)));
  992 + return yy__DIR__; }
989 <PAR>__TIME__ { sprintf(str_buf,"%d",(int)time(NULL)); 993 <PAR>__TIME__ { sprintf(str_buf,"%d",(int)time(NULL));
990 yylval.expr = mcons4(of_type,linecol(),pdstr_Word32,mcons3(integer_10, 994 yylval.expr = mcons4(of_type,linecol(),pdstr_Word32,mcons3(integer_10,
991 linecol(), 995 linecol(),
anubis_dev/compiler/src/msgtexts.c
@@ -1057,7 +1057,7 @@ const char *msgtext_undefined_macro[] = @@ -1057,7 +1057,7 @@ const char *msgtext_undefined_macro[] =
1057 const char *msgtext_too_big[] = 1057 const char *msgtext_too_big[] =
1058 { 1058 {
1059 " Your program is too big for this version of the compiler.\n" 1059 " Your program is too big for this version of the compiler.\n"
1060 - " Consider spilling it using secondary modules.\n\n" 1060 + " Consider spliting it using secondary modules.\n\n"
1061 }; 1061 };
1062 1062
1063 const char *msgtext_similar_paths[] = 1063 const char *msgtext_similar_paths[] =
anubis_dev/compiler/src/replace.c
@@ -505,6 +505,7 @@ static Expr rename_bound(Expr head, /* the expression within which bound @@ -505,6 +505,7 @@ static Expr rename_bound(Expr head, /* the expression within which bound
505 505
506 case __line__: 506 case __line__:
507 case __file__: 507 case __file__:
  508 + case __dir__:
508 { 509 {
509 result = head; 510 result = head;
510 } 511 }
@@ -955,6 +956,13 @@ static Expr replace_aux(Expr head, /* where bound variables are already ren @@ -955,6 +956,13 @@ static Expr replace_aux(Expr head, /* where bound variables are already ren
955 } 956 }
956 break; 957 break;
957 958
  959 + case __dir__:
  960 + /* (__dir__ . <lisp string>) */
  961 + {
  962 + return cons(__dir__,new_string(dir_in(second(op)))); /* change the value of __DIR__ */
  963 + }
  964 + break;
  965 +
958 default: assert(0); 966 default: assert(0);
959 } 967 }
960 } 968 }
anubis_dev/compiler/src/show.c
@@ -538,6 +538,12 @@ void show_interpretation(FILE *fp, @@ -538,6 +538,12 @@ void show_interpretation(FILE *fp,
538 } 538 }
539 break; 539 break;
540 540
  541 + case __dir__:
  542 + {
  543 + xpos += fprintf(fp,"__DIR__");
  544 + }
  545 + break;
  546 +
541 #if 0 547 #if 0
542 case integer: /* (integer <lc> . <Cint>) */ Obsolete: replaced by 'anb_int_10' and 'anb_int_16' 548 case integer: /* (integer <lc> . <Cint>) */ Obsolete: replaced by 'anb_int_10' and 'anb_int_16'
543 { 549 {
anubis_dev/compiler/src/templates.c
@@ -255,6 +255,11 @@ @@ -255,6 +255,11 @@
255 { 255 {
256 } 256 }
257 257
  258 + case __dir__:
  259 + /* (__dir__ . <lisp string>) */
  260 + {
  261 + }
  262 +
258 default: assert(0); 263 default: assert(0);
259 } 264 }
260 265
anubis_dev/compiler/src/typetools.c
@@ -413,6 +413,7 @@ Expr _type_from_interpretation(Expr head, Expr env) @@ -413,6 +413,7 @@ Expr _type_from_interpretation(Expr head, Expr env)
413 break; 413 break;
414 414
415 case __file__: 415 case __file__:
  416 + case __dir__:
416 result = type_String; 417 result = type_String;
417 break; 418 break;
418 419
anubis_dev/library/data_base/import_csv.anubis
1 ๏ปฟ 1 ๏ปฟ
2 - 2 + This file is obsoleted by data_base/read_csv.anubis
  3 +
  4 +
3 The Anubis Project 5 The Anubis Project
4 6
5 Reading CSV tables. 7 Reading CSV tables.
@@ -54,9 +56,9 @@ @@ -54,9 +56,9 @@
54 56
55 57
56 58
57 -read tools/basis.anubis 59 + read tools/basis.anubis
58 60
59 -public define Maybe(List(List(String))) 61 + public define Maybe(List(List(String)))
60 read_table 62 read_table
61 ( 63 (
62 String filename, 64 String filename,
@@ -97,7 +99,7 @@ public define Maybe(List(List(String))) @@ -97,7 +99,7 @@ public define Maybe(List(List(String)))
97 Checking if a list of Word8 'candidate' is a prefix in a line separator. 99 Checking if a list of Word8 'candidate' is a prefix in a line separator.
98 100
99 101
100 -define Bool 102 + define Bool
101 begins_line_separator 103 begins_line_separator
102 ( 104 (
103 List(Word8) candidate, 105 List(Word8) candidate,
@@ -121,7 +123,7 @@ define Bool @@ -121,7 +123,7 @@ define Bool
121 123
122 Here is the test. 124 Here is the test.
123 125
124 -define Bool 126 + define Bool
125 begins_line_separator 127 begins_line_separator
126 ( 128 (
127 List(Word8) candidate, 129 List(Word8) candidate,
@@ -141,7 +143,7 @@ define Bool @@ -141,7 +143,7 @@ define Bool
141 143
142 We have two cross recursive functions 'read_table' and 'read_more_lines'. 144 We have two cross recursive functions 'read_table' and 'read_more_lines'.
143 145
144 -define List(List(String)) 146 + define List(List(String))
145 read_table 147 read_table
146 ( 148 (
147 RStream file, 149 RStream file,
@@ -162,7 +164,7 @@ define List(List(String)) @@ -162,7 +164,7 @@ define List(List(String))
162 file. If end of file is read, the last line of the table is empty. Otherwise, we 164 file. If end of file is read, the last line of the table is empty. Otherwise, we
163 return to 'read_table', with the correct 'current_line' and 'current_cell'. 165 return to 'read_table', with the correct 'current_line' and 'current_cell'.
164 166
165 -define List(List(String)) 167 + define List(List(String))
166 read_more_lines 168 read_more_lines
167 ( 169 (
168 RStream file, 170 RStream file,
@@ -185,7 +187,7 @@ define List(List(String)) @@ -185,7 +187,7 @@ define List(List(String))
185 187
186 Reading a table from an already opened file. 188 Reading a table from an already opened file.
187 189
188 -define List(List(String)) 190 + define List(List(String))
189 read_table 191 read_table
190 ( 192 (
191 RStream file, 193 RStream file,
@@ -218,7 +220,7 @@ define List(List(String)) @@ -218,7 +220,7 @@ define List(List(String))
218 220
219 Now, here is our tool. 221 Now, here is our tool.
220 222
221 -public define Maybe(List(List(String))) 223 + public define Maybe(List(List(String)))
222 read_table 224 read_table
223 ( 225 (
224 String filename, 226 String filename,
anubis_dev/library/data_base/read_csv.anubis 0 โ†’ 100644
  1 +
  2 + The Anubis Project
  3 +
  4 + Reading a csv (Comma Separated Values) file.
  5 +
  6 +
  7 +read tools/basis.anubis
  8 +read tools/time.anubis
  9 +read lexical_analysis/fast_lexer_4.anubis
  10 +
  11 +
  12 + The function made by the function below reads a single record from a CSV input source.
  13 +
  14 +public type ReadCsvResult:
  15 + end_of_input,
  16 + error (String message), // an error message
  17 + ok (Int offset, List(String) record). // a single record and the offset of the end of
  18 + // this record.
  19 +
  20 +public define One -> ReadCsvResult
  21 + make_read_csv_line
  22 + (
  23 + LexingStream ls, // lexing stream to be constructed from the input (see fast_lexer_4.anubis)
  24 + String sep, // cell separator (can be "," or ";")
  25 + List(Int) cols_to_get // list of column numbers you want to get
  26 + ).
  27 +
  28 +
  29 +
  30 + --- That's all for the public part ! ------------------------------------------------------------------
  31 +
  32 +
  33 +type CellPrefixToken: // reading the beginning of a cell until the first double quote or separator
  34 + eof,
  35 + double_quote, // if double quote, ignore the content up to here and switch to another lexer
  36 + separator(ByteArray). // if separator, keep everything before this separator
  37 +
  38 +
  39 + This lexer if for reading the beginning of a cell.
  40 +
  41 +define List(LexerItem(CellPrefixToken,One))
  42 + begin_cell_description
  43 + (
  44 + String sep
  45 + ) =
  46 + [
  47 + lexer_item("[# #t]*\"",
  48 + return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |->
  49 + token(double_quote))),
  50 +
  51 + lexer_item("[^#"+sep+"\"#r#n]*#"+sep,
  52 + return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |->
  53 + token(separator(extract(0,l-1))))),
  54 +
  55 + lexer_item("[^#"+sep+"\"#r#n]*#n" ,
  56 + return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |->
  57 + token(separator(extract(0,l-1))))),
  58 +
  59 + lexer_item("[^#"+sep+"\"#r#n]*(#r#n)" ,
  60 + return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |->
  61 + token(separator(extract(0,l-2)))))
  62 + ].
  63 +
  64 +
  65 + This lexer is for reading within the double quotes (if the cell is double quoted)
  66 +
  67 +
  68 +type InToken:
  69 + double_quote, // can also be the end of file
  70 + two_double_quotes,
  71 + part(ByteArray). // part of cell
  72 +
  73 +
  74 +define List(LexerItem(InToken,One))
  75 + read_quoted_cell_description
  76 + (
  77 + String sep
  78 + ) =
  79 + [
  80 + lexer_item("[^\"]*" ,
  81 + return((ByteArray b, LexingTools t, One u) |-> token(part(b)))),
  82 +
  83 + lexer_item("\"\"" ,
  84 + return((ByteArray b, LexingTools t, One u) |-> token(two_double_quotes))),
  85 +
  86 + lexer_item("\"[# #t]*(("+sep+")|(#n)|(#r#n))" ,
  87 + return((ByteArray b, LexingTools t, One u) |-> token(double_quote)))
  88 + ].
  89 +
  90 +
  91 + The lexer described below skips a cell (and eats the trailing separator).
  92 +
  93 +define List(LexerItem(One,One))
  94 + skip_cell_description
  95 + (
  96 + String sep
  97 + ) =
  98 + [
  99 + lexer_item("(([^\"#n#r#"+sep+"]*)|([# #t]*\"([^\"]|(\"\"))*\"[# #t]*))#"+sep,
  100 + return(((Int,Int) -> ByteArray b, Int l, LexingTools t, One u) |-> token(unique)))
  101 + ].
  102 +
  103 + The lexer described below skips to end of line (and eats the end of line).
  104 +
  105 +type EOL_Token:
  106 + eol_offset(Int offset).
  107 +
  108 +define List(LexerItem(EOL_Token,One))
  109 + to_eol_description
  110 + =
  111 + [
  112 + lexer_item("([^#r#n]*)((#n)|(#r#n))",
  113 + return(((Int,Int) -> ByteArray b, Int l, LexingTools t, One u) |->
  114 + token(eol_offset(t.offset(unique)))))
  115 + ].
  116 +
  117 +
  118 +
  119 +global define One
  120 + make_the_lexers
  121 + (
  122 + List(String) _
  123 + ) =
  124 + make_precompiled_lexer("csv_c_begin_cell", begin_cell_description(","), '#');
  125 + make_precompiled_lexer("csv_c_quoted_cell", read_quoted_cell_description(","), '#');
  126 + make_precompiled_lexer("csv_c_skip_cell", skip_cell_description(","), '#');
  127 + make_precompiled_lexer("csv_s_begin_cell", begin_cell_description(";"), '#');
  128 + make_precompiled_lexer("csv_s_quoted_cell", read_quoted_cell_description(";"), '#');
  129 + make_precompiled_lexer("csv_s_skip_cell", skip_cell_description(";"), '#');
  130 + make_precompiled_lexer("csv_to_eol", to_eol_description, '#').
  131 +
  132 +
  133 +execute anbexec make_the_lexers
  134 +read generated/csv_c_begin_cell.anubis
  135 +read generated/csv_c_quoted_cell.anubis
  136 +read generated/csv_c_skip_cell.anubis
  137 +read generated/csv_s_begin_cell.anubis
  138 +read generated/csv_s_quoted_cell.anubis
  139 +read generated/csv_s_skip_cell.anubis
  140 +read generated/csv_to_eol.anubis
  141 +
  142 +
  143 + define One
  144 + repeat
  145 + (
  146 + Int n,
  147 + One -> One f
  148 + ) =
  149 + if n =< 0 then unique else f(unique); repeat(n-1,f).
  150 +
  151 +
  152 + define Int -> Result(String,ByteArray)
  153 + read_next_cell
  154 + (
  155 + One -> One skip_cell,
  156 + One -> Result(String,ByteArray) read_cell
  157 + ) =
  158 + (Int n) |-> repeat(n,skip_cell); read_cell(unique).
  159 +
  160 +
  161 +type CB_Result:
  162 + eof,
  163 + error(String),
  164 + skip,
  165 + cell(String),
  166 + eol(Int offset).
  167 +
  168 +
  169 +define One -> ReadCsvResult
  170 + make_read_csv_line
  171 + (
  172 + List(One -> CB_Result) cbs,
  173 + One -> One to_eol
  174 + ) =
  175 + with f = (List(One -> CB_Result) l, List(String) so_far) |-f1->
  176 + if l is
  177 + {
  178 + [ ] then ok(0, reverse(so_far)),
  179 + [f . g] then
  180 + if f(unique) is
  181 + {
  182 + eof then end_of_input,
  183 + error(e) then to_eol(unique); error(e),
  184 + skip then (ReadCsvResult)f1(g, so_far),
  185 + cell(c) then
  186 + //print("read cell ["+c+"] ");
  187 + (ReadCsvResult)f1(g, [c. so_far]),
  188 + eol(offset) then
  189 + if g is
  190 + {
  191 + [] then ok(offset, reverse(so_far)),
  192 + [_ . _] then error("End Of Line unexpected")
  193 + }
  194 +
  195 + }
  196 + },
  197 + (One u) |-> f(cbs, []).
  198 +
  199 +
  200 +
  201 +define List(One -> CB_Result)
  202 + make_cbs
  203 + (
  204 + One -> CB_Result skip_cell,
  205 + One -> CB_Result read_cell,
  206 + One -> CB_Result to_eol,
  207 + List(Int) cols_to_get // must be strictly increasing
  208 + ) =
  209 + if cols_to_get is
  210 + {
  211 + [ ] then [to_eol],
  212 + [n . l] then
  213 + with rest = make_cbs(skip_cell,read_cell,to_eol, map((Int x) |-> x - n -1,l)),
  214 + constant_list(skip_cell,n) + [read_cell . rest]
  215 + }.
  216 +
  217 +
  218 + Just a test for make_cbs:
  219 + global define One
  220 + gaga
  221 + (
  222 + List(String) args
  223 + ) =
  224 + with skip_cell = (One u) |-> skip,
  225 + read_cell = (One u) |-> (CB_Result)cell(to_byte_array("")),
  226 + to_eol = (One u) |-> (CB_Result)error(""),
  227 + l = [3,5,(Int)10],
  228 + with r = make_cbs(skip_cell,read_cell,to_eol,l),
  229 + forget(map((One -> CB_Result f) |-> if f(unique) is
  230 + {
  231 + error(e) then print("eol\n\n"),
  232 + skip then print("skip\n"),
  233 + cell(_) then print("cell\n")
  234 + }, r)).
  235 +
  236 +
  237 +
  238 +
  239 +public define One -> ReadCsvResult
  240 + make_read_csv_line
  241 + (
  242 + LexingStream ls,
  243 + String sep,
  244 + List(Int) cols_to_get
  245 + ) =
  246 + with lex_skip = retrieve_lexer(skip_cell_description(sep), if sep = "," then csv_c_skip_cell else csv_s_skip_cell)(ls,unique),
  247 + lex_begin = retrieve_lexer(begin_cell_description(sep), if sep = "," then csv_c_begin_cell else csv_s_begin_cell)(ls,unique),
  248 + lex_in = retrieve_lexer(read_quoted_cell_description(sep), if sep = "," then csv_c_quoted_cell else csv_s_quoted_cell)(ls,unique),
  249 + lex_eol = retrieve_lexer(to_eol_description, csv_to_eol)(ls,unique),
  250 + skip_cell = (One u) |-> (CB_Result)if lex_skip(u) is
  251 + {
  252 + end_of_input then eof,
  253 + error(b,line,col) then error("skip "+line+":"+col+" :"+to_string(b)),
  254 + token(t) then skip
  255 + },
  256 + begin_cell = (One u) |-> (Result(String,CellPrefixToken))if lex_begin(u) is
  257 + {
  258 + end_of_input then ok(eof),
  259 + error(b,line,col) then error("begin "+to_string(b)),
  260 + token(t) then ok(t)
  261 + },
  262 + read_in_aux = (List(ByteArray) so_far) |-aux-> (CB_Result)if lex_in(unique) is
  263 + {
  264 + end_of_input then eof,
  265 + error(b,line,col) then error("in "+to_string(b)),
  266 + token(t) then if t is
  267 + {
  268 + double_quote then cell(to_string(concat(reverse(so_far)))),
  269 + two_double_quotes then aux([{0x22} . so_far]),
  270 + part(p) then aux([p . so_far])
  271 + }
  272 + },
  273 + read_in = (One u) |-> read_in_aux([]),
  274 + read_cell = (One u) |-> if begin_cell(u) is
  275 + {
  276 + error(e) then error(e),
  277 + ok(b) then if b is
  278 + {
  279 + eof then eof,
  280 + double_quote then read_in(u),
  281 + separator(c) then cell(to_string(c))
  282 + }
  283 + },
  284 + to_eol = (One u) |-> if lex_eol(u) is
  285 + {
  286 + end_of_input then eof,
  287 + error(b,line,col) then error("eol "+to_string(b)),
  288 + token(t) then if t is eol_offset(offset) then eol(offset)
  289 + },
  290 + make_read_csv_line(make_cbs(skip_cell,read_cell,to_eol,cols_to_get),
  291 + (One u) |-> forget(to_eol(u))).
  292 +
  293 +
  294 +
  295 + *** Command line test.
  296 +
  297 +define Maybe(List(Int))
  298 + map_to_Int
  299 + (
  300 + List(String) l
  301 + ) =
  302 + if l is
  303 + {
  304 + [ ] then success([ ]),
  305 + [h . t] then if decimal_scan(h) is
  306 + {
  307 + failure then failure,
  308 + success(n1) then if map_to_Int(t) is
  309 + {
  310 + failure then failure,
  311 + success(l1) then success([n1 . l1])
  312 + }
  313 + }
  314 + }.
  315 +
  316 +
  317 +define One
  318 + print_csv_line
  319 + (
  320 + List(String) l
  321 + ) =
  322 + print("| ");
  323 + map_forget((String b) |-> print(b+" | "),l).
  324 +
  325 +
  326 +define One syntax = print("Usage: anbexec read_csv_file <csv file path> <sep> <n1> ... <nk>\n"+
  327 + " where <sep> is the (double quoted) separator (can be \",\" or \";\")\n"+
  328 + " and where the integers <n1>...<nk> are the ranks of the columns to keep,\n"+
  329 + " (starting at 0).\n\n").
  330 +
  331 +define One
  332 + print_to_error
  333 + (
  334 + One -> ReadCsvResult f
  335 + ) =
  336 + if f(unique) is
  337 + {
  338 + end_of_input then print("-------- end of file --------------\n"),
  339 + error(e) then print("Error ["+e+"]\n");
  340 + print_to_error(f),
  341 + ok(offset,n) then print_csv_line(n);
  342 + print("[at offset "+offset+"]\n");
  343 + print_to_error(f)
  344 + }.
  345 +
  346 +define One
  347 + show_perf
  348 + (
  349 + One -> ReadCsvResult f,
  350 + Int left,
  351 + Int read_line,
  352 + Int block_size,
  353 + UTime start_time
  354 + ) =
  355 + if f(unique) is
  356 + {
  357 + end_of_input then show_duration("lines read "+read_line, start_time);
  358 + print("----------------------\n"),
  359 + error(e) then print("error ["+e+"]\n"); print_to_error(f),
  360 + ok(o,n) then
  361 + with left1 = if left = 1 then
  362 + show_duration("lines read "+read_line+1, start_time);
  363 + block_size
  364 + else
  365 + left -1,
  366 + show_perf(f, left1, read_line+1, block_size, start_time)
  367 + }.
  368 +
  369 +
  370 +define One
  371 + show_perf
  372 + (
  373 + One -> ReadCsvResult f,
  374 + Int block_size
  375 + )=
  376 + show_perf(f, block_size, 0, block_size, unow)
  377 + .
  378 +
  379 +global define One
  380 + read_csv_file
  381 + (
  382 + List(String) args
  383 + ) =
  384 + if args is
  385 + {
  386 + [ ] then syntax,
  387 + [path . t] then
  388 + println("file "+path);
  389 + if t is
  390 + {
  391 +
  392 + [ ] then syntax,
  393 + [sep . l] then if sep:[",",";"]
  394 + then
  395 + if map_to_Int(l) is
  396 + {
  397 + failure then syntax
  398 + success(List(Int) cols) then
  399 + if file(path,read) is
  400 + {
  401 + failure then print("File '"+path+"' not found.\n"),
  402 + success(f) then
  403 + if make_lexing_stream("",f,10,10) is
  404 + {
  405 + failure then print("Error while reading file '"+path+"'.\n"),
  406 + success(ls) then
  407 + with cs = no_doubles(qsort(cols,(Int x, Int y) |-> x < y)),
  408 + read_line = make_read_csv_line(ls,sep,cs),
  409 +// show_perf(read_line, 10000)
  410 + print_to_error(read_line)
  411 + }
  412 + }
  413 + }
  414 + else syntax
  415 + }
  416 + }.
  417 +
  418 +
anubis_dev/library/data_base/read_csv_table.anubis
1 1
  2 + This file is obsoleted by data_base/read_csv.anubis
  3 +
2 4
3 Try it ! 5 Try it !
4 6
5 -read import_csv.anubis 7 + read import_csv.anubis
6 8
7 9
8 -define One 10 + define One
9 table_print 11 table_print
10 ( 12 (
11 List(String) l 13 List(String) l
@@ -18,7 +20,7 @@ define One @@ -18,7 +20,7 @@ define One
18 table_print(t) 20 table_print(t)
19 }. 21 }.
20 22
21 -define One 23 + define One
22 table_print 24 table_print
23 ( 25 (
24 List(List(String)) t 26 List(List(String)) t
@@ -29,7 +31,7 @@ define One @@ -29,7 +31,7 @@ define One
29 [h . t2] then table_print(h); table_print(t2) 31 [h . t2] then table_print(h); table_print(t2)
30 }. 32 }.
31 33
32 -define One 34 + define One
33 table_print 35 table_print
34 ( 36 (
35 Maybe(List(List(String))) t 37 Maybe(List(List(String))) t
@@ -40,7 +42,7 @@ define One @@ -40,7 +42,7 @@ define One
40 success(l) then table_print(l) 42 success(l) then table_print(l)
41 }. 43 }.
42 44
43 -global define One 45 + global define One
44 read_csv_table 46 read_csv_table
45 ( 47 (
46 List(String) args 48 List(String) args
anubis_dev/library/lexical_analysis/fast_lexer_4.anubis
@@ -618,7 +618,7 @@ public type DFA_state($Token,$Aux): @@ -618,7 +618,7 @@ public type DFA_state($Token,$Aux):
618 variant of 'make_lexer': 618 variant of 'make_lexer':
619 619
620 public define Result(RegExprError, 620 public define Result(RegExprError,
621 - (LexingStream -> One -> LexerOutput($Token), // the lexer 621 + ((LexingStream,$Aux) -> One -> LexerOutput($Token), // the lexer
622 List(DFA_state($Token,$Aux)))) // the automaton 622 List(DFA_state($Token,$Aux)))) // the automaton
623 make_lexer_and_automaton 623 make_lexer_and_automaton
624 ( 624 (
@@ -651,7 +651,7 @@ public define One @@ -651,7 +651,7 @@ public define One
651 651
652 *** (4.3) How to use a lexer. 652 *** (4.3) How to use a lexer.
653 653
654 - Applying the function of type 'LexingStream($Aux) -> One -> LexerOutput($Token)' returned by 654 + Applying the function of type 'LexingStream -> One -> LexerOutput($Token)' returned by
655 'make_lexer' to a lexing stream is understood as 'plugging' the lexer onto this lexing 655 'make_lexer' to a lexing stream is understood as 'plugging' the lexer onto this lexing
656 stream. The result is a function of type: 656 stream. The result is a function of type:
657 657
@@ -1314,10 +1314,34 @@ define Result(RegExprError,RegExpr) @@ -1314,10 +1314,34 @@ define Result(RegExprError,RegExpr)
1314 1314
1315 1315
1316 1316
  1317 + Debugging tools:
  1318 +define String
  1319 + format
  1320 + (
  1321 + List(Word8) l
  1322 + ) =
  1323 + concat(map((Word8 c) |-> to_decimal(c) ,l)," ").
1317 1324
1318 -  
1319 - 1325 +define String
  1326 + format
  1327 + (
  1328 + RegExpr e
  1329 + ) =
  1330 + if e is
  1331 + {
  1332 + char(Word8 _0) then "char("+constant_string(1,_0)+")",
  1333 + choice(List(Word8) _0) then "choice("+format(_0)+")",
  1334 + plus(RegExpr _0) then "plus("+format(_0)+")",
  1335 + star(RegExpr _0) then "star("+format(_0)+")",
  1336 + cat(RegExpr _0,RegExpr _1) then "cat("+format(_0)+","+format(_1)+")",
  1337 + or(RegExpr _0,RegExpr _1) then "or("+format(_0)+","+format(_1)+")",
  1338 + dot then "dot",
  1339 + question_mark(RegExpr _0) then "question_mark("+format(_0)+")"
  1340 + }.
  1341 +
  1342 +
1320 *** [1.6.3] The tool for parsing regular expressions. 1343 *** [1.6.3] The tool for parsing regular expressions.
  1344 +
1321 1345
1322 public define Result(RegExprError,RegExpr) 1346 public define Result(RegExprError,RegExpr)
1323 parse_regular_expression 1347 parse_regular_expression
@@ -1328,13 +1352,12 @@ public define Result(RegExprError,RegExpr) @@ -1328,13 +1352,12 @@ public define Result(RegExprError,RegExpr)
1328 if read_regexpr(s,escape_char,[],end_of_regexpr) is 1352 if read_regexpr(s,escape_char,[],end_of_regexpr) is
1329 { 1353 {
1330 error(msg) then error(msg), 1354 error(msg) then error(msg),
1331 - ok(re) then ok(re) 1355 + ok(re) then //print("["+format(re)+"]\n");
  1356 + ok(re)
1332 }. 1357 }.
1333 1358
1334 1359
1335 1360
1336 -  
1337 -  
1338 *** [1.7] Transforming a regular expression into a basic one. 1361 *** [1.7] Transforming a regular expression into a basic one.
1339 1362
1340 *** [1.7.1] Expanding a 'choice' of characters. 1363 *** [1.7.1] Expanding a 'choice' of characters.
@@ -1427,7 +1450,7 @@ public define String @@ -1427,7 +1450,7 @@ public define String
1427 *** [1.1] The type 'LexingStream'. 1450 *** [1.1] The type 'LexingStream'.
1428 1451
1429 A lexing stream provides tools which are adhoc for using low level fast lexers as 1452 A lexing stream provides tools which are adhoc for using low level fast lexers as
1430 - defined in section 13 of predefined.anubis: 1453 + defined in section 13 of predefined.anubis.
1431 1454
1432 The type below records the information needed to come back to the state just after the 1455 The type below records the information needed to come back to the state just after the
1433 last or penultimate token was read. 1456 last or penultimate token was read.
@@ -1440,9 +1463,10 @@ type TokenState: @@ -1440,9 +1463,10 @@ type TokenState:
1440 Int col 1463 Int col
1441 ). 1464 ).
1442 1465
1443 - There is a ``penultimate token'' when at least one token has been successfully read since the 1466 + There is a ``penultimate token'' when at least two token has been successfully read since the
1444 creation of the lexing stream. If it is not the case, the value of the ``penultimate state'' 1467 creation of the lexing stream. If it is not the case, the value of the ``penultimate state''
1445 - defaults to the very initial state. 1468 + defaults to the state after the very first token was read or to the very initial state if no
  1469 + tokan was read.
1446 1470
1447 When the buffer is reloaded, part of the current buffer is kept. One reason for this is that 1471 When the buffer is reloaded, part of the current buffer is kept. One reason for this is that
1448 when we encounter the end of the buffer it can be the case that we are currently reading a token 1472 when we encounter the end of the buffer it can be the case that we are currently reading a token
@@ -1464,7 +1488,6 @@ type TokenState: @@ -1464,7 +1488,6 @@ type TokenState:
1464 state informations for token1 and token2, the last two tokens successfully read. 1488 state informations for token1 and token2, the last two tokens successfully read.
1465 1489
1466 1490
1467 -  
1468 public type LexingStream: 1491 public type LexingStream:
1469 lexing_stream 1492 lexing_stream
1470 ( 1493 (
@@ -1546,18 +1569,18 @@ public type LexingStream: @@ -1546,18 +1569,18 @@ public type LexingStream:
1546 -- the new current buffer "source text." 1569 -- the new current buffer "source text."
1547 1570
1548 -- last accepted: (s,3), because 'sou' has been accepted in state 's' and 1571 -- last accepted: (s,3), because 'sou' has been accepted in state 's' and
1549 - ends at offset 0 within the new buffer, 1572 + ends at offset 3 within the new buffer,
1550 1573
1551 -- current_v receives the value 3, because 'sou' is already read, 1574 -- current_v receives the value 3, because 'sou' is already read,
1552 1575
1553 -- token_start_v receives the value 0, because the token we are currently 1576 -- token_start_v receives the value 0, because the token we are currently
1554 reading begins at offset 0. 1577 reading begins at offset 0.
1555 1578
1556 - -- state s, because we want to try to read the sequel of 'sou'. 1579 + -- restart in state s, because we want to try to read the sequel of 'sou'.
1557 1580
1558 Notice that if the low level lexer had returned 'rejected(s,at_end_of_input,12,15)' 1581 Notice that if the low level lexer had returned 'rejected(s,at_end_of_input,12,15)'
1559 instead of 'accepted(s,at_end_of_input,12,15)', the scenario is the same one except 1582 instead of 'accepted(s,at_end_of_input,12,15)', the scenario is the same one except
1560 - that last accepted is 'none'. 1583 + that last accepted will be 'none'.
1561 1584
1562 The low level lexer will now return 'accepted(s,not_at_end_of_input,0,6)', meaning that 1585 The low level lexer will now return 'accepted(s,not_at_end_of_input,0,6)', meaning that
1563 it has recognized the token 'source' between positions 0 (included) and 6 (not 1586 it has recognized the token 'source' between positions 0 (included) and 6 (not
@@ -1574,6 +1597,7 @@ public type LexingStream: @@ -1574,6 +1597,7 @@ public type LexingStream:
1574 define LexingTools 1597 define LexingTools
1575 make_tools 1598 make_tools
1576 ( 1599 (
  1600 + Var(Int) token_start_v, // actually not used in this function
1577 Var(Int) current_v, 1601 Var(Int) current_v,
1578 Var(Int) line_v, 1602 Var(Int) line_v,
1579 Var(Int) col_v, 1603 Var(Int) col_v,
@@ -1590,12 +1614,11 @@ define LexingTools @@ -1590,12 +1614,11 @@ define LexingTools
1590 (One _) |-> *col_v, 1614 (One _) |-> *col_v,
1591 1615
1592 // get current offset: 1616 // get current offset:
  1617 + // This is the number of bytes which are no more in the buffer plus the current position.
1593 (One _) |-> *past_v + *current_v, 1618 (One _) |-> *past_v + *current_v,
1594 1619
1595 // go back one char: 1620 // go back one char:
1596 // don't go beyond the beginning of the buffer 1621 // don't go beyond the beginning of the buffer
1597 - // No need to update line_v and col_v because they  
1598 - // refer to the beginning of the token.  
1599 (Int n) |-> current_v <- max(*current_v - n, 0), 1622 (Int n) |-> current_v <- max(*current_v - n, 0),
1600 1623
1601 // comming back to the state just after the last token was read 1624 // comming back to the state just after the last token was read
@@ -1603,7 +1626,6 @@ define LexingTools @@ -1603,7 +1626,6 @@ define LexingTools
1603 current_v <- cur; 1626 current_v <- cur;
1604 line_v <- l; 1627 line_v <- l;
1605 col_v <- c; 1628 col_v <- c;
1606 - last_tok_v <- *penult_tok_v;  
1607 last_accept_v <- none, 1629 last_accept_v <- none,
1608 1630
1609 // comming back to the state just after the penultimate token was read 1631 // comming back to the state just after the penultimate token was read
@@ -1624,7 +1646,8 @@ public define LexingStream @@ -1624,7 +1646,8 @@ public define LexingStream
1624 String preambule, 1646 String preambule,
1625 ByteArray b 1647 ByteArray b
1626 ) = 1648 ) =
1627 - with b1_v = var(if length(preambule) = 0 then b else to_byte_array(preambule)+b), 1649 + with b1_v = var(if length(preambule) = 0 then b else to_byte_array(preambule)+b),
  1650 + token_start_v = var((Int)0),
1628 current_v = var((Int)0), 1651 current_v = var((Int)0),
1629 line_v = var((Int)0), 1652 line_v = var((Int)0),
1630 col_v = var((Int)0), 1653 col_v = var((Int)0),
@@ -1633,7 +1656,7 @@ public define LexingStream @@ -1633,7 +1656,7 @@ public define LexingStream
1633 penult_tok_v = var(tstate(0,0,0)), 1656 penult_tok_v = var(tstate(0,0,0)),
1634 last_accept_v = var((FastLexerLastAccepted)none), 1657 last_accept_v = var((FastLexerLastAccepted)none),
1635 lexing_stream(b1_v, // buffer 1658 lexing_stream(b1_v, // buffer
1636 - var((Int)0), // starting position 1659 + token_start_v, // starting position
1637 current_v, // current position 1660 current_v, // current position
1638 last_accept_v, // last accepting position 1661 last_accept_v, // last accepting position
1639 last_tok_v, // last token state 1662 last_tok_v, // last token state
@@ -1641,8 +1664,8 @@ public define LexingStream @@ -1641,8 +1664,8 @@ public define LexingStream
1641 (One u) |-> failure, // buffer is never reloaded 1664 (One u) |-> failure, // buffer is never reloaded
1642 line_v, // current line 1665 line_v, // current line
1643 col_v, // current column 1666 col_v, // current column
1644 - past_v, // past bytes  
1645 - make_tools(current_v,line_v,col_v,past_v,last_tok_v,penult_tok_v,last_accept_v)). 1667 + past_v, // past bytes (will remain always 0 in this case)
  1668 + make_tools(token_start_v,current_v,line_v,col_v,past_v,last_tok_v,penult_tok_v,last_accept_v)).
1646 1669
1647 1670
1648 1671
@@ -1700,7 +1723,7 @@ public define Maybe(LexingStream) @@ -1700,7 +1723,7 @@ public define Maybe(LexingStream)
1700 //print("Keeping this from previous buffer: ["+to_string(extract(old_buffer,dropped,old_length))+"]\n"); 1723 //print("Keeping this from previous buffer: ["+to_string(extract(old_buffer,dropped,old_length))+"]\n");
1701 buffer_v <- extract(old_buffer,dropped,old_length)+more; 1724 buffer_v <- extract(old_buffer,dropped,old_length)+more;
1702 //print("New buffer: ["+to_string(*buffer_v)+"] size: "+to_decimal(length(*buffer_v))+"\n"); 1725 //print("New buffer: ["+to_string(*buffer_v)+"] size: "+to_decimal(length(*buffer_v))+"\n");
1703 - token_start_v <- 0; 1726 + token_start_v <- *token_start_v - dropped;
1704 //print("Next token starting position: "+to_decimal(*token_start_v)+"\n"); 1727 //print("Next token starting position: "+to_decimal(*token_start_v)+"\n");
1705 current_v <- old_length - dropped; 1728 current_v <- old_length - dropped;
1706 //print("New current reading position: "+to_decimal(*current_v)+"\n"); 1729 //print("New current reading position: "+to_decimal(*current_v)+"\n");
@@ -1726,7 +1749,7 @@ public define Maybe(LexingStream) @@ -1726,7 +1749,7 @@ public define Maybe(LexingStream)
1726 line_v, 1749 line_v,
1727 col_v, 1750 col_v,
1728 past_bytes_v, 1751 past_bytes_v,
1729 - make_tools(current_v,line_v,col_v,past_bytes_v,last_tok_v,penult_tok_v,last_accepted_v))) 1752 + make_tools(token_start_v,current_v,line_v,col_v,past_bytes_v,last_tok_v,penult_tok_v,last_accepted_v)))
1730 }. 1753 }.
1731 1754
1732 1755
@@ -1785,7 +1808,7 @@ public define Maybe(LexingStream) @@ -1785,7 +1808,7 @@ public define Maybe(LexingStream)
1785 min(min(current(*penult_tok_v),current(*last_tok_v)),*token_start_v), 1808 min(min(current(*penult_tok_v),current(*last_tok_v)),*token_start_v),
1786 1809
1787 buffer_v <- extract(old_buffer,dropped,old_length)+more; 1810 buffer_v <- extract(old_buffer,dropped,old_length)+more;
1788 - token_start_v <- 0; 1811 + token_start_v <- *token_start_v - dropped;
1789 current_v <- old_length - dropped; 1812 current_v <- old_length - dropped;
1790 past_bytes_v <- *past_bytes_v + dropped; 1813 past_bytes_v <- *past_bytes_v + dropped;
1791 last_tok_v <- (if *last_tok_v is tstate(cur,l,c) then tstate(cur - dropped,l,c)); 1814 last_tok_v <- (if *last_tok_v is tstate(cur,l,c) then tstate(cur - dropped,l,c));
@@ -1809,7 +1832,7 @@ public define Maybe(LexingStream) @@ -1809,7 +1832,7 @@ public define Maybe(LexingStream)
1809 line_v, 1832 line_v,
1810 col_v, 1833 col_v,
1811 past_bytes_v, 1834 past_bytes_v,
1812 - make_tools(current_v,line_v,col_v,past_bytes_v,last_tok_v,penult_tok_v,last_accepted_v))) 1835 + make_tools(token_start_v,current_v,line_v,col_v,past_bytes_v,last_tok_v,penult_tok_v,last_accepted_v)))
1813 }. 1836 }.
1814 1837
1815 1838
@@ -1841,7 +1864,9 @@ define (Int, Int, Int) // returns new (start,line,col) @@ -1841,7 +1864,9 @@ define (Int, Int, Int) // returns new (start,line,col)
1841 Int line, // current line 1864 Int line, // current line
1842 Int col // current column 1865 Int col // current column
1843 ) = 1866 ) =
1844 - if old_start >= new_start then (new_start,line,col) else 1867 + //print("old_start = "+old_start+"\n");
  1868 + if old_start >= new_start then //print("======== new col: "+col+"\n");
  1869 + (new_start,line,col) else
1845 with c = force_nth(old_start,buffer), 1870 with c = force_nth(old_start,buffer),
1846 if ((c >> 6) = 2) 1871 if ((c >> 6) = 2)
1847 /* 1872 /*
@@ -1864,6 +1889,7 @@ define One @@ -1864,6 +1889,7 @@ define One
1864 Var(Int) line_v, 1889 Var(Int) line_v,
1865 Var(Int) col_v 1890 Var(Int) col_v
1866 ) = 1891 ) =
  1892 + //print("new_start = "+new_start+"\n");
1867 if compute_start_line_col(buffer,*token_start_v,new_start,*line_v,*col_v) is (s,l,c) then 1893 if compute_start_line_col(buffer,*token_start_v,new_start,*line_v,*col_v) is (s,l,c) then
1868 token_start_v <- s; 1894 token_start_v <- s;
1869 line_v <- l; 1895 line_v <- l;
@@ -1891,7 +1917,7 @@ public define LexerOutput($Token) @@ -1891,7 +1917,7 @@ public define LexerOutput($Token)
1891 ) = 1917 ) =
1892 if lstream is lexing_stream(buffer_v,token_start_v,current_v,last_accept_v,last_tok_v,penult_tok_v,reload_buffer, 1918 if lstream is lexing_stream(buffer_v,token_start_v,current_v,last_accept_v,last_tok_v,penult_tok_v,reload_buffer,
1893 line_v,col_v,offset_v,tools) then 1919 line_v,col_v,offset_v,tools) then
1894 - //print("starting at offset "+to_decimal(*current_v)+" with token start at "+to_decimal(*token_start_v)+"\n"); 1920 + //print("starting at offset "+to_decimal(*current_v)+" with token start at "+to_decimal(*token_start_v)+"\n");
1895 with lgbuf = length(*buffer_v), 1921 with lgbuf = length(*buffer_v),
1896 if low_level_lexer(*buffer_v,*last_accept_v,*current_v,*token_start_v,starting_state) is 1922 if low_level_lexer(*buffer_v,*last_accept_v,*current_v,*token_start_v,starting_state) is
1897 { 1923 {
@@ -1908,7 +1934,7 @@ public define LexerOutput($Token) @@ -1908,7 +1934,7 @@ public define LexerOutput($Token)
1908 else 1934 else
1909 ( 1935 (
1910 /* the lexeme may still be accepted after the buffer is reloaded */ 1936 /* the lexeme may still be accepted after the buffer is reloaded */
1911 - update_start_line_col(*buffer_v,start,token_start_v,line_v,col_v); 1937 + //update_start_line_col(*buffer_v,start,token_start_v,line_v,col_v);
1912 if reload_buffer(unique) is 1938 if reload_buffer(unique) is
1913 { 1939 {
1914 failure then 1940 failure then
@@ -1928,13 +1954,13 @@ public define LexerOutput($Token) @@ -1928,13 +1954,13 @@ public define LexerOutput($Token)
1928 1954
1929 /* almost the same thing for accepted */ 1955 /* almost the same thing for accepted */
1930 accepted(s,start,end) then 1956 accepted(s,start,end) then
1931 - //print("low level accepted start = "+to_decimal(start)+" end = "+to_decimal(end)+"\n"); 1957 + //print("low level accepted start = "+to_decimal(start)+" end = "+to_decimal(end)+"\n");
1932 last_accept_v <- last(s,end); 1958 last_accept_v <- last(s,end);
1933 current_v <- end; 1959 current_v <- end;
1934 if end /= lgbuf then 1960 if end /= lgbuf then
1935 ( 1961 (
1936 /* the lexeme just read must be accepted: the action is applied */ 1962 /* the lexeme just read must be accepted: the action is applied */
1937 - update_start_line_col(*buffer_v,end,token_start_v,line_v,col_v); 1963 +
1938 last_accept_v <- none; 1964 last_accept_v <- none;
1939 if *actions(word32(s,0)) is 1965 if *actions(word32(s,0)) is
1940 { 1966 {
@@ -1945,18 +1971,25 @@ public define LexerOutput($Token) @@ -1945,18 +1971,25 @@ public define LexerOutput($Token)
1945 // We must update some variables 1971 // We must update some variables
1946 penult_tok_v <- *last_tok_v; 1972 penult_tok_v <- *last_tok_v;
1947 last_tok_v <- tstate(end,*line_v,*col_v); 1973 last_tok_v <- tstate(end,*line_v,*col_v);
1948 - f(extract(*buffer_v,start,end),tools,aux), 1974 + with result = f(extract(*buffer_v,start,end),tools,aux),
  1975 + update_start_line_col(*buffer_v,*current_v,token_start_v,line_v,col_v);
  1976 + result,
  1977 +
1949 return(f) then 1978 return(f) then
1950 penult_tok_v <- *last_tok_v; 1979 penult_tok_v <- *last_tok_v;
1951 last_tok_v <- tstate(end,*line_v,*col_v); 1980 last_tok_v <- tstate(end,*line_v,*col_v);
1952 - f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l),  
1953 - end-start,tools,aux), 1981 + with result = f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l),
  1982 + end-start,tools,aux),
  1983 + //print("*token_start_v = "+*token_start_v+"\n");
  1984 + //print("*current_v = "+*current_v+"\n");
  1985 + update_start_line_col(*buffer_v,*current_v,token_start_v,line_v,col_v);
  1986 + result
1954 } 1987 }
1955 ) 1988 )
1956 else 1989 else
1957 ( 1990 (
1958 /* the lexeme may still be accepted after the buffer is reloaded */ 1991 /* the lexeme may still be accepted after the buffer is reloaded */
1959 - update_start_line_col(*buffer_v,start,token_start_v,line_v,col_v); 1992 +
1960 if reload_buffer(unique) is 1993 if reload_buffer(unique) is
1961 { 1994 {
1962 failure then 1995 failure then
@@ -1970,11 +2003,15 @@ public define LexerOutput($Token) @@ -1970,11 +2003,15 @@ public define LexerOutput($Token)
1970 ignore then should_not_happen(end_of_input), 2003 ignore then should_not_happen(end_of_input),
1971 return(f) then penult_tok_v <- *last_tok_v; 2004 return(f) then penult_tok_v <- *last_tok_v;
1972 last_tok_v <- tstate(end,*line_v,*col_v); 2005 last_tok_v <- tstate(end,*line_v,*col_v);
1973 - f(extract(*buffer_v,start,end),tools,aux), 2006 + with result = f(extract(*buffer_v,start,end),tools,aux),
  2007 + update_start_line_col(*buffer_v,*current_v,token_start_v,line_v,col_v);
  2008 + result,
1974 return(f) then penult_tok_v <- *last_tok_v; 2009 return(f) then penult_tok_v <- *last_tok_v;
1975 last_tok_v <- tstate(end,*line_v,*col_v); 2010 last_tok_v <- tstate(end,*line_v,*col_v);
1976 - f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l),  
1977 - end-start,tools,aux) 2011 + with result = f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l),
  2012 + end-start,tools,aux),
  2013 + update_start_line_col(*buffer_v,*current_v,token_start_v,line_v,col_v);
  2014 + result
1978 }, 2015 },
1979 2016
1980 success(_) then 2017 success(_) then
@@ -1988,7 +2025,7 @@ public define LexerOutput($Token) @@ -1988,7 +2025,7 @@ public define LexerOutput($Token)
1988 ignored_to_end then 2025 ignored_to_end then
1989 //print("low level ignored_to_end\n"); 2026 //print("low level ignored_to_end\n");
1990 /* we are at end of input buffer */ 2027 /* we are at end of input buffer */
1991 - update_start_line_col(*buffer_v,lgbuf,token_start_v,line_v,col_v); 2028 + //update_start_line_col(*buffer_v,lgbuf,token_start_v,line_v,col_v);
1992 if reload_buffer(unique) is 2029 if reload_buffer(unique) is
1993 { 2030 {
1994 failure then 2031 failure then
@@ -2010,7 +2047,7 @@ public define LexerOutput($Token) @@ -2010,7 +2047,7 @@ public define LexerOutput($Token)
2010 2047
2011 *** [3] Constructing the automaton. 2048 *** [3] Constructing the automaton.
2012 2049
2013 - The description of a lexer is given as a list of 'LexerItem($Token)', where the 2050 + The description of a lexer is given as a list of 'LexerItem($Token,$Aux)', where the
2014 parameter '$Token' represents the type of tokens. Each lexer item is made of a regular 2051 parameter '$Token' represents the type of tokens. Each lexer item is made of a regular
2015 expression and an action. If the action is 'ignore', the token just read is ignored and 2052 expression and an action. If the action is 'ignore', the token just read is ignored and
2016 the lexer tries to read the next one. Otherwise, the action is applied to the lexeme 2053 the lexer tries to read the next one. Otherwise, the action is applied to the lexeme
@@ -3032,20 +3069,47 @@ define List(Int) @@ -3032,20 +3069,47 @@ define List(Int)
3032 ignoring(name,transitions) then [-1 . actions_ranks(t)] 3069 ignoring(name,transitions) then [-1 . actions_ranks(t)]
3033 } 3070 }
3034 }. 3071 }.
  3072 +
  3073 +
  3074 + Get a characteristic serializable datum from a lexer description (used to avoid
  3075 + reconstructing the lexer when the description did not change). The signature is
  3076 + changed if any of the regular expressions is changed or if their order is changed
  3077 + or if the sort of action is changed (this last point ensures that the list of
  3078 + action ranks remains correct in the generated file).
  3079 +define List(ByteArray)
  3080 + extract_regexprs
  3081 + (
  3082 + List(LexerItem($Token,$Aux)) l
  3083 + ) =
  3084 + with asign = (LexerAction($Token,$Aux) a) |-> if a is
  3085 + {
  3086 + ignore then "(*i)", // something which is illegal as a regular expression
  3087 + return(_0) then "(*r1)",
  3088 + return(_0) then "(*r2)"
  3089 + },
  3090 + map((LexerItem($Token,$Aux) i) |-> if i is
  3091 + {
  3092 + lexer_item(regular_expression,action) then to_byte_array(regular_expression+asign(action)),
  3093 + lexer_item(literal,action) then literal+to_byte_array(asign(action))
  3094 + },l).
  3095 +
3035 3096
  3097 +
3036 public define One 3098 public define One
3037 - make_precompiled_lexer 3099 + make_precompiled_lexer_aux
3038 ( 3100 (
  3101 + String signature,
3039 String directory, 3102 String directory,
3040 String lexer_name, 3103 String lexer_name,
3041 List(LexerItem($Token,$Aux)) lexer_description, 3104 List(LexerItem($Token,$Aux)) lexer_description,
3042 Word8 escape_char 3105 Word8 escape_char
3043 - ) = 3106 + ) =
3044 with file_name = directory/lexer_name+".anubis", 3107 with file_name = directory/lexer_name+".anubis",
3045 if file(file_name,new) is 3108 if file(file_name,new) is
3046 { 3109 {
3047 failure then print("Cannot create file '"+file_name+"'.\n"), 3110 failure then print("Cannot create file '"+file_name+"'.\n"),
3048 success(file) then 3111 success(file) then
  3112 + print(weaken(file)," "+signature+"\n\n");
3049 if make_DFA(lexer_description,escape_char) is 3113 if make_DFA(lexer_description,escape_char) is
3050 { 3114 {
3051 error(msg) then print(to_English(msg)+"\n"), 3115 error(msg) then print(to_English(msg)+"\n"),
@@ -3060,6 +3124,46 @@ public define One @@ -3060,6 +3124,46 @@ public define One
3060 }. 3124 }.
3061 3125
3062 3126
  3127 +define Maybe(String)
  3128 + read_signature
  3129 + (
  3130 + String file_name
  3131 + ) =
  3132 + if file(file_name,read) is
  3133 + {
  3134 + failure then failure,
  3135 + success(f) then if read(f,43,10) is // read the first 3 (blanks) + 40 (sha1 hash) characters
  3136 + {
  3137 + error then failure,
  3138 + timeout then failure,
  3139 + ok(ba) then success(to_string(extract(ba,3,43)))
  3140 + }
  3141 + }.
  3142 +
  3143 +public define One
  3144 + make_precompiled_lexer
  3145 + (
  3146 + String directory,
  3147 + String lexer_name,
  3148 + List(LexerItem($Token,$Aux)) lexer_description,
  3149 + Word8 escape_char
  3150 + ) =
  3151 + // avoid to reconstruct the lexer if not needed
  3152 + with signature = to_hexa(sha1(extract_regexprs(lexer_description))),
  3153 + file_name = directory/lexer_name+".anubis",
  3154 + do_it = (One u) |->
  3155 + print("Creating '"+file_name+"'. Please wait ... "); forget(flush(stdout));
  3156 + make_precompiled_lexer_aux(signature,directory,lexer_name,lexer_description,escape_char);
  3157 + print("Done.\n"); forget(flush(stdout)),
  3158 + if read_signature(file_name) is
  3159 + {
  3160 + failure then do_it(unique),
  3161 + success(s) then if s = signature
  3162 + then unique
  3163 + else do_it(unique)
  3164 + }.
  3165 +
  3166 +
3063 public define One 3167 public define One
3064 make_precompiled_lexer 3168 make_precompiled_lexer
3065 ( 3169 (
anubis_dev/library/system/files.anubis
@@ -1020,14 +1020,14 @@ define Maybe(Int) @@ -1020,14 +1020,14 @@ define Maybe(Int)
1020 find_the_first 1020 find_the_first
1021 ( 1021 (
1022 Data_IO io, 1022 Data_IO io,
1023 - String looking_for, //String to search 1023 + ByteArray looking_for, //String to search
1024 Int size, //size of the string to search 1024 Int size, //size of the string to search
1025 - String buffer, 1025 + ByteArray buffer,
1026 Int current_pos, 1026 Int current_pos,
1027 Int buf_size, 1027 Int buf_size,
1028 Int buf_pos 1028 Int buf_pos
1029 )= 1029 )=
1030 - //println("buf_size :"+buf_size+ " buf_pos :"+buf_pos + " size : "+size); 1030 + //println("general current pos: "+current_pos+" | buffer size: "+buf_size+ " | buffer pos: "+buf_pos + " | search size: "+size);
1031 if (buf_size - buf_pos) < size then 1031 if (buf_size - buf_pos) < size then
1032 //println("New buffer request current pos "+current_pos+" buffer_pos "+buf_pos); 1032 //println("New buffer request current pos "+current_pos+" buffer_pos "+buf_pos);
1033 if read_bytes(io, 65536) is // <- block size is 64k 1033 if read_bytes(io, 65536) is // <- block size is 64k
@@ -1035,23 +1035,30 @@ define Maybe(Int) @@ -1035,23 +1035,30 @@ define Maybe(Int)
1035 failure then println("read_bytes failure");failure, //finish 1035 failure then println("read_bytes failure");failure, //finish
1036 time_out then println("read_bytes timeout");failure, //finish 1036 time_out then println("read_bytes timeout");failure, //finish
1037 success(ba) then 1037 success(ba) then
1038 - with new_buffer = to_string(extract(to_byte_array(buffer), buf_pos, buf_size) + ba), 1038 + //println("length of ba "+length(ba));
  1039 + with ex_ba = extract(buffer, buf_pos, buf_size),
  1040 + //println("length of ex_ba "+length(ex_ba));
  1041 + with new_ba = ex_ba + ba,
  1042 + //println("length of new_ba "+length(new_ba));
  1043 + with new_buffer = ex_ba + ba,
1039 //println("SUCCESS New buffer length "+length(new_buffer)+" new current_pos "+current_pos); 1044 //println("SUCCESS New buffer length "+length(new_buffer)+" new current_pos "+current_pos);
1040 find_the_first(io, looking_for, size, new_buffer, current_pos + buf_pos, length(new_buffer), 0), 1045 find_the_first(io, looking_for, size, new_buffer, current_pos + buf_pos, length(new_buffer), 0),
1041 truncated(ba) then 1046 truncated(ba) then
1042 if length(ba) = 0 then 1047 if length(ba) = 0 then
  1048 + //println("last buffer current position ["+current_pos+"]");
1043 failure //finish 1049 failure //finish
1044 else 1050 else
1045 - with new_buffer = to_string(extract(to_byte_array(buffer), buf_pos, buf_size) + ba), 1051 + with new_buffer = extract(buffer, buf_pos, buf_size) + ba,
1046 // println("TRUNCATED New buffer length "+length(new_buffer)+" new current_pos "+current_pos); 1052 // println("TRUNCATED New buffer length "+length(new_buffer)+" new current_pos "+current_pos);
1047 find_the_first(io, looking_for, size, new_buffer, current_pos + buf_pos, length(new_buffer), 0) 1053 find_the_first(io, looking_for, size, new_buffer, current_pos + buf_pos, length(new_buffer), 0)
1048 } 1054 }
1049 else 1055 else
1050 - if find_string(buffer, looking_for, buf_pos) is 1056 + if find_byte_array(buffer, looking_for, buf_pos) is
1051 { 1057 {
1052 - failure then find_the_first(io, looking_for, size, buffer, current_pos, buf_size, buf_size - (size-1)), 1058 + failure then
  1059 + find_the_first(io, looking_for, size, buffer, current_pos, buf_size, buf_size - (size-1)),
1053 success(pos) then 1060 success(pos) then
1054 - println("pattern ["+looking_for+"] found at offset "+(current_pos+pos)); 1061 + //println("pattern ["+to_string(looking_for)+"] found at offset "+(current_pos+pos));
1055 success(current_pos + pos) 1062 success(current_pos + pos)
1056 } 1063 }
1057 . 1064 .
@@ -1126,7 +1133,7 @@ public define Maybe(Int) @@ -1126,7 +1133,7 @@ public define Maybe(Int)
1126 Data_IO io, 1133 Data_IO io,
1127 String search_string 1134 String search_string
1128 ) = 1135 ) =
1129 - find_the_first(io, search_string, length(search_string), "", 0, 0, 0). 1136 + find_the_first(io, to_byte_array(search_string), length(search_string), constant_byte_array(0,0), 0, 0, 0).
1130 1137
1131 public define Maybe(Int) 1138 public define Maybe(Int)
1132 find_the_first 1139 find_the_first
@@ -1138,7 +1145,7 @@ public define Maybe(Int) @@ -1138,7 +1145,7 @@ public define Maybe(Int)
1138 { 1145 {
1139 failure then failure, 1146 failure then failure,
1140 success(f) then 1147 success(f) then
1141 - find_the_first(make_data_io(f), search_string, length(search_string), "", 0, 0, 0) 1148 + find_the_first(make_data_io(f), to_byte_array(search_string), length(search_string), constant_byte_array(0,0), 0, 0, 0)
1142 }. 1149 }.
1143 1150
1144 public define Maybe(Int) 1151 public define Maybe(Int)
@@ -1153,7 +1160,8 @@ public define Maybe(Int) @@ -1153,7 +1160,8 @@ public define Maybe(Int)
1153 failure then failure, 1160 failure then failure,
1154 success(f) then 1161 success(f) then
1155 with size = file_size(filename), 1162 with size = file_size(filename),
1156 - find_the_first(make_data_io(f, start_position, size - start_position), search_string, length(search_string), "", 0, 0, 0) 1163 + //println("file size "+size);
  1164 + find_the_first(make_data_io(f, start_position, size - start_position), to_byte_array(search_string), length(search_string), constant_byte_array(0,0), 0, 0, 0)
1157 }. 1165 }.
1158 1166
1159 public define Maybe(Int) 1167 public define Maybe(Int)
@@ -1168,6 +1176,6 @@ public define Maybe(Int) @@ -1168,6 +1176,6 @@ public define Maybe(Int)
1168 { 1176 {
1169 failure then failure, 1177 failure then failure,
1170 success(f) then 1178 success(f) then
1171 - find_the_first(make_data_io(f, start_position, end_position - start_position), search_string, length(search_string), "", 0, 0, 0) 1179 + find_the_first(make_data_io(f, start_position, end_position - start_position), to_byte_array(search_string), length(search_string), constant_byte_array(0,0), 0, 0, 0)
1172 }. 1180 }.
1173 1181
anubis_dev/library/tools/bytearray.anubis
@@ -50,6 +50,16 @@ define One @@ -50,6 +50,16 @@ define One
50 anubis_format_aux(s,b,bpl,i+1,c+1,ind). 50 anubis_format_aux(s,b,bpl,i+1,c+1,ind).
51 51
52 52
53 - 53 +public define ByteArray
  54 + concat
  55 + (
  56 + List(ByteArray) l
  57 + ) =
  58 + if l is
  59 + {
  60 + [ ] then constant_byte_array(0,0),
  61 + [h . t] then h + concat(t)
  62 + }.
  63 +
54 64
55 65
anubis_dev/library/tools/int.anubis
@@ -14,15 +14,15 @@ @@ -14,15 +14,15 @@
14 14
15 *** (1) Absolute value. 15 *** (1) Absolute value.
16 16
17 - public define macro Int abs(Int x). returns the absolute value of x 17 + public define inline Int abs(Int x). returns the absolute value of x
18 18
19 19
20 *** (2) Max and min. 20 *** (2) Max and min.
21 21
22 The 'max' and 'min' functions respectively return the greatest and the smallest of their arguments. 22 The 'max' and 'min' functions respectively return the greatest and the smallest of their arguments.
23 23
24 -public define Int max(Int x, Int y). returns the greatest of x and y  
25 -public define Int min(Int x, Int y). returns the smallest of x and y 24 +public define inline Int max(Int x, Int y). returns the greatest of x and y
  25 +public define inline Int min(Int x, Int y). returns the smallest of x and y
26 26
27 public define Int max(NonEmptyList(Int) l). returns the greatest element of the list (which is non empty) 27 public define Int max(NonEmptyList(Int) l). returns the greatest element of the list (which is non empty)
28 public define Int min(NonEmptyList(Int) l). returns the smallest element of the list (which is non empty) 28 public define Int min(NonEmptyList(Int) l). returns the smallest element of the list (which is non empty)
@@ -63,20 +63,25 @@ public define Int Int x ^ Int y. raises x to the p @@ -63,20 +63,25 @@ public define Int Int x ^ Int y. raises x to the p
63 public define Bool odd (Int x). 63 public define Bool odd (Int x).
64 public define Bool even (Int x). 64 public define Bool even (Int x).
65 65
  66 + *** (7) Greatest common divisor, ...
  67 +
  68 +public define Int gcd (Int x, Int y).
  69 +public define Int lcm (Int x, Int y). least common multiple
  70 +
66 71
67 --- That's all for the public part ! ----------------------------------------------- 72 --- That's all for the public part ! -----------------------------------------------
68 73
69 74
70 75
71 76
72 -public define macro Int 77 +public define inline Int
73 abs 78 abs
74 ( 79 (
75 Int x 80 Int x
76 ) = 81 ) =
77 if x < 0 then -x else x. 82 if x < 0 then -x else x.
78 83
79 -public define Int // must not be macro because x and y are always computed 84 +public define inline Int // must not be macro because x and y are always computed
80 // and one of them would be computed twice 85 // and one of them would be computed twice
81 max 86 max
82 ( 87 (
@@ -116,7 +121,7 @@ public define Int @@ -116,7 +121,7 @@ public define Int
116 121
117 122
118 123
119 -public define Int 124 +public define inline Int
120 min 125 min
121 ( 126 (
122 Int x, 127 Int x,
@@ -226,6 +231,38 @@ public define Bool @@ -226,6 +231,38 @@ public define Bool
226 }. 231 }.
227 232
228 233
  234 +public define Int
  235 + gcd
  236 + (
  237 + Int x,
  238 + Int y
  239 + ) =
  240 + /* Euclid's algorithm */
  241 + if x > y then gcd(y,x) else
  242 + if x = 0 then abs(y) else
  243 + if x < 0 then gcd(-x,y) else
  244 + if y/x is
  245 + {
  246 + failure then should_not_happen(0),
  247 + success(p) then if p is (q,r) then gcd(r,x)
  248 + }.
  249 +
  250 +
  251 +
  252 +public define Int
  253 + lcm
  254 + (
  255 + Int x,
  256 + Int y
  257 + ) =
  258 + with d = gcd(x,y),
  259 + if d = 0
  260 + then 0
  261 + else if (x*y)/d is
  262 + {
  263 + failure then should_not_happen(0),
  264 + success(p) then if p is (q,_) then q
  265 + }.
229 266
230 267
231 268
anubis_dev/library/tools/line_reader.anubis
@@ -38,7 +38,7 @@ @@ -38,7 +38,7 @@
38 38
39 //--------------------------------------------------------------------------- 39 //---------------------------------------------------------------------------
40 40
41 -read lexical_analysis/fast_lexer_3.anubis 41 +read lexical_analysis/fast_lexer_4.anubis
42 42
43 43
44 type Token: 44 type Token:
@@ -46,13 +46,13 @@ type Token: @@ -46,13 +46,13 @@ type Token:
46 eol. 46 eol.
47 47
48 public type LineReaderLexer: 48 public type LineReaderLexer:
49 - line_reader_lexer(LexingStream(One) -> One -> LexerOutput(Token) /*lexer_base*/). 49 + line_reader_lexer((LexingStream, One) -> One -> LexerOutput(Token) /*lexer_base*/).
50 50
51 public type LineReader: 51 public type LineReader:
52 line_reader(One -> LexerOutput(Token) /*lexer*/, 52 line_reader(One -> LexerOutput(Token) /*lexer*/,
53 One -> Int /*offset*/, 53 One -> Int /*offset*/,
54 LineReaderLexer /*lexer_base*/, 54 LineReaderLexer /*lexer_base*/,
55 - LexingStream(One) lexing_stream). 55 + LexingStream lexing_stream).
56 56
57 public define Int 57 public define Int
58 current_offset 58 current_offset
@@ -96,30 +96,30 @@ public define Maybe(String) @@ -96,30 +96,30 @@ public define Maybe(String)
96 public define Maybe(LineReaderLexer) 96 public define Maybe(LineReaderLexer)
97 make_line_reader_lexer 97 make_line_reader_lexer
98 = 98 =
99 - if make_lexer_and_automaton([  
100 - lexer_item("#r?#n", return((ByteArray b, LexingTools t, One aux) |-> token(eol))),  
101 - lexer_item("#r", return((ByteArray b, LexingTools t, One aux) |-> token(eol))),  
102 - lexer_item("[^\r\n]*", return((ByteArray b, LexingTools t, One aux) |-> token(line(to_string(b))))),  
103 - ], 99 + if make_lexer([
  100 + lexer_item("#r?#n", return((ByteArray b, LexingTools t, One aux) |-> token(eol))),
  101 + lexer_item("#r", return((ByteArray b, LexingTools t, One aux) |-> token(eol))),
  102 + lexer_item("[^\r\n]*", return((ByteArray b, LexingTools t, One aux) |-> token(line(to_string(b))))),
  103 + ],
104 '#') is 104 '#') is
105 { 105 {
106 error(msg) then print("Syntax error in regular expression: "+to_English(msg)+"\n"); failure, 106 error(msg) then print("Syntax error in regular expression: "+to_English(msg)+"\n"); failure,
107 - ok(p) then if p is (lexer, automaton) then success(line_reader_lexer(lexer)) 107 + ok(lexer) then success(line_reader_lexer(lexer))
108 }. 108 }.
109 109
110 public define LineReader 110 public define LineReader
111 make_line_reader 111 make_line_reader
112 ( 112 (
113 - LexingStream(One) ls, 113 + LexingStream ls,
114 LineReaderLexer make_lexer 114 LineReaderLexer make_lexer
115 ) = 115 ) =
116 if make_lexer is line_reader_lexer(lexer) then 116 if make_lexer is line_reader_lexer(lexer) then
117 - line_reader(lexer(ls), (One u) |-> offset(ls), make_lexer, ls). 117 + line_reader(lexer(ls, unique), (One u) |-> offset(ls), make_lexer, ls).
118 118
119 public define Maybe(LineReader) 119 public define Maybe(LineReader)
120 make_line_reader 120 make_line_reader
121 ( 121 (
122 - LexingStream(One) ls, 122 + LexingStream ls,
123 ) = 123 ) =
124 if make_line_reader_lexer is 124 if make_line_reader_lexer is
125 { 125 {
@@ -135,7 +135,7 @@ public define Maybe(LineReader) @@ -135,7 +135,7 @@ public define Maybe(LineReader)
135 ( 135 (
136 String s, 136 String s,
137 ) = 137 ) =
138 - make_line_reader(make_lexing_stream("", s, unique)). 138 + make_line_reader(make_lexing_stream("", s)).
139 139
140 public define Maybe(LineReader) 140 public define Maybe(LineReader)
141 make_line_reader 141 make_line_reader
@@ -146,8 +146,7 @@ public define Maybe(LineReader) @@ -146,8 +146,7 @@ public define Maybe(LineReader)
146 if make_lexing_stream("", /* preambule */ 146 if make_lexing_stream("", /* preambule */
147 f, /* the opened file */ 147 f, /* the opened file */
148 65536, /* size of buffer for the lexing stream */ 148 65536, /* size of buffer for the lexing stream */
149 - timeout, /* timeout (seconds) */  
150 - unique) 149 + timeout) /* timeout (seconds) */
151 is 150 is
152 { 151 {
153 failure then print("cannot make lexing stream.\n"); failure, 152 failure then print("cannot make lexing stream.\n"); failure,
@@ -159,7 +158,7 @@ public define LineReader @@ -159,7 +158,7 @@ public define LineReader
159 reset_line_reader 158 reset_line_reader
160 ( 159 (
161 LineReader lr, 160 LineReader lr,
162 - LexingStream(One) ls, 161 + LexingStream ls,
163 ) = 162 ) =
164 if lr is line_reader(lexer, offset, make_lexer, _) then 163 if lr is line_reader(lexer, offset, make_lexer, _) then
165 make_line_reader(ls, make_lexer). 164 make_line_reader(ls, make_lexer).
anubis_dev/library/tools/list.anubis
@@ -922,3 +922,33 @@ public define (List($T1), List($T2)) unzip(List(($T1, $T2)) l) = unzip(reverse(l @@ -922,3 +922,33 @@ public define (List($T1), List($T2)) unzip(List(($T1, $T2)) l) = unzip(reverse(l
922 922
923 923
924 924
  925 + Compute the list of all sublists of a list (beware: for a list of length n,
  926 + this gives a list of length 2^n).
  927 +
  928 +public define List(List($T))
  929 + sublists
  930 + (
  931 + List($T) l
  932 + ) =
  933 + if l is
  934 + {
  935 + [ ] then [[ ]],
  936 + [h . t] then with p = sublists(t),
  937 + map((List($T) u) |-> [h . u],p) + p
  938 + }.
  939 +
  940 +
  941 + Construct a 'constant' list containing any number of times the same datum.
  942 +
  943 + constant_list(x,6) is just: [x,x,x,x,x,x]
  944 +
  945 +public define List($T)
  946 + constant_list
  947 + (
  948 + $T x,
  949 + Int n
  950 + ) =
  951 + if n =< 0 then [ ] else [x . constant_list(x,n-1)].
  952 +
  953 +
  954 +
anubis_dev/library/tools/read_table.anubis
1 1
2 -transmit data_base/import_csv.anubis (since version 1.6) 2 + Obsolete file.
  3 +
  4 + transmit data_base/import_csv.anubis (since version 1.6)
3 5
anubis_dev/manuals/en/Anubis-doc-1-14.pdf
No preview for this file type
anubis_dev/manuals/en/Anubis-doc-1-14.tex
@@ -1011,6 +1011,12 @@ independently of the many definitions you may give to the symbol \cod{[~]} and t @@ -1011,6 +1011,12 @@ independently of the many definitions you may give to the symbol \cod{[~]} and t
1011 This is enough on syntax for reading the subsequent sections. We will say more on syntax later. 1011 This is enough on syntax for reading the subsequent sections. We will say more on syntax later.
1012 1012
1013 1013
  1014 +\section{\cod{\_\_LINE\_\_}, \cod{\_\_FILE\_\_}, \cod{\_\_DIR\_\_} and \cod{\_\_TIME\_\_}}
  1015 +The keyword \cod{\_\_LINE\_\_} represents the line number (of the source file) where this keyword is written. This
  1016 +is a datum of type \cod{Word32}. Similarly, \cod{\_\_FILE\_\_} and \cod{\_\_DIR\_\_} represent the absolute file path and
  1017 +absolute directory path of the source file they are written within. They are of type \cod{String}. The keyword
  1018 +\cod{\_\_TIME\_\_} instantiates to the current time (of type \cod{Word32}) at
  1019 +which it is found in the source file during the compilation.
1014 1020
1015 1021
1016 \section{Automatically generated files} 1022 \section{Automatically generated files}
@@ -1019,7 +1025,7 @@ an ``automatically generated file&#39;&#39;. There are several examples @@ -1019,7 +1025,7 @@ an ``automatically generated file&#39;&#39;. There are several examples
1019 of metaprograms in the library such as~: \APG\ (see section \myref{sec:APG}), \cod{fast\_lexer\_3} (see section 1025 of metaprograms in the library such as~: \APG\ (see section \myref{sec:APG}), \cod{fast\_lexer\_3} (see section
1020 \myref{sec:fastlexer}), \cod{metaSQL} (see section \myref{sec:metaSQL}), \dots\ 1026 \myref{sec:fastlexer}), \cod{metaSQL} (see section \myref{sec:metaSQL}), \dots\
1021 1027
1022 -Autmatically generated files should never be modified because any modification is destroyed by a new execution of the 1028 +Automatically generated files should never be modified because any modification is destroyed by a new execution of the
1023 metaprogram. Only true source files should be modified. As a consequence, generated files encomber your working 1029 metaprogram. Only true source files should be modified. As a consequence, generated files encomber your working
1024 directory, and we have adopted the policy of putting them into a subdirectory (of the current directory), uniformly named \fn{generated}. 1030 directory, and we have adopted the policy of putting them into a subdirectory (of the current directory), uniformly named \fn{generated}.
1025 Putting generated files into the \fn{generated} subdirectory must be the default behavior of metaprograms. An option of the 1031 Putting generated files into the \fn{generated} subdirectory must be the default behavior of metaprograms. An option of the