Commit 633d58adaff26e3bc529a9b96dfe3c24a8cfea57
correction + iterator
Showing
23 changed files
with
747 additions
and
96 deletions
Show diff stats
anubis_dev/compiler/src/compil.h
| ... | ... | @@ -200,7 +200,10 @@ extern void NormalizeFileName(char *pathName); |
| 200 | 200 | of the form mcons3(file,line,col) where file, line and col Lisp integers. |
| 201 | 201 | */ |
| 202 | 202 | |
| 203 | +extern char *path_prefix(char *name); | |
| 204 | + | |
| 203 | 205 | #define file_in(x) (assert(((int)(integer_value(car(x)))) < max_already_included), already_included[integer_value(car(x))]) |
| 206 | +#define dir_in(x) path_prefix(file_in(x)) | |
| 204 | 207 | #define line_in(x) integer_value(second(x)) |
| 205 | 208 | #define col_in(x) integer_value(cdr2(x)) |
| 206 | 209 | |
| ... | ... | @@ -631,6 +634,7 @@ extern Expr linecol(void); |
| 631 | 634 | item(unlock_var)\ |
| 632 | 635 | item(__line__)\ |
| 633 | 636 | item(__file__)\ |
| 637 | + item(__dir__)\ | |
| 634 | 638 | item(lazy)\ |
| 635 | 639 | |
| 636 | 640 | /* true 'dynamic' modules and 'load_adm' (added in version 1.13) */ | ... | ... |
anubis_dev/compiler/src/compile.c
anubis_dev/compiler/src/expr.cpp
| ... | ... | @@ -591,6 +591,7 @@ Expr _symbols_in_interp(Expr head) |
| 591 | 591 | case operation: |
| 592 | 592 | case __line__: |
| 593 | 593 | case __file__: |
| 594 | + case __dir__: | |
| 594 | 595 | return nil; |
| 595 | 596 | |
| 596 | 597 | case macro: |
| ... | ... | @@ -755,6 +756,7 @@ Expr _symbols_in_term(Expr term) |
| 755 | 756 | case todo: /* (todo <lc> <filename> . <text>) */ |
| 756 | 757 | case __line__: |
| 757 | 758 | case __file__: |
| 759 | + case __dir__: | |
| 758 | 760 | case byte_array: |
| 759 | 761 | return nil; |
| 760 | 762 | ... | ... |
anubis_dev/compiler/src/grammar.y
| ... | ... | @@ -81,7 +81,7 @@ int is_global = 0; |
| 81 | 81 | %token<expr> yy__config_file yy__verbose yy__stop_after yy__mapsto yy__rec_mapsto yy__language |
| 82 | 82 | %token<expr> yy__mapstoo yy__rec_mapstoo yy__arroww |
| 83 | 83 | %token<expr> yy__conf_int yy__conf_string yy__conf_symbol |
| 84 | -%token<expr> yy__enddot yy__eof yy__LINE__ yy__FILE__ yy__colon_equals | |
| 84 | +%token<expr> yy__enddot yy__eof yy__LINE__ yy__FILE__ yy__DIR__ yy__colon_equals | |
| 85 | 85 | %token<expr> yy__integer yy__macro_integer yy__dummy yy__end_LBA |
| 86 | 86 | %token<expr> yy__defaults_as yy__lazy |
| 87 | 87 | |
| ... | ... | @@ -666,6 +666,7 @@ Term: yy__alert { alert_obsolete($1); |
| 666 | 666 | | yy__symbol yy__colon_equals Term { $$ = $3; } |
| 667 | 667 | | yy__LINE__ %prec prec_symbol { $$ = $1; } |
| 668 | 668 | | yy__FILE__ %prec prec_symbol { $$ = $1; } |
| 669 | +| yy__DIR__ %prec prec_symbol { $$ = $1; } | |
| 669 | 670 | //| yy__Symbol %prec prec_symbol { $$ = mcons3(symbol,linecol(),$1); } |
| 670 | 671 | | yy__lpar yy__rpar %prec prec_symbol { $$ = mcons3(symbol,linecol(),pdstr_voidpars); } |
| 671 | 672 | | yy__integer %prec prec_symbol { $$ = mcons3(car($1),linecol(), cdr($1)); } | ... | ... |
anubis_dev/compiler/src/interp.c
| ... | ... | @@ -2316,6 +2316,7 @@ term_interpretations(Expr ttype, /* required type for that term (may contai |
| 2316 | 2316 | |
| 2317 | 2317 | case __line__: /* (__line__ . <lisp integer>) */ |
| 2318 | 2318 | case __file__: /* (__file__ . <lisp string>) */ |
| 2319 | + case __dir__: | |
| 2319 | 2320 | result = list1(cons(term,env)); |
| 2320 | 2321 | break; |
| 2321 | 2322 | ... | ... |
anubis_dev/compiler/src/lexer.l
| ... | ... | @@ -326,7 +326,7 @@ extern void NormalizeFileName(char *pathName); |
| 326 | 326 | |
| 327 | 327 | |
| 328 | 328 | /* open an Anubis source file. */ |
| 329 | -FILE *fopensrc(const char *name, int n) | |
| 329 | +FILE *fopensrc(const char *name, int n) // n is for debugging purpose (identifies from where the call comes) | |
| 330 | 330 | /* The given name may be either absolute or relative. It |
| 331 | 331 | must be immediately converted into an absolute path, |
| 332 | 332 | because the compiler uses ONLY absolute file paths |
| ... | ... | @@ -544,7 +544,7 @@ int is_already_included(char *name) |
| 544 | 544 | /* same up to case */ |
| 545 | 545 | if (!strcmp(already_included[i],fopensrc_abs_path)) |
| 546 | 546 | { |
| 547 | - /* exactely same: file already compiled */ | |
| 547 | + /* exactly same: file already compiled */ | |
| 548 | 548 | fclose(fp); |
| 549 | 549 | return 1; |
| 550 | 550 | } |
| ... | ... | @@ -683,6 +683,8 @@ void come_back(void) |
| 683 | 683 | if (verbose) printf("Returning to directory: %s\n", |
| 684 | 684 | include_dir[include_stack_ptr]); |
| 685 | 685 | current_file_abs_path = abs_file_paths_stack[include_stack_ptr]; |
| 686 | + fopensrc_abs_path = current_file_abs_path; // these two variables seem to be redondant ! | |
| 687 | + fopensrc_cur_dir = include_dir[include_stack_ptr]; | |
| 686 | 688 | current_file_id = get_file_id(current_file_abs_path); |
| 687 | 689 | saturate_visi_table(); |
| 688 | 690 | if (show_reads) show_come_back(current_file_abs_path,current_file_id); |
| ... | ... | @@ -986,6 +988,8 @@ W [\ \t\r\n] |
| 986 | 988 | return yy__LINE__; } |
| 987 | 989 | <PAR>__FILE__ { yylval.expr = cons(__file__,new_string(current_file_abs_path)); |
| 988 | 990 | return yy__FILE__; } |
| 991 | +<PAR>__DIR__ { yylval.expr = cons(__dir__,new_string(path_prefix(current_file_abs_path))); | |
| 992 | + return yy__DIR__; } | |
| 989 | 993 | <PAR>__TIME__ { sprintf(str_buf,"%d",(int)time(NULL)); |
| 990 | 994 | yylval.expr = mcons4(of_type,linecol(),pdstr_Word32,mcons3(integer_10, |
| 991 | 995 | linecol(), | ... | ... |
anubis_dev/compiler/src/msgtexts.c
| ... | ... | @@ -1057,7 +1057,7 @@ const char *msgtext_undefined_macro[] = |
| 1057 | 1057 | const char *msgtext_too_big[] = |
| 1058 | 1058 | { |
| 1059 | 1059 | " Your program is too big for this version of the compiler.\n" |
| 1060 | - " Consider spilling it using secondary modules.\n\n" | |
| 1060 | + " Consider spliting it using secondary modules.\n\n" | |
| 1061 | 1061 | }; |
| 1062 | 1062 | |
| 1063 | 1063 | const char *msgtext_similar_paths[] = | ... | ... |
anubis_dev/compiler/src/replace.c
| ... | ... | @@ -505,6 +505,7 @@ static Expr rename_bound(Expr head, /* the expression within which bound |
| 505 | 505 | |
| 506 | 506 | case __line__: |
| 507 | 507 | case __file__: |
| 508 | + case __dir__: | |
| 508 | 509 | { |
| 509 | 510 | result = head; |
| 510 | 511 | } |
| ... | ... | @@ -955,6 +956,13 @@ static Expr replace_aux(Expr head, /* where bound variables are already ren |
| 955 | 956 | } |
| 956 | 957 | break; |
| 957 | 958 | |
| 959 | + case __dir__: | |
| 960 | + /* (__dir__ . <lisp string>) */ | |
| 961 | + { | |
| 962 | + return cons(__dir__,new_string(dir_in(second(op)))); /* change the value of __DIR__ */ | |
| 963 | + } | |
| 964 | + break; | |
| 965 | + | |
| 958 | 966 | default: assert(0); |
| 959 | 967 | } |
| 960 | 968 | } | ... | ... |
anubis_dev/compiler/src/show.c
| ... | ... | @@ -538,6 +538,12 @@ void show_interpretation(FILE *fp, |
| 538 | 538 | } |
| 539 | 539 | break; |
| 540 | 540 | |
| 541 | + case __dir__: | |
| 542 | + { | |
| 543 | + xpos += fprintf(fp,"__DIR__"); | |
| 544 | + } | |
| 545 | + break; | |
| 546 | + | |
| 541 | 547 | #if 0 |
| 542 | 548 | case integer: /* (integer <lc> . <Cint>) */ Obsolete: replaced by 'anb_int_10' and 'anb_int_16' |
| 543 | 549 | { | ... | ... |
anubis_dev/compiler/src/templates.c
anubis_dev/compiler/src/typetools.c
anubis_dev/library/data_base/import_csv.anubis
| 1 | 1 | ๏ปฟ |
| 2 | - | |
| 2 | + This file is obsoleted by data_base/read_csv.anubis | |
| 3 | + | |
| 4 | + | |
| 3 | 5 | The Anubis Project |
| 4 | 6 | |
| 5 | 7 | Reading CSV tables. |
| ... | ... | @@ -54,9 +56,9 @@ |
| 54 | 56 | |
| 55 | 57 | |
| 56 | 58 | |
| 57 | -read tools/basis.anubis | |
| 59 | + read tools/basis.anubis | |
| 58 | 60 | |
| 59 | -public define Maybe(List(List(String))) | |
| 61 | + public define Maybe(List(List(String))) | |
| 60 | 62 | read_table |
| 61 | 63 | ( |
| 62 | 64 | String filename, |
| ... | ... | @@ -97,7 +99,7 @@ public define Maybe(List(List(String))) |
| 97 | 99 | Checking if a list of Word8 'candidate' is a prefix in a line separator. |
| 98 | 100 | |
| 99 | 101 | |
| 100 | -define Bool | |
| 102 | + define Bool | |
| 101 | 103 | begins_line_separator |
| 102 | 104 | ( |
| 103 | 105 | List(Word8) candidate, |
| ... | ... | @@ -121,7 +123,7 @@ define Bool |
| 121 | 123 | |
| 122 | 124 | Here is the test. |
| 123 | 125 | |
| 124 | -define Bool | |
| 126 | + define Bool | |
| 125 | 127 | begins_line_separator |
| 126 | 128 | ( |
| 127 | 129 | List(Word8) candidate, |
| ... | ... | @@ -141,7 +143,7 @@ define Bool |
| 141 | 143 | |
| 142 | 144 | We have two cross recursive functions 'read_table' and 'read_more_lines'. |
| 143 | 145 | |
| 144 | -define List(List(String)) | |
| 146 | + define List(List(String)) | |
| 145 | 147 | read_table |
| 146 | 148 | ( |
| 147 | 149 | RStream file, |
| ... | ... | @@ -162,7 +164,7 @@ define List(List(String)) |
| 162 | 164 | file. If end of file is read, the last line of the table is empty. Otherwise, we |
| 163 | 165 | return to 'read_table', with the correct 'current_line' and 'current_cell'. |
| 164 | 166 | |
| 165 | -define List(List(String)) | |
| 167 | + define List(List(String)) | |
| 166 | 168 | read_more_lines |
| 167 | 169 | ( |
| 168 | 170 | RStream file, |
| ... | ... | @@ -185,7 +187,7 @@ define List(List(String)) |
| 185 | 187 | |
| 186 | 188 | Reading a table from an already opened file. |
| 187 | 189 | |
| 188 | -define List(List(String)) | |
| 190 | + define List(List(String)) | |
| 189 | 191 | read_table |
| 190 | 192 | ( |
| 191 | 193 | RStream file, |
| ... | ... | @@ -218,7 +220,7 @@ define List(List(String)) |
| 218 | 220 | |
| 219 | 221 | Now, here is our tool. |
| 220 | 222 | |
| 221 | -public define Maybe(List(List(String))) | |
| 223 | + public define Maybe(List(List(String))) | |
| 222 | 224 | read_table |
| 223 | 225 | ( |
| 224 | 226 | String filename, | ... | ... |
| 1 | + | |
| 2 | + The Anubis Project | |
| 3 | + | |
| 4 | + Reading a csv (Comma Separated Values) file. | |
| 5 | + | |
| 6 | + | |
| 7 | +read tools/basis.anubis | |
| 8 | +read tools/time.anubis | |
| 9 | +read lexical_analysis/fast_lexer_4.anubis | |
| 10 | + | |
| 11 | + | |
| 12 | + The function made by the function below reads a single record from a CSV input source. | |
| 13 | + | |
| 14 | +public type ReadCsvResult: | |
| 15 | + end_of_input, | |
| 16 | + error (String message), // an error message | |
| 17 | + ok (Int offset, List(String) record). // a single record and the offset of the end of | |
| 18 | + // this record. | |
| 19 | + | |
| 20 | +public define One -> ReadCsvResult | |
| 21 | + make_read_csv_line | |
| 22 | + ( | |
| 23 | + LexingStream ls, // lexing stream to be constructed from the input (see fast_lexer_4.anubis) | |
| 24 | + String sep, // cell separator (can be "," or ";") | |
| 25 | + List(Int) cols_to_get // list of column numbers you want to get | |
| 26 | + ). | |
| 27 | + | |
| 28 | + | |
| 29 | + | |
| 30 | + --- That's all for the public part ! ------------------------------------------------------------------ | |
| 31 | + | |
| 32 | + | |
| 33 | +type CellPrefixToken: // reading the beginning of a cell until the first double quote or separator | |
| 34 | + eof, | |
| 35 | + double_quote, // if double quote, ignore the content up to here and switch to another lexer | |
| 36 | + separator(ByteArray). // if separator, keep everything before this separator | |
| 37 | + | |
| 38 | + | |
| 39 | + This lexer if for reading the beginning of a cell. | |
| 40 | + | |
| 41 | +define List(LexerItem(CellPrefixToken,One)) | |
| 42 | + begin_cell_description | |
| 43 | + ( | |
| 44 | + String sep | |
| 45 | + ) = | |
| 46 | + [ | |
| 47 | + lexer_item("[# #t]*\"", | |
| 48 | + return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |-> | |
| 49 | + token(double_quote))), | |
| 50 | + | |
| 51 | + lexer_item("[^#"+sep+"\"#r#n]*#"+sep, | |
| 52 | + return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |-> | |
| 53 | + token(separator(extract(0,l-1))))), | |
| 54 | + | |
| 55 | + lexer_item("[^#"+sep+"\"#r#n]*#n" , | |
| 56 | + return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |-> | |
| 57 | + token(separator(extract(0,l-1))))), | |
| 58 | + | |
| 59 | + lexer_item("[^#"+sep+"\"#r#n]*(#r#n)" , | |
| 60 | + return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |-> | |
| 61 | + token(separator(extract(0,l-2))))) | |
| 62 | + ]. | |
| 63 | + | |
| 64 | + | |
| 65 | + This lexer is for reading within the double quotes (if the cell is double quoted) | |
| 66 | + | |
| 67 | + | |
| 68 | +type InToken: | |
| 69 | + double_quote, // can also be the end of file | |
| 70 | + two_double_quotes, | |
| 71 | + part(ByteArray). // part of cell | |
| 72 | + | |
| 73 | + | |
| 74 | +define List(LexerItem(InToken,One)) | |
| 75 | + read_quoted_cell_description | |
| 76 | + ( | |
| 77 | + String sep | |
| 78 | + ) = | |
| 79 | + [ | |
| 80 | + lexer_item("[^\"]*" , | |
| 81 | + return((ByteArray b, LexingTools t, One u) |-> token(part(b)))), | |
| 82 | + | |
| 83 | + lexer_item("\"\"" , | |
| 84 | + return((ByteArray b, LexingTools t, One u) |-> token(two_double_quotes))), | |
| 85 | + | |
| 86 | + lexer_item("\"[# #t]*(("+sep+")|(#n)|(#r#n))" , | |
| 87 | + return((ByteArray b, LexingTools t, One u) |-> token(double_quote))) | |
| 88 | + ]. | |
| 89 | + | |
| 90 | + | |
| 91 | + The lexer described below skips a cell (and eats the trailing separator). | |
| 92 | + | |
| 93 | +define List(LexerItem(One,One)) | |
| 94 | + skip_cell_description | |
| 95 | + ( | |
| 96 | + String sep | |
| 97 | + ) = | |
| 98 | + [ | |
| 99 | + lexer_item("(([^\"#n#r#"+sep+"]*)|([# #t]*\"([^\"]|(\"\"))*\"[# #t]*))#"+sep, | |
| 100 | + return(((Int,Int) -> ByteArray b, Int l, LexingTools t, One u) |-> token(unique))) | |
| 101 | + ]. | |
| 102 | + | |
| 103 | + The lexer described below skips to end of line (and eats the end of line). | |
| 104 | + | |
| 105 | +type EOL_Token: | |
| 106 | + eol_offset(Int offset). | |
| 107 | + | |
| 108 | +define List(LexerItem(EOL_Token,One)) | |
| 109 | + to_eol_description | |
| 110 | + = | |
| 111 | + [ | |
| 112 | + lexer_item("([^#r#n]*)((#n)|(#r#n))", | |
| 113 | + return(((Int,Int) -> ByteArray b, Int l, LexingTools t, One u) |-> | |
| 114 | + token(eol_offset(t.offset(unique))))) | |
| 115 | + ]. | |
| 116 | + | |
| 117 | + | |
| 118 | + | |
| 119 | +global define One | |
| 120 | + make_the_lexers | |
| 121 | + ( | |
| 122 | + List(String) _ | |
| 123 | + ) = | |
| 124 | + make_precompiled_lexer("csv_c_begin_cell", begin_cell_description(","), '#'); | |
| 125 | + make_precompiled_lexer("csv_c_quoted_cell", read_quoted_cell_description(","), '#'); | |
| 126 | + make_precompiled_lexer("csv_c_skip_cell", skip_cell_description(","), '#'); | |
| 127 | + make_precompiled_lexer("csv_s_begin_cell", begin_cell_description(";"), '#'); | |
| 128 | + make_precompiled_lexer("csv_s_quoted_cell", read_quoted_cell_description(";"), '#'); | |
| 129 | + make_precompiled_lexer("csv_s_skip_cell", skip_cell_description(";"), '#'); | |
| 130 | + make_precompiled_lexer("csv_to_eol", to_eol_description, '#'). | |
| 131 | + | |
| 132 | + | |
| 133 | +execute anbexec make_the_lexers | |
| 134 | +read generated/csv_c_begin_cell.anubis | |
| 135 | +read generated/csv_c_quoted_cell.anubis | |
| 136 | +read generated/csv_c_skip_cell.anubis | |
| 137 | +read generated/csv_s_begin_cell.anubis | |
| 138 | +read generated/csv_s_quoted_cell.anubis | |
| 139 | +read generated/csv_s_skip_cell.anubis | |
| 140 | +read generated/csv_to_eol.anubis | |
| 141 | + | |
| 142 | + | |
| 143 | + define One | |
| 144 | + repeat | |
| 145 | + ( | |
| 146 | + Int n, | |
| 147 | + One -> One f | |
| 148 | + ) = | |
| 149 | + if n =< 0 then unique else f(unique); repeat(n-1,f). | |
| 150 | + | |
| 151 | + | |
| 152 | + define Int -> Result(String,ByteArray) | |
| 153 | + read_next_cell | |
| 154 | + ( | |
| 155 | + One -> One skip_cell, | |
| 156 | + One -> Result(String,ByteArray) read_cell | |
| 157 | + ) = | |
| 158 | + (Int n) |-> repeat(n,skip_cell); read_cell(unique). | |
| 159 | + | |
| 160 | + | |
| 161 | +type CB_Result: | |
| 162 | + eof, | |
| 163 | + error(String), | |
| 164 | + skip, | |
| 165 | + cell(String), | |
| 166 | + eol(Int offset). | |
| 167 | + | |
| 168 | + | |
| 169 | +define One -> ReadCsvResult | |
| 170 | + make_read_csv_line | |
| 171 | + ( | |
| 172 | + List(One -> CB_Result) cbs, | |
| 173 | + One -> One to_eol | |
| 174 | + ) = | |
| 175 | + with f = (List(One -> CB_Result) l, List(String) so_far) |-f1-> | |
| 176 | + if l is | |
| 177 | + { | |
| 178 | + [ ] then ok(0, reverse(so_far)), | |
| 179 | + [f . g] then | |
| 180 | + if f(unique) is | |
| 181 | + { | |
| 182 | + eof then end_of_input, | |
| 183 | + error(e) then to_eol(unique); error(e), | |
| 184 | + skip then (ReadCsvResult)f1(g, so_far), | |
| 185 | + cell(c) then | |
| 186 | + //print("read cell ["+c+"] "); | |
| 187 | + (ReadCsvResult)f1(g, [c. so_far]), | |
| 188 | + eol(offset) then | |
| 189 | + if g is | |
| 190 | + { | |
| 191 | + [] then ok(offset, reverse(so_far)), | |
| 192 | + [_ . _] then error("End Of Line unexpected") | |
| 193 | + } | |
| 194 | + | |
| 195 | + } | |
| 196 | + }, | |
| 197 | + (One u) |-> f(cbs, []). | |
| 198 | + | |
| 199 | + | |
| 200 | + | |
| 201 | +define List(One -> CB_Result) | |
| 202 | + make_cbs | |
| 203 | + ( | |
| 204 | + One -> CB_Result skip_cell, | |
| 205 | + One -> CB_Result read_cell, | |
| 206 | + One -> CB_Result to_eol, | |
| 207 | + List(Int) cols_to_get // must be strictly increasing | |
| 208 | + ) = | |
| 209 | + if cols_to_get is | |
| 210 | + { | |
| 211 | + [ ] then [to_eol], | |
| 212 | + [n . l] then | |
| 213 | + with rest = make_cbs(skip_cell,read_cell,to_eol, map((Int x) |-> x - n -1,l)), | |
| 214 | + constant_list(skip_cell,n) + [read_cell . rest] | |
| 215 | + }. | |
| 216 | + | |
| 217 | + | |
| 218 | + Just a test for make_cbs: | |
| 219 | + global define One | |
| 220 | + gaga | |
| 221 | + ( | |
| 222 | + List(String) args | |
| 223 | + ) = | |
| 224 | + with skip_cell = (One u) |-> skip, | |
| 225 | + read_cell = (One u) |-> (CB_Result)cell(to_byte_array("")), | |
| 226 | + to_eol = (One u) |-> (CB_Result)error(""), | |
| 227 | + l = [3,5,(Int)10], | |
| 228 | + with r = make_cbs(skip_cell,read_cell,to_eol,l), | |
| 229 | + forget(map((One -> CB_Result f) |-> if f(unique) is | |
| 230 | + { | |
| 231 | + error(e) then print("eol\n\n"), | |
| 232 | + skip then print("skip\n"), | |
| 233 | + cell(_) then print("cell\n") | |
| 234 | + }, r)). | |
| 235 | + | |
| 236 | + | |
| 237 | + | |
| 238 | + | |
| 239 | +public define One -> ReadCsvResult | |
| 240 | + make_read_csv_line | |
| 241 | + ( | |
| 242 | + LexingStream ls, | |
| 243 | + String sep, | |
| 244 | + List(Int) cols_to_get | |
| 245 | + ) = | |
| 246 | + with lex_skip = retrieve_lexer(skip_cell_description(sep), if sep = "," then csv_c_skip_cell else csv_s_skip_cell)(ls,unique), | |
| 247 | + lex_begin = retrieve_lexer(begin_cell_description(sep), if sep = "," then csv_c_begin_cell else csv_s_begin_cell)(ls,unique), | |
| 248 | + lex_in = retrieve_lexer(read_quoted_cell_description(sep), if sep = "," then csv_c_quoted_cell else csv_s_quoted_cell)(ls,unique), | |
| 249 | + lex_eol = retrieve_lexer(to_eol_description, csv_to_eol)(ls,unique), | |
| 250 | + skip_cell = (One u) |-> (CB_Result)if lex_skip(u) is | |
| 251 | + { | |
| 252 | + end_of_input then eof, | |
| 253 | + error(b,line,col) then error("skip "+line+":"+col+" :"+to_string(b)), | |
| 254 | + token(t) then skip | |
| 255 | + }, | |
| 256 | + begin_cell = (One u) |-> (Result(String,CellPrefixToken))if lex_begin(u) is | |
| 257 | + { | |
| 258 | + end_of_input then ok(eof), | |
| 259 | + error(b,line,col) then error("begin "+to_string(b)), | |
| 260 | + token(t) then ok(t) | |
| 261 | + }, | |
| 262 | + read_in_aux = (List(ByteArray) so_far) |-aux-> (CB_Result)if lex_in(unique) is | |
| 263 | + { | |
| 264 | + end_of_input then eof, | |
| 265 | + error(b,line,col) then error("in "+to_string(b)), | |
| 266 | + token(t) then if t is | |
| 267 | + { | |
| 268 | + double_quote then cell(to_string(concat(reverse(so_far)))), | |
| 269 | + two_double_quotes then aux([{0x22} . so_far]), | |
| 270 | + part(p) then aux([p . so_far]) | |
| 271 | + } | |
| 272 | + }, | |
| 273 | + read_in = (One u) |-> read_in_aux([]), | |
| 274 | + read_cell = (One u) |-> if begin_cell(u) is | |
| 275 | + { | |
| 276 | + error(e) then error(e), | |
| 277 | + ok(b) then if b is | |
| 278 | + { | |
| 279 | + eof then eof, | |
| 280 | + double_quote then read_in(u), | |
| 281 | + separator(c) then cell(to_string(c)) | |
| 282 | + } | |
| 283 | + }, | |
| 284 | + to_eol = (One u) |-> if lex_eol(u) is | |
| 285 | + { | |
| 286 | + end_of_input then eof, | |
| 287 | + error(b,line,col) then error("eol "+to_string(b)), | |
| 288 | + token(t) then if t is eol_offset(offset) then eol(offset) | |
| 289 | + }, | |
| 290 | + make_read_csv_line(make_cbs(skip_cell,read_cell,to_eol,cols_to_get), | |
| 291 | + (One u) |-> forget(to_eol(u))). | |
| 292 | + | |
| 293 | + | |
| 294 | + | |
| 295 | + *** Command line test. | |
| 296 | + | |
| 297 | +define Maybe(List(Int)) | |
| 298 | + map_to_Int | |
| 299 | + ( | |
| 300 | + List(String) l | |
| 301 | + ) = | |
| 302 | + if l is | |
| 303 | + { | |
| 304 | + [ ] then success([ ]), | |
| 305 | + [h . t] then if decimal_scan(h) is | |
| 306 | + { | |
| 307 | + failure then failure, | |
| 308 | + success(n1) then if map_to_Int(t) is | |
| 309 | + { | |
| 310 | + failure then failure, | |
| 311 | + success(l1) then success([n1 . l1]) | |
| 312 | + } | |
| 313 | + } | |
| 314 | + }. | |
| 315 | + | |
| 316 | + | |
| 317 | +define One | |
| 318 | + print_csv_line | |
| 319 | + ( | |
| 320 | + List(String) l | |
| 321 | + ) = | |
| 322 | + print("| "); | |
| 323 | + map_forget((String b) |-> print(b+" | "),l). | |
| 324 | + | |
| 325 | + | |
| 326 | +define One syntax = print("Usage: anbexec read_csv_file <csv file path> <sep> <n1> ... <nk>\n"+ | |
| 327 | + " where <sep> is the (double quoted) separator (can be \",\" or \";\")\n"+ | |
| 328 | + " and where the integers <n1>...<nk> are the ranks of the columns to keep,\n"+ | |
| 329 | + " (starting at 0).\n\n"). | |
| 330 | + | |
| 331 | +define One | |
| 332 | + print_to_error | |
| 333 | + ( | |
| 334 | + One -> ReadCsvResult f | |
| 335 | + ) = | |
| 336 | + if f(unique) is | |
| 337 | + { | |
| 338 | + end_of_input then print("-------- end of file --------------\n"), | |
| 339 | + error(e) then print("Error ["+e+"]\n"); | |
| 340 | + print_to_error(f), | |
| 341 | + ok(offset,n) then print_csv_line(n); | |
| 342 | + print("[at offset "+offset+"]\n"); | |
| 343 | + print_to_error(f) | |
| 344 | + }. | |
| 345 | + | |
| 346 | +define One | |
| 347 | + show_perf | |
| 348 | + ( | |
| 349 | + One -> ReadCsvResult f, | |
| 350 | + Int left, | |
| 351 | + Int read_line, | |
| 352 | + Int block_size, | |
| 353 | + UTime start_time | |
| 354 | + ) = | |
| 355 | + if f(unique) is | |
| 356 | + { | |
| 357 | + end_of_input then show_duration("lines read "+read_line, start_time); | |
| 358 | + print("----------------------\n"), | |
| 359 | + error(e) then print("error ["+e+"]\n"); print_to_error(f), | |
| 360 | + ok(o,n) then | |
| 361 | + with left1 = if left = 1 then | |
| 362 | + show_duration("lines read "+read_line+1, start_time); | |
| 363 | + block_size | |
| 364 | + else | |
| 365 | + left -1, | |
| 366 | + show_perf(f, left1, read_line+1, block_size, start_time) | |
| 367 | + }. | |
| 368 | + | |
| 369 | + | |
| 370 | +define One | |
| 371 | + show_perf | |
| 372 | + ( | |
| 373 | + One -> ReadCsvResult f, | |
| 374 | + Int block_size | |
| 375 | + )= | |
| 376 | + show_perf(f, block_size, 0, block_size, unow) | |
| 377 | + . | |
| 378 | + | |
| 379 | +global define One | |
| 380 | + read_csv_file | |
| 381 | + ( | |
| 382 | + List(String) args | |
| 383 | + ) = | |
| 384 | + if args is | |
| 385 | + { | |
| 386 | + [ ] then syntax, | |
| 387 | + [path . t] then | |
| 388 | + println("file "+path); | |
| 389 | + if t is | |
| 390 | + { | |
| 391 | + | |
| 392 | + [ ] then syntax, | |
| 393 | + [sep . l] then if sep:[",",";"] | |
| 394 | + then | |
| 395 | + if map_to_Int(l) is | |
| 396 | + { | |
| 397 | + failure then syntax | |
| 398 | + success(List(Int) cols) then | |
| 399 | + if file(path,read) is | |
| 400 | + { | |
| 401 | + failure then print("File '"+path+"' not found.\n"), | |
| 402 | + success(f) then | |
| 403 | + if make_lexing_stream("",f,10,10) is | |
| 404 | + { | |
| 405 | + failure then print("Error while reading file '"+path+"'.\n"), | |
| 406 | + success(ls) then | |
| 407 | + with cs = no_doubles(qsort(cols,(Int x, Int y) |-> x < y)), | |
| 408 | + read_line = make_read_csv_line(ls,sep,cs), | |
| 409 | +// show_perf(read_line, 10000) | |
| 410 | + print_to_error(read_line) | |
| 411 | + } | |
| 412 | + } | |
| 413 | + } | |
| 414 | + else syntax | |
| 415 | + } | |
| 416 | + }. | |
| 417 | + | |
| 418 | + | ... | ... |
anubis_dev/library/data_base/read_csv_table.anubis
| 1 | 1 | |
| 2 | + This file is obsoleted by data_base/read_csv.anubis | |
| 3 | + | |
| 2 | 4 | |
| 3 | 5 | Try it ! |
| 4 | 6 | |
| 5 | -read import_csv.anubis | |
| 7 | + read import_csv.anubis | |
| 6 | 8 | |
| 7 | 9 | |
| 8 | -define One | |
| 10 | + define One | |
| 9 | 11 | table_print |
| 10 | 12 | ( |
| 11 | 13 | List(String) l |
| ... | ... | @@ -18,7 +20,7 @@ define One |
| 18 | 20 | table_print(t) |
| 19 | 21 | }. |
| 20 | 22 | |
| 21 | -define One | |
| 23 | + define One | |
| 22 | 24 | table_print |
| 23 | 25 | ( |
| 24 | 26 | List(List(String)) t |
| ... | ... | @@ -29,7 +31,7 @@ define One |
| 29 | 31 | [h . t2] then table_print(h); table_print(t2) |
| 30 | 32 | }. |
| 31 | 33 | |
| 32 | -define One | |
| 34 | + define One | |
| 33 | 35 | table_print |
| 34 | 36 | ( |
| 35 | 37 | Maybe(List(List(String))) t |
| ... | ... | @@ -40,7 +42,7 @@ define One |
| 40 | 42 | success(l) then table_print(l) |
| 41 | 43 | }. |
| 42 | 44 | |
| 43 | -global define One | |
| 45 | + global define One | |
| 44 | 46 | read_csv_table |
| 45 | 47 | ( |
| 46 | 48 | List(String) args | ... | ... |
anubis_dev/library/lexical_analysis/fast_lexer_4.anubis
| ... | ... | @@ -618,7 +618,7 @@ public type DFA_state($Token,$Aux): |
| 618 | 618 | variant of 'make_lexer': |
| 619 | 619 | |
| 620 | 620 | public define Result(RegExprError, |
| 621 | - (LexingStream -> One -> LexerOutput($Token), // the lexer | |
| 621 | + ((LexingStream,$Aux) -> One -> LexerOutput($Token), // the lexer | |
| 622 | 622 | List(DFA_state($Token,$Aux)))) // the automaton |
| 623 | 623 | make_lexer_and_automaton |
| 624 | 624 | ( |
| ... | ... | @@ -651,7 +651,7 @@ public define One |
| 651 | 651 | |
| 652 | 652 | *** (4.3) How to use a lexer. |
| 653 | 653 | |
| 654 | - Applying the function of type 'LexingStream($Aux) -> One -> LexerOutput($Token)' returned by | |
| 654 | + Applying the function of type 'LexingStream -> One -> LexerOutput($Token)' returned by | |
| 655 | 655 | 'make_lexer' to a lexing stream is understood as 'plugging' the lexer onto this lexing |
| 656 | 656 | stream. The result is a function of type: |
| 657 | 657 | |
| ... | ... | @@ -1314,10 +1314,34 @@ define Result(RegExprError,RegExpr) |
| 1314 | 1314 | |
| 1315 | 1315 | |
| 1316 | 1316 | |
| 1317 | + Debugging tools: | |
| 1318 | +define String | |
| 1319 | + format | |
| 1320 | + ( | |
| 1321 | + List(Word8) l | |
| 1322 | + ) = | |
| 1323 | + concat(map((Word8 c) |-> to_decimal(c) ,l)," "). | |
| 1317 | 1324 | |
| 1318 | - | |
| 1319 | - | |
| 1325 | +define String | |
| 1326 | + format | |
| 1327 | + ( | |
| 1328 | + RegExpr e | |
| 1329 | + ) = | |
| 1330 | + if e is | |
| 1331 | + { | |
| 1332 | + char(Word8 _0) then "char("+constant_string(1,_0)+")", | |
| 1333 | + choice(List(Word8) _0) then "choice("+format(_0)+")", | |
| 1334 | + plus(RegExpr _0) then "plus("+format(_0)+")", | |
| 1335 | + star(RegExpr _0) then "star("+format(_0)+")", | |
| 1336 | + cat(RegExpr _0,RegExpr _1) then "cat("+format(_0)+","+format(_1)+")", | |
| 1337 | + or(RegExpr _0,RegExpr _1) then "or("+format(_0)+","+format(_1)+")", | |
| 1338 | + dot then "dot", | |
| 1339 | + question_mark(RegExpr _0) then "question_mark("+format(_0)+")" | |
| 1340 | + }. | |
| 1341 | + | |
| 1342 | + | |
| 1320 | 1343 | *** [1.6.3] The tool for parsing regular expressions. |
| 1344 | + | |
| 1321 | 1345 | |
| 1322 | 1346 | public define Result(RegExprError,RegExpr) |
| 1323 | 1347 | parse_regular_expression |
| ... | ... | @@ -1328,13 +1352,12 @@ public define Result(RegExprError,RegExpr) |
| 1328 | 1352 | if read_regexpr(s,escape_char,[],end_of_regexpr) is |
| 1329 | 1353 | { |
| 1330 | 1354 | error(msg) then error(msg), |
| 1331 | - ok(re) then ok(re) | |
| 1355 | + ok(re) then //print("["+format(re)+"]\n"); | |
| 1356 | + ok(re) | |
| 1332 | 1357 | }. |
| 1333 | 1358 | |
| 1334 | 1359 | |
| 1335 | 1360 | |
| 1336 | - | |
| 1337 | - | |
| 1338 | 1361 | *** [1.7] Transforming a regular expression into a basic one. |
| 1339 | 1362 | |
| 1340 | 1363 | *** [1.7.1] Expanding a 'choice' of characters. |
| ... | ... | @@ -1427,7 +1450,7 @@ public define String |
| 1427 | 1450 | *** [1.1] The type 'LexingStream'. |
| 1428 | 1451 | |
| 1429 | 1452 | A lexing stream provides tools which are adhoc for using low level fast lexers as |
| 1430 | - defined in section 13 of predefined.anubis: | |
| 1453 | + defined in section 13 of predefined.anubis. | |
| 1431 | 1454 | |
| 1432 | 1455 | The type below records the information needed to come back to the state just after the |
| 1433 | 1456 | last or penultimate token was read. |
| ... | ... | @@ -1440,9 +1463,10 @@ type TokenState: |
| 1440 | 1463 | Int col |
| 1441 | 1464 | ). |
| 1442 | 1465 | |
| 1443 | - There is a ``penultimate token'' when at least one token has been successfully read since the | |
| 1466 | + There is a ``penultimate token'' when at least two token has been successfully read since the | |
| 1444 | 1467 | creation of the lexing stream. If it is not the case, the value of the ``penultimate state'' |
| 1445 | - defaults to the very initial state. | |
| 1468 | + defaults to the state after the very first token was read or to the very initial state if no | |
| 1469 | + tokan was read. | |
| 1446 | 1470 | |
| 1447 | 1471 | When the buffer is reloaded, part of the current buffer is kept. One reason for this is that |
| 1448 | 1472 | when we encounter the end of the buffer it can be the case that we are currently reading a token |
| ... | ... | @@ -1464,7 +1488,6 @@ type TokenState: |
| 1464 | 1488 | state informations for token1 and token2, the last two tokens successfully read. |
| 1465 | 1489 | |
| 1466 | 1490 | |
| 1467 | - | |
| 1468 | 1491 | public type LexingStream: |
| 1469 | 1492 | lexing_stream |
| 1470 | 1493 | ( |
| ... | ... | @@ -1546,18 +1569,18 @@ public type LexingStream: |
| 1546 | 1569 | -- the new current buffer "source text." |
| 1547 | 1570 | |
| 1548 | 1571 | -- last accepted: (s,3), because 'sou' has been accepted in state 's' and |
| 1549 | - ends at offset 0 within the new buffer, | |
| 1572 | + ends at offset 3 within the new buffer, | |
| 1550 | 1573 | |
| 1551 | 1574 | -- current_v receives the value 3, because 'sou' is already read, |
| 1552 | 1575 | |
| 1553 | 1576 | -- token_start_v receives the value 0, because the token we are currently |
| 1554 | 1577 | reading begins at offset 0. |
| 1555 | 1578 | |
| 1556 | - -- state s, because we want to try to read the sequel of 'sou'. | |
| 1579 | + -- restart in state s, because we want to try to read the sequel of 'sou'. | |
| 1557 | 1580 | |
| 1558 | 1581 | Notice that if the low level lexer had returned 'rejected(s,at_end_of_input,12,15)' |
| 1559 | 1582 | instead of 'accepted(s,at_end_of_input,12,15)', the scenario is the same one except |
| 1560 | - that last accepted is 'none'. | |
| 1583 | + that last accepted will be 'none'. | |
| 1561 | 1584 | |
| 1562 | 1585 | The low level lexer will now return 'accepted(s,not_at_end_of_input,0,6)', meaning that |
| 1563 | 1586 | it has recognized the token 'source' between positions 0 (included) and 6 (not |
| ... | ... | @@ -1574,6 +1597,7 @@ public type LexingStream: |
| 1574 | 1597 | define LexingTools |
| 1575 | 1598 | make_tools |
| 1576 | 1599 | ( |
| 1600 | + Var(Int) token_start_v, // actually not used in this function | |
| 1577 | 1601 | Var(Int) current_v, |
| 1578 | 1602 | Var(Int) line_v, |
| 1579 | 1603 | Var(Int) col_v, |
| ... | ... | @@ -1590,12 +1614,11 @@ define LexingTools |
| 1590 | 1614 | (One _) |-> *col_v, |
| 1591 | 1615 | |
| 1592 | 1616 | // get current offset: |
| 1617 | + // This is the number of bytes which are no more in the buffer plus the current position. | |
| 1593 | 1618 | (One _) |-> *past_v + *current_v, |
| 1594 | 1619 | |
| 1595 | 1620 | // go back one char: |
| 1596 | 1621 | // don't go beyond the beginning of the buffer |
| 1597 | - // No need to update line_v and col_v because they | |
| 1598 | - // refer to the beginning of the token. | |
| 1599 | 1622 | (Int n) |-> current_v <- max(*current_v - n, 0), |
| 1600 | 1623 | |
| 1601 | 1624 | // comming back to the state just after the last token was read |
| ... | ... | @@ -1603,7 +1626,6 @@ define LexingTools |
| 1603 | 1626 | current_v <- cur; |
| 1604 | 1627 | line_v <- l; |
| 1605 | 1628 | col_v <- c; |
| 1606 | - last_tok_v <- *penult_tok_v; | |
| 1607 | 1629 | last_accept_v <- none, |
| 1608 | 1630 | |
| 1609 | 1631 | // comming back to the state just after the penultimate token was read |
| ... | ... | @@ -1624,7 +1646,8 @@ public define LexingStream |
| 1624 | 1646 | String preambule, |
| 1625 | 1647 | ByteArray b |
| 1626 | 1648 | ) = |
| 1627 | - with b1_v = var(if length(preambule) = 0 then b else to_byte_array(preambule)+b), | |
| 1649 | + with b1_v = var(if length(preambule) = 0 then b else to_byte_array(preambule)+b), | |
| 1650 | + token_start_v = var((Int)0), | |
| 1628 | 1651 | current_v = var((Int)0), |
| 1629 | 1652 | line_v = var((Int)0), |
| 1630 | 1653 | col_v = var((Int)0), |
| ... | ... | @@ -1633,7 +1656,7 @@ public define LexingStream |
| 1633 | 1656 | penult_tok_v = var(tstate(0,0,0)), |
| 1634 | 1657 | last_accept_v = var((FastLexerLastAccepted)none), |
| 1635 | 1658 | lexing_stream(b1_v, // buffer |
| 1636 | - var((Int)0), // starting position | |
| 1659 | + token_start_v, // starting position | |
| 1637 | 1660 | current_v, // current position |
| 1638 | 1661 | last_accept_v, // last accepting position |
| 1639 | 1662 | last_tok_v, // last token state |
| ... | ... | @@ -1641,8 +1664,8 @@ public define LexingStream |
| 1641 | 1664 | (One u) |-> failure, // buffer is never reloaded |
| 1642 | 1665 | line_v, // current line |
| 1643 | 1666 | col_v, // current column |
| 1644 | - past_v, // past bytes | |
| 1645 | - make_tools(current_v,line_v,col_v,past_v,last_tok_v,penult_tok_v,last_accept_v)). | |
| 1667 | + past_v, // past bytes (will remain always 0 in this case) | |
| 1668 | + make_tools(token_start_v,current_v,line_v,col_v,past_v,last_tok_v,penult_tok_v,last_accept_v)). | |
| 1646 | 1669 | |
| 1647 | 1670 | |
| 1648 | 1671 | |
| ... | ... | @@ -1700,7 +1723,7 @@ public define Maybe(LexingStream) |
| 1700 | 1723 | //print("Keeping this from previous buffer: ["+to_string(extract(old_buffer,dropped,old_length))+"]\n"); |
| 1701 | 1724 | buffer_v <- extract(old_buffer,dropped,old_length)+more; |
| 1702 | 1725 | //print("New buffer: ["+to_string(*buffer_v)+"] size: "+to_decimal(length(*buffer_v))+"\n"); |
| 1703 | - token_start_v <- 0; | |
| 1726 | + token_start_v <- *token_start_v - dropped; | |
| 1704 | 1727 | //print("Next token starting position: "+to_decimal(*token_start_v)+"\n"); |
| 1705 | 1728 | current_v <- old_length - dropped; |
| 1706 | 1729 | //print("New current reading position: "+to_decimal(*current_v)+"\n"); |
| ... | ... | @@ -1726,7 +1749,7 @@ public define Maybe(LexingStream) |
| 1726 | 1749 | line_v, |
| 1727 | 1750 | col_v, |
| 1728 | 1751 | past_bytes_v, |
| 1729 | - make_tools(current_v,line_v,col_v,past_bytes_v,last_tok_v,penult_tok_v,last_accepted_v))) | |
| 1752 | + make_tools(token_start_v,current_v,line_v,col_v,past_bytes_v,last_tok_v,penult_tok_v,last_accepted_v))) | |
| 1730 | 1753 | }. |
| 1731 | 1754 | |
| 1732 | 1755 | |
| ... | ... | @@ -1785,7 +1808,7 @@ public define Maybe(LexingStream) |
| 1785 | 1808 | min(min(current(*penult_tok_v),current(*last_tok_v)),*token_start_v), |
| 1786 | 1809 | |
| 1787 | 1810 | buffer_v <- extract(old_buffer,dropped,old_length)+more; |
| 1788 | - token_start_v <- 0; | |
| 1811 | + token_start_v <- *token_start_v - dropped; | |
| 1789 | 1812 | current_v <- old_length - dropped; |
| 1790 | 1813 | past_bytes_v <- *past_bytes_v + dropped; |
| 1791 | 1814 | last_tok_v <- (if *last_tok_v is tstate(cur,l,c) then tstate(cur - dropped,l,c)); |
| ... | ... | @@ -1809,7 +1832,7 @@ public define Maybe(LexingStream) |
| 1809 | 1832 | line_v, |
| 1810 | 1833 | col_v, |
| 1811 | 1834 | past_bytes_v, |
| 1812 | - make_tools(current_v,line_v,col_v,past_bytes_v,last_tok_v,penult_tok_v,last_accepted_v))) | |
| 1835 | + make_tools(token_start_v,current_v,line_v,col_v,past_bytes_v,last_tok_v,penult_tok_v,last_accepted_v))) | |
| 1813 | 1836 | }. |
| 1814 | 1837 | |
| 1815 | 1838 | |
| ... | ... | @@ -1841,7 +1864,9 @@ define (Int, Int, Int) // returns new (start,line,col) |
| 1841 | 1864 | Int line, // current line |
| 1842 | 1865 | Int col // current column |
| 1843 | 1866 | ) = |
| 1844 | - if old_start >= new_start then (new_start,line,col) else | |
| 1867 | + //print("old_start = "+old_start+"\n"); | |
| 1868 | + if old_start >= new_start then //print("======== new col: "+col+"\n"); | |
| 1869 | + (new_start,line,col) else | |
| 1845 | 1870 | with c = force_nth(old_start,buffer), |
| 1846 | 1871 | if ((c >> 6) = 2) |
| 1847 | 1872 | /* |
| ... | ... | @@ -1864,6 +1889,7 @@ define One |
| 1864 | 1889 | Var(Int) line_v, |
| 1865 | 1890 | Var(Int) col_v |
| 1866 | 1891 | ) = |
| 1892 | + //print("new_start = "+new_start+"\n"); | |
| 1867 | 1893 | if compute_start_line_col(buffer,*token_start_v,new_start,*line_v,*col_v) is (s,l,c) then |
| 1868 | 1894 | token_start_v <- s; |
| 1869 | 1895 | line_v <- l; |
| ... | ... | @@ -1891,7 +1917,7 @@ public define LexerOutput($Token) |
| 1891 | 1917 | ) = |
| 1892 | 1918 | if lstream is lexing_stream(buffer_v,token_start_v,current_v,last_accept_v,last_tok_v,penult_tok_v,reload_buffer, |
| 1893 | 1919 | line_v,col_v,offset_v,tools) then |
| 1894 | - //print("starting at offset "+to_decimal(*current_v)+" with token start at "+to_decimal(*token_start_v)+"\n"); | |
| 1920 | + //print("starting at offset "+to_decimal(*current_v)+" with token start at "+to_decimal(*token_start_v)+"\n"); | |
| 1895 | 1921 | with lgbuf = length(*buffer_v), |
| 1896 | 1922 | if low_level_lexer(*buffer_v,*last_accept_v,*current_v,*token_start_v,starting_state) is |
| 1897 | 1923 | { |
| ... | ... | @@ -1908,7 +1934,7 @@ public define LexerOutput($Token) |
| 1908 | 1934 | else |
| 1909 | 1935 | ( |
| 1910 | 1936 | /* the lexeme may still be accepted after the buffer is reloaded */ |
| 1911 | - update_start_line_col(*buffer_v,start,token_start_v,line_v,col_v); | |
| 1937 | + //update_start_line_col(*buffer_v,start,token_start_v,line_v,col_v); | |
| 1912 | 1938 | if reload_buffer(unique) is |
| 1913 | 1939 | { |
| 1914 | 1940 | failure then |
| ... | ... | @@ -1928,13 +1954,13 @@ public define LexerOutput($Token) |
| 1928 | 1954 | |
| 1929 | 1955 | /* almost the same thing for accepted */ |
| 1930 | 1956 | accepted(s,start,end) then |
| 1931 | - //print("low level accepted start = "+to_decimal(start)+" end = "+to_decimal(end)+"\n"); | |
| 1957 | + //print("low level accepted start = "+to_decimal(start)+" end = "+to_decimal(end)+"\n"); | |
| 1932 | 1958 | last_accept_v <- last(s,end); |
| 1933 | 1959 | current_v <- end; |
| 1934 | 1960 | if end /= lgbuf then |
| 1935 | 1961 | ( |
| 1936 | 1962 | /* the lexeme just read must be accepted: the action is applied */ |
| 1937 | - update_start_line_col(*buffer_v,end,token_start_v,line_v,col_v); | |
| 1963 | + | |
| 1938 | 1964 | last_accept_v <- none; |
| 1939 | 1965 | if *actions(word32(s,0)) is |
| 1940 | 1966 | { |
| ... | ... | @@ -1945,18 +1971,25 @@ public define LexerOutput($Token) |
| 1945 | 1971 | // We must update some variables |
| 1946 | 1972 | penult_tok_v <- *last_tok_v; |
| 1947 | 1973 | last_tok_v <- tstate(end,*line_v,*col_v); |
| 1948 | - f(extract(*buffer_v,start,end),tools,aux), | |
| 1974 | + with result = f(extract(*buffer_v,start,end),tools,aux), | |
| 1975 | + update_start_line_col(*buffer_v,*current_v,token_start_v,line_v,col_v); | |
| 1976 | + result, | |
| 1977 | + | |
| 1949 | 1978 | return(f) then |
| 1950 | 1979 | penult_tok_v <- *last_tok_v; |
| 1951 | 1980 | last_tok_v <- tstate(end,*line_v,*col_v); |
| 1952 | - f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l), | |
| 1953 | - end-start,tools,aux), | |
| 1981 | + with result = f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l), | |
| 1982 | + end-start,tools,aux), | |
| 1983 | + //print("*token_start_v = "+*token_start_v+"\n"); | |
| 1984 | + //print("*current_v = "+*current_v+"\n"); | |
| 1985 | + update_start_line_col(*buffer_v,*current_v,token_start_v,line_v,col_v); | |
| 1986 | + result | |
| 1954 | 1987 | } |
| 1955 | 1988 | ) |
| 1956 | 1989 | else |
| 1957 | 1990 | ( |
| 1958 | 1991 | /* the lexeme may still be accepted after the buffer is reloaded */ |
| 1959 | - update_start_line_col(*buffer_v,start,token_start_v,line_v,col_v); | |
| 1992 | + | |
| 1960 | 1993 | if reload_buffer(unique) is |
| 1961 | 1994 | { |
| 1962 | 1995 | failure then |
| ... | ... | @@ -1970,11 +2003,15 @@ public define LexerOutput($Token) |
| 1970 | 2003 | ignore then should_not_happen(end_of_input), |
| 1971 | 2004 | return(f) then penult_tok_v <- *last_tok_v; |
| 1972 | 2005 | last_tok_v <- tstate(end,*line_v,*col_v); |
| 1973 | - f(extract(*buffer_v,start,end),tools,aux), | |
| 2006 | + with result = f(extract(*buffer_v,start,end),tools,aux), | |
| 2007 | + update_start_line_col(*buffer_v,*current_v,token_start_v,line_v,col_v); | |
| 2008 | + result, | |
| 1974 | 2009 | return(f) then penult_tok_v <- *last_tok_v; |
| 1975 | 2010 | last_tok_v <- tstate(end,*line_v,*col_v); |
| 1976 | - f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l), | |
| 1977 | - end-start,tools,aux) | |
| 2011 | + with result = f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l), | |
| 2012 | + end-start,tools,aux), | |
| 2013 | + update_start_line_col(*buffer_v,*current_v,token_start_v,line_v,col_v); | |
| 2014 | + result | |
| 1978 | 2015 | }, |
| 1979 | 2016 | |
| 1980 | 2017 | success(_) then |
| ... | ... | @@ -1988,7 +2025,7 @@ public define LexerOutput($Token) |
| 1988 | 2025 | ignored_to_end then |
| 1989 | 2026 | //print("low level ignored_to_end\n"); |
| 1990 | 2027 | /* we are at end of input buffer */ |
| 1991 | - update_start_line_col(*buffer_v,lgbuf,token_start_v,line_v,col_v); | |
| 2028 | + //update_start_line_col(*buffer_v,lgbuf,token_start_v,line_v,col_v); | |
| 1992 | 2029 | if reload_buffer(unique) is |
| 1993 | 2030 | { |
| 1994 | 2031 | failure then |
| ... | ... | @@ -2010,7 +2047,7 @@ public define LexerOutput($Token) |
| 2010 | 2047 | |
| 2011 | 2048 | *** [3] Constructing the automaton. |
| 2012 | 2049 | |
| 2013 | - The description of a lexer is given as a list of 'LexerItem($Token)', where the | |
| 2050 | + The description of a lexer is given as a list of 'LexerItem($Token,$Aux)', where the | |
| 2014 | 2051 | parameter '$Token' represents the type of tokens. Each lexer item is made of a regular |
| 2015 | 2052 | expression and an action. If the action is 'ignore', the token just read is ignored and |
| 2016 | 2053 | the lexer tries to read the next one. Otherwise, the action is applied to the lexeme |
| ... | ... | @@ -3032,20 +3069,47 @@ define List(Int) |
| 3032 | 3069 | ignoring(name,transitions) then [-1 . actions_ranks(t)] |
| 3033 | 3070 | } |
| 3034 | 3071 | }. |
| 3072 | + | |
| 3073 | + | |
| 3074 | + Get a characteristic serializable datum from a lexer description (used to avoid | |
| 3075 | + reconstructing the lexer when the description did not change). The signature is | |
| 3076 | + changed if any of the regular expressions is changed or if their order is changed | |
| 3077 | + or if the sort of action is changed (this last point ensures that the list of | |
| 3078 | + action ranks remains correct in the generated file). | |
| 3079 | +define List(ByteArray) | |
| 3080 | + extract_regexprs | |
| 3081 | + ( | |
| 3082 | + List(LexerItem($Token,$Aux)) l | |
| 3083 | + ) = | |
| 3084 | + with asign = (LexerAction($Token,$Aux) a) |-> if a is | |
| 3085 | + { | |
| 3086 | + ignore then "(*i)", // something which is illegal as a regular expression | |
| 3087 | + return(_0) then "(*r1)", | |
| 3088 | + return(_0) then "(*r2)" | |
| 3089 | + }, | |
| 3090 | + map((LexerItem($Token,$Aux) i) |-> if i is | |
| 3091 | + { | |
| 3092 | + lexer_item(regular_expression,action) then to_byte_array(regular_expression+asign(action)), | |
| 3093 | + lexer_item(literal,action) then literal+to_byte_array(asign(action)) | |
| 3094 | + },l). | |
| 3095 | + | |
| 3035 | 3096 | |
| 3097 | + | |
| 3036 | 3098 | public define One |
| 3037 | - make_precompiled_lexer | |
| 3099 | + make_precompiled_lexer_aux | |
| 3038 | 3100 | ( |
| 3101 | + String signature, | |
| 3039 | 3102 | String directory, |
| 3040 | 3103 | String lexer_name, |
| 3041 | 3104 | List(LexerItem($Token,$Aux)) lexer_description, |
| 3042 | 3105 | Word8 escape_char |
| 3043 | - ) = | |
| 3106 | + ) = | |
| 3044 | 3107 | with file_name = directory/lexer_name+".anubis", |
| 3045 | 3108 | if file(file_name,new) is |
| 3046 | 3109 | { |
| 3047 | 3110 | failure then print("Cannot create file '"+file_name+"'.\n"), |
| 3048 | 3111 | success(file) then |
| 3112 | + print(weaken(file)," "+signature+"\n\n"); | |
| 3049 | 3113 | if make_DFA(lexer_description,escape_char) is |
| 3050 | 3114 | { |
| 3051 | 3115 | error(msg) then print(to_English(msg)+"\n"), |
| ... | ... | @@ -3060,6 +3124,46 @@ public define One |
| 3060 | 3124 | }. |
| 3061 | 3125 | |
| 3062 | 3126 | |
| 3127 | +define Maybe(String) | |
| 3128 | + read_signature | |
| 3129 | + ( | |
| 3130 | + String file_name | |
| 3131 | + ) = | |
| 3132 | + if file(file_name,read) is | |
| 3133 | + { | |
| 3134 | + failure then failure, | |
| 3135 | + success(f) then if read(f,43,10) is // read the first 3 (blanks) + 40 (sha1 hash) characters | |
| 3136 | + { | |
| 3137 | + error then failure, | |
| 3138 | + timeout then failure, | |
| 3139 | + ok(ba) then success(to_string(extract(ba,3,43))) | |
| 3140 | + } | |
| 3141 | + }. | |
| 3142 | + | |
| 3143 | +public define One | |
| 3144 | + make_precompiled_lexer | |
| 3145 | + ( | |
| 3146 | + String directory, | |
| 3147 | + String lexer_name, | |
| 3148 | + List(LexerItem($Token,$Aux)) lexer_description, | |
| 3149 | + Word8 escape_char | |
| 3150 | + ) = | |
| 3151 | + // avoid to reconstruct the lexer if not needed | |
| 3152 | + with signature = to_hexa(sha1(extract_regexprs(lexer_description))), | |
| 3153 | + file_name = directory/lexer_name+".anubis", | |
| 3154 | + do_it = (One u) |-> | |
| 3155 | + print("Creating '"+file_name+"'. Please wait ... "); forget(flush(stdout)); | |
| 3156 | + make_precompiled_lexer_aux(signature,directory,lexer_name,lexer_description,escape_char); | |
| 3157 | + print("Done.\n"); forget(flush(stdout)), | |
| 3158 | + if read_signature(file_name) is | |
| 3159 | + { | |
| 3160 | + failure then do_it(unique), | |
| 3161 | + success(s) then if s = signature | |
| 3162 | + then unique | |
| 3163 | + else do_it(unique) | |
| 3164 | + }. | |
| 3165 | + | |
| 3166 | + | |
| 3063 | 3167 | public define One |
| 3064 | 3168 | make_precompiled_lexer |
| 3065 | 3169 | ( | ... | ... |
anubis_dev/library/system/files.anubis
| ... | ... | @@ -1020,14 +1020,14 @@ define Maybe(Int) |
| 1020 | 1020 | find_the_first |
| 1021 | 1021 | ( |
| 1022 | 1022 | Data_IO io, |
| 1023 | - String looking_for, //String to search | |
| 1023 | + ByteArray looking_for, //String to search | |
| 1024 | 1024 | Int size, //size of the string to search |
| 1025 | - String buffer, | |
| 1025 | + ByteArray buffer, | |
| 1026 | 1026 | Int current_pos, |
| 1027 | 1027 | Int buf_size, |
| 1028 | 1028 | Int buf_pos |
| 1029 | 1029 | )= |
| 1030 | - //println("buf_size :"+buf_size+ " buf_pos :"+buf_pos + " size : "+size); | |
| 1030 | + //println("general current pos: "+current_pos+" | buffer size: "+buf_size+ " | buffer pos: "+buf_pos + " | search size: "+size); | |
| 1031 | 1031 | if (buf_size - buf_pos) < size then |
| 1032 | 1032 | //println("New buffer request current pos "+current_pos+" buffer_pos "+buf_pos); |
| 1033 | 1033 | if read_bytes(io, 65536) is // <- block size is 64k |
| ... | ... | @@ -1035,23 +1035,30 @@ define Maybe(Int) |
| 1035 | 1035 | failure then println("read_bytes failure");failure, //finish |
| 1036 | 1036 | time_out then println("read_bytes timeout");failure, //finish |
| 1037 | 1037 | success(ba) then |
| 1038 | - with new_buffer = to_string(extract(to_byte_array(buffer), buf_pos, buf_size) + ba), | |
| 1038 | + //println("length of ba "+length(ba)); | |
| 1039 | + with ex_ba = extract(buffer, buf_pos, buf_size), | |
| 1040 | + //println("length of ex_ba "+length(ex_ba)); | |
| 1041 | + with new_ba = ex_ba + ba, | |
| 1042 | + //println("length of new_ba "+length(new_ba)); | |
| 1043 | + with new_buffer = ex_ba + ba, | |
| 1039 | 1044 | //println("SUCCESS New buffer length "+length(new_buffer)+" new current_pos "+current_pos); |
| 1040 | 1045 | find_the_first(io, looking_for, size, new_buffer, current_pos + buf_pos, length(new_buffer), 0), |
| 1041 | 1046 | truncated(ba) then |
| 1042 | 1047 | if length(ba) = 0 then |
| 1048 | + //println("last buffer current position ["+current_pos+"]"); | |
| 1043 | 1049 | failure //finish |
| 1044 | 1050 | else |
| 1045 | - with new_buffer = to_string(extract(to_byte_array(buffer), buf_pos, buf_size) + ba), | |
| 1051 | + with new_buffer = extract(buffer, buf_pos, buf_size) + ba, | |
| 1046 | 1052 | // println("TRUNCATED New buffer length "+length(new_buffer)+" new current_pos "+current_pos); |
| 1047 | 1053 | find_the_first(io, looking_for, size, new_buffer, current_pos + buf_pos, length(new_buffer), 0) |
| 1048 | 1054 | } |
| 1049 | 1055 | else |
| 1050 | - if find_string(buffer, looking_for, buf_pos) is | |
| 1056 | + if find_byte_array(buffer, looking_for, buf_pos) is | |
| 1051 | 1057 | { |
| 1052 | - failure then find_the_first(io, looking_for, size, buffer, current_pos, buf_size, buf_size - (size-1)), | |
| 1058 | + failure then | |
| 1059 | + find_the_first(io, looking_for, size, buffer, current_pos, buf_size, buf_size - (size-1)), | |
| 1053 | 1060 | success(pos) then |
| 1054 | - println("pattern ["+looking_for+"] found at offset "+(current_pos+pos)); | |
| 1061 | + //println("pattern ["+to_string(looking_for)+"] found at offset "+(current_pos+pos)); | |
| 1055 | 1062 | success(current_pos + pos) |
| 1056 | 1063 | } |
| 1057 | 1064 | . |
| ... | ... | @@ -1126,7 +1133,7 @@ public define Maybe(Int) |
| 1126 | 1133 | Data_IO io, |
| 1127 | 1134 | String search_string |
| 1128 | 1135 | ) = |
| 1129 | - find_the_first(io, search_string, length(search_string), "", 0, 0, 0). | |
| 1136 | + find_the_first(io, to_byte_array(search_string), length(search_string), constant_byte_array(0,0), 0, 0, 0). | |
| 1130 | 1137 | |
| 1131 | 1138 | public define Maybe(Int) |
| 1132 | 1139 | find_the_first |
| ... | ... | @@ -1138,7 +1145,7 @@ public define Maybe(Int) |
| 1138 | 1145 | { |
| 1139 | 1146 | failure then failure, |
| 1140 | 1147 | success(f) then |
| 1141 | - find_the_first(make_data_io(f), search_string, length(search_string), "", 0, 0, 0) | |
| 1148 | + find_the_first(make_data_io(f), to_byte_array(search_string), length(search_string), constant_byte_array(0,0), 0, 0, 0) | |
| 1142 | 1149 | }. |
| 1143 | 1150 | |
| 1144 | 1151 | public define Maybe(Int) |
| ... | ... | @@ -1153,7 +1160,8 @@ public define Maybe(Int) |
| 1153 | 1160 | failure then failure, |
| 1154 | 1161 | success(f) then |
| 1155 | 1162 | with size = file_size(filename), |
| 1156 | - find_the_first(make_data_io(f, start_position, size - start_position), search_string, length(search_string), "", 0, 0, 0) | |
| 1163 | + //println("file size "+size); | |
| 1164 | + find_the_first(make_data_io(f, start_position, size - start_position), to_byte_array(search_string), length(search_string), constant_byte_array(0,0), 0, 0, 0) | |
| 1157 | 1165 | }. |
| 1158 | 1166 | |
| 1159 | 1167 | public define Maybe(Int) |
| ... | ... | @@ -1168,6 +1176,6 @@ public define Maybe(Int) |
| 1168 | 1176 | { |
| 1169 | 1177 | failure then failure, |
| 1170 | 1178 | success(f) then |
| 1171 | - find_the_first(make_data_io(f, start_position, end_position - start_position), search_string, length(search_string), "", 0, 0, 0) | |
| 1179 | + find_the_first(make_data_io(f, start_position, end_position - start_position), to_byte_array(search_string), length(search_string), constant_byte_array(0,0), 0, 0, 0) | |
| 1172 | 1180 | }. |
| 1173 | 1181 | ... | ... |
anubis_dev/library/tools/bytearray.anubis
| ... | ... | @@ -50,6 +50,16 @@ define One |
| 50 | 50 | anubis_format_aux(s,b,bpl,i+1,c+1,ind). |
| 51 | 51 | |
| 52 | 52 | |
| 53 | - | |
| 53 | +public define ByteArray | |
| 54 | + concat | |
| 55 | + ( | |
| 56 | + List(ByteArray) l | |
| 57 | + ) = | |
| 58 | + if l is | |
| 59 | + { | |
| 60 | + [ ] then constant_byte_array(0,0), | |
| 61 | + [h . t] then h + concat(t) | |
| 62 | + }. | |
| 63 | + | |
| 54 | 64 | |
| 55 | 65 | ... | ... |
anubis_dev/library/tools/int.anubis
| ... | ... | @@ -14,15 +14,15 @@ |
| 14 | 14 | |
| 15 | 15 | *** (1) Absolute value. |
| 16 | 16 | |
| 17 | - public define macro Int abs(Int x). returns the absolute value of x | |
| 17 | + public define inline Int abs(Int x). returns the absolute value of x | |
| 18 | 18 | |
| 19 | 19 | |
| 20 | 20 | *** (2) Max and min. |
| 21 | 21 | |
| 22 | 22 | The 'max' and 'min' functions respectively return the greatest and the smallest of their arguments. |
| 23 | 23 | |
| 24 | -public define Int max(Int x, Int y). returns the greatest of x and y | |
| 25 | -public define Int min(Int x, Int y). returns the smallest of x and y | |
| 24 | +public define inline Int max(Int x, Int y). returns the greatest of x and y | |
| 25 | +public define inline Int min(Int x, Int y). returns the smallest of x and y | |
| 26 | 26 | |
| 27 | 27 | public define Int max(NonEmptyList(Int) l). returns the greatest element of the list (which is non empty) |
| 28 | 28 | public define Int min(NonEmptyList(Int) l). returns the smallest element of the list (which is non empty) |
| ... | ... | @@ -63,20 +63,25 @@ public define Int Int x ^ Int y. raises x to the p |
| 63 | 63 | public define Bool odd (Int x). |
| 64 | 64 | public define Bool even (Int x). |
| 65 | 65 | |
| 66 | + *** (7) Greatest common divisor, ... | |
| 67 | + | |
| 68 | +public define Int gcd (Int x, Int y). | |
| 69 | +public define Int lcm (Int x, Int y). least common multiple | |
| 70 | + | |
| 66 | 71 | |
| 67 | 72 | --- That's all for the public part ! ----------------------------------------------- |
| 68 | 73 | |
| 69 | 74 | |
| 70 | 75 | |
| 71 | 76 | |
| 72 | -public define macro Int | |
| 77 | +public define inline Int | |
| 73 | 78 | abs |
| 74 | 79 | ( |
| 75 | 80 | Int x |
| 76 | 81 | ) = |
| 77 | 82 | if x < 0 then -x else x. |
| 78 | 83 | |
| 79 | -public define Int // must not be macro because x and y are always computed | |
| 84 | +public define inline Int // must not be macro because x and y are always computed | |
| 80 | 85 | // and one of them would be computed twice |
| 81 | 86 | max |
| 82 | 87 | ( |
| ... | ... | @@ -116,7 +121,7 @@ public define Int |
| 116 | 121 | |
| 117 | 122 | |
| 118 | 123 | |
| 119 | -public define Int | |
| 124 | +public define inline Int | |
| 120 | 125 | min |
| 121 | 126 | ( |
| 122 | 127 | Int x, |
| ... | ... | @@ -226,6 +231,38 @@ public define Bool |
| 226 | 231 | }. |
| 227 | 232 | |
| 228 | 233 | |
| 234 | +public define Int | |
| 235 | + gcd | |
| 236 | + ( | |
| 237 | + Int x, | |
| 238 | + Int y | |
| 239 | + ) = | |
| 240 | + /* Euclid's algorithm */ | |
| 241 | + if x > y then gcd(y,x) else | |
| 242 | + if x = 0 then abs(y) else | |
| 243 | + if x < 0 then gcd(-x,y) else | |
| 244 | + if y/x is | |
| 245 | + { | |
| 246 | + failure then should_not_happen(0), | |
| 247 | + success(p) then if p is (q,r) then gcd(r,x) | |
| 248 | + }. | |
| 249 | + | |
| 250 | + | |
| 251 | + | |
| 252 | +public define Int | |
| 253 | + lcm | |
| 254 | + ( | |
| 255 | + Int x, | |
| 256 | + Int y | |
| 257 | + ) = | |
| 258 | + with d = gcd(x,y), | |
| 259 | + if d = 0 | |
| 260 | + then 0 | |
| 261 | + else if (x*y)/d is | |
| 262 | + { | |
| 263 | + failure then should_not_happen(0), | |
| 264 | + success(p) then if p is (q,_) then q | |
| 265 | + }. | |
| 229 | 266 | |
| 230 | 267 | |
| 231 | 268 | ... | ... |
anubis_dev/library/tools/line_reader.anubis
| ... | ... | @@ -38,7 +38,7 @@ |
| 38 | 38 | |
| 39 | 39 | //--------------------------------------------------------------------------- |
| 40 | 40 | |
| 41 | -read lexical_analysis/fast_lexer_3.anubis | |
| 41 | +read lexical_analysis/fast_lexer_4.anubis | |
| 42 | 42 | |
| 43 | 43 | |
| 44 | 44 | type Token: |
| ... | ... | @@ -46,13 +46,13 @@ type Token: |
| 46 | 46 | eol. |
| 47 | 47 | |
| 48 | 48 | public type LineReaderLexer: |
| 49 | - line_reader_lexer(LexingStream(One) -> One -> LexerOutput(Token) /*lexer_base*/). | |
| 49 | + line_reader_lexer((LexingStream, One) -> One -> LexerOutput(Token) /*lexer_base*/). | |
| 50 | 50 | |
| 51 | 51 | public type LineReader: |
| 52 | 52 | line_reader(One -> LexerOutput(Token) /*lexer*/, |
| 53 | 53 | One -> Int /*offset*/, |
| 54 | 54 | LineReaderLexer /*lexer_base*/, |
| 55 | - LexingStream(One) lexing_stream). | |
| 55 | + LexingStream lexing_stream). | |
| 56 | 56 | |
| 57 | 57 | public define Int |
| 58 | 58 | current_offset |
| ... | ... | @@ -96,30 +96,30 @@ public define Maybe(String) |
| 96 | 96 | public define Maybe(LineReaderLexer) |
| 97 | 97 | make_line_reader_lexer |
| 98 | 98 | = |
| 99 | - if make_lexer_and_automaton([ | |
| 100 | - lexer_item("#r?#n", return((ByteArray b, LexingTools t, One aux) |-> token(eol))), | |
| 101 | - lexer_item("#r", return((ByteArray b, LexingTools t, One aux) |-> token(eol))), | |
| 102 | - lexer_item("[^\r\n]*", return((ByteArray b, LexingTools t, One aux) |-> token(line(to_string(b))))), | |
| 103 | - ], | |
| 99 | + if make_lexer([ | |
| 100 | + lexer_item("#r?#n", return((ByteArray b, LexingTools t, One aux) |-> token(eol))), | |
| 101 | + lexer_item("#r", return((ByteArray b, LexingTools t, One aux) |-> token(eol))), | |
| 102 | + lexer_item("[^\r\n]*", return((ByteArray b, LexingTools t, One aux) |-> token(line(to_string(b))))), | |
| 103 | + ], | |
| 104 | 104 | '#') is |
| 105 | 105 | { |
| 106 | 106 | error(msg) then print("Syntax error in regular expression: "+to_English(msg)+"\n"); failure, |
| 107 | - ok(p) then if p is (lexer, automaton) then success(line_reader_lexer(lexer)) | |
| 107 | + ok(lexer) then success(line_reader_lexer(lexer)) | |
| 108 | 108 | }. |
| 109 | 109 | |
| 110 | 110 | public define LineReader |
| 111 | 111 | make_line_reader |
| 112 | 112 | ( |
| 113 | - LexingStream(One) ls, | |
| 113 | + LexingStream ls, | |
| 114 | 114 | LineReaderLexer make_lexer |
| 115 | 115 | ) = |
| 116 | 116 | if make_lexer is line_reader_lexer(lexer) then |
| 117 | - line_reader(lexer(ls), (One u) |-> offset(ls), make_lexer, ls). | |
| 117 | + line_reader(lexer(ls, unique), (One u) |-> offset(ls), make_lexer, ls). | |
| 118 | 118 | |
| 119 | 119 | public define Maybe(LineReader) |
| 120 | 120 | make_line_reader |
| 121 | 121 | ( |
| 122 | - LexingStream(One) ls, | |
| 122 | + LexingStream ls, | |
| 123 | 123 | ) = |
| 124 | 124 | if make_line_reader_lexer is |
| 125 | 125 | { |
| ... | ... | @@ -135,7 +135,7 @@ public define Maybe(LineReader) |
| 135 | 135 | ( |
| 136 | 136 | String s, |
| 137 | 137 | ) = |
| 138 | - make_line_reader(make_lexing_stream("", s, unique)). | |
| 138 | + make_line_reader(make_lexing_stream("", s)). | |
| 139 | 139 | |
| 140 | 140 | public define Maybe(LineReader) |
| 141 | 141 | make_line_reader |
| ... | ... | @@ -146,8 +146,7 @@ public define Maybe(LineReader) |
| 146 | 146 | if make_lexing_stream("", /* preambule */ |
| 147 | 147 | f, /* the opened file */ |
| 148 | 148 | 65536, /* size of buffer for the lexing stream */ |
| 149 | - timeout, /* timeout (seconds) */ | |
| 150 | - unique) | |
| 149 | + timeout) /* timeout (seconds) */ | |
| 151 | 150 | is |
| 152 | 151 | { |
| 153 | 152 | failure then print("cannot make lexing stream.\n"); failure, |
| ... | ... | @@ -159,7 +158,7 @@ public define LineReader |
| 159 | 158 | reset_line_reader |
| 160 | 159 | ( |
| 161 | 160 | LineReader lr, |
| 162 | - LexingStream(One) ls, | |
| 161 | + LexingStream ls, | |
| 163 | 162 | ) = |
| 164 | 163 | if lr is line_reader(lexer, offset, make_lexer, _) then |
| 165 | 164 | make_line_reader(ls, make_lexer). | ... | ... |
anubis_dev/library/tools/list.anubis
| ... | ... | @@ -922,3 +922,33 @@ public define (List($T1), List($T2)) unzip(List(($T1, $T2)) l) = unzip(reverse(l |
| 922 | 922 | |
| 923 | 923 | |
| 924 | 924 | |
| 925 | + Compute the list of all sublists of a list (beware: for a list of length n, | |
| 926 | + this gives a list of length 2^n). | |
| 927 | + | |
| 928 | +public define List(List($T)) | |
| 929 | + sublists | |
| 930 | + ( | |
| 931 | + List($T) l | |
| 932 | + ) = | |
| 933 | + if l is | |
| 934 | + { | |
| 935 | + [ ] then [[ ]], | |
| 936 | + [h . t] then with p = sublists(t), | |
| 937 | + map((List($T) u) |-> [h . u],p) + p | |
| 938 | + }. | |
| 939 | + | |
| 940 | + | |
| 941 | + Construct a 'constant' list containing any number of times the same datum. | |
| 942 | + | |
| 943 | + constant_list(x,6) is just: [x,x,x,x,x,x] | |
| 944 | + | |
| 945 | +public define List($T) | |
| 946 | + constant_list | |
| 947 | + ( | |
| 948 | + $T x, | |
| 949 | + Int n | |
| 950 | + ) = | |
| 951 | + if n =< 0 then [ ] else [x . constant_list(x,n-1)]. | |
| 952 | + | |
| 953 | + | |
| 954 | + | ... | ... |
anubis_dev/library/tools/read_table.anubis
anubis_dev/manuals/en/Anubis-doc-1-14.pdf
No preview for this file type
anubis_dev/manuals/en/Anubis-doc-1-14.tex
| ... | ... | @@ -1011,6 +1011,12 @@ independently of the many definitions you may give to the symbol \cod{[~]} and t |
| 1011 | 1011 | This is enough on syntax for reading the subsequent sections. We will say more on syntax later. |
| 1012 | 1012 | |
| 1013 | 1013 | |
| 1014 | +\section{\cod{\_\_LINE\_\_}, \cod{\_\_FILE\_\_}, \cod{\_\_DIR\_\_} and \cod{\_\_TIME\_\_}} | |
| 1015 | +The keyword \cod{\_\_LINE\_\_} represents the line number (of the source file) where this keyword is written. This | |
| 1016 | +is a datum of type \cod{Word32}. Similarly, \cod{\_\_FILE\_\_} and \cod{\_\_DIR\_\_} represent the absolute file path and | |
| 1017 | +absolute directory path of the source file they are written within. They are of type \cod{String}. The keyword | |
| 1018 | +\cod{\_\_TIME\_\_} instantiates to the current time (of type \cod{Word32}) at | |
| 1019 | +which it is found in the source file during the compilation. | |
| 1014 | 1020 | |
| 1015 | 1021 | |
| 1016 | 1022 | \section{Automatically generated files} |
| ... | ... | @@ -1019,7 +1025,7 @@ an ``automatically generated file''. There are several examples |
| 1019 | 1025 | of metaprograms in the library such as~: \APG\ (see section \myref{sec:APG}), \cod{fast\_lexer\_3} (see section |
| 1020 | 1026 | \myref{sec:fastlexer}), \cod{metaSQL} (see section \myref{sec:metaSQL}), \dots\ |
| 1021 | 1027 | |
| 1022 | -Autmatically generated files should never be modified because any modification is destroyed by a new execution of the | |
| 1028 | +Automatically generated files should never be modified because any modification is destroyed by a new execution of the | |
| 1023 | 1029 | metaprogram. Only true source files should be modified. As a consequence, generated files encomber your working |
| 1024 | 1030 | directory, and we have adopted the policy of putting them into a subdirectory (of the current directory), uniformly named \fn{generated}. |
| 1025 | 1031 | Putting generated files into the \fn{generated} subdirectory must be the default behavior of metaprograms. An option of the | ... | ... |