Commit 633d58adaff26e3bc529a9b96dfe3c24a8cfea57
correction + iterator
Showing
23 changed files
with
747 additions
and
96 deletions
Show diff stats
anubis_dev/compiler/src/compil.h
@@ -200,7 +200,10 @@ extern void NormalizeFileName(char *pathName); | @@ -200,7 +200,10 @@ extern void NormalizeFileName(char *pathName); | ||
200 | of the form mcons3(file,line,col) where file, line and col Lisp integers. | 200 | of the form mcons3(file,line,col) where file, line and col Lisp integers. |
201 | */ | 201 | */ |
202 | 202 | ||
203 | +extern char *path_prefix(char *name); | ||
204 | + | ||
203 | #define file_in(x) (assert(((int)(integer_value(car(x)))) < max_already_included), already_included[integer_value(car(x))]) | 205 | #define file_in(x) (assert(((int)(integer_value(car(x)))) < max_already_included), already_included[integer_value(car(x))]) |
206 | +#define dir_in(x) path_prefix(file_in(x)) | ||
204 | #define line_in(x) integer_value(second(x)) | 207 | #define line_in(x) integer_value(second(x)) |
205 | #define col_in(x) integer_value(cdr2(x)) | 208 | #define col_in(x) integer_value(cdr2(x)) |
206 | 209 | ||
@@ -631,6 +634,7 @@ extern Expr linecol(void); | @@ -631,6 +634,7 @@ extern Expr linecol(void); | ||
631 | item(unlock_var)\ | 634 | item(unlock_var)\ |
632 | item(__line__)\ | 635 | item(__line__)\ |
633 | item(__file__)\ | 636 | item(__file__)\ |
637 | + item(__dir__)\ | ||
634 | item(lazy)\ | 638 | item(lazy)\ |
635 | 639 | ||
636 | /* true 'dynamic' modules and 'load_adm' (added in version 1.13) */ | 640 | /* true 'dynamic' modules and 'load_adm' (added in version 1.13) */ |
anubis_dev/compiler/src/compile.c
@@ -3439,6 +3439,7 @@ Expr compile_term(Expr head, | @@ -3439,6 +3439,7 @@ Expr compile_term(Expr head, | ||
3439 | } | 3439 | } |
3440 | break; | 3440 | break; |
3441 | 3441 | ||
3442 | + case __dir__: | ||
3442 | case __file__: /* (__file__ . <lisp string>) */ | 3443 | case __file__: /* (__file__ . <lisp string>) */ |
3443 | { | 3444 | { |
3444 | code = cons(cons(string, | 3445 | code = cons(cons(string, |
anubis_dev/compiler/src/expr.cpp
@@ -591,6 +591,7 @@ Expr _symbols_in_interp(Expr head) | @@ -591,6 +591,7 @@ Expr _symbols_in_interp(Expr head) | ||
591 | case operation: | 591 | case operation: |
592 | case __line__: | 592 | case __line__: |
593 | case __file__: | 593 | case __file__: |
594 | + case __dir__: | ||
594 | return nil; | 595 | return nil; |
595 | 596 | ||
596 | case macro: | 597 | case macro: |
@@ -755,6 +756,7 @@ Expr _symbols_in_term(Expr term) | @@ -755,6 +756,7 @@ Expr _symbols_in_term(Expr term) | ||
755 | case todo: /* (todo <lc> <filename> . <text>) */ | 756 | case todo: /* (todo <lc> <filename> . <text>) */ |
756 | case __line__: | 757 | case __line__: |
757 | case __file__: | 758 | case __file__: |
759 | + case __dir__: | ||
758 | case byte_array: | 760 | case byte_array: |
759 | return nil; | 761 | return nil; |
760 | 762 |
anubis_dev/compiler/src/grammar.y
@@ -81,7 +81,7 @@ int is_global = 0; | @@ -81,7 +81,7 @@ int is_global = 0; | ||
81 | %token<expr> yy__config_file yy__verbose yy__stop_after yy__mapsto yy__rec_mapsto yy__language | 81 | %token<expr> yy__config_file yy__verbose yy__stop_after yy__mapsto yy__rec_mapsto yy__language |
82 | %token<expr> yy__mapstoo yy__rec_mapstoo yy__arroww | 82 | %token<expr> yy__mapstoo yy__rec_mapstoo yy__arroww |
83 | %token<expr> yy__conf_int yy__conf_string yy__conf_symbol | 83 | %token<expr> yy__conf_int yy__conf_string yy__conf_symbol |
84 | -%token<expr> yy__enddot yy__eof yy__LINE__ yy__FILE__ yy__colon_equals | 84 | +%token<expr> yy__enddot yy__eof yy__LINE__ yy__FILE__ yy__DIR__ yy__colon_equals |
85 | %token<expr> yy__integer yy__macro_integer yy__dummy yy__end_LBA | 85 | %token<expr> yy__integer yy__macro_integer yy__dummy yy__end_LBA |
86 | %token<expr> yy__defaults_as yy__lazy | 86 | %token<expr> yy__defaults_as yy__lazy |
87 | 87 | ||
@@ -666,6 +666,7 @@ Term: yy__alert { alert_obsolete($1); | @@ -666,6 +666,7 @@ Term: yy__alert { alert_obsolete($1); | ||
666 | | yy__symbol yy__colon_equals Term { $$ = $3; } | 666 | | yy__symbol yy__colon_equals Term { $$ = $3; } |
667 | | yy__LINE__ %prec prec_symbol { $$ = $1; } | 667 | | yy__LINE__ %prec prec_symbol { $$ = $1; } |
668 | | yy__FILE__ %prec prec_symbol { $$ = $1; } | 668 | | yy__FILE__ %prec prec_symbol { $$ = $1; } |
669 | +| yy__DIR__ %prec prec_symbol { $$ = $1; } | ||
669 | //| yy__Symbol %prec prec_symbol { $$ = mcons3(symbol,linecol(),$1); } | 670 | //| yy__Symbol %prec prec_symbol { $$ = mcons3(symbol,linecol(),$1); } |
670 | | yy__lpar yy__rpar %prec prec_symbol { $$ = mcons3(symbol,linecol(),pdstr_voidpars); } | 671 | | yy__lpar yy__rpar %prec prec_symbol { $$ = mcons3(symbol,linecol(),pdstr_voidpars); } |
671 | | yy__integer %prec prec_symbol { $$ = mcons3(car($1),linecol(), cdr($1)); } | 672 | | yy__integer %prec prec_symbol { $$ = mcons3(car($1),linecol(), cdr($1)); } |
anubis_dev/compiler/src/interp.c
@@ -2316,6 +2316,7 @@ term_interpretations(Expr ttype, /* required type for that term (may contai | @@ -2316,6 +2316,7 @@ term_interpretations(Expr ttype, /* required type for that term (may contai | ||
2316 | 2316 | ||
2317 | case __line__: /* (__line__ . <lisp integer>) */ | 2317 | case __line__: /* (__line__ . <lisp integer>) */ |
2318 | case __file__: /* (__file__ . <lisp string>) */ | 2318 | case __file__: /* (__file__ . <lisp string>) */ |
2319 | + case __dir__: | ||
2319 | result = list1(cons(term,env)); | 2320 | result = list1(cons(term,env)); |
2320 | break; | 2321 | break; |
2321 | 2322 |
anubis_dev/compiler/src/lexer.l
@@ -326,7 +326,7 @@ extern void NormalizeFileName(char *pathName); | @@ -326,7 +326,7 @@ extern void NormalizeFileName(char *pathName); | ||
326 | 326 | ||
327 | 327 | ||
328 | /* open an Anubis source file. */ | 328 | /* open an Anubis source file. */ |
329 | -FILE *fopensrc(const char *name, int n) | 329 | +FILE *fopensrc(const char *name, int n) // n is for debugging purpose (identifies from where the call comes) |
330 | /* The given name may be either absolute or relative. It | 330 | /* The given name may be either absolute or relative. It |
331 | must be immediately converted into an absolute path, | 331 | must be immediately converted into an absolute path, |
332 | because the compiler uses ONLY absolute file paths | 332 | because the compiler uses ONLY absolute file paths |
@@ -544,7 +544,7 @@ int is_already_included(char *name) | @@ -544,7 +544,7 @@ int is_already_included(char *name) | ||
544 | /* same up to case */ | 544 | /* same up to case */ |
545 | if (!strcmp(already_included[i],fopensrc_abs_path)) | 545 | if (!strcmp(already_included[i],fopensrc_abs_path)) |
546 | { | 546 | { |
547 | - /* exactely same: file already compiled */ | 547 | + /* exactly same: file already compiled */ |
548 | fclose(fp); | 548 | fclose(fp); |
549 | return 1; | 549 | return 1; |
550 | } | 550 | } |
@@ -683,6 +683,8 @@ void come_back(void) | @@ -683,6 +683,8 @@ void come_back(void) | ||
683 | if (verbose) printf("Returning to directory: %s\n", | 683 | if (verbose) printf("Returning to directory: %s\n", |
684 | include_dir[include_stack_ptr]); | 684 | include_dir[include_stack_ptr]); |
685 | current_file_abs_path = abs_file_paths_stack[include_stack_ptr]; | 685 | current_file_abs_path = abs_file_paths_stack[include_stack_ptr]; |
686 | + fopensrc_abs_path = current_file_abs_path; // these two variables seem to be redondant ! | ||
687 | + fopensrc_cur_dir = include_dir[include_stack_ptr]; | ||
686 | current_file_id = get_file_id(current_file_abs_path); | 688 | current_file_id = get_file_id(current_file_abs_path); |
687 | saturate_visi_table(); | 689 | saturate_visi_table(); |
688 | if (show_reads) show_come_back(current_file_abs_path,current_file_id); | 690 | if (show_reads) show_come_back(current_file_abs_path,current_file_id); |
@@ -986,6 +988,8 @@ W [\ \t\r\n] | @@ -986,6 +988,8 @@ W [\ \t\r\n] | ||
986 | return yy__LINE__; } | 988 | return yy__LINE__; } |
987 | <PAR>__FILE__ { yylval.expr = cons(__file__,new_string(current_file_abs_path)); | 989 | <PAR>__FILE__ { yylval.expr = cons(__file__,new_string(current_file_abs_path)); |
988 | return yy__FILE__; } | 990 | return yy__FILE__; } |
991 | +<PAR>__DIR__ { yylval.expr = cons(__dir__,new_string(path_prefix(current_file_abs_path))); | ||
992 | + return yy__DIR__; } | ||
989 | <PAR>__TIME__ { sprintf(str_buf,"%d",(int)time(NULL)); | 993 | <PAR>__TIME__ { sprintf(str_buf,"%d",(int)time(NULL)); |
990 | yylval.expr = mcons4(of_type,linecol(),pdstr_Word32,mcons3(integer_10, | 994 | yylval.expr = mcons4(of_type,linecol(),pdstr_Word32,mcons3(integer_10, |
991 | linecol(), | 995 | linecol(), |
anubis_dev/compiler/src/msgtexts.c
@@ -1057,7 +1057,7 @@ const char *msgtext_undefined_macro[] = | @@ -1057,7 +1057,7 @@ const char *msgtext_undefined_macro[] = | ||
1057 | const char *msgtext_too_big[] = | 1057 | const char *msgtext_too_big[] = |
1058 | { | 1058 | { |
1059 | " Your program is too big for this version of the compiler.\n" | 1059 | " Your program is too big for this version of the compiler.\n" |
1060 | - " Consider spilling it using secondary modules.\n\n" | 1060 | + " Consider spliting it using secondary modules.\n\n" |
1061 | }; | 1061 | }; |
1062 | 1062 | ||
1063 | const char *msgtext_similar_paths[] = | 1063 | const char *msgtext_similar_paths[] = |
anubis_dev/compiler/src/replace.c
@@ -505,6 +505,7 @@ static Expr rename_bound(Expr head, /* the expression within which bound | @@ -505,6 +505,7 @@ static Expr rename_bound(Expr head, /* the expression within which bound | ||
505 | 505 | ||
506 | case __line__: | 506 | case __line__: |
507 | case __file__: | 507 | case __file__: |
508 | + case __dir__: | ||
508 | { | 509 | { |
509 | result = head; | 510 | result = head; |
510 | } | 511 | } |
@@ -955,6 +956,13 @@ static Expr replace_aux(Expr head, /* where bound variables are already ren | @@ -955,6 +956,13 @@ static Expr replace_aux(Expr head, /* where bound variables are already ren | ||
955 | } | 956 | } |
956 | break; | 957 | break; |
957 | 958 | ||
959 | + case __dir__: | ||
960 | + /* (__dir__ . <lisp string>) */ | ||
961 | + { | ||
962 | + return cons(__dir__,new_string(dir_in(second(op)))); /* change the value of __DIR__ */ | ||
963 | + } | ||
964 | + break; | ||
965 | + | ||
958 | default: assert(0); | 966 | default: assert(0); |
959 | } | 967 | } |
960 | } | 968 | } |
anubis_dev/compiler/src/show.c
@@ -538,6 +538,12 @@ void show_interpretation(FILE *fp, | @@ -538,6 +538,12 @@ void show_interpretation(FILE *fp, | ||
538 | } | 538 | } |
539 | break; | 539 | break; |
540 | 540 | ||
541 | + case __dir__: | ||
542 | + { | ||
543 | + xpos += fprintf(fp,"__DIR__"); | ||
544 | + } | ||
545 | + break; | ||
546 | + | ||
541 | #if 0 | 547 | #if 0 |
542 | case integer: /* (integer <lc> . <Cint>) */ Obsolete: replaced by 'anb_int_10' and 'anb_int_16' | 548 | case integer: /* (integer <lc> . <Cint>) */ Obsolete: replaced by 'anb_int_10' and 'anb_int_16' |
543 | { | 549 | { |
anubis_dev/compiler/src/templates.c
anubis_dev/compiler/src/typetools.c
@@ -413,6 +413,7 @@ Expr _type_from_interpretation(Expr head, Expr env) | @@ -413,6 +413,7 @@ Expr _type_from_interpretation(Expr head, Expr env) | ||
413 | break; | 413 | break; |
414 | 414 | ||
415 | case __file__: | 415 | case __file__: |
416 | + case __dir__: | ||
416 | result = type_String; | 417 | result = type_String; |
417 | break; | 418 | break; |
418 | 419 |
anubis_dev/library/data_base/import_csv.anubis
1 | ๏ปฟ | 1 | ๏ปฟ |
2 | - | 2 | + This file is obsoleted by data_base/read_csv.anubis |
3 | + | ||
4 | + | ||
3 | The Anubis Project | 5 | The Anubis Project |
4 | 6 | ||
5 | Reading CSV tables. | 7 | Reading CSV tables. |
@@ -54,9 +56,9 @@ | @@ -54,9 +56,9 @@ | ||
54 | 56 | ||
55 | 57 | ||
56 | 58 | ||
57 | -read tools/basis.anubis | 59 | + read tools/basis.anubis |
58 | 60 | ||
59 | -public define Maybe(List(List(String))) | 61 | + public define Maybe(List(List(String))) |
60 | read_table | 62 | read_table |
61 | ( | 63 | ( |
62 | String filename, | 64 | String filename, |
@@ -97,7 +99,7 @@ public define Maybe(List(List(String))) | @@ -97,7 +99,7 @@ public define Maybe(List(List(String))) | ||
97 | Checking if a list of Word8 'candidate' is a prefix in a line separator. | 99 | Checking if a list of Word8 'candidate' is a prefix in a line separator. |
98 | 100 | ||
99 | 101 | ||
100 | -define Bool | 102 | + define Bool |
101 | begins_line_separator | 103 | begins_line_separator |
102 | ( | 104 | ( |
103 | List(Word8) candidate, | 105 | List(Word8) candidate, |
@@ -121,7 +123,7 @@ define Bool | @@ -121,7 +123,7 @@ define Bool | ||
121 | 123 | ||
122 | Here is the test. | 124 | Here is the test. |
123 | 125 | ||
124 | -define Bool | 126 | + define Bool |
125 | begins_line_separator | 127 | begins_line_separator |
126 | ( | 128 | ( |
127 | List(Word8) candidate, | 129 | List(Word8) candidate, |
@@ -141,7 +143,7 @@ define Bool | @@ -141,7 +143,7 @@ define Bool | ||
141 | 143 | ||
142 | We have two cross recursive functions 'read_table' and 'read_more_lines'. | 144 | We have two cross recursive functions 'read_table' and 'read_more_lines'. |
143 | 145 | ||
144 | -define List(List(String)) | 146 | + define List(List(String)) |
145 | read_table | 147 | read_table |
146 | ( | 148 | ( |
147 | RStream file, | 149 | RStream file, |
@@ -162,7 +164,7 @@ define List(List(String)) | @@ -162,7 +164,7 @@ define List(List(String)) | ||
162 | file. If end of file is read, the last line of the table is empty. Otherwise, we | 164 | file. If end of file is read, the last line of the table is empty. Otherwise, we |
163 | return to 'read_table', with the correct 'current_line' and 'current_cell'. | 165 | return to 'read_table', with the correct 'current_line' and 'current_cell'. |
164 | 166 | ||
165 | -define List(List(String)) | 167 | + define List(List(String)) |
166 | read_more_lines | 168 | read_more_lines |
167 | ( | 169 | ( |
168 | RStream file, | 170 | RStream file, |
@@ -185,7 +187,7 @@ define List(List(String)) | @@ -185,7 +187,7 @@ define List(List(String)) | ||
185 | 187 | ||
186 | Reading a table from an already opened file. | 188 | Reading a table from an already opened file. |
187 | 189 | ||
188 | -define List(List(String)) | 190 | + define List(List(String)) |
189 | read_table | 191 | read_table |
190 | ( | 192 | ( |
191 | RStream file, | 193 | RStream file, |
@@ -218,7 +220,7 @@ define List(List(String)) | @@ -218,7 +220,7 @@ define List(List(String)) | ||
218 | 220 | ||
219 | Now, here is our tool. | 221 | Now, here is our tool. |
220 | 222 | ||
221 | -public define Maybe(List(List(String))) | 223 | + public define Maybe(List(List(String))) |
222 | read_table | 224 | read_table |
223 | ( | 225 | ( |
224 | String filename, | 226 | String filename, |
1 | + | ||
2 | + The Anubis Project | ||
3 | + | ||
4 | + Reading a csv (Comma Separated Values) file. | ||
5 | + | ||
6 | + | ||
7 | +read tools/basis.anubis | ||
8 | +read tools/time.anubis | ||
9 | +read lexical_analysis/fast_lexer_4.anubis | ||
10 | + | ||
11 | + | ||
12 | + The function made by the function below reads a single record from a CSV input source. | ||
13 | + | ||
14 | +public type ReadCsvResult: | ||
15 | + end_of_input, | ||
16 | + error (String message), // an error message | ||
17 | + ok (Int offset, List(String) record). // a single record and the offset of the end of | ||
18 | + // this record. | ||
19 | + | ||
20 | +public define One -> ReadCsvResult | ||
21 | + make_read_csv_line | ||
22 | + ( | ||
23 | + LexingStream ls, // lexing stream to be constructed from the input (see fast_lexer_4.anubis) | ||
24 | + String sep, // cell separator (can be "," or ";") | ||
25 | + List(Int) cols_to_get // list of column numbers you want to get | ||
26 | + ). | ||
27 | + | ||
28 | + | ||
29 | + | ||
30 | + --- That's all for the public part ! ------------------------------------------------------------------ | ||
31 | + | ||
32 | + | ||
33 | +type CellPrefixToken: // reading the beginning of a cell until the first double quote or separator | ||
34 | + eof, | ||
35 | + double_quote, // if double quote, ignore the content up to here and switch to another lexer | ||
36 | + separator(ByteArray). // if separator, keep everything before this separator | ||
37 | + | ||
38 | + | ||
39 | + This lexer if for reading the beginning of a cell. | ||
40 | + | ||
41 | +define List(LexerItem(CellPrefixToken,One)) | ||
42 | + begin_cell_description | ||
43 | + ( | ||
44 | + String sep | ||
45 | + ) = | ||
46 | + [ | ||
47 | + lexer_item("[# #t]*\"", | ||
48 | + return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |-> | ||
49 | + token(double_quote))), | ||
50 | + | ||
51 | + lexer_item("[^#"+sep+"\"#r#n]*#"+sep, | ||
52 | + return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |-> | ||
53 | + token(separator(extract(0,l-1))))), | ||
54 | + | ||
55 | + lexer_item("[^#"+sep+"\"#r#n]*#n" , | ||
56 | + return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |-> | ||
57 | + token(separator(extract(0,l-1))))), | ||
58 | + | ||
59 | + lexer_item("[^#"+sep+"\"#r#n]*(#r#n)" , | ||
60 | + return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |-> | ||
61 | + token(separator(extract(0,l-2))))) | ||
62 | + ]. | ||
63 | + | ||
64 | + | ||
65 | + This lexer is for reading within the double quotes (if the cell is double quoted) | ||
66 | + | ||
67 | + | ||
68 | +type InToken: | ||
69 | + double_quote, // can also be the end of file | ||
70 | + two_double_quotes, | ||
71 | + part(ByteArray). // part of cell | ||
72 | + | ||
73 | + | ||
74 | +define List(LexerItem(InToken,One)) | ||
75 | + read_quoted_cell_description | ||
76 | + ( | ||
77 | + String sep | ||
78 | + ) = | ||
79 | + [ | ||
80 | + lexer_item("[^\"]*" , | ||
81 | + return((ByteArray b, LexingTools t, One u) |-> token(part(b)))), | ||
82 | + | ||
83 | + lexer_item("\"\"" , | ||
84 | + return((ByteArray b, LexingTools t, One u) |-> token(two_double_quotes))), | ||
85 | + | ||
86 | + lexer_item("\"[# #t]*(("+sep+")|(#n)|(#r#n))" , | ||
87 | + return((ByteArray b, LexingTools t, One u) |-> token(double_quote))) | ||
88 | + ]. | ||
89 | + | ||
90 | + | ||
91 | + The lexer described below skips a cell (and eats the trailing separator). | ||
92 | + | ||
93 | +define List(LexerItem(One,One)) | ||
94 | + skip_cell_description | ||
95 | + ( | ||
96 | + String sep | ||
97 | + ) = | ||
98 | + [ | ||
99 | + lexer_item("(([^\"#n#r#"+sep+"]*)|([# #t]*\"([^\"]|(\"\"))*\"[# #t]*))#"+sep, | ||
100 | + return(((Int,Int) -> ByteArray b, Int l, LexingTools t, One u) |-> token(unique))) | ||
101 | + ]. | ||
102 | + | ||
103 | + The lexer described below skips to end of line (and eats the end of line). | ||
104 | + | ||
105 | +type EOL_Token: | ||
106 | + eol_offset(Int offset). | ||
107 | + | ||
108 | +define List(LexerItem(EOL_Token,One)) | ||
109 | + to_eol_description | ||
110 | + = | ||
111 | + [ | ||
112 | + lexer_item("([^#r#n]*)((#n)|(#r#n))", | ||
113 | + return(((Int,Int) -> ByteArray b, Int l, LexingTools t, One u) |-> | ||
114 | + token(eol_offset(t.offset(unique))))) | ||
115 | + ]. | ||
116 | + | ||
117 | + | ||
118 | + | ||
119 | +global define One | ||
120 | + make_the_lexers | ||
121 | + ( | ||
122 | + List(String) _ | ||
123 | + ) = | ||
124 | + make_precompiled_lexer("csv_c_begin_cell", begin_cell_description(","), '#'); | ||
125 | + make_precompiled_lexer("csv_c_quoted_cell", read_quoted_cell_description(","), '#'); | ||
126 | + make_precompiled_lexer("csv_c_skip_cell", skip_cell_description(","), '#'); | ||
127 | + make_precompiled_lexer("csv_s_begin_cell", begin_cell_description(";"), '#'); | ||
128 | + make_precompiled_lexer("csv_s_quoted_cell", read_quoted_cell_description(";"), '#'); | ||
129 | + make_precompiled_lexer("csv_s_skip_cell", skip_cell_description(";"), '#'); | ||
130 | + make_precompiled_lexer("csv_to_eol", to_eol_description, '#'). | ||
131 | + | ||
132 | + | ||
133 | +execute anbexec make_the_lexers | ||
134 | +read generated/csv_c_begin_cell.anubis | ||
135 | +read generated/csv_c_quoted_cell.anubis | ||
136 | +read generated/csv_c_skip_cell.anubis | ||
137 | +read generated/csv_s_begin_cell.anubis | ||
138 | +read generated/csv_s_quoted_cell.anubis | ||
139 | +read generated/csv_s_skip_cell.anubis | ||
140 | +read generated/csv_to_eol.anubis | ||
141 | + | ||
142 | + | ||
143 | + define One | ||
144 | + repeat | ||
145 | + ( | ||
146 | + Int n, | ||
147 | + One -> One f | ||
148 | + ) = | ||
149 | + if n =< 0 then unique else f(unique); repeat(n-1,f). | ||
150 | + | ||
151 | + | ||
152 | + define Int -> Result(String,ByteArray) | ||
153 | + read_next_cell | ||
154 | + ( | ||
155 | + One -> One skip_cell, | ||
156 | + One -> Result(String,ByteArray) read_cell | ||
157 | + ) = | ||
158 | + (Int n) |-> repeat(n,skip_cell); read_cell(unique). | ||
159 | + | ||
160 | + | ||
161 | +type CB_Result: | ||
162 | + eof, | ||
163 | + error(String), | ||
164 | + skip, | ||
165 | + cell(String), | ||
166 | + eol(Int offset). | ||
167 | + | ||
168 | + | ||
169 | +define One -> ReadCsvResult | ||
170 | + make_read_csv_line | ||
171 | + ( | ||
172 | + List(One -> CB_Result) cbs, | ||
173 | + One -> One to_eol | ||
174 | + ) = | ||
175 | + with f = (List(One -> CB_Result) l, List(String) so_far) |-f1-> | ||
176 | + if l is | ||
177 | + { | ||
178 | + [ ] then ok(0, reverse(so_far)), | ||
179 | + [f . g] then | ||
180 | + if f(unique) is | ||
181 | + { | ||
182 | + eof then end_of_input, | ||
183 | + error(e) then to_eol(unique); error(e), | ||
184 | + skip then (ReadCsvResult)f1(g, so_far), | ||
185 | + cell(c) then | ||
186 | + //print("read cell ["+c+"] "); | ||
187 | + (ReadCsvResult)f1(g, [c. so_far]), | ||
188 | + eol(offset) then | ||
189 | + if g is | ||
190 | + { | ||
191 | + [] then ok(offset, reverse(so_far)), | ||
192 | + [_ . _] then error("End Of Line unexpected") | ||
193 | + } | ||
194 | + | ||
195 | + } | ||
196 | + }, | ||
197 | + (One u) |-> f(cbs, []). | ||
198 | + | ||
199 | + | ||
200 | + | ||
201 | +define List(One -> CB_Result) | ||
202 | + make_cbs | ||
203 | + ( | ||
204 | + One -> CB_Result skip_cell, | ||
205 | + One -> CB_Result read_cell, | ||
206 | + One -> CB_Result to_eol, | ||
207 | + List(Int) cols_to_get // must be strictly increasing | ||
208 | + ) = | ||
209 | + if cols_to_get is | ||
210 | + { | ||
211 | + [ ] then [to_eol], | ||
212 | + [n . l] then | ||
213 | + with rest = make_cbs(skip_cell,read_cell,to_eol, map((Int x) |-> x - n -1,l)), | ||
214 | + constant_list(skip_cell,n) + [read_cell . rest] | ||
215 | + }. | ||
216 | + | ||
217 | + | ||
218 | + Just a test for make_cbs: | ||
219 | + global define One | ||
220 | + gaga | ||
221 | + ( | ||
222 | + List(String) args | ||
223 | + ) = | ||
224 | + with skip_cell = (One u) |-> skip, | ||
225 | + read_cell = (One u) |-> (CB_Result)cell(to_byte_array("")), | ||
226 | + to_eol = (One u) |-> (CB_Result)error(""), | ||
227 | + l = [3,5,(Int)10], | ||
228 | + with r = make_cbs(skip_cell,read_cell,to_eol,l), | ||
229 | + forget(map((One -> CB_Result f) |-> if f(unique) is | ||
230 | + { | ||
231 | + error(e) then print("eol\n\n"), | ||
232 | + skip then print("skip\n"), | ||
233 | + cell(_) then print("cell\n") | ||
234 | + }, r)). | ||
235 | + | ||
236 | + | ||
237 | + | ||
238 | + | ||
239 | +public define One -> ReadCsvResult | ||
240 | + make_read_csv_line | ||
241 | + ( | ||
242 | + LexingStream ls, | ||
243 | + String sep, | ||
244 | + List(Int) cols_to_get | ||
245 | + ) = | ||
246 | + with lex_skip = retrieve_lexer(skip_cell_description(sep), if sep = "," then csv_c_skip_cell else csv_s_skip_cell)(ls,unique), | ||
247 | + lex_begin = retrieve_lexer(begin_cell_description(sep), if sep = "," then csv_c_begin_cell else csv_s_begin_cell)(ls,unique), | ||
248 | + lex_in = retrieve_lexer(read_quoted_cell_description(sep), if sep = "," then csv_c_quoted_cell else csv_s_quoted_cell)(ls,unique), | ||
249 | + lex_eol = retrieve_lexer(to_eol_description, csv_to_eol)(ls,unique), | ||
250 | + skip_cell = (One u) |-> (CB_Result)if lex_skip(u) is | ||
251 | + { | ||
252 | + end_of_input then eof, | ||
253 | + error(b,line,col) then error("skip "+line+":"+col+" :"+to_string(b)), | ||
254 | + token(t) then skip | ||
255 | + }, | ||
256 | + begin_cell = (One u) |-> (Result(String,CellPrefixToken))if lex_begin(u) is | ||
257 | + { | ||
258 | + end_of_input then ok(eof), | ||
259 | + error(b,line,col) then error("begin "+to_string(b)), | ||
260 | + token(t) then ok(t) | ||
261 | + }, | ||
262 | + read_in_aux = (List(ByteArray) so_far) |-aux-> (CB_Result)if lex_in(unique) is | ||
263 | + { | ||
264 | + end_of_input then eof, | ||
265 | + error(b,line,col) then error("in "+to_string(b)), | ||
266 | + token(t) then if t is | ||
267 | + { | ||
268 | + double_quote then cell(to_string(concat(reverse(so_far)))), | ||
269 | + two_double_quotes then aux([{0x22} . so_far]), | ||
270 | + part(p) then aux([p . so_far]) | ||
271 | + } | ||
272 | + }, | ||
273 | + read_in = (One u) |-> read_in_aux([]), | ||
274 | + read_cell = (One u) |-> if begin_cell(u) is | ||
275 | + { | ||
276 | + error(e) then error(e), | ||
277 | + ok(b) then if b is | ||
278 | + { | ||
279 | + eof then eof, | ||
280 | + double_quote then read_in(u), | ||
281 | + separator(c) then cell(to_string(c)) | ||
282 | + } | ||
283 | + }, | ||
284 | + to_eol = (One u) |-> if lex_eol(u) is | ||
285 | + { | ||
286 | + end_of_input then eof, | ||
287 | + error(b,line,col) then error("eol "+to_string(b)), | ||
288 | + token(t) then if t is eol_offset(offset) then eol(offset) | ||
289 | + }, | ||
290 | + make_read_csv_line(make_cbs(skip_cell,read_cell,to_eol,cols_to_get), | ||
291 | + (One u) |-> forget(to_eol(u))). | ||
292 | + | ||
293 | + | ||
294 | + | ||
295 | + *** Command line test. | ||
296 | + | ||
297 | +define Maybe(List(Int)) | ||
298 | + map_to_Int | ||
299 | + ( | ||
300 | + List(String) l | ||
301 | + ) = | ||
302 | + if l is | ||
303 | + { | ||
304 | + [ ] then success([ ]), | ||
305 | + [h . t] then if decimal_scan(h) is | ||
306 | + { | ||
307 | + failure then failure, | ||
308 | + success(n1) then if map_to_Int(t) is | ||
309 | + { | ||
310 | + failure then failure, | ||
311 | + success(l1) then success([n1 . l1]) | ||
312 | + } | ||
313 | + } | ||
314 | + }. | ||
315 | + | ||
316 | + | ||
317 | +define One | ||
318 | + print_csv_line | ||
319 | + ( | ||
320 | + List(String) l | ||
321 | + ) = | ||
322 | + print("| "); | ||
323 | + map_forget((String b) |-> print(b+" | "),l). | ||
324 | + | ||
325 | + | ||
326 | +define One syntax = print("Usage: anbexec read_csv_file <csv file path> <sep> <n1> ... <nk>\n"+ | ||
327 | + " where <sep> is the (double quoted) separator (can be \",\" or \";\")\n"+ | ||
328 | + " and where the integers <n1>...<nk> are the ranks of the columns to keep,\n"+ | ||
329 | + " (starting at 0).\n\n"). | ||
330 | + | ||
331 | +define One | ||
332 | + print_to_error | ||
333 | + ( | ||
334 | + One -> ReadCsvResult f | ||
335 | + ) = | ||
336 | + if f(unique) is | ||
337 | + { | ||
338 | + end_of_input then print("-------- end of file --------------\n"), | ||
339 | + error(e) then print("Error ["+e+"]\n"); | ||
340 | + print_to_error(f), | ||
341 | + ok(offset,n) then print_csv_line(n); | ||
342 | + print("[at offset "+offset+"]\n"); | ||
343 | + print_to_error(f) | ||
344 | + }. | ||
345 | + | ||
346 | +define One | ||
347 | + show_perf | ||
348 | + ( | ||
349 | + One -> ReadCsvResult f, | ||
350 | + Int left, | ||
351 | + Int read_line, | ||
352 | + Int block_size, | ||
353 | + UTime start_time | ||
354 | + ) = | ||
355 | + if f(unique) is | ||
356 | + { | ||
357 | + end_of_input then show_duration("lines read "+read_line, start_time); | ||
358 | + print("----------------------\n"), | ||
359 | + error(e) then print("error ["+e+"]\n"); print_to_error(f), | ||
360 | + ok(o,n) then | ||
361 | + with left1 = if left = 1 then | ||
362 | + show_duration("lines read "+read_line+1, start_time); | ||
363 | + block_size | ||
364 | + else | ||
365 | + left -1, | ||
366 | + show_perf(f, left1, read_line+1, block_size, start_time) | ||
367 | + }. | ||
368 | + | ||
369 | + | ||
370 | +define One | ||
371 | + show_perf | ||
372 | + ( | ||
373 | + One -> ReadCsvResult f, | ||
374 | + Int block_size | ||
375 | + )= | ||
376 | + show_perf(f, block_size, 0, block_size, unow) | ||
377 | + . | ||
378 | + | ||
379 | +global define One | ||
380 | + read_csv_file | ||
381 | + ( | ||
382 | + List(String) args | ||
383 | + ) = | ||
384 | + if args is | ||
385 | + { | ||
386 | + [ ] then syntax, | ||
387 | + [path . t] then | ||
388 | + println("file "+path); | ||
389 | + if t is | ||
390 | + { | ||
391 | + | ||
392 | + [ ] then syntax, | ||
393 | + [sep . l] then if sep:[",",";"] | ||
394 | + then | ||
395 | + if map_to_Int(l) is | ||
396 | + { | ||
397 | + failure then syntax | ||
398 | + success(List(Int) cols) then | ||
399 | + if file(path,read) is | ||
400 | + { | ||
401 | + failure then print("File '"+path+"' not found.\n"), | ||
402 | + success(f) then | ||
403 | + if make_lexing_stream("",f,10,10) is | ||
404 | + { | ||
405 | + failure then print("Error while reading file '"+path+"'.\n"), | ||
406 | + success(ls) then | ||
407 | + with cs = no_doubles(qsort(cols,(Int x, Int y) |-> x < y)), | ||
408 | + read_line = make_read_csv_line(ls,sep,cs), | ||
409 | +// show_perf(read_line, 10000) | ||
410 | + print_to_error(read_line) | ||
411 | + } | ||
412 | + } | ||
413 | + } | ||
414 | + else syntax | ||
415 | + } | ||
416 | + }. | ||
417 | + | ||
418 | + |
anubis_dev/library/data_base/read_csv_table.anubis
1 | 1 | ||
2 | + This file is obsoleted by data_base/read_csv.anubis | ||
3 | + | ||
2 | 4 | ||
3 | Try it ! | 5 | Try it ! |
4 | 6 | ||
5 | -read import_csv.anubis | 7 | + read import_csv.anubis |
6 | 8 | ||
7 | 9 | ||
8 | -define One | 10 | + define One |
9 | table_print | 11 | table_print |
10 | ( | 12 | ( |
11 | List(String) l | 13 | List(String) l |
@@ -18,7 +20,7 @@ define One | @@ -18,7 +20,7 @@ define One | ||
18 | table_print(t) | 20 | table_print(t) |
19 | }. | 21 | }. |
20 | 22 | ||
21 | -define One | 23 | + define One |
22 | table_print | 24 | table_print |
23 | ( | 25 | ( |
24 | List(List(String)) t | 26 | List(List(String)) t |
@@ -29,7 +31,7 @@ define One | @@ -29,7 +31,7 @@ define One | ||
29 | [h . t2] then table_print(h); table_print(t2) | 31 | [h . t2] then table_print(h); table_print(t2) |
30 | }. | 32 | }. |
31 | 33 | ||
32 | -define One | 34 | + define One |
33 | table_print | 35 | table_print |
34 | ( | 36 | ( |
35 | Maybe(List(List(String))) t | 37 | Maybe(List(List(String))) t |
@@ -40,7 +42,7 @@ define One | @@ -40,7 +42,7 @@ define One | ||
40 | success(l) then table_print(l) | 42 | success(l) then table_print(l) |
41 | }. | 43 | }. |
42 | 44 | ||
43 | -global define One | 45 | + global define One |
44 | read_csv_table | 46 | read_csv_table |
45 | ( | 47 | ( |
46 | List(String) args | 48 | List(String) args |
anubis_dev/library/lexical_analysis/fast_lexer_4.anubis
@@ -618,7 +618,7 @@ public type DFA_state($Token,$Aux): | @@ -618,7 +618,7 @@ public type DFA_state($Token,$Aux): | ||
618 | variant of 'make_lexer': | 618 | variant of 'make_lexer': |
619 | 619 | ||
620 | public define Result(RegExprError, | 620 | public define Result(RegExprError, |
621 | - (LexingStream -> One -> LexerOutput($Token), // the lexer | 621 | + ((LexingStream,$Aux) -> One -> LexerOutput($Token), // the lexer |
622 | List(DFA_state($Token,$Aux)))) // the automaton | 622 | List(DFA_state($Token,$Aux)))) // the automaton |
623 | make_lexer_and_automaton | 623 | make_lexer_and_automaton |
624 | ( | 624 | ( |
@@ -651,7 +651,7 @@ public define One | @@ -651,7 +651,7 @@ public define One | ||
651 | 651 | ||
652 | *** (4.3) How to use a lexer. | 652 | *** (4.3) How to use a lexer. |
653 | 653 | ||
654 | - Applying the function of type 'LexingStream($Aux) -> One -> LexerOutput($Token)' returned by | 654 | + Applying the function of type 'LexingStream -> One -> LexerOutput($Token)' returned by |
655 | 'make_lexer' to a lexing stream is understood as 'plugging' the lexer onto this lexing | 655 | 'make_lexer' to a lexing stream is understood as 'plugging' the lexer onto this lexing |
656 | stream. The result is a function of type: | 656 | stream. The result is a function of type: |
657 | 657 | ||
@@ -1314,10 +1314,34 @@ define Result(RegExprError,RegExpr) | @@ -1314,10 +1314,34 @@ define Result(RegExprError,RegExpr) | ||
1314 | 1314 | ||
1315 | 1315 | ||
1316 | 1316 | ||
1317 | + Debugging tools: | ||
1318 | +define String | ||
1319 | + format | ||
1320 | + ( | ||
1321 | + List(Word8) l | ||
1322 | + ) = | ||
1323 | + concat(map((Word8 c) |-> to_decimal(c) ,l)," "). | ||
1317 | 1324 | ||
1318 | - | ||
1319 | - | 1325 | +define String |
1326 | + format | ||
1327 | + ( | ||
1328 | + RegExpr e | ||
1329 | + ) = | ||
1330 | + if e is | ||
1331 | + { | ||
1332 | + char(Word8 _0) then "char("+constant_string(1,_0)+")", | ||
1333 | + choice(List(Word8) _0) then "choice("+format(_0)+")", | ||
1334 | + plus(RegExpr _0) then "plus("+format(_0)+")", | ||
1335 | + star(RegExpr _0) then "star("+format(_0)+")", | ||
1336 | + cat(RegExpr _0,RegExpr _1) then "cat("+format(_0)+","+format(_1)+")", | ||
1337 | + or(RegExpr _0,RegExpr _1) then "or("+format(_0)+","+format(_1)+")", | ||
1338 | + dot then "dot", | ||
1339 | + question_mark(RegExpr _0) then "question_mark("+format(_0)+")" | ||
1340 | + }. | ||
1341 | + | ||
1342 | + | ||
1320 | *** [1.6.3] The tool for parsing regular expressions. | 1343 | *** [1.6.3] The tool for parsing regular expressions. |
1344 | + | ||
1321 | 1345 | ||
1322 | public define Result(RegExprError,RegExpr) | 1346 | public define Result(RegExprError,RegExpr) |
1323 | parse_regular_expression | 1347 | parse_regular_expression |
@@ -1328,13 +1352,12 @@ public define Result(RegExprError,RegExpr) | @@ -1328,13 +1352,12 @@ public define Result(RegExprError,RegExpr) | ||
1328 | if read_regexpr(s,escape_char,[],end_of_regexpr) is | 1352 | if read_regexpr(s,escape_char,[],end_of_regexpr) is |
1329 | { | 1353 | { |
1330 | error(msg) then error(msg), | 1354 | error(msg) then error(msg), |
1331 | - ok(re) then ok(re) | 1355 | + ok(re) then //print("["+format(re)+"]\n"); |
1356 | + ok(re) | ||
1332 | }. | 1357 | }. |
1333 | 1358 | ||
1334 | 1359 | ||
1335 | 1360 | ||
1336 | - | ||
1337 | - | ||
1338 | *** [1.7] Transforming a regular expression into a basic one. | 1361 | *** [1.7] Transforming a regular expression into a basic one. |
1339 | 1362 | ||
1340 | *** [1.7.1] Expanding a 'choice' of characters. | 1363 | *** [1.7.1] Expanding a 'choice' of characters. |
@@ -1427,7 +1450,7 @@ public define String | @@ -1427,7 +1450,7 @@ public define String | ||
1427 | *** [1.1] The type 'LexingStream'. | 1450 | *** [1.1] The type 'LexingStream'. |
1428 | 1451 | ||
1429 | A lexing stream provides tools which are adhoc for using low level fast lexers as | 1452 | A lexing stream provides tools which are adhoc for using low level fast lexers as |
1430 | - defined in section 13 of predefined.anubis: | 1453 | + defined in section 13 of predefined.anubis. |
1431 | 1454 | ||
1432 | The type below records the information needed to come back to the state just after the | 1455 | The type below records the information needed to come back to the state just after the |
1433 | last or penultimate token was read. | 1456 | last or penultimate token was read. |
@@ -1440,9 +1463,10 @@ type TokenState: | @@ -1440,9 +1463,10 @@ type TokenState: | ||
1440 | Int col | 1463 | Int col |
1441 | ). | 1464 | ). |
1442 | 1465 | ||
1443 | - There is a ``penultimate token'' when at least one token has been successfully read since the | 1466 | + There is a ``penultimate token'' when at least two token has been successfully read since the |
1444 | creation of the lexing stream. If it is not the case, the value of the ``penultimate state'' | 1467 | creation of the lexing stream. If it is not the case, the value of the ``penultimate state'' |
1445 | - defaults to the very initial state. | 1468 | + defaults to the state after the very first token was read or to the very initial state if no |
1469 | + tokan was read. | ||
1446 | 1470 | ||
1447 | When the buffer is reloaded, part of the current buffer is kept. One reason for this is that | 1471 | When the buffer is reloaded, part of the current buffer is kept. One reason for this is that |
1448 | when we encounter the end of the buffer it can be the case that we are currently reading a token | 1472 | when we encounter the end of the buffer it can be the case that we are currently reading a token |
@@ -1464,7 +1488,6 @@ type TokenState: | @@ -1464,7 +1488,6 @@ type TokenState: | ||
1464 | state informations for token1 and token2, the last two tokens successfully read. | 1488 | state informations for token1 and token2, the last two tokens successfully read. |
1465 | 1489 | ||
1466 | 1490 | ||
1467 | - | ||
1468 | public type LexingStream: | 1491 | public type LexingStream: |
1469 | lexing_stream | 1492 | lexing_stream |
1470 | ( | 1493 | ( |
@@ -1546,18 +1569,18 @@ public type LexingStream: | @@ -1546,18 +1569,18 @@ public type LexingStream: | ||
1546 | -- the new current buffer "source text." | 1569 | -- the new current buffer "source text." |
1547 | 1570 | ||
1548 | -- last accepted: (s,3), because 'sou' has been accepted in state 's' and | 1571 | -- last accepted: (s,3), because 'sou' has been accepted in state 's' and |
1549 | - ends at offset 0 within the new buffer, | 1572 | + ends at offset 3 within the new buffer, |
1550 | 1573 | ||
1551 | -- current_v receives the value 3, because 'sou' is already read, | 1574 | -- current_v receives the value 3, because 'sou' is already read, |
1552 | 1575 | ||
1553 | -- token_start_v receives the value 0, because the token we are currently | 1576 | -- token_start_v receives the value 0, because the token we are currently |
1554 | reading begins at offset 0. | 1577 | reading begins at offset 0. |
1555 | 1578 | ||
1556 | - -- state s, because we want to try to read the sequel of 'sou'. | 1579 | + -- restart in state s, because we want to try to read the sequel of 'sou'. |
1557 | 1580 | ||
1558 | Notice that if the low level lexer had returned 'rejected(s,at_end_of_input,12,15)' | 1581 | Notice that if the low level lexer had returned 'rejected(s,at_end_of_input,12,15)' |
1559 | instead of 'accepted(s,at_end_of_input,12,15)', the scenario is the same one except | 1582 | instead of 'accepted(s,at_end_of_input,12,15)', the scenario is the same one except |
1560 | - that last accepted is 'none'. | 1583 | + that last accepted will be 'none'. |
1561 | 1584 | ||
1562 | The low level lexer will now return 'accepted(s,not_at_end_of_input,0,6)', meaning that | 1585 | The low level lexer will now return 'accepted(s,not_at_end_of_input,0,6)', meaning that |
1563 | it has recognized the token 'source' between positions 0 (included) and 6 (not | 1586 | it has recognized the token 'source' between positions 0 (included) and 6 (not |
@@ -1574,6 +1597,7 @@ public type LexingStream: | @@ -1574,6 +1597,7 @@ public type LexingStream: | ||
1574 | define LexingTools | 1597 | define LexingTools |
1575 | make_tools | 1598 | make_tools |
1576 | ( | 1599 | ( |
1600 | + Var(Int) token_start_v, // actually not used in this function | ||
1577 | Var(Int) current_v, | 1601 | Var(Int) current_v, |
1578 | Var(Int) line_v, | 1602 | Var(Int) line_v, |
1579 | Var(Int) col_v, | 1603 | Var(Int) col_v, |
@@ -1590,12 +1614,11 @@ define LexingTools | @@ -1590,12 +1614,11 @@ define LexingTools | ||
1590 | (One _) |-> *col_v, | 1614 | (One _) |-> *col_v, |
1591 | 1615 | ||
1592 | // get current offset: | 1616 | // get current offset: |
1617 | + // This is the number of bytes which are no more in the buffer plus the current position. | ||
1593 | (One _) |-> *past_v + *current_v, | 1618 | (One _) |-> *past_v + *current_v, |
1594 | 1619 | ||
1595 | // go back one char: | 1620 | // go back one char: |
1596 | // don't go beyond the beginning of the buffer | 1621 | // don't go beyond the beginning of the buffer |
1597 | - // No need to update line_v and col_v because they | ||
1598 | - // refer to the beginning of the token. | ||
1599 | (Int n) |-> current_v <- max(*current_v - n, 0), | 1622 | (Int n) |-> current_v <- max(*current_v - n, 0), |
1600 | 1623 | ||
1601 | // comming back to the state just after the last token was read | 1624 | // comming back to the state just after the last token was read |
@@ -1603,7 +1626,6 @@ define LexingTools | @@ -1603,7 +1626,6 @@ define LexingTools | ||
1603 | current_v <- cur; | 1626 | current_v <- cur; |
1604 | line_v <- l; | 1627 | line_v <- l; |
1605 | col_v <- c; | 1628 | col_v <- c; |
1606 | - last_tok_v <- *penult_tok_v; | ||
1607 | last_accept_v <- none, | 1629 | last_accept_v <- none, |
1608 | 1630 | ||
1609 | // comming back to the state just after the penultimate token was read | 1631 | // comming back to the state just after the penultimate token was read |
@@ -1624,7 +1646,8 @@ public define LexingStream | @@ -1624,7 +1646,8 @@ public define LexingStream | ||
1624 | String preambule, | 1646 | String preambule, |
1625 | ByteArray b | 1647 | ByteArray b |
1626 | ) = | 1648 | ) = |
1627 | - with b1_v = var(if length(preambule) = 0 then b else to_byte_array(preambule)+b), | 1649 | + with b1_v = var(if length(preambule) = 0 then b else to_byte_array(preambule)+b), |
1650 | + token_start_v = var((Int)0), | ||
1628 | current_v = var((Int)0), | 1651 | current_v = var((Int)0), |
1629 | line_v = var((Int)0), | 1652 | line_v = var((Int)0), |
1630 | col_v = var((Int)0), | 1653 | col_v = var((Int)0), |
@@ -1633,7 +1656,7 @@ public define LexingStream | @@ -1633,7 +1656,7 @@ public define LexingStream | ||
1633 | penult_tok_v = var(tstate(0,0,0)), | 1656 | penult_tok_v = var(tstate(0,0,0)), |
1634 | last_accept_v = var((FastLexerLastAccepted)none), | 1657 | last_accept_v = var((FastLexerLastAccepted)none), |
1635 | lexing_stream(b1_v, // buffer | 1658 | lexing_stream(b1_v, // buffer |
1636 | - var((Int)0), // starting position | 1659 | + token_start_v, // starting position |
1637 | current_v, // current position | 1660 | current_v, // current position |
1638 | last_accept_v, // last accepting position | 1661 | last_accept_v, // last accepting position |
1639 | last_tok_v, // last token state | 1662 | last_tok_v, // last token state |
@@ -1641,8 +1664,8 @@ public define LexingStream | @@ -1641,8 +1664,8 @@ public define LexingStream | ||
1641 | (One u) |-> failure, // buffer is never reloaded | 1664 | (One u) |-> failure, // buffer is never reloaded |
1642 | line_v, // current line | 1665 | line_v, // current line |
1643 | col_v, // current column | 1666 | col_v, // current column |
1644 | - past_v, // past bytes | ||
1645 | - make_tools(current_v,line_v,col_v,past_v,last_tok_v,penult_tok_v,last_accept_v)). | 1667 | + past_v, // past bytes (will remain always 0 in this case) |
1668 | + make_tools(token_start_v,current_v,line_v,col_v,past_v,last_tok_v,penult_tok_v,last_accept_v)). | ||
1646 | 1669 | ||
1647 | 1670 | ||
1648 | 1671 | ||
@@ -1700,7 +1723,7 @@ public define Maybe(LexingStream) | @@ -1700,7 +1723,7 @@ public define Maybe(LexingStream) | ||
1700 | //print("Keeping this from previous buffer: ["+to_string(extract(old_buffer,dropped,old_length))+"]\n"); | 1723 | //print("Keeping this from previous buffer: ["+to_string(extract(old_buffer,dropped,old_length))+"]\n"); |
1701 | buffer_v <- extract(old_buffer,dropped,old_length)+more; | 1724 | buffer_v <- extract(old_buffer,dropped,old_length)+more; |
1702 | //print("New buffer: ["+to_string(*buffer_v)+"] size: "+to_decimal(length(*buffer_v))+"\n"); | 1725 | //print("New buffer: ["+to_string(*buffer_v)+"] size: "+to_decimal(length(*buffer_v))+"\n"); |
1703 | - token_start_v <- 0; | 1726 | + token_start_v <- *token_start_v - dropped; |
1704 | //print("Next token starting position: "+to_decimal(*token_start_v)+"\n"); | 1727 | //print("Next token starting position: "+to_decimal(*token_start_v)+"\n"); |
1705 | current_v <- old_length - dropped; | 1728 | current_v <- old_length - dropped; |
1706 | //print("New current reading position: "+to_decimal(*current_v)+"\n"); | 1729 | //print("New current reading position: "+to_decimal(*current_v)+"\n"); |
@@ -1726,7 +1749,7 @@ public define Maybe(LexingStream) | @@ -1726,7 +1749,7 @@ public define Maybe(LexingStream) | ||
1726 | line_v, | 1749 | line_v, |
1727 | col_v, | 1750 | col_v, |
1728 | past_bytes_v, | 1751 | past_bytes_v, |
1729 | - make_tools(current_v,line_v,col_v,past_bytes_v,last_tok_v,penult_tok_v,last_accepted_v))) | 1752 | + make_tools(token_start_v,current_v,line_v,col_v,past_bytes_v,last_tok_v,penult_tok_v,last_accepted_v))) |
1730 | }. | 1753 | }. |
1731 | 1754 | ||
1732 | 1755 | ||
@@ -1785,7 +1808,7 @@ public define Maybe(LexingStream) | @@ -1785,7 +1808,7 @@ public define Maybe(LexingStream) | ||
1785 | min(min(current(*penult_tok_v),current(*last_tok_v)),*token_start_v), | 1808 | min(min(current(*penult_tok_v),current(*last_tok_v)),*token_start_v), |
1786 | 1809 | ||
1787 | buffer_v <- extract(old_buffer,dropped,old_length)+more; | 1810 | buffer_v <- extract(old_buffer,dropped,old_length)+more; |
1788 | - token_start_v <- 0; | 1811 | + token_start_v <- *token_start_v - dropped; |
1789 | current_v <- old_length - dropped; | 1812 | current_v <- old_length - dropped; |
1790 | past_bytes_v <- *past_bytes_v + dropped; | 1813 | past_bytes_v <- *past_bytes_v + dropped; |
1791 | last_tok_v <- (if *last_tok_v is tstate(cur,l,c) then tstate(cur - dropped,l,c)); | 1814 | last_tok_v <- (if *last_tok_v is tstate(cur,l,c) then tstate(cur - dropped,l,c)); |
@@ -1809,7 +1832,7 @@ public define Maybe(LexingStream) | @@ -1809,7 +1832,7 @@ public define Maybe(LexingStream) | ||
1809 | line_v, | 1832 | line_v, |
1810 | col_v, | 1833 | col_v, |
1811 | past_bytes_v, | 1834 | past_bytes_v, |
1812 | - make_tools(current_v,line_v,col_v,past_bytes_v,last_tok_v,penult_tok_v,last_accepted_v))) | 1835 | + make_tools(token_start_v,current_v,line_v,col_v,past_bytes_v,last_tok_v,penult_tok_v,last_accepted_v))) |
1813 | }. | 1836 | }. |
1814 | 1837 | ||
1815 | 1838 | ||
@@ -1841,7 +1864,9 @@ define (Int, Int, Int) // returns new (start,line,col) | @@ -1841,7 +1864,9 @@ define (Int, Int, Int) // returns new (start,line,col) | ||
1841 | Int line, // current line | 1864 | Int line, // current line |
1842 | Int col // current column | 1865 | Int col // current column |
1843 | ) = | 1866 | ) = |
1844 | - if old_start >= new_start then (new_start,line,col) else | 1867 | + //print("old_start = "+old_start+"\n"); |
1868 | + if old_start >= new_start then //print("======== new col: "+col+"\n"); | ||
1869 | + (new_start,line,col) else | ||
1845 | with c = force_nth(old_start,buffer), | 1870 | with c = force_nth(old_start,buffer), |
1846 | if ((c >> 6) = 2) | 1871 | if ((c >> 6) = 2) |
1847 | /* | 1872 | /* |
@@ -1864,6 +1889,7 @@ define One | @@ -1864,6 +1889,7 @@ define One | ||
1864 | Var(Int) line_v, | 1889 | Var(Int) line_v, |
1865 | Var(Int) col_v | 1890 | Var(Int) col_v |
1866 | ) = | 1891 | ) = |
1892 | + //print("new_start = "+new_start+"\n"); | ||
1867 | if compute_start_line_col(buffer,*token_start_v,new_start,*line_v,*col_v) is (s,l,c) then | 1893 | if compute_start_line_col(buffer,*token_start_v,new_start,*line_v,*col_v) is (s,l,c) then |
1868 | token_start_v <- s; | 1894 | token_start_v <- s; |
1869 | line_v <- l; | 1895 | line_v <- l; |
@@ -1891,7 +1917,7 @@ public define LexerOutput($Token) | @@ -1891,7 +1917,7 @@ public define LexerOutput($Token) | ||
1891 | ) = | 1917 | ) = |
1892 | if lstream is lexing_stream(buffer_v,token_start_v,current_v,last_accept_v,last_tok_v,penult_tok_v,reload_buffer, | 1918 | if lstream is lexing_stream(buffer_v,token_start_v,current_v,last_accept_v,last_tok_v,penult_tok_v,reload_buffer, |
1893 | line_v,col_v,offset_v,tools) then | 1919 | line_v,col_v,offset_v,tools) then |
1894 | - //print("starting at offset "+to_decimal(*current_v)+" with token start at "+to_decimal(*token_start_v)+"\n"); | 1920 | + //print("starting at offset "+to_decimal(*current_v)+" with token start at "+to_decimal(*token_start_v)+"\n"); |
1895 | with lgbuf = length(*buffer_v), | 1921 | with lgbuf = length(*buffer_v), |
1896 | if low_level_lexer(*buffer_v,*last_accept_v,*current_v,*token_start_v,starting_state) is | 1922 | if low_level_lexer(*buffer_v,*last_accept_v,*current_v,*token_start_v,starting_state) is |
1897 | { | 1923 | { |
@@ -1908,7 +1934,7 @@ public define LexerOutput($Token) | @@ -1908,7 +1934,7 @@ public define LexerOutput($Token) | ||
1908 | else | 1934 | else |
1909 | ( | 1935 | ( |
1910 | /* the lexeme may still be accepted after the buffer is reloaded */ | 1936 | /* the lexeme may still be accepted after the buffer is reloaded */ |
1911 | - update_start_line_col(*buffer_v,start,token_start_v,line_v,col_v); | 1937 | + //update_start_line_col(*buffer_v,start,token_start_v,line_v,col_v); |
1912 | if reload_buffer(unique) is | 1938 | if reload_buffer(unique) is |
1913 | { | 1939 | { |
1914 | failure then | 1940 | failure then |
@@ -1928,13 +1954,13 @@ public define LexerOutput($Token) | @@ -1928,13 +1954,13 @@ public define LexerOutput($Token) | ||
1928 | 1954 | ||
1929 | /* almost the same thing for accepted */ | 1955 | /* almost the same thing for accepted */ |
1930 | accepted(s,start,end) then | 1956 | accepted(s,start,end) then |
1931 | - //print("low level accepted start = "+to_decimal(start)+" end = "+to_decimal(end)+"\n"); | 1957 | + //print("low level accepted start = "+to_decimal(start)+" end = "+to_decimal(end)+"\n"); |
1932 | last_accept_v <- last(s,end); | 1958 | last_accept_v <- last(s,end); |
1933 | current_v <- end; | 1959 | current_v <- end; |
1934 | if end /= lgbuf then | 1960 | if end /= lgbuf then |
1935 | ( | 1961 | ( |
1936 | /* the lexeme just read must be accepted: the action is applied */ | 1962 | /* the lexeme just read must be accepted: the action is applied */ |
1937 | - update_start_line_col(*buffer_v,end,token_start_v,line_v,col_v); | 1963 | + |
1938 | last_accept_v <- none; | 1964 | last_accept_v <- none; |
1939 | if *actions(word32(s,0)) is | 1965 | if *actions(word32(s,0)) is |
1940 | { | 1966 | { |
@@ -1945,18 +1971,25 @@ public define LexerOutput($Token) | @@ -1945,18 +1971,25 @@ public define LexerOutput($Token) | ||
1945 | // We must update some variables | 1971 | // We must update some variables |
1946 | penult_tok_v <- *last_tok_v; | 1972 | penult_tok_v <- *last_tok_v; |
1947 | last_tok_v <- tstate(end,*line_v,*col_v); | 1973 | last_tok_v <- tstate(end,*line_v,*col_v); |
1948 | - f(extract(*buffer_v,start,end),tools,aux), | 1974 | + with result = f(extract(*buffer_v,start,end),tools,aux), |
1975 | + update_start_line_col(*buffer_v,*current_v,token_start_v,line_v,col_v); | ||
1976 | + result, | ||
1977 | + | ||
1949 | return(f) then | 1978 | return(f) then |
1950 | penult_tok_v <- *last_tok_v; | 1979 | penult_tok_v <- *last_tok_v; |
1951 | last_tok_v <- tstate(end,*line_v,*col_v); | 1980 | last_tok_v <- tstate(end,*line_v,*col_v); |
1952 | - f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l), | ||
1953 | - end-start,tools,aux), | 1981 | + with result = f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l), |
1982 | + end-start,tools,aux), | ||
1983 | + //print("*token_start_v = "+*token_start_v+"\n"); | ||
1984 | + //print("*current_v = "+*current_v+"\n"); | ||
1985 | + update_start_line_col(*buffer_v,*current_v,token_start_v,line_v,col_v); | ||
1986 | + result | ||
1954 | } | 1987 | } |
1955 | ) | 1988 | ) |
1956 | else | 1989 | else |
1957 | ( | 1990 | ( |
1958 | /* the lexeme may still be accepted after the buffer is reloaded */ | 1991 | /* the lexeme may still be accepted after the buffer is reloaded */ |
1959 | - update_start_line_col(*buffer_v,start,token_start_v,line_v,col_v); | 1992 | + |
1960 | if reload_buffer(unique) is | 1993 | if reload_buffer(unique) is |
1961 | { | 1994 | { |
1962 | failure then | 1995 | failure then |
@@ -1970,11 +2003,15 @@ public define LexerOutput($Token) | @@ -1970,11 +2003,15 @@ public define LexerOutput($Token) | ||
1970 | ignore then should_not_happen(end_of_input), | 2003 | ignore then should_not_happen(end_of_input), |
1971 | return(f) then penult_tok_v <- *last_tok_v; | 2004 | return(f) then penult_tok_v <- *last_tok_v; |
1972 | last_tok_v <- tstate(end,*line_v,*col_v); | 2005 | last_tok_v <- tstate(end,*line_v,*col_v); |
1973 | - f(extract(*buffer_v,start,end),tools,aux), | 2006 | + with result = f(extract(*buffer_v,start,end),tools,aux), |
2007 | + update_start_line_col(*buffer_v,*current_v,token_start_v,line_v,col_v); | ||
2008 | + result, | ||
1974 | return(f) then penult_tok_v <- *last_tok_v; | 2009 | return(f) then penult_tok_v <- *last_tok_v; |
1975 | last_tok_v <- tstate(end,*line_v,*col_v); | 2010 | last_tok_v <- tstate(end,*line_v,*col_v); |
1976 | - f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l), | ||
1977 | - end-start,tools,aux) | 2011 | + with result = f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l), |
2012 | + end-start,tools,aux), | ||
2013 | + update_start_line_col(*buffer_v,*current_v,token_start_v,line_v,col_v); | ||
2014 | + result | ||
1978 | }, | 2015 | }, |
1979 | 2016 | ||
1980 | success(_) then | 2017 | success(_) then |
@@ -1988,7 +2025,7 @@ public define LexerOutput($Token) | @@ -1988,7 +2025,7 @@ public define LexerOutput($Token) | ||
1988 | ignored_to_end then | 2025 | ignored_to_end then |
1989 | //print("low level ignored_to_end\n"); | 2026 | //print("low level ignored_to_end\n"); |
1990 | /* we are at end of input buffer */ | 2027 | /* we are at end of input buffer */ |
1991 | - update_start_line_col(*buffer_v,lgbuf,token_start_v,line_v,col_v); | 2028 | + //update_start_line_col(*buffer_v,lgbuf,token_start_v,line_v,col_v); |
1992 | if reload_buffer(unique) is | 2029 | if reload_buffer(unique) is |
1993 | { | 2030 | { |
1994 | failure then | 2031 | failure then |
@@ -2010,7 +2047,7 @@ public define LexerOutput($Token) | @@ -2010,7 +2047,7 @@ public define LexerOutput($Token) | ||
2010 | 2047 | ||
2011 | *** [3] Constructing the automaton. | 2048 | *** [3] Constructing the automaton. |
2012 | 2049 | ||
2013 | - The description of a lexer is given as a list of 'LexerItem($Token)', where the | 2050 | + The description of a lexer is given as a list of 'LexerItem($Token,$Aux)', where the |
2014 | parameter '$Token' represents the type of tokens. Each lexer item is made of a regular | 2051 | parameter '$Token' represents the type of tokens. Each lexer item is made of a regular |
2015 | expression and an action. If the action is 'ignore', the token just read is ignored and | 2052 | expression and an action. If the action is 'ignore', the token just read is ignored and |
2016 | the lexer tries to read the next one. Otherwise, the action is applied to the lexeme | 2053 | the lexer tries to read the next one. Otherwise, the action is applied to the lexeme |
@@ -3032,20 +3069,47 @@ define List(Int) | @@ -3032,20 +3069,47 @@ define List(Int) | ||
3032 | ignoring(name,transitions) then [-1 . actions_ranks(t)] | 3069 | ignoring(name,transitions) then [-1 . actions_ranks(t)] |
3033 | } | 3070 | } |
3034 | }. | 3071 | }. |
3072 | + | ||
3073 | + | ||
3074 | + Get a characteristic serializable datum from a lexer description (used to avoid | ||
3075 | + reconstructing the lexer when the description did not change). The signature is | ||
3076 | + changed if any of the regular expressions is changed or if their order is changed | ||
3077 | + or if the sort of action is changed (this last point ensures that the list of | ||
3078 | + action ranks remains correct in the generated file). | ||
3079 | +define List(ByteArray) | ||
3080 | + extract_regexprs | ||
3081 | + ( | ||
3082 | + List(LexerItem($Token,$Aux)) l | ||
3083 | + ) = | ||
3084 | + with asign = (LexerAction($Token,$Aux) a) |-> if a is | ||
3085 | + { | ||
3086 | + ignore then "(*i)", // something which is illegal as a regular expression | ||
3087 | + return(_0) then "(*r1)", | ||
3088 | + return(_0) then "(*r2)" | ||
3089 | + }, | ||
3090 | + map((LexerItem($Token,$Aux) i) |-> if i is | ||
3091 | + { | ||
3092 | + lexer_item(regular_expression,action) then to_byte_array(regular_expression+asign(action)), | ||
3093 | + lexer_item(literal,action) then literal+to_byte_array(asign(action)) | ||
3094 | + },l). | ||
3095 | + | ||
3035 | 3096 | ||
3097 | + | ||
3036 | public define One | 3098 | public define One |
3037 | - make_precompiled_lexer | 3099 | + make_precompiled_lexer_aux |
3038 | ( | 3100 | ( |
3101 | + String signature, | ||
3039 | String directory, | 3102 | String directory, |
3040 | String lexer_name, | 3103 | String lexer_name, |
3041 | List(LexerItem($Token,$Aux)) lexer_description, | 3104 | List(LexerItem($Token,$Aux)) lexer_description, |
3042 | Word8 escape_char | 3105 | Word8 escape_char |
3043 | - ) = | 3106 | + ) = |
3044 | with file_name = directory/lexer_name+".anubis", | 3107 | with file_name = directory/lexer_name+".anubis", |
3045 | if file(file_name,new) is | 3108 | if file(file_name,new) is |
3046 | { | 3109 | { |
3047 | failure then print("Cannot create file '"+file_name+"'.\n"), | 3110 | failure then print("Cannot create file '"+file_name+"'.\n"), |
3048 | success(file) then | 3111 | success(file) then |
3112 | + print(weaken(file)," "+signature+"\n\n"); | ||
3049 | if make_DFA(lexer_description,escape_char) is | 3113 | if make_DFA(lexer_description,escape_char) is |
3050 | { | 3114 | { |
3051 | error(msg) then print(to_English(msg)+"\n"), | 3115 | error(msg) then print(to_English(msg)+"\n"), |
@@ -3060,6 +3124,46 @@ public define One | @@ -3060,6 +3124,46 @@ public define One | ||
3060 | }. | 3124 | }. |
3061 | 3125 | ||
3062 | 3126 | ||
3127 | +define Maybe(String) | ||
3128 | + read_signature | ||
3129 | + ( | ||
3130 | + String file_name | ||
3131 | + ) = | ||
3132 | + if file(file_name,read) is | ||
3133 | + { | ||
3134 | + failure then failure, | ||
3135 | + success(f) then if read(f,43,10) is // read the first 3 (blanks) + 40 (sha1 hash) characters | ||
3136 | + { | ||
3137 | + error then failure, | ||
3138 | + timeout then failure, | ||
3139 | + ok(ba) then success(to_string(extract(ba,3,43))) | ||
3140 | + } | ||
3141 | + }. | ||
3142 | + | ||
3143 | +public define One | ||
3144 | + make_precompiled_lexer | ||
3145 | + ( | ||
3146 | + String directory, | ||
3147 | + String lexer_name, | ||
3148 | + List(LexerItem($Token,$Aux)) lexer_description, | ||
3149 | + Word8 escape_char | ||
3150 | + ) = | ||
3151 | + // avoid to reconstruct the lexer if not needed | ||
3152 | + with signature = to_hexa(sha1(extract_regexprs(lexer_description))), | ||
3153 | + file_name = directory/lexer_name+".anubis", | ||
3154 | + do_it = (One u) |-> | ||
3155 | + print("Creating '"+file_name+"'. Please wait ... "); forget(flush(stdout)); | ||
3156 | + make_precompiled_lexer_aux(signature,directory,lexer_name,lexer_description,escape_char); | ||
3157 | + print("Done.\n"); forget(flush(stdout)), | ||
3158 | + if read_signature(file_name) is | ||
3159 | + { | ||
3160 | + failure then do_it(unique), | ||
3161 | + success(s) then if s = signature | ||
3162 | + then unique | ||
3163 | + else do_it(unique) | ||
3164 | + }. | ||
3165 | + | ||
3166 | + | ||
3063 | public define One | 3167 | public define One |
3064 | make_precompiled_lexer | 3168 | make_precompiled_lexer |
3065 | ( | 3169 | ( |
anubis_dev/library/system/files.anubis
@@ -1020,14 +1020,14 @@ define Maybe(Int) | @@ -1020,14 +1020,14 @@ define Maybe(Int) | ||
1020 | find_the_first | 1020 | find_the_first |
1021 | ( | 1021 | ( |
1022 | Data_IO io, | 1022 | Data_IO io, |
1023 | - String looking_for, //String to search | 1023 | + ByteArray looking_for, //String to search |
1024 | Int size, //size of the string to search | 1024 | Int size, //size of the string to search |
1025 | - String buffer, | 1025 | + ByteArray buffer, |
1026 | Int current_pos, | 1026 | Int current_pos, |
1027 | Int buf_size, | 1027 | Int buf_size, |
1028 | Int buf_pos | 1028 | Int buf_pos |
1029 | )= | 1029 | )= |
1030 | - //println("buf_size :"+buf_size+ " buf_pos :"+buf_pos + " size : "+size); | 1030 | + //println("general current pos: "+current_pos+" | buffer size: "+buf_size+ " | buffer pos: "+buf_pos + " | search size: "+size); |
1031 | if (buf_size - buf_pos) < size then | 1031 | if (buf_size - buf_pos) < size then |
1032 | //println("New buffer request current pos "+current_pos+" buffer_pos "+buf_pos); | 1032 | //println("New buffer request current pos "+current_pos+" buffer_pos "+buf_pos); |
1033 | if read_bytes(io, 65536) is // <- block size is 64k | 1033 | if read_bytes(io, 65536) is // <- block size is 64k |
@@ -1035,23 +1035,30 @@ define Maybe(Int) | @@ -1035,23 +1035,30 @@ define Maybe(Int) | ||
1035 | failure then println("read_bytes failure");failure, //finish | 1035 | failure then println("read_bytes failure");failure, //finish |
1036 | time_out then println("read_bytes timeout");failure, //finish | 1036 | time_out then println("read_bytes timeout");failure, //finish |
1037 | success(ba) then | 1037 | success(ba) then |
1038 | - with new_buffer = to_string(extract(to_byte_array(buffer), buf_pos, buf_size) + ba), | 1038 | + //println("length of ba "+length(ba)); |
1039 | + with ex_ba = extract(buffer, buf_pos, buf_size), | ||
1040 | + //println("length of ex_ba "+length(ex_ba)); | ||
1041 | + with new_ba = ex_ba + ba, | ||
1042 | + //println("length of new_ba "+length(new_ba)); | ||
1043 | + with new_buffer = ex_ba + ba, | ||
1039 | //println("SUCCESS New buffer length "+length(new_buffer)+" new current_pos "+current_pos); | 1044 | //println("SUCCESS New buffer length "+length(new_buffer)+" new current_pos "+current_pos); |
1040 | find_the_first(io, looking_for, size, new_buffer, current_pos + buf_pos, length(new_buffer), 0), | 1045 | find_the_first(io, looking_for, size, new_buffer, current_pos + buf_pos, length(new_buffer), 0), |
1041 | truncated(ba) then | 1046 | truncated(ba) then |
1042 | if length(ba) = 0 then | 1047 | if length(ba) = 0 then |
1048 | + //println("last buffer current position ["+current_pos+"]"); | ||
1043 | failure //finish | 1049 | failure //finish |
1044 | else | 1050 | else |
1045 | - with new_buffer = to_string(extract(to_byte_array(buffer), buf_pos, buf_size) + ba), | 1051 | + with new_buffer = extract(buffer, buf_pos, buf_size) + ba, |
1046 | // println("TRUNCATED New buffer length "+length(new_buffer)+" new current_pos "+current_pos); | 1052 | // println("TRUNCATED New buffer length "+length(new_buffer)+" new current_pos "+current_pos); |
1047 | find_the_first(io, looking_for, size, new_buffer, current_pos + buf_pos, length(new_buffer), 0) | 1053 | find_the_first(io, looking_for, size, new_buffer, current_pos + buf_pos, length(new_buffer), 0) |
1048 | } | 1054 | } |
1049 | else | 1055 | else |
1050 | - if find_string(buffer, looking_for, buf_pos) is | 1056 | + if find_byte_array(buffer, looking_for, buf_pos) is |
1051 | { | 1057 | { |
1052 | - failure then find_the_first(io, looking_for, size, buffer, current_pos, buf_size, buf_size - (size-1)), | 1058 | + failure then |
1059 | + find_the_first(io, looking_for, size, buffer, current_pos, buf_size, buf_size - (size-1)), | ||
1053 | success(pos) then | 1060 | success(pos) then |
1054 | - println("pattern ["+looking_for+"] found at offset "+(current_pos+pos)); | 1061 | + //println("pattern ["+to_string(looking_for)+"] found at offset "+(current_pos+pos)); |
1055 | success(current_pos + pos) | 1062 | success(current_pos + pos) |
1056 | } | 1063 | } |
1057 | . | 1064 | . |
@@ -1126,7 +1133,7 @@ public define Maybe(Int) | @@ -1126,7 +1133,7 @@ public define Maybe(Int) | ||
1126 | Data_IO io, | 1133 | Data_IO io, |
1127 | String search_string | 1134 | String search_string |
1128 | ) = | 1135 | ) = |
1129 | - find_the_first(io, search_string, length(search_string), "", 0, 0, 0). | 1136 | + find_the_first(io, to_byte_array(search_string), length(search_string), constant_byte_array(0,0), 0, 0, 0). |
1130 | 1137 | ||
1131 | public define Maybe(Int) | 1138 | public define Maybe(Int) |
1132 | find_the_first | 1139 | find_the_first |
@@ -1138,7 +1145,7 @@ public define Maybe(Int) | @@ -1138,7 +1145,7 @@ public define Maybe(Int) | ||
1138 | { | 1145 | { |
1139 | failure then failure, | 1146 | failure then failure, |
1140 | success(f) then | 1147 | success(f) then |
1141 | - find_the_first(make_data_io(f), search_string, length(search_string), "", 0, 0, 0) | 1148 | + find_the_first(make_data_io(f), to_byte_array(search_string), length(search_string), constant_byte_array(0,0), 0, 0, 0) |
1142 | }. | 1149 | }. |
1143 | 1150 | ||
1144 | public define Maybe(Int) | 1151 | public define Maybe(Int) |
@@ -1153,7 +1160,8 @@ public define Maybe(Int) | @@ -1153,7 +1160,8 @@ public define Maybe(Int) | ||
1153 | failure then failure, | 1160 | failure then failure, |
1154 | success(f) then | 1161 | success(f) then |
1155 | with size = file_size(filename), | 1162 | with size = file_size(filename), |
1156 | - find_the_first(make_data_io(f, start_position, size - start_position), search_string, length(search_string), "", 0, 0, 0) | 1163 | + //println("file size "+size); |
1164 | + find_the_first(make_data_io(f, start_position, size - start_position), to_byte_array(search_string), length(search_string), constant_byte_array(0,0), 0, 0, 0) | ||
1157 | }. | 1165 | }. |
1158 | 1166 | ||
1159 | public define Maybe(Int) | 1167 | public define Maybe(Int) |
@@ -1168,6 +1176,6 @@ public define Maybe(Int) | @@ -1168,6 +1176,6 @@ public define Maybe(Int) | ||
1168 | { | 1176 | { |
1169 | failure then failure, | 1177 | failure then failure, |
1170 | success(f) then | 1178 | success(f) then |
1171 | - find_the_first(make_data_io(f, start_position, end_position - start_position), search_string, length(search_string), "", 0, 0, 0) | 1179 | + find_the_first(make_data_io(f, start_position, end_position - start_position), to_byte_array(search_string), length(search_string), constant_byte_array(0,0), 0, 0, 0) |
1172 | }. | 1180 | }. |
1173 | 1181 |
anubis_dev/library/tools/bytearray.anubis
@@ -50,6 +50,16 @@ define One | @@ -50,6 +50,16 @@ define One | ||
50 | anubis_format_aux(s,b,bpl,i+1,c+1,ind). | 50 | anubis_format_aux(s,b,bpl,i+1,c+1,ind). |
51 | 51 | ||
52 | 52 | ||
53 | - | 53 | +public define ByteArray |
54 | + concat | ||
55 | + ( | ||
56 | + List(ByteArray) l | ||
57 | + ) = | ||
58 | + if l is | ||
59 | + { | ||
60 | + [ ] then constant_byte_array(0,0), | ||
61 | + [h . t] then h + concat(t) | ||
62 | + }. | ||
63 | + | ||
54 | 64 | ||
55 | 65 |
anubis_dev/library/tools/int.anubis
@@ -14,15 +14,15 @@ | @@ -14,15 +14,15 @@ | ||
14 | 14 | ||
15 | *** (1) Absolute value. | 15 | *** (1) Absolute value. |
16 | 16 | ||
17 | - public define macro Int abs(Int x). returns the absolute value of x | 17 | + public define inline Int abs(Int x). returns the absolute value of x |
18 | 18 | ||
19 | 19 | ||
20 | *** (2) Max and min. | 20 | *** (2) Max and min. |
21 | 21 | ||
22 | The 'max' and 'min' functions respectively return the greatest and the smallest of their arguments. | 22 | The 'max' and 'min' functions respectively return the greatest and the smallest of their arguments. |
23 | 23 | ||
24 | -public define Int max(Int x, Int y). returns the greatest of x and y | ||
25 | -public define Int min(Int x, Int y). returns the smallest of x and y | 24 | +public define inline Int max(Int x, Int y). returns the greatest of x and y |
25 | +public define inline Int min(Int x, Int y). returns the smallest of x and y | ||
26 | 26 | ||
27 | public define Int max(NonEmptyList(Int) l). returns the greatest element of the list (which is non empty) | 27 | public define Int max(NonEmptyList(Int) l). returns the greatest element of the list (which is non empty) |
28 | public define Int min(NonEmptyList(Int) l). returns the smallest element of the list (which is non empty) | 28 | public define Int min(NonEmptyList(Int) l). returns the smallest element of the list (which is non empty) |
@@ -63,20 +63,25 @@ public define Int Int x ^ Int y. raises x to the p | @@ -63,20 +63,25 @@ public define Int Int x ^ Int y. raises x to the p | ||
63 | public define Bool odd (Int x). | 63 | public define Bool odd (Int x). |
64 | public define Bool even (Int x). | 64 | public define Bool even (Int x). |
65 | 65 | ||
66 | + *** (7) Greatest common divisor, ... | ||
67 | + | ||
68 | +public define Int gcd (Int x, Int y). | ||
69 | +public define Int lcm (Int x, Int y). least common multiple | ||
70 | + | ||
66 | 71 | ||
67 | --- That's all for the public part ! ----------------------------------------------- | 72 | --- That's all for the public part ! ----------------------------------------------- |
68 | 73 | ||
69 | 74 | ||
70 | 75 | ||
71 | 76 | ||
72 | -public define macro Int | 77 | +public define inline Int |
73 | abs | 78 | abs |
74 | ( | 79 | ( |
75 | Int x | 80 | Int x |
76 | ) = | 81 | ) = |
77 | if x < 0 then -x else x. | 82 | if x < 0 then -x else x. |
78 | 83 | ||
79 | -public define Int // must not be macro because x and y are always computed | 84 | +public define inline Int // must not be macro because x and y are always computed |
80 | // and one of them would be computed twice | 85 | // and one of them would be computed twice |
81 | max | 86 | max |
82 | ( | 87 | ( |
@@ -116,7 +121,7 @@ public define Int | @@ -116,7 +121,7 @@ public define Int | ||
116 | 121 | ||
117 | 122 | ||
118 | 123 | ||
119 | -public define Int | 124 | +public define inline Int |
120 | min | 125 | min |
121 | ( | 126 | ( |
122 | Int x, | 127 | Int x, |
@@ -226,6 +231,38 @@ public define Bool | @@ -226,6 +231,38 @@ public define Bool | ||
226 | }. | 231 | }. |
227 | 232 | ||
228 | 233 | ||
234 | +public define Int | ||
235 | + gcd | ||
236 | + ( | ||
237 | + Int x, | ||
238 | + Int y | ||
239 | + ) = | ||
240 | + /* Euclid's algorithm */ | ||
241 | + if x > y then gcd(y,x) else | ||
242 | + if x = 0 then abs(y) else | ||
243 | + if x < 0 then gcd(-x,y) else | ||
244 | + if y/x is | ||
245 | + { | ||
246 | + failure then should_not_happen(0), | ||
247 | + success(p) then if p is (q,r) then gcd(r,x) | ||
248 | + }. | ||
249 | + | ||
250 | + | ||
251 | + | ||
252 | +public define Int | ||
253 | + lcm | ||
254 | + ( | ||
255 | + Int x, | ||
256 | + Int y | ||
257 | + ) = | ||
258 | + with d = gcd(x,y), | ||
259 | + if d = 0 | ||
260 | + then 0 | ||
261 | + else if (x*y)/d is | ||
262 | + { | ||
263 | + failure then should_not_happen(0), | ||
264 | + success(p) then if p is (q,_) then q | ||
265 | + }. | ||
229 | 266 | ||
230 | 267 | ||
231 | 268 |
anubis_dev/library/tools/line_reader.anubis
@@ -38,7 +38,7 @@ | @@ -38,7 +38,7 @@ | ||
38 | 38 | ||
39 | //--------------------------------------------------------------------------- | 39 | //--------------------------------------------------------------------------- |
40 | 40 | ||
41 | -read lexical_analysis/fast_lexer_3.anubis | 41 | +read lexical_analysis/fast_lexer_4.anubis |
42 | 42 | ||
43 | 43 | ||
44 | type Token: | 44 | type Token: |
@@ -46,13 +46,13 @@ type Token: | @@ -46,13 +46,13 @@ type Token: | ||
46 | eol. | 46 | eol. |
47 | 47 | ||
48 | public type LineReaderLexer: | 48 | public type LineReaderLexer: |
49 | - line_reader_lexer(LexingStream(One) -> One -> LexerOutput(Token) /*lexer_base*/). | 49 | + line_reader_lexer((LexingStream, One) -> One -> LexerOutput(Token) /*lexer_base*/). |
50 | 50 | ||
51 | public type LineReader: | 51 | public type LineReader: |
52 | line_reader(One -> LexerOutput(Token) /*lexer*/, | 52 | line_reader(One -> LexerOutput(Token) /*lexer*/, |
53 | One -> Int /*offset*/, | 53 | One -> Int /*offset*/, |
54 | LineReaderLexer /*lexer_base*/, | 54 | LineReaderLexer /*lexer_base*/, |
55 | - LexingStream(One) lexing_stream). | 55 | + LexingStream lexing_stream). |
56 | 56 | ||
57 | public define Int | 57 | public define Int |
58 | current_offset | 58 | current_offset |
@@ -96,30 +96,30 @@ public define Maybe(String) | @@ -96,30 +96,30 @@ public define Maybe(String) | ||
96 | public define Maybe(LineReaderLexer) | 96 | public define Maybe(LineReaderLexer) |
97 | make_line_reader_lexer | 97 | make_line_reader_lexer |
98 | = | 98 | = |
99 | - if make_lexer_and_automaton([ | ||
100 | - lexer_item("#r?#n", return((ByteArray b, LexingTools t, One aux) |-> token(eol))), | ||
101 | - lexer_item("#r", return((ByteArray b, LexingTools t, One aux) |-> token(eol))), | ||
102 | - lexer_item("[^\r\n]*", return((ByteArray b, LexingTools t, One aux) |-> token(line(to_string(b))))), | ||
103 | - ], | 99 | + if make_lexer([ |
100 | + lexer_item("#r?#n", return((ByteArray b, LexingTools t, One aux) |-> token(eol))), | ||
101 | + lexer_item("#r", return((ByteArray b, LexingTools t, One aux) |-> token(eol))), | ||
102 | + lexer_item("[^\r\n]*", return((ByteArray b, LexingTools t, One aux) |-> token(line(to_string(b))))), | ||
103 | + ], | ||
104 | '#') is | 104 | '#') is |
105 | { | 105 | { |
106 | error(msg) then print("Syntax error in regular expression: "+to_English(msg)+"\n"); failure, | 106 | error(msg) then print("Syntax error in regular expression: "+to_English(msg)+"\n"); failure, |
107 | - ok(p) then if p is (lexer, automaton) then success(line_reader_lexer(lexer)) | 107 | + ok(lexer) then success(line_reader_lexer(lexer)) |
108 | }. | 108 | }. |
109 | 109 | ||
110 | public define LineReader | 110 | public define LineReader |
111 | make_line_reader | 111 | make_line_reader |
112 | ( | 112 | ( |
113 | - LexingStream(One) ls, | 113 | + LexingStream ls, |
114 | LineReaderLexer make_lexer | 114 | LineReaderLexer make_lexer |
115 | ) = | 115 | ) = |
116 | if make_lexer is line_reader_lexer(lexer) then | 116 | if make_lexer is line_reader_lexer(lexer) then |
117 | - line_reader(lexer(ls), (One u) |-> offset(ls), make_lexer, ls). | 117 | + line_reader(lexer(ls, unique), (One u) |-> offset(ls), make_lexer, ls). |
118 | 118 | ||
119 | public define Maybe(LineReader) | 119 | public define Maybe(LineReader) |
120 | make_line_reader | 120 | make_line_reader |
121 | ( | 121 | ( |
122 | - LexingStream(One) ls, | 122 | + LexingStream ls, |
123 | ) = | 123 | ) = |
124 | if make_line_reader_lexer is | 124 | if make_line_reader_lexer is |
125 | { | 125 | { |
@@ -135,7 +135,7 @@ public define Maybe(LineReader) | @@ -135,7 +135,7 @@ public define Maybe(LineReader) | ||
135 | ( | 135 | ( |
136 | String s, | 136 | String s, |
137 | ) = | 137 | ) = |
138 | - make_line_reader(make_lexing_stream("", s, unique)). | 138 | + make_line_reader(make_lexing_stream("", s)). |
139 | 139 | ||
140 | public define Maybe(LineReader) | 140 | public define Maybe(LineReader) |
141 | make_line_reader | 141 | make_line_reader |
@@ -146,8 +146,7 @@ public define Maybe(LineReader) | @@ -146,8 +146,7 @@ public define Maybe(LineReader) | ||
146 | if make_lexing_stream("", /* preambule */ | 146 | if make_lexing_stream("", /* preambule */ |
147 | f, /* the opened file */ | 147 | f, /* the opened file */ |
148 | 65536, /* size of buffer for the lexing stream */ | 148 | 65536, /* size of buffer for the lexing stream */ |
149 | - timeout, /* timeout (seconds) */ | ||
150 | - unique) | 149 | + timeout) /* timeout (seconds) */ |
151 | is | 150 | is |
152 | { | 151 | { |
153 | failure then print("cannot make lexing stream.\n"); failure, | 152 | failure then print("cannot make lexing stream.\n"); failure, |
@@ -159,7 +158,7 @@ public define LineReader | @@ -159,7 +158,7 @@ public define LineReader | ||
159 | reset_line_reader | 158 | reset_line_reader |
160 | ( | 159 | ( |
161 | LineReader lr, | 160 | LineReader lr, |
162 | - LexingStream(One) ls, | 161 | + LexingStream ls, |
163 | ) = | 162 | ) = |
164 | if lr is line_reader(lexer, offset, make_lexer, _) then | 163 | if lr is line_reader(lexer, offset, make_lexer, _) then |
165 | make_line_reader(ls, make_lexer). | 164 | make_line_reader(ls, make_lexer). |
anubis_dev/library/tools/list.anubis
@@ -922,3 +922,33 @@ public define (List($T1), List($T2)) unzip(List(($T1, $T2)) l) = unzip(reverse(l | @@ -922,3 +922,33 @@ public define (List($T1), List($T2)) unzip(List(($T1, $T2)) l) = unzip(reverse(l | ||
922 | 922 | ||
923 | 923 | ||
924 | 924 | ||
925 | + Compute the list of all sublists of a list (beware: for a list of length n, | ||
926 | + this gives a list of length 2^n). | ||
927 | + | ||
928 | +public define List(List($T)) | ||
929 | + sublists | ||
930 | + ( | ||
931 | + List($T) l | ||
932 | + ) = | ||
933 | + if l is | ||
934 | + { | ||
935 | + [ ] then [[ ]], | ||
936 | + [h . t] then with p = sublists(t), | ||
937 | + map((List($T) u) |-> [h . u],p) + p | ||
938 | + }. | ||
939 | + | ||
940 | + | ||
941 | + Construct a 'constant' list containing any number of times the same datum. | ||
942 | + | ||
943 | + constant_list(x,6) is just: [x,x,x,x,x,x] | ||
944 | + | ||
945 | +public define List($T) | ||
946 | + constant_list | ||
947 | + ( | ||
948 | + $T x, | ||
949 | + Int n | ||
950 | + ) = | ||
951 | + if n =< 0 then [ ] else [x . constant_list(x,n-1)]. | ||
952 | + | ||
953 | + | ||
954 | + |
anubis_dev/library/tools/read_table.anubis
anubis_dev/manuals/en/Anubis-doc-1-14.pdf
No preview for this file type
anubis_dev/manuals/en/Anubis-doc-1-14.tex
@@ -1011,6 +1011,12 @@ independently of the many definitions you may give to the symbol \cod{[~]} and t | @@ -1011,6 +1011,12 @@ independently of the many definitions you may give to the symbol \cod{[~]} and t | ||
1011 | This is enough on syntax for reading the subsequent sections. We will say more on syntax later. | 1011 | This is enough on syntax for reading the subsequent sections. We will say more on syntax later. |
1012 | 1012 | ||
1013 | 1013 | ||
1014 | +\section{\cod{\_\_LINE\_\_}, \cod{\_\_FILE\_\_}, \cod{\_\_DIR\_\_} and \cod{\_\_TIME\_\_}} | ||
1015 | +The keyword \cod{\_\_LINE\_\_} represents the line number (of the source file) where this keyword is written. This | ||
1016 | +is a datum of type \cod{Word32}. Similarly, \cod{\_\_FILE\_\_} and \cod{\_\_DIR\_\_} represent the absolute file path and | ||
1017 | +absolute directory path of the source file they are written within. They are of type \cod{String}. The keyword | ||
1018 | +\cod{\_\_TIME\_\_} instantiates to the current time (of type \cod{Word32}) at | ||
1019 | +which it is found in the source file during the compilation. | ||
1014 | 1020 | ||
1015 | 1021 | ||
1016 | \section{Automatically generated files} | 1022 | \section{Automatically generated files} |
@@ -1019,7 +1025,7 @@ an ``automatically generated file''. There are several examples | @@ -1019,7 +1025,7 @@ an ``automatically generated file''. There are several examples | ||
1019 | of metaprograms in the library such as~: \APG\ (see section \myref{sec:APG}), \cod{fast\_lexer\_3} (see section | 1025 | of metaprograms in the library such as~: \APG\ (see section \myref{sec:APG}), \cod{fast\_lexer\_3} (see section |
1020 | \myref{sec:fastlexer}), \cod{metaSQL} (see section \myref{sec:metaSQL}), \dots\ | 1026 | \myref{sec:fastlexer}), \cod{metaSQL} (see section \myref{sec:metaSQL}), \dots\ |
1021 | 1027 | ||
1022 | -Autmatically generated files should never be modified because any modification is destroyed by a new execution of the | 1028 | +Automatically generated files should never be modified because any modification is destroyed by a new execution of the |
1023 | metaprogram. Only true source files should be modified. As a consequence, generated files encomber your working | 1029 | metaprogram. Only true source files should be modified. As a consequence, generated files encomber your working |
1024 | directory, and we have adopted the policy of putting them into a subdirectory (of the current directory), uniformly named \fn{generated}. | 1030 | directory, and we have adopted the policy of putting them into a subdirectory (of the current directory), uniformly named \fn{generated}. |
1025 | Putting generated files into the \fn{generated} subdirectory must be the default behavior of metaprograms. An option of the | 1031 | Putting generated files into the \fn{generated} subdirectory must be the default behavior of metaprograms. An option of the |