Commit 633d58adaff26e3bc529a9b96dfe3c24a8cfea57

Authored by HerrmannM
2 parents df8d5af6 bcb6ccd9

correction + iterator

anubis_dev/compiler/src/compil.h
... ... @@ -200,7 +200,10 @@ extern void NormalizeFileName(char *pathName);
200 200 of the form mcons3(file,line,col) where file, line and col Lisp integers.
201 201 */
202 202  
  203 +extern char *path_prefix(char *name);
  204 +
203 205 #define file_in(x) (assert(((int)(integer_value(car(x)))) < max_already_included), already_included[integer_value(car(x))])
  206 +#define dir_in(x) path_prefix(file_in(x))
204 207 #define line_in(x) integer_value(second(x))
205 208 #define col_in(x) integer_value(cdr2(x))
206 209  
... ... @@ -631,6 +634,7 @@ extern Expr linecol(void);
631 634 item(unlock_var)\
632 635 item(__line__)\
633 636 item(__file__)\
  637 + item(__dir__)\
634 638 item(lazy)\
635 639  
636 640 /* true 'dynamic' modules and 'load_adm' (added in version 1.13) */
... ...
anubis_dev/compiler/src/compile.c
... ... @@ -3439,6 +3439,7 @@ Expr compile_term(Expr head,
3439 3439 }
3440 3440 break;
3441 3441  
  3442 + case __dir__:
3442 3443 case __file__: /* (__file__ . <lisp string>) */
3443 3444 {
3444 3445 code = cons(cons(string,
... ...
anubis_dev/compiler/src/expr.cpp
... ... @@ -591,6 +591,7 @@ Expr _symbols_in_interp(Expr head)
591 591 case operation:
592 592 case __line__:
593 593 case __file__:
  594 + case __dir__:
594 595 return nil;
595 596  
596 597 case macro:
... ... @@ -755,6 +756,7 @@ Expr _symbols_in_term(Expr term)
755 756 case todo: /* (todo <lc> <filename> . <text>) */
756 757 case __line__:
757 758 case __file__:
  759 + case __dir__:
758 760 case byte_array:
759 761 return nil;
760 762  
... ...
anubis_dev/compiler/src/grammar.y
... ... @@ -81,7 +81,7 @@ int is_global = 0;
81 81 %token<expr> yy__config_file yy__verbose yy__stop_after yy__mapsto yy__rec_mapsto yy__language
82 82 %token<expr> yy__mapstoo yy__rec_mapstoo yy__arroww
83 83 %token<expr> yy__conf_int yy__conf_string yy__conf_symbol
84   -%token<expr> yy__enddot yy__eof yy__LINE__ yy__FILE__ yy__colon_equals
  84 +%token<expr> yy__enddot yy__eof yy__LINE__ yy__FILE__ yy__DIR__ yy__colon_equals
85 85 %token<expr> yy__integer yy__macro_integer yy__dummy yy__end_LBA
86 86 %token<expr> yy__defaults_as yy__lazy
87 87  
... ... @@ -666,6 +666,7 @@ Term: yy__alert { alert_obsolete($1);
666 666 | yy__symbol yy__colon_equals Term { $$ = $3; }
667 667 | yy__LINE__ %prec prec_symbol { $$ = $1; }
668 668 | yy__FILE__ %prec prec_symbol { $$ = $1; }
  669 +| yy__DIR__ %prec prec_symbol { $$ = $1; }
669 670 //| yy__Symbol %prec prec_symbol { $$ = mcons3(symbol,linecol(),$1); }
670 671 | yy__lpar yy__rpar %prec prec_symbol { $$ = mcons3(symbol,linecol(),pdstr_voidpars); }
671 672 | yy__integer %prec prec_symbol { $$ = mcons3(car($1),linecol(), cdr($1)); }
... ...
anubis_dev/compiler/src/interp.c
... ... @@ -2316,6 +2316,7 @@ term_interpretations(Expr ttype, /* required type for that term (may contai
2316 2316  
2317 2317 case __line__: /* (__line__ . <lisp integer>) */
2318 2318 case __file__: /* (__file__ . <lisp string>) */
  2319 + case __dir__:
2319 2320 result = list1(cons(term,env));
2320 2321 break;
2321 2322  
... ...
anubis_dev/compiler/src/lexer.l
... ... @@ -326,7 +326,7 @@ extern void NormalizeFileName(char *pathName);
326 326  
327 327  
328 328 /* open an Anubis source file. */
329   -FILE *fopensrc(const char *name, int n)
  329 +FILE *fopensrc(const char *name, int n) // n is for debugging purpose (identifies from where the call comes)
330 330 /* The given name may be either absolute or relative. It
331 331 must be immediately converted into an absolute path,
332 332 because the compiler uses ONLY absolute file paths
... ... @@ -544,7 +544,7 @@ int is_already_included(char *name)
544 544 /* same up to case */
545 545 if (!strcmp(already_included[i],fopensrc_abs_path))
546 546 {
547   - /* exactely same: file already compiled */
  547 + /* exactly same: file already compiled */
548 548 fclose(fp);
549 549 return 1;
550 550 }
... ... @@ -683,6 +683,8 @@ void come_back(void)
683 683 if (verbose) printf("Returning to directory: %s\n",
684 684 include_dir[include_stack_ptr]);
685 685 current_file_abs_path = abs_file_paths_stack[include_stack_ptr];
  686 + fopensrc_abs_path = current_file_abs_path; // these two variables seem to be redondant !
  687 + fopensrc_cur_dir = include_dir[include_stack_ptr];
686 688 current_file_id = get_file_id(current_file_abs_path);
687 689 saturate_visi_table();
688 690 if (show_reads) show_come_back(current_file_abs_path,current_file_id);
... ... @@ -986,6 +988,8 @@ W [\ \t\r\n]
986 988 return yy__LINE__; }
987 989 <PAR>__FILE__ { yylval.expr = cons(__file__,new_string(current_file_abs_path));
988 990 return yy__FILE__; }
  991 +<PAR>__DIR__ { yylval.expr = cons(__dir__,new_string(path_prefix(current_file_abs_path)));
  992 + return yy__DIR__; }
989 993 <PAR>__TIME__ { sprintf(str_buf,"%d",(int)time(NULL));
990 994 yylval.expr = mcons4(of_type,linecol(),pdstr_Word32,mcons3(integer_10,
991 995 linecol(),
... ...
anubis_dev/compiler/src/msgtexts.c
... ... @@ -1057,7 +1057,7 @@ const char *msgtext_undefined_macro[] =
1057 1057 const char *msgtext_too_big[] =
1058 1058 {
1059 1059 " Your program is too big for this version of the compiler.\n"
1060   - " Consider spilling it using secondary modules.\n\n"
  1060 + " Consider spliting it using secondary modules.\n\n"
1061 1061 };
1062 1062  
1063 1063 const char *msgtext_similar_paths[] =
... ...
anubis_dev/compiler/src/replace.c
... ... @@ -505,6 +505,7 @@ static Expr rename_bound(Expr head, /* the expression within which bound
505 505  
506 506 case __line__:
507 507 case __file__:
  508 + case __dir__:
508 509 {
509 510 result = head;
510 511 }
... ... @@ -955,6 +956,13 @@ static Expr replace_aux(Expr head, /* where bound variables are already ren
955 956 }
956 957 break;
957 958  
  959 + case __dir__:
  960 + /* (__dir__ . <lisp string>) */
  961 + {
  962 + return cons(__dir__,new_string(dir_in(second(op)))); /* change the value of __DIR__ */
  963 + }
  964 + break;
  965 +
958 966 default: assert(0);
959 967 }
960 968 }
... ...
anubis_dev/compiler/src/show.c
... ... @@ -538,6 +538,12 @@ void show_interpretation(FILE *fp,
538 538 }
539 539 break;
540 540  
  541 + case __dir__:
  542 + {
  543 + xpos += fprintf(fp,"__DIR__");
  544 + }
  545 + break;
  546 +
541 547 #if 0
542 548 case integer: /* (integer <lc> . <Cint>) */ Obsolete: replaced by 'anb_int_10' and 'anb_int_16'
543 549 {
... ...
anubis_dev/compiler/src/templates.c
... ... @@ -255,6 +255,11 @@
255 255 {
256 256 }
257 257  
  258 + case __dir__:
  259 + /* (__dir__ . <lisp string>) */
  260 + {
  261 + }
  262 +
258 263 default: assert(0);
259 264 }
260 265  
... ...
anubis_dev/compiler/src/typetools.c
... ... @@ -413,6 +413,7 @@ Expr _type_from_interpretation(Expr head, Expr env)
413 413 break;
414 414  
415 415 case __file__:
  416 + case __dir__:
416 417 result = type_String;
417 418 break;
418 419  
... ...
anubis_dev/library/data_base/import_csv.anubis
1 1 ๏ปฟ
2   -
  2 + This file is obsoleted by data_base/read_csv.anubis
  3 +
  4 +
3 5 The Anubis Project
4 6  
5 7 Reading CSV tables.
... ... @@ -54,9 +56,9 @@
54 56  
55 57  
56 58  
57   -read tools/basis.anubis
  59 + read tools/basis.anubis
58 60  
59   -public define Maybe(List(List(String)))
  61 + public define Maybe(List(List(String)))
60 62 read_table
61 63 (
62 64 String filename,
... ... @@ -97,7 +99,7 @@ public define Maybe(List(List(String)))
97 99 Checking if a list of Word8 'candidate' is a prefix in a line separator.
98 100  
99 101  
100   -define Bool
  102 + define Bool
101 103 begins_line_separator
102 104 (
103 105 List(Word8) candidate,
... ... @@ -121,7 +123,7 @@ define Bool
121 123  
122 124 Here is the test.
123 125  
124   -define Bool
  126 + define Bool
125 127 begins_line_separator
126 128 (
127 129 List(Word8) candidate,
... ... @@ -141,7 +143,7 @@ define Bool
141 143  
142 144 We have two cross recursive functions 'read_table' and 'read_more_lines'.
143 145  
144   -define List(List(String))
  146 + define List(List(String))
145 147 read_table
146 148 (
147 149 RStream file,
... ... @@ -162,7 +164,7 @@ define List(List(String))
162 164 file. If end of file is read, the last line of the table is empty. Otherwise, we
163 165 return to 'read_table', with the correct 'current_line' and 'current_cell'.
164 166  
165   -define List(List(String))
  167 + define List(List(String))
166 168 read_more_lines
167 169 (
168 170 RStream file,
... ... @@ -185,7 +187,7 @@ define List(List(String))
185 187  
186 188 Reading a table from an already opened file.
187 189  
188   -define List(List(String))
  190 + define List(List(String))
189 191 read_table
190 192 (
191 193 RStream file,
... ... @@ -218,7 +220,7 @@ define List(List(String))
218 220  
219 221 Now, here is our tool.
220 222  
221   -public define Maybe(List(List(String)))
  223 + public define Maybe(List(List(String)))
222 224 read_table
223 225 (
224 226 String filename,
... ...
anubis_dev/library/data_base/read_csv.anubis 0 โ†’ 100644
  1 +
  2 + The Anubis Project
  3 +
  4 + Reading a csv (Comma Separated Values) file.
  5 +
  6 +
  7 +read tools/basis.anubis
  8 +read tools/time.anubis
  9 +read lexical_analysis/fast_lexer_4.anubis
  10 +
  11 +
  12 + The function made by the function below reads a single record from a CSV input source.
  13 +
  14 +public type ReadCsvResult:
  15 + end_of_input,
  16 + error (String message), // an error message
  17 + ok (Int offset, List(String) record). // a single record and the offset of the end of
  18 + // this record.
  19 +
  20 +public define One -> ReadCsvResult
  21 + make_read_csv_line
  22 + (
  23 + LexingStream ls, // lexing stream to be constructed from the input (see fast_lexer_4.anubis)
  24 + String sep, // cell separator (can be "," or ";")
  25 + List(Int) cols_to_get // list of column numbers you want to get
  26 + ).
  27 +
  28 +
  29 +
  30 + --- That's all for the public part ! ------------------------------------------------------------------
  31 +
  32 +
  33 +type CellPrefixToken: // reading the beginning of a cell until the first double quote or separator
  34 + eof,
  35 + double_quote, // if double quote, ignore the content up to here and switch to another lexer
  36 + separator(ByteArray). // if separator, keep everything before this separator
  37 +
  38 +
  39 + This lexer if for reading the beginning of a cell.
  40 +
  41 +define List(LexerItem(CellPrefixToken,One))
  42 + begin_cell_description
  43 + (
  44 + String sep
  45 + ) =
  46 + [
  47 + lexer_item("[# #t]*\"",
  48 + return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |->
  49 + token(double_quote))),
  50 +
  51 + lexer_item("[^#"+sep+"\"#r#n]*#"+sep,
  52 + return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |->
  53 + token(separator(extract(0,l-1))))),
  54 +
  55 + lexer_item("[^#"+sep+"\"#r#n]*#n" ,
  56 + return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |->
  57 + token(separator(extract(0,l-1))))),
  58 +
  59 + lexer_item("[^#"+sep+"\"#r#n]*(#r#n)" ,
  60 + return(((Int,Int) -> ByteArray extract, Int l, LexingTools t, One u) |->
  61 + token(separator(extract(0,l-2)))))
  62 + ].
  63 +
  64 +
  65 + This lexer is for reading within the double quotes (if the cell is double quoted)
  66 +
  67 +
  68 +type InToken:
  69 + double_quote, // can also be the end of file
  70 + two_double_quotes,
  71 + part(ByteArray). // part of cell
  72 +
  73 +
  74 +define List(LexerItem(InToken,One))
  75 + read_quoted_cell_description
  76 + (
  77 + String sep
  78 + ) =
  79 + [
  80 + lexer_item("[^\"]*" ,
  81 + return((ByteArray b, LexingTools t, One u) |-> token(part(b)))),
  82 +
  83 + lexer_item("\"\"" ,
  84 + return((ByteArray b, LexingTools t, One u) |-> token(two_double_quotes))),
  85 +
  86 + lexer_item("\"[# #t]*(("+sep+")|(#n)|(#r#n))" ,
  87 + return((ByteArray b, LexingTools t, One u) |-> token(double_quote)))
  88 + ].
  89 +
  90 +
  91 + The lexer described below skips a cell (and eats the trailing separator).
  92 +
  93 +define List(LexerItem(One,One))
  94 + skip_cell_description
  95 + (
  96 + String sep
  97 + ) =
  98 + [
  99 + lexer_item("(([^\"#n#r#"+sep+"]*)|([# #t]*\"([^\"]|(\"\"))*\"[# #t]*))#"+sep,
  100 + return(((Int,Int) -> ByteArray b, Int l, LexingTools t, One u) |-> token(unique)))
  101 + ].
  102 +
  103 + The lexer described below skips to end of line (and eats the end of line).
  104 +
  105 +type EOL_Token:
  106 + eol_offset(Int offset).
  107 +
  108 +define List(LexerItem(EOL_Token,One))
  109 + to_eol_description
  110 + =
  111 + [
  112 + lexer_item("([^#r#n]*)((#n)|(#r#n))",
  113 + return(((Int,Int) -> ByteArray b, Int l, LexingTools t, One u) |->
  114 + token(eol_offset(t.offset(unique)))))
  115 + ].
  116 +
  117 +
  118 +
  119 +global define One
  120 + make_the_lexers
  121 + (
  122 + List(String) _
  123 + ) =
  124 + make_precompiled_lexer("csv_c_begin_cell", begin_cell_description(","), '#');
  125 + make_precompiled_lexer("csv_c_quoted_cell", read_quoted_cell_description(","), '#');
  126 + make_precompiled_lexer("csv_c_skip_cell", skip_cell_description(","), '#');
  127 + make_precompiled_lexer("csv_s_begin_cell", begin_cell_description(";"), '#');
  128 + make_precompiled_lexer("csv_s_quoted_cell", read_quoted_cell_description(";"), '#');
  129 + make_precompiled_lexer("csv_s_skip_cell", skip_cell_description(";"), '#');
  130 + make_precompiled_lexer("csv_to_eol", to_eol_description, '#').
  131 +
  132 +
  133 +execute anbexec make_the_lexers
  134 +read generated/csv_c_begin_cell.anubis
  135 +read generated/csv_c_quoted_cell.anubis
  136 +read generated/csv_c_skip_cell.anubis
  137 +read generated/csv_s_begin_cell.anubis
  138 +read generated/csv_s_quoted_cell.anubis
  139 +read generated/csv_s_skip_cell.anubis
  140 +read generated/csv_to_eol.anubis
  141 +
  142 +
  143 + define One
  144 + repeat
  145 + (
  146 + Int n,
  147 + One -> One f
  148 + ) =
  149 + if n =< 0 then unique else f(unique); repeat(n-1,f).
  150 +
  151 +
  152 + define Int -> Result(String,ByteArray)
  153 + read_next_cell
  154 + (
  155 + One -> One skip_cell,
  156 + One -> Result(String,ByteArray) read_cell
  157 + ) =
  158 + (Int n) |-> repeat(n,skip_cell); read_cell(unique).
  159 +
  160 +
  161 +type CB_Result:
  162 + eof,
  163 + error(String),
  164 + skip,
  165 + cell(String),
  166 + eol(Int offset).
  167 +
  168 +
  169 +define One -> ReadCsvResult
  170 + make_read_csv_line
  171 + (
  172 + List(One -> CB_Result) cbs,
  173 + One -> One to_eol
  174 + ) =
  175 + with f = (List(One -> CB_Result) l, List(String) so_far) |-f1->
  176 + if l is
  177 + {
  178 + [ ] then ok(0, reverse(so_far)),
  179 + [f . g] then
  180 + if f(unique) is
  181 + {
  182 + eof then end_of_input,
  183 + error(e) then to_eol(unique); error(e),
  184 + skip then (ReadCsvResult)f1(g, so_far),
  185 + cell(c) then
  186 + //print("read cell ["+c+"] ");
  187 + (ReadCsvResult)f1(g, [c. so_far]),
  188 + eol(offset) then
  189 + if g is
  190 + {
  191 + [] then ok(offset, reverse(so_far)),
  192 + [_ . _] then error("End Of Line unexpected")
  193 + }
  194 +
  195 + }
  196 + },
  197 + (One u) |-> f(cbs, []).
  198 +
  199 +
  200 +
  201 +define List(One -> CB_Result)
  202 + make_cbs
  203 + (
  204 + One -> CB_Result skip_cell,
  205 + One -> CB_Result read_cell,
  206 + One -> CB_Result to_eol,
  207 + List(Int) cols_to_get // must be strictly increasing
  208 + ) =
  209 + if cols_to_get is
  210 + {
  211 + [ ] then [to_eol],
  212 + [n . l] then
  213 + with rest = make_cbs(skip_cell,read_cell,to_eol, map((Int x) |-> x - n -1,l)),
  214 + constant_list(skip_cell,n) + [read_cell . rest]
  215 + }.
  216 +
  217 +
  218 + Just a test for make_cbs:
  219 + global define One
  220 + gaga
  221 + (
  222 + List(String) args
  223 + ) =
  224 + with skip_cell = (One u) |-> skip,
  225 + read_cell = (One u) |-> (CB_Result)cell(to_byte_array("")),
  226 + to_eol = (One u) |-> (CB_Result)error(""),
  227 + l = [3,5,(Int)10],
  228 + with r = make_cbs(skip_cell,read_cell,to_eol,l),
  229 + forget(map((One -> CB_Result f) |-> if f(unique) is
  230 + {
  231 + error(e) then print("eol\n\n"),
  232 + skip then print("skip\n"),
  233 + cell(_) then print("cell\n")
  234 + }, r)).
  235 +
  236 +
  237 +
  238 +
  239 +public define One -> ReadCsvResult
  240 + make_read_csv_line
  241 + (
  242 + LexingStream ls,
  243 + String sep,
  244 + List(Int) cols_to_get
  245 + ) =
  246 + with lex_skip = retrieve_lexer(skip_cell_description(sep), if sep = "," then csv_c_skip_cell else csv_s_skip_cell)(ls,unique),
  247 + lex_begin = retrieve_lexer(begin_cell_description(sep), if sep = "," then csv_c_begin_cell else csv_s_begin_cell)(ls,unique),
  248 + lex_in = retrieve_lexer(read_quoted_cell_description(sep), if sep = "," then csv_c_quoted_cell else csv_s_quoted_cell)(ls,unique),
  249 + lex_eol = retrieve_lexer(to_eol_description, csv_to_eol)(ls,unique),
  250 + skip_cell = (One u) |-> (CB_Result)if lex_skip(u) is
  251 + {
  252 + end_of_input then eof,
  253 + error(b,line,col) then error("skip "+line+":"+col+" :"+to_string(b)),
  254 + token(t) then skip
  255 + },
  256 + begin_cell = (One u) |-> (Result(String,CellPrefixToken))if lex_begin(u) is
  257 + {
  258 + end_of_input then ok(eof),
  259 + error(b,line,col) then error("begin "+to_string(b)),
  260 + token(t) then ok(t)
  261 + },
  262 + read_in_aux = (List(ByteArray) so_far) |-aux-> (CB_Result)if lex_in(unique) is
  263 + {
  264 + end_of_input then eof,
  265 + error(b,line,col) then error("in "+to_string(b)),
  266 + token(t) then if t is
  267 + {
  268 + double_quote then cell(to_string(concat(reverse(so_far)))),
  269 + two_double_quotes then aux([{0x22} . so_far]),
  270 + part(p) then aux([p . so_far])
  271 + }
  272 + },
  273 + read_in = (One u) |-> read_in_aux([]),
  274 + read_cell = (One u) |-> if begin_cell(u) is
  275 + {
  276 + error(e) then error(e),
  277 + ok(b) then if b is
  278 + {
  279 + eof then eof,
  280 + double_quote then read_in(u),
  281 + separator(c) then cell(to_string(c))
  282 + }
  283 + },
  284 + to_eol = (One u) |-> if lex_eol(u) is
  285 + {
  286 + end_of_input then eof,
  287 + error(b,line,col) then error("eol "+to_string(b)),
  288 + token(t) then if t is eol_offset(offset) then eol(offset)
  289 + },
  290 + make_read_csv_line(make_cbs(skip_cell,read_cell,to_eol,cols_to_get),
  291 + (One u) |-> forget(to_eol(u))).
  292 +
  293 +
  294 +
  295 + *** Command line test.
  296 +
  297 +define Maybe(List(Int))
  298 + map_to_Int
  299 + (
  300 + List(String) l
  301 + ) =
  302 + if l is
  303 + {
  304 + [ ] then success([ ]),
  305 + [h . t] then if decimal_scan(h) is
  306 + {
  307 + failure then failure,
  308 + success(n1) then if map_to_Int(t) is
  309 + {
  310 + failure then failure,
  311 + success(l1) then success([n1 . l1])
  312 + }
  313 + }
  314 + }.
  315 +
  316 +
  317 +define One
  318 + print_csv_line
  319 + (
  320 + List(String) l
  321 + ) =
  322 + print("| ");
  323 + map_forget((String b) |-> print(b+" | "),l).
  324 +
  325 +
  326 +define One syntax = print("Usage: anbexec read_csv_file <csv file path> <sep> <n1> ... <nk>\n"+
  327 + " where <sep> is the (double quoted) separator (can be \",\" or \";\")\n"+
  328 + " and where the integers <n1>...<nk> are the ranks of the columns to keep,\n"+
  329 + " (starting at 0).\n\n").
  330 +
  331 +define One
  332 + print_to_error
  333 + (
  334 + One -> ReadCsvResult f
  335 + ) =
  336 + if f(unique) is
  337 + {
  338 + end_of_input then print("-------- end of file --------------\n"),
  339 + error(e) then print("Error ["+e+"]\n");
  340 + print_to_error(f),
  341 + ok(offset,n) then print_csv_line(n);
  342 + print("[at offset "+offset+"]\n");
  343 + print_to_error(f)
  344 + }.
  345 +
  346 +define One
  347 + show_perf
  348 + (
  349 + One -> ReadCsvResult f,
  350 + Int left,
  351 + Int read_line,
  352 + Int block_size,
  353 + UTime start_time
  354 + ) =
  355 + if f(unique) is
  356 + {
  357 + end_of_input then show_duration("lines read "+read_line, start_time);
  358 + print("----------------------\n"),
  359 + error(e) then print("error ["+e+"]\n"); print_to_error(f),
  360 + ok(o,n) then
  361 + with left1 = if left = 1 then
  362 + show_duration("lines read "+read_line+1, start_time);
  363 + block_size
  364 + else
  365 + left -1,
  366 + show_perf(f, left1, read_line+1, block_size, start_time)
  367 + }.
  368 +
  369 +
  370 +define One
  371 + show_perf
  372 + (
  373 + One -> ReadCsvResult f,
  374 + Int block_size
  375 + )=
  376 + show_perf(f, block_size, 0, block_size, unow)
  377 + .
  378 +
  379 +global define One
  380 + read_csv_file
  381 + (
  382 + List(String) args
  383 + ) =
  384 + if args is
  385 + {
  386 + [ ] then syntax,
  387 + [path . t] then
  388 + println("file "+path);
  389 + if t is
  390 + {
  391 +
  392 + [ ] then syntax,
  393 + [sep . l] then if sep:[",",";"]
  394 + then
  395 + if map_to_Int(l) is
  396 + {
  397 + failure then syntax
  398 + success(List(Int) cols) then
  399 + if file(path,read) is
  400 + {
  401 + failure then print("File '"+path+"' not found.\n"),
  402 + success(f) then
  403 + if make_lexing_stream("",f,10,10) is
  404 + {
  405 + failure then print("Error while reading file '"+path+"'.\n"),
  406 + success(ls) then
  407 + with cs = no_doubles(qsort(cols,(Int x, Int y) |-> x < y)),
  408 + read_line = make_read_csv_line(ls,sep,cs),
  409 +// show_perf(read_line, 10000)
  410 + print_to_error(read_line)
  411 + }
  412 + }
  413 + }
  414 + else syntax
  415 + }
  416 + }.
  417 +
  418 +
... ...
anubis_dev/library/data_base/read_csv_table.anubis
1 1  
  2 + This file is obsoleted by data_base/read_csv.anubis
  3 +
2 4  
3 5 Try it !
4 6  
5   -read import_csv.anubis
  7 + read import_csv.anubis
6 8  
7 9  
8   -define One
  10 + define One
9 11 table_print
10 12 (
11 13 List(String) l
... ... @@ -18,7 +20,7 @@ define One
18 20 table_print(t)
19 21 }.
20 22  
21   -define One
  23 + define One
22 24 table_print
23 25 (
24 26 List(List(String)) t
... ... @@ -29,7 +31,7 @@ define One
29 31 [h . t2] then table_print(h); table_print(t2)
30 32 }.
31 33  
32   -define One
  34 + define One
33 35 table_print
34 36 (
35 37 Maybe(List(List(String))) t
... ... @@ -40,7 +42,7 @@ define One
40 42 success(l) then table_print(l)
41 43 }.
42 44  
43   -global define One
  45 + global define One
44 46 read_csv_table
45 47 (
46 48 List(String) args
... ...
anubis_dev/library/lexical_analysis/fast_lexer_4.anubis
... ... @@ -618,7 +618,7 @@ public type DFA_state($Token,$Aux):
618 618 variant of 'make_lexer':
619 619  
620 620 public define Result(RegExprError,
621   - (LexingStream -> One -> LexerOutput($Token), // the lexer
  621 + ((LexingStream,$Aux) -> One -> LexerOutput($Token), // the lexer
622 622 List(DFA_state($Token,$Aux)))) // the automaton
623 623 make_lexer_and_automaton
624 624 (
... ... @@ -651,7 +651,7 @@ public define One
651 651  
652 652 *** (4.3) How to use a lexer.
653 653  
654   - Applying the function of type 'LexingStream($Aux) -> One -> LexerOutput($Token)' returned by
  654 + Applying the function of type 'LexingStream -> One -> LexerOutput($Token)' returned by
655 655 'make_lexer' to a lexing stream is understood as 'plugging' the lexer onto this lexing
656 656 stream. The result is a function of type:
657 657  
... ... @@ -1314,10 +1314,34 @@ define Result(RegExprError,RegExpr)
1314 1314  
1315 1315  
1316 1316  
  1317 + Debugging tools:
  1318 +define String
  1319 + format
  1320 + (
  1321 + List(Word8) l
  1322 + ) =
  1323 + concat(map((Word8 c) |-> to_decimal(c) ,l)," ").
1317 1324  
1318   -
1319   -
  1325 +define String
  1326 + format
  1327 + (
  1328 + RegExpr e
  1329 + ) =
  1330 + if e is
  1331 + {
  1332 + char(Word8 _0) then "char("+constant_string(1,_0)+")",
  1333 + choice(List(Word8) _0) then "choice("+format(_0)+")",
  1334 + plus(RegExpr _0) then "plus("+format(_0)+")",
  1335 + star(RegExpr _0) then "star("+format(_0)+")",
  1336 + cat(RegExpr _0,RegExpr _1) then "cat("+format(_0)+","+format(_1)+")",
  1337 + or(RegExpr _0,RegExpr _1) then "or("+format(_0)+","+format(_1)+")",
  1338 + dot then "dot",
  1339 + question_mark(RegExpr _0) then "question_mark("+format(_0)+")"
  1340 + }.
  1341 +
  1342 +
1320 1343 *** [1.6.3] The tool for parsing regular expressions.
  1344 +
1321 1345  
1322 1346 public define Result(RegExprError,RegExpr)
1323 1347 parse_regular_expression
... ... @@ -1328,13 +1352,12 @@ public define Result(RegExprError,RegExpr)
1328 1352 if read_regexpr(s,escape_char,[],end_of_regexpr) is
1329 1353 {
1330 1354 error(msg) then error(msg),
1331   - ok(re) then ok(re)
  1355 + ok(re) then //print("["+format(re)+"]\n");
  1356 + ok(re)
1332 1357 }.
1333 1358  
1334 1359  
1335 1360  
1336   -
1337   -
1338 1361 *** [1.7] Transforming a regular expression into a basic one.
1339 1362  
1340 1363 *** [1.7.1] Expanding a 'choice' of characters.
... ... @@ -1427,7 +1450,7 @@ public define String
1427 1450 *** [1.1] The type 'LexingStream'.
1428 1451  
1429 1452 A lexing stream provides tools which are adhoc for using low level fast lexers as
1430   - defined in section 13 of predefined.anubis:
  1453 + defined in section 13 of predefined.anubis.
1431 1454  
1432 1455 The type below records the information needed to come back to the state just after the
1433 1456 last or penultimate token was read.
... ... @@ -1440,9 +1463,10 @@ type TokenState:
1440 1463 Int col
1441 1464 ).
1442 1465  
1443   - There is a ``penultimate token'' when at least one token has been successfully read since the
  1466 + There is a ``penultimate token'' when at least two token has been successfully read since the
1444 1467 creation of the lexing stream. If it is not the case, the value of the ``penultimate state''
1445   - defaults to the very initial state.
  1468 + defaults to the state after the very first token was read or to the very initial state if no
  1469 + tokan was read.
1446 1470  
1447 1471 When the buffer is reloaded, part of the current buffer is kept. One reason for this is that
1448 1472 when we encounter the end of the buffer it can be the case that we are currently reading a token
... ... @@ -1464,7 +1488,6 @@ type TokenState:
1464 1488 state informations for token1 and token2, the last two tokens successfully read.
1465 1489  
1466 1490  
1467   -
1468 1491 public type LexingStream:
1469 1492 lexing_stream
1470 1493 (
... ... @@ -1546,18 +1569,18 @@ public type LexingStream:
1546 1569 -- the new current buffer "source text."
1547 1570  
1548 1571 -- last accepted: (s,3), because 'sou' has been accepted in state 's' and
1549   - ends at offset 0 within the new buffer,
  1572 + ends at offset 3 within the new buffer,
1550 1573  
1551 1574 -- current_v receives the value 3, because 'sou' is already read,
1552 1575  
1553 1576 -- token_start_v receives the value 0, because the token we are currently
1554 1577 reading begins at offset 0.
1555 1578  
1556   - -- state s, because we want to try to read the sequel of 'sou'.
  1579 + -- restart in state s, because we want to try to read the sequel of 'sou'.
1557 1580  
1558 1581 Notice that if the low level lexer had returned 'rejected(s,at_end_of_input,12,15)'
1559 1582 instead of 'accepted(s,at_end_of_input,12,15)', the scenario is the same one except
1560   - that last accepted is 'none'.
  1583 + that last accepted will be 'none'.
1561 1584  
1562 1585 The low level lexer will now return 'accepted(s,not_at_end_of_input,0,6)', meaning that
1563 1586 it has recognized the token 'source' between positions 0 (included) and 6 (not
... ... @@ -1574,6 +1597,7 @@ public type LexingStream:
1574 1597 define LexingTools
1575 1598 make_tools
1576 1599 (
  1600 + Var(Int) token_start_v, // actually not used in this function
1577 1601 Var(Int) current_v,
1578 1602 Var(Int) line_v,
1579 1603 Var(Int) col_v,
... ... @@ -1590,12 +1614,11 @@ define LexingTools
1590 1614 (One _) |-> *col_v,
1591 1615  
1592 1616 // get current offset:
  1617 + // This is the number of bytes which are no more in the buffer plus the current position.
1593 1618 (One _) |-> *past_v + *current_v,
1594 1619  
1595 1620 // go back one char:
1596 1621 // don't go beyond the beginning of the buffer
1597   - // No need to update line_v and col_v because they
1598   - // refer to the beginning of the token.
1599 1622 (Int n) |-> current_v <- max(*current_v - n, 0),
1600 1623  
1601 1624 // comming back to the state just after the last token was read
... ... @@ -1603,7 +1626,6 @@ define LexingTools
1603 1626 current_v <- cur;
1604 1627 line_v <- l;
1605 1628 col_v <- c;
1606   - last_tok_v <- *penult_tok_v;
1607 1629 last_accept_v <- none,
1608 1630  
1609 1631 // comming back to the state just after the penultimate token was read
... ... @@ -1624,7 +1646,8 @@ public define LexingStream
1624 1646 String preambule,
1625 1647 ByteArray b
1626 1648 ) =
1627   - with b1_v = var(if length(preambule) = 0 then b else to_byte_array(preambule)+b),
  1649 + with b1_v = var(if length(preambule) = 0 then b else to_byte_array(preambule)+b),
  1650 + token_start_v = var((Int)0),
1628 1651 current_v = var((Int)0),
1629 1652 line_v = var((Int)0),
1630 1653 col_v = var((Int)0),
... ... @@ -1633,7 +1656,7 @@ public define LexingStream
1633 1656 penult_tok_v = var(tstate(0,0,0)),
1634 1657 last_accept_v = var((FastLexerLastAccepted)none),
1635 1658 lexing_stream(b1_v, // buffer
1636   - var((Int)0), // starting position
  1659 + token_start_v, // starting position
1637 1660 current_v, // current position
1638 1661 last_accept_v, // last accepting position
1639 1662 last_tok_v, // last token state
... ... @@ -1641,8 +1664,8 @@ public define LexingStream
1641 1664 (One u) |-> failure, // buffer is never reloaded
1642 1665 line_v, // current line
1643 1666 col_v, // current column
1644   - past_v, // past bytes
1645   - make_tools(current_v,line_v,col_v,past_v,last_tok_v,penult_tok_v,last_accept_v)).
  1667 + past_v, // past bytes (will remain always 0 in this case)
  1668 + make_tools(token_start_v,current_v,line_v,col_v,past_v,last_tok_v,penult_tok_v,last_accept_v)).
1646 1669  
1647 1670  
1648 1671  
... ... @@ -1700,7 +1723,7 @@ public define Maybe(LexingStream)
1700 1723 //print("Keeping this from previous buffer: ["+to_string(extract(old_buffer,dropped,old_length))+"]\n");
1701 1724 buffer_v <- extract(old_buffer,dropped,old_length)+more;
1702 1725 //print("New buffer: ["+to_string(*buffer_v)+"] size: "+to_decimal(length(*buffer_v))+"\n");
1703   - token_start_v <- 0;
  1726 + token_start_v <- *token_start_v - dropped;
1704 1727 //print("Next token starting position: "+to_decimal(*token_start_v)+"\n");
1705 1728 current_v <- old_length - dropped;
1706 1729 //print("New current reading position: "+to_decimal(*current_v)+"\n");
... ... @@ -1726,7 +1749,7 @@ public define Maybe(LexingStream)
1726 1749 line_v,
1727 1750 col_v,
1728 1751 past_bytes_v,
1729   - make_tools(current_v,line_v,col_v,past_bytes_v,last_tok_v,penult_tok_v,last_accepted_v)))
  1752 + make_tools(token_start_v,current_v,line_v,col_v,past_bytes_v,last_tok_v,penult_tok_v,last_accepted_v)))
1730 1753 }.
1731 1754  
1732 1755  
... ... @@ -1785,7 +1808,7 @@ public define Maybe(LexingStream)
1785 1808 min(min(current(*penult_tok_v),current(*last_tok_v)),*token_start_v),
1786 1809  
1787 1810 buffer_v <- extract(old_buffer,dropped,old_length)+more;
1788   - token_start_v <- 0;
  1811 + token_start_v <- *token_start_v - dropped;
1789 1812 current_v <- old_length - dropped;
1790 1813 past_bytes_v <- *past_bytes_v + dropped;
1791 1814 last_tok_v <- (if *last_tok_v is tstate(cur,l,c) then tstate(cur - dropped,l,c));
... ... @@ -1809,7 +1832,7 @@ public define Maybe(LexingStream)
1809 1832 line_v,
1810 1833 col_v,
1811 1834 past_bytes_v,
1812   - make_tools(current_v,line_v,col_v,past_bytes_v,last_tok_v,penult_tok_v,last_accepted_v)))
  1835 + make_tools(token_start_v,current_v,line_v,col_v,past_bytes_v,last_tok_v,penult_tok_v,last_accepted_v)))
1813 1836 }.
1814 1837  
1815 1838  
... ... @@ -1841,7 +1864,9 @@ define (Int, Int, Int) // returns new (start,line,col)
1841 1864 Int line, // current line
1842 1865 Int col // current column
1843 1866 ) =
1844   - if old_start >= new_start then (new_start,line,col) else
  1867 + //print("old_start = "+old_start+"\n");
  1868 + if old_start >= new_start then //print("======== new col: "+col+"\n");
  1869 + (new_start,line,col) else
1845 1870 with c = force_nth(old_start,buffer),
1846 1871 if ((c >> 6) = 2)
1847 1872 /*
... ... @@ -1864,6 +1889,7 @@ define One
1864 1889 Var(Int) line_v,
1865 1890 Var(Int) col_v
1866 1891 ) =
  1892 + //print("new_start = "+new_start+"\n");
1867 1893 if compute_start_line_col(buffer,*token_start_v,new_start,*line_v,*col_v) is (s,l,c) then
1868 1894 token_start_v <- s;
1869 1895 line_v <- l;
... ... @@ -1891,7 +1917,7 @@ public define LexerOutput($Token)
1891 1917 ) =
1892 1918 if lstream is lexing_stream(buffer_v,token_start_v,current_v,last_accept_v,last_tok_v,penult_tok_v,reload_buffer,
1893 1919 line_v,col_v,offset_v,tools) then
1894   - //print("starting at offset "+to_decimal(*current_v)+" with token start at "+to_decimal(*token_start_v)+"\n");
  1920 + //print("starting at offset "+to_decimal(*current_v)+" with token start at "+to_decimal(*token_start_v)+"\n");
1895 1921 with lgbuf = length(*buffer_v),
1896 1922 if low_level_lexer(*buffer_v,*last_accept_v,*current_v,*token_start_v,starting_state) is
1897 1923 {
... ... @@ -1908,7 +1934,7 @@ public define LexerOutput($Token)
1908 1934 else
1909 1935 (
1910 1936 /* the lexeme may still be accepted after the buffer is reloaded */
1911   - update_start_line_col(*buffer_v,start,token_start_v,line_v,col_v);
  1937 + //update_start_line_col(*buffer_v,start,token_start_v,line_v,col_v);
1912 1938 if reload_buffer(unique) is
1913 1939 {
1914 1940 failure then
... ... @@ -1928,13 +1954,13 @@ public define LexerOutput($Token)
1928 1954  
1929 1955 /* almost the same thing for accepted */
1930 1956 accepted(s,start,end) then
1931   - //print("low level accepted start = "+to_decimal(start)+" end = "+to_decimal(end)+"\n");
  1957 + //print("low level accepted start = "+to_decimal(start)+" end = "+to_decimal(end)+"\n");
1932 1958 last_accept_v <- last(s,end);
1933 1959 current_v <- end;
1934 1960 if end /= lgbuf then
1935 1961 (
1936 1962 /* the lexeme just read must be accepted: the action is applied */
1937   - update_start_line_col(*buffer_v,end,token_start_v,line_v,col_v);
  1963 +
1938 1964 last_accept_v <- none;
1939 1965 if *actions(word32(s,0)) is
1940 1966 {
... ... @@ -1945,18 +1971,25 @@ public define LexerOutput($Token)
1945 1971 // We must update some variables
1946 1972 penult_tok_v <- *last_tok_v;
1947 1973 last_tok_v <- tstate(end,*line_v,*col_v);
1948   - f(extract(*buffer_v,start,end),tools,aux),
  1974 + with result = f(extract(*buffer_v,start,end),tools,aux),
  1975 + update_start_line_col(*buffer_v,*current_v,token_start_v,line_v,col_v);
  1976 + result,
  1977 +
1949 1978 return(f) then
1950 1979 penult_tok_v <- *last_tok_v;
1951 1980 last_tok_v <- tstate(end,*line_v,*col_v);
1952   - f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l),
1953   - end-start,tools,aux),
  1981 + with result = f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l),
  1982 + end-start,tools,aux),
  1983 + //print("*token_start_v = "+*token_start_v+"\n");
  1984 + //print("*current_v = "+*current_v+"\n");
  1985 + update_start_line_col(*buffer_v,*current_v,token_start_v,line_v,col_v);
  1986 + result
1954 1987 }
1955 1988 )
1956 1989 else
1957 1990 (
1958 1991 /* the lexeme may still be accepted after the buffer is reloaded */
1959   - update_start_line_col(*buffer_v,start,token_start_v,line_v,col_v);
  1992 +
1960 1993 if reload_buffer(unique) is
1961 1994 {
1962 1995 failure then
... ... @@ -1970,11 +2003,15 @@ public define LexerOutput($Token)
1970 2003 ignore then should_not_happen(end_of_input),
1971 2004 return(f) then penult_tok_v <- *last_tok_v;
1972 2005 last_tok_v <- tstate(end,*line_v,*col_v);
1973   - f(extract(*buffer_v,start,end),tools,aux),
  2006 + with result = f(extract(*buffer_v,start,end),tools,aux),
  2007 + update_start_line_col(*buffer_v,*current_v,token_start_v,line_v,col_v);
  2008 + result,
1974 2009 return(f) then penult_tok_v <- *last_tok_v;
1975 2010 last_tok_v <- tstate(end,*line_v,*col_v);
1976   - f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l),
1977   - end-start,tools,aux)
  2011 + with result = f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l),
  2012 + end-start,tools,aux),
  2013 + update_start_line_col(*buffer_v,*current_v,token_start_v,line_v,col_v);
  2014 + result
1978 2015 },
1979 2016  
1980 2017 success(_) then
... ... @@ -1988,7 +2025,7 @@ public define LexerOutput($Token)
1988 2025 ignored_to_end then
1989 2026 //print("low level ignored_to_end\n");
1990 2027 /* we are at end of input buffer */
1991   - update_start_line_col(*buffer_v,lgbuf,token_start_v,line_v,col_v);
  2028 + //update_start_line_col(*buffer_v,lgbuf,token_start_v,line_v,col_v);
1992 2029 if reload_buffer(unique) is
1993 2030 {
1994 2031 failure then
... ... @@ -2010,7 +2047,7 @@ public define LexerOutput($Token)
2010 2047  
2011 2048 *** [3] Constructing the automaton.
2012 2049  
2013   - The description of a lexer is given as a list of 'LexerItem($Token)', where the
  2050 + The description of a lexer is given as a list of 'LexerItem($Token,$Aux)', where the
2014 2051 parameter '$Token' represents the type of tokens. Each lexer item is made of a regular
2015 2052 expression and an action. If the action is 'ignore', the token just read is ignored and
2016 2053 the lexer tries to read the next one. Otherwise, the action is applied to the lexeme
... ... @@ -3032,20 +3069,47 @@ define List(Int)
3032 3069 ignoring(name,transitions) then [-1 . actions_ranks(t)]
3033 3070 }
3034 3071 }.
  3072 +
  3073 +
  3074 + Get a characteristic serializable datum from a lexer description (used to avoid
  3075 + reconstructing the lexer when the description did not change). The signature is
  3076 + changed if any of the regular expressions is changed or if their order is changed
  3077 + or if the sort of action is changed (this last point ensures that the list of
  3078 + action ranks remains correct in the generated file).
  3079 +define List(ByteArray)
  3080 + extract_regexprs
  3081 + (
  3082 + List(LexerItem($Token,$Aux)) l
  3083 + ) =
  3084 + with asign = (LexerAction($Token,$Aux) a) |-> if a is
  3085 + {
  3086 + ignore then "(*i)", // something which is illegal as a regular expression
  3087 + return(_0) then "(*r1)",
  3088 + return(_0) then "(*r2)"
  3089 + },
  3090 + map((LexerItem($Token,$Aux) i) |-> if i is
  3091 + {
  3092 + lexer_item(regular_expression,action) then to_byte_array(regular_expression+asign(action)),
  3093 + lexer_item(literal,action) then literal+to_byte_array(asign(action))
  3094 + },l).
  3095 +
3035 3096  
  3097 +
3036 3098 public define One
3037   - make_precompiled_lexer
  3099 + make_precompiled_lexer_aux
3038 3100 (
  3101 + String signature,
3039 3102 String directory,
3040 3103 String lexer_name,
3041 3104 List(LexerItem($Token,$Aux)) lexer_description,
3042 3105 Word8 escape_char
3043   - ) =
  3106 + ) =
3044 3107 with file_name = directory/lexer_name+".anubis",
3045 3108 if file(file_name,new) is
3046 3109 {
3047 3110 failure then print("Cannot create file '"+file_name+"'.\n"),
3048 3111 success(file) then
  3112 + print(weaken(file)," "+signature+"\n\n");
3049 3113 if make_DFA(lexer_description,escape_char) is
3050 3114 {
3051 3115 error(msg) then print(to_English(msg)+"\n"),
... ... @@ -3060,6 +3124,46 @@ public define One
3060 3124 }.
3061 3125  
3062 3126  
  3127 +define Maybe(String)
  3128 + read_signature
  3129 + (
  3130 + String file_name
  3131 + ) =
  3132 + if file(file_name,read) is
  3133 + {
  3134 + failure then failure,
  3135 + success(f) then if read(f,43,10) is // read the first 3 (blanks) + 40 (sha1 hash) characters
  3136 + {
  3137 + error then failure,
  3138 + timeout then failure,
  3139 + ok(ba) then success(to_string(extract(ba,3,43)))
  3140 + }
  3141 + }.
  3142 +
  3143 +public define One
  3144 + make_precompiled_lexer
  3145 + (
  3146 + String directory,
  3147 + String lexer_name,
  3148 + List(LexerItem($Token,$Aux)) lexer_description,
  3149 + Word8 escape_char
  3150 + ) =
  3151 + // avoid to reconstruct the lexer if not needed
  3152 + with signature = to_hexa(sha1(extract_regexprs(lexer_description))),
  3153 + file_name = directory/lexer_name+".anubis",
  3154 + do_it = (One u) |->
  3155 + print("Creating '"+file_name+"'. Please wait ... "); forget(flush(stdout));
  3156 + make_precompiled_lexer_aux(signature,directory,lexer_name,lexer_description,escape_char);
  3157 + print("Done.\n"); forget(flush(stdout)),
  3158 + if read_signature(file_name) is
  3159 + {
  3160 + failure then do_it(unique),
  3161 + success(s) then if s = signature
  3162 + then unique
  3163 + else do_it(unique)
  3164 + }.
  3165 +
  3166 +
3063 3167 public define One
3064 3168 make_precompiled_lexer
3065 3169 (
... ...
anubis_dev/library/system/files.anubis
... ... @@ -1020,14 +1020,14 @@ define Maybe(Int)
1020 1020 find_the_first
1021 1021 (
1022 1022 Data_IO io,
1023   - String looking_for, //String to search
  1023 + ByteArray looking_for, //String to search
1024 1024 Int size, //size of the string to search
1025   - String buffer,
  1025 + ByteArray buffer,
1026 1026 Int current_pos,
1027 1027 Int buf_size,
1028 1028 Int buf_pos
1029 1029 )=
1030   - //println("buf_size :"+buf_size+ " buf_pos :"+buf_pos + " size : "+size);
  1030 + //println("general current pos: "+current_pos+" | buffer size: "+buf_size+ " | buffer pos: "+buf_pos + " | search size: "+size);
1031 1031 if (buf_size - buf_pos) < size then
1032 1032 //println("New buffer request current pos "+current_pos+" buffer_pos "+buf_pos);
1033 1033 if read_bytes(io, 65536) is // <- block size is 64k
... ... @@ -1035,23 +1035,30 @@ define Maybe(Int)
1035 1035 failure then println("read_bytes failure");failure, //finish
1036 1036 time_out then println("read_bytes timeout");failure, //finish
1037 1037 success(ba) then
1038   - with new_buffer = to_string(extract(to_byte_array(buffer), buf_pos, buf_size) + ba),
  1038 + //println("length of ba "+length(ba));
  1039 + with ex_ba = extract(buffer, buf_pos, buf_size),
  1040 + //println("length of ex_ba "+length(ex_ba));
  1041 + with new_ba = ex_ba + ba,
  1042 + //println("length of new_ba "+length(new_ba));
  1043 + with new_buffer = ex_ba + ba,
1039 1044 //println("SUCCESS New buffer length "+length(new_buffer)+" new current_pos "+current_pos);
1040 1045 find_the_first(io, looking_for, size, new_buffer, current_pos + buf_pos, length(new_buffer), 0),
1041 1046 truncated(ba) then
1042 1047 if length(ba) = 0 then
  1048 + //println("last buffer current position ["+current_pos+"]");
1043 1049 failure //finish
1044 1050 else
1045   - with new_buffer = to_string(extract(to_byte_array(buffer), buf_pos, buf_size) + ba),
  1051 + with new_buffer = extract(buffer, buf_pos, buf_size) + ba,
1046 1052 // println("TRUNCATED New buffer length "+length(new_buffer)+" new current_pos "+current_pos);
1047 1053 find_the_first(io, looking_for, size, new_buffer, current_pos + buf_pos, length(new_buffer), 0)
1048 1054 }
1049 1055 else
1050   - if find_string(buffer, looking_for, buf_pos) is
  1056 + if find_byte_array(buffer, looking_for, buf_pos) is
1051 1057 {
1052   - failure then find_the_first(io, looking_for, size, buffer, current_pos, buf_size, buf_size - (size-1)),
  1058 + failure then
  1059 + find_the_first(io, looking_for, size, buffer, current_pos, buf_size, buf_size - (size-1)),
1053 1060 success(pos) then
1054   - println("pattern ["+looking_for+"] found at offset "+(current_pos+pos));
  1061 + //println("pattern ["+to_string(looking_for)+"] found at offset "+(current_pos+pos));
1055 1062 success(current_pos + pos)
1056 1063 }
1057 1064 .
... ... @@ -1126,7 +1133,7 @@ public define Maybe(Int)
1126 1133 Data_IO io,
1127 1134 String search_string
1128 1135 ) =
1129   - find_the_first(io, search_string, length(search_string), "", 0, 0, 0).
  1136 + find_the_first(io, to_byte_array(search_string), length(search_string), constant_byte_array(0,0), 0, 0, 0).
1130 1137  
1131 1138 public define Maybe(Int)
1132 1139 find_the_first
... ... @@ -1138,7 +1145,7 @@ public define Maybe(Int)
1138 1145 {
1139 1146 failure then failure,
1140 1147 success(f) then
1141   - find_the_first(make_data_io(f), search_string, length(search_string), "", 0, 0, 0)
  1148 + find_the_first(make_data_io(f), to_byte_array(search_string), length(search_string), constant_byte_array(0,0), 0, 0, 0)
1142 1149 }.
1143 1150  
1144 1151 public define Maybe(Int)
... ... @@ -1153,7 +1160,8 @@ public define Maybe(Int)
1153 1160 failure then failure,
1154 1161 success(f) then
1155 1162 with size = file_size(filename),
1156   - find_the_first(make_data_io(f, start_position, size - start_position), search_string, length(search_string), "", 0, 0, 0)
  1163 + //println("file size "+size);
  1164 + find_the_first(make_data_io(f, start_position, size - start_position), to_byte_array(search_string), length(search_string), constant_byte_array(0,0), 0, 0, 0)
1157 1165 }.
1158 1166  
1159 1167 public define Maybe(Int)
... ... @@ -1168,6 +1176,6 @@ public define Maybe(Int)
1168 1176 {
1169 1177 failure then failure,
1170 1178 success(f) then
1171   - find_the_first(make_data_io(f, start_position, end_position - start_position), search_string, length(search_string), "", 0, 0, 0)
  1179 + find_the_first(make_data_io(f, start_position, end_position - start_position), to_byte_array(search_string), length(search_string), constant_byte_array(0,0), 0, 0, 0)
1172 1180 }.
1173 1181  
... ...
anubis_dev/library/tools/bytearray.anubis
... ... @@ -50,6 +50,16 @@ define One
50 50 anubis_format_aux(s,b,bpl,i+1,c+1,ind).
51 51  
52 52  
53   -
  53 +public define ByteArray
  54 + concat
  55 + (
  56 + List(ByteArray) l
  57 + ) =
  58 + if l is
  59 + {
  60 + [ ] then constant_byte_array(0,0),
  61 + [h . t] then h + concat(t)
  62 + }.
  63 +
54 64  
55 65  
... ...
anubis_dev/library/tools/int.anubis
... ... @@ -14,15 +14,15 @@
14 14  
15 15 *** (1) Absolute value.
16 16  
17   - public define macro Int abs(Int x). returns the absolute value of x
  17 + public define inline Int abs(Int x). returns the absolute value of x
18 18  
19 19  
20 20 *** (2) Max and min.
21 21  
22 22 The 'max' and 'min' functions respectively return the greatest and the smallest of their arguments.
23 23  
24   -public define Int max(Int x, Int y). returns the greatest of x and y
25   -public define Int min(Int x, Int y). returns the smallest of x and y
  24 +public define inline Int max(Int x, Int y). returns the greatest of x and y
  25 +public define inline Int min(Int x, Int y). returns the smallest of x and y
26 26  
27 27 public define Int max(NonEmptyList(Int) l). returns the greatest element of the list (which is non empty)
28 28 public define Int min(NonEmptyList(Int) l). returns the smallest element of the list (which is non empty)
... ... @@ -63,20 +63,25 @@ public define Int Int x ^ Int y. raises x to the p
63 63 public define Bool odd (Int x).
64 64 public define Bool even (Int x).
65 65  
  66 + *** (7) Greatest common divisor, ...
  67 +
  68 +public define Int gcd (Int x, Int y).
  69 +public define Int lcm (Int x, Int y). least common multiple
  70 +
66 71  
67 72 --- That's all for the public part ! -----------------------------------------------
68 73  
69 74  
70 75  
71 76  
72   -public define macro Int
  77 +public define inline Int
73 78 abs
74 79 (
75 80 Int x
76 81 ) =
77 82 if x < 0 then -x else x.
78 83  
79   -public define Int // must not be macro because x and y are always computed
  84 +public define inline Int // must not be macro because x and y are always computed
80 85 // and one of them would be computed twice
81 86 max
82 87 (
... ... @@ -116,7 +121,7 @@ public define Int
116 121  
117 122  
118 123  
119   -public define Int
  124 +public define inline Int
120 125 min
121 126 (
122 127 Int x,
... ... @@ -226,6 +231,38 @@ public define Bool
226 231 }.
227 232  
228 233  
  234 +public define Int
  235 + gcd
  236 + (
  237 + Int x,
  238 + Int y
  239 + ) =
  240 + /* Euclid's algorithm */
  241 + if x > y then gcd(y,x) else
  242 + if x = 0 then abs(y) else
  243 + if x < 0 then gcd(-x,y) else
  244 + if y/x is
  245 + {
  246 + failure then should_not_happen(0),
  247 + success(p) then if p is (q,r) then gcd(r,x)
  248 + }.
  249 +
  250 +
  251 +
  252 +public define Int
  253 + lcm
  254 + (
  255 + Int x,
  256 + Int y
  257 + ) =
  258 + with d = gcd(x,y),
  259 + if d = 0
  260 + then 0
  261 + else if (x*y)/d is
  262 + {
  263 + failure then should_not_happen(0),
  264 + success(p) then if p is (q,_) then q
  265 + }.
229 266  
230 267  
231 268  
... ...
anubis_dev/library/tools/line_reader.anubis
... ... @@ -38,7 +38,7 @@
38 38  
39 39 //---------------------------------------------------------------------------
40 40  
41   -read lexical_analysis/fast_lexer_3.anubis
  41 +read lexical_analysis/fast_lexer_4.anubis
42 42  
43 43  
44 44 type Token:
... ... @@ -46,13 +46,13 @@ type Token:
46 46 eol.
47 47  
48 48 public type LineReaderLexer:
49   - line_reader_lexer(LexingStream(One) -> One -> LexerOutput(Token) /*lexer_base*/).
  49 + line_reader_lexer((LexingStream, One) -> One -> LexerOutput(Token) /*lexer_base*/).
50 50  
51 51 public type LineReader:
52 52 line_reader(One -> LexerOutput(Token) /*lexer*/,
53 53 One -> Int /*offset*/,
54 54 LineReaderLexer /*lexer_base*/,
55   - LexingStream(One) lexing_stream).
  55 + LexingStream lexing_stream).
56 56  
57 57 public define Int
58 58 current_offset
... ... @@ -96,30 +96,30 @@ public define Maybe(String)
96 96 public define Maybe(LineReaderLexer)
97 97 make_line_reader_lexer
98 98 =
99   - if make_lexer_and_automaton([
100   - lexer_item("#r?#n", return((ByteArray b, LexingTools t, One aux) |-> token(eol))),
101   - lexer_item("#r", return((ByteArray b, LexingTools t, One aux) |-> token(eol))),
102   - lexer_item("[^\r\n]*", return((ByteArray b, LexingTools t, One aux) |-> token(line(to_string(b))))),
103   - ],
  99 + if make_lexer([
  100 + lexer_item("#r?#n", return((ByteArray b, LexingTools t, One aux) |-> token(eol))),
  101 + lexer_item("#r", return((ByteArray b, LexingTools t, One aux) |-> token(eol))),
  102 + lexer_item("[^\r\n]*", return((ByteArray b, LexingTools t, One aux) |-> token(line(to_string(b))))),
  103 + ],
104 104 '#') is
105 105 {
106 106 error(msg) then print("Syntax error in regular expression: "+to_English(msg)+"\n"); failure,
107   - ok(p) then if p is (lexer, automaton) then success(line_reader_lexer(lexer))
  107 + ok(lexer) then success(line_reader_lexer(lexer))
108 108 }.
109 109  
110 110 public define LineReader
111 111 make_line_reader
112 112 (
113   - LexingStream(One) ls,
  113 + LexingStream ls,
114 114 LineReaderLexer make_lexer
115 115 ) =
116 116 if make_lexer is line_reader_lexer(lexer) then
117   - line_reader(lexer(ls), (One u) |-> offset(ls), make_lexer, ls).
  117 + line_reader(lexer(ls, unique), (One u) |-> offset(ls), make_lexer, ls).
118 118  
119 119 public define Maybe(LineReader)
120 120 make_line_reader
121 121 (
122   - LexingStream(One) ls,
  122 + LexingStream ls,
123 123 ) =
124 124 if make_line_reader_lexer is
125 125 {
... ... @@ -135,7 +135,7 @@ public define Maybe(LineReader)
135 135 (
136 136 String s,
137 137 ) =
138   - make_line_reader(make_lexing_stream("", s, unique)).
  138 + make_line_reader(make_lexing_stream("", s)).
139 139  
140 140 public define Maybe(LineReader)
141 141 make_line_reader
... ... @@ -146,8 +146,7 @@ public define Maybe(LineReader)
146 146 if make_lexing_stream("", /* preambule */
147 147 f, /* the opened file */
148 148 65536, /* size of buffer for the lexing stream */
149   - timeout, /* timeout (seconds) */
150   - unique)
  149 + timeout) /* timeout (seconds) */
151 150 is
152 151 {
153 152 failure then print("cannot make lexing stream.\n"); failure,
... ... @@ -159,7 +158,7 @@ public define LineReader
159 158 reset_line_reader
160 159 (
161 160 LineReader lr,
162   - LexingStream(One) ls,
  161 + LexingStream ls,
163 162 ) =
164 163 if lr is line_reader(lexer, offset, make_lexer, _) then
165 164 make_line_reader(ls, make_lexer).
... ...
anubis_dev/library/tools/list.anubis
... ... @@ -922,3 +922,33 @@ public define (List($T1), List($T2)) unzip(List(($T1, $T2)) l) = unzip(reverse(l
922 922  
923 923  
924 924  
  925 + Compute the list of all sublists of a list (beware: for a list of length n,
  926 + this gives a list of length 2^n).
  927 +
  928 +public define List(List($T))
  929 + sublists
  930 + (
  931 + List($T) l
  932 + ) =
  933 + if l is
  934 + {
  935 + [ ] then [[ ]],
  936 + [h . t] then with p = sublists(t),
  937 + map((List($T) u) |-> [h . u],p) + p
  938 + }.
  939 +
  940 +
  941 + Construct a 'constant' list containing any number of times the same datum.
  942 +
  943 + constant_list(x,6) is just: [x,x,x,x,x,x]
  944 +
  945 +public define List($T)
  946 + constant_list
  947 + (
  948 + $T x,
  949 + Int n
  950 + ) =
  951 + if n =< 0 then [ ] else [x . constant_list(x,n-1)].
  952 +
  953 +
  954 +
... ...
anubis_dev/library/tools/read_table.anubis
1 1  
2   -transmit data_base/import_csv.anubis (since version 1.6)
  2 + Obsolete file.
  3 +
  4 + transmit data_base/import_csv.anubis (since version 1.6)
3 5  
... ...
anubis_dev/manuals/en/Anubis-doc-1-14.pdf
No preview for this file type
anubis_dev/manuals/en/Anubis-doc-1-14.tex
... ... @@ -1011,6 +1011,12 @@ independently of the many definitions you may give to the symbol \cod{[~]} and t
1011 1011 This is enough on syntax for reading the subsequent sections. We will say more on syntax later.
1012 1012  
1013 1013  
  1014 +\section{\cod{\_\_LINE\_\_}, \cod{\_\_FILE\_\_}, \cod{\_\_DIR\_\_} and \cod{\_\_TIME\_\_}}
  1015 +The keyword \cod{\_\_LINE\_\_} represents the line number (of the source file) where this keyword is written. This
  1016 +is a datum of type \cod{Word32}. Similarly, \cod{\_\_FILE\_\_} and \cod{\_\_DIR\_\_} represent the absolute file path and
  1017 +absolute directory path of the source file they are written within. They are of type \cod{String}. The keyword
  1018 +\cod{\_\_TIME\_\_} instantiates to the current time (of type \cod{Word32}) at
  1019 +which it is found in the source file during the compilation.
1014 1020  
1015 1021  
1016 1022 \section{Automatically generated files}
... ... @@ -1019,7 +1025,7 @@ an ``automatically generated file&#39;&#39;. There are several examples
1019 1025 of metaprograms in the library such as~: \APG\ (see section \myref{sec:APG}), \cod{fast\_lexer\_3} (see section
1020 1026 \myref{sec:fastlexer}), \cod{metaSQL} (see section \myref{sec:metaSQL}), \dots\
1021 1027  
1022   -Autmatically generated files should never be modified because any modification is destroyed by a new execution of the
  1028 +Automatically generated files should never be modified because any modification is destroyed by a new execution of the
1023 1029 metaprogram. Only true source files should be modified. As a consequence, generated files encomber your working
1024 1030 directory, and we have adopted the policy of putting them into a subdirectory (of the current directory), uniformly named \fn{generated}.
1025 1031 Putting generated files into the \fn{generated} subdirectory must be the default behavior of metaprograms. An option of the
... ...