Commit 92c7ed18c2ee6e10f09865d2d577af4a0fcf26a9

Authored by Cédric RICARD
1 parent bde59c8c

Fix LineReader bug (infinite loop)

Add UnitTest to FastLexer revealing bug
anubis_dev/library/lexical_analysis/fast_lexer.anubis
... ... @@ -447,24 +447,24 @@ public define Maybe(LexingStream) make_lexing_stream(SSL_Connection stream,
447 447 file, in particular at the actual definition of type 'LexingStream', and write down
448 448 another such function (in a file of yours within which you put a 'read
449 449 fast_lexer.anubis').
450   -
451   -
452   - To each lexing stream is attached a function for counting of type 'One -> Int'. When
453   - applied to 'unique', this function returns the number of bytes already read from the
454   - lexing stream, i.e. the position from which the reading of te next token will
455   - occur. This function is obtained as follows:
  450 +
  451 +
  452 + To each lexing stream is attached a function for counting of type 'One -> Int'. When
  453 + applied to 'unique', this function returns the number of bytes already read from the
  454 + lexing stream, i.e. the position from which the reading of te next token will
  455 + occur. This function is obtained as follows:
456 456  
457   -public define One -> Int
458   - offset_counter
459   - (
460   - LexingStream ls
461   - ).
  457 +public define One -> Int
  458 + offset_counter
  459 + (
  460 + LexingStream ls
  461 + ).
  462 +
  463 + If youn need this function, it is recommended to call 'offset_counter' only once just
  464 + after the lexing stream is created.
462 465  
463   - If youn need this function, it is recommended to call 'offset_counter' only once just
464   - after the lexing stream is created.
465 466  
466 467  
467   -
468 468 *** (4) Constructing a lexer.
469 469  
470 470 *** (4.1) Construction.
... ... @@ -580,7 +580,7 @@ public define Printable_tree
580 580  
581 581 read tools/basis.anubis
582 582 read tools/streams.anubis
583   -
  583 +read system/convert.anubis
584 584  
585 585 -------------------------------- Table of Contents ------------------------------------
586 586  
... ... @@ -740,13 +740,7 @@ define Maybe(ExChar)
740 740  
741 741 *** [1.5.1] Truncating a Word32 to a Word8.
742 742  
743   -define Word8
744   - truncate_to_Word8
745   - (
746   - Word32 x
747   - ) =
748   - if x is word32(l1,_) then if l1 is word16(l2,_) then l2.
749   -
  743 + moved to system/convert.anubis
750 744  
751 745  
752 746 *** [1.5.2] Creating a range of consecutive characters.
... ... @@ -1223,24 +1217,24 @@ public define String
1223 1217 public type LexingStream:
1224 1218 lexing_stream
1225 1219 (
1226   - Var(ByteArray) buffer_v, // the current buffer
1227   - Var(Int) start_v, // start of lexem in buffer
1228   - Var(FastLexerLastAccepted) last_accept_v, // last accepting position (if any)
1229   - Var(Int) current_v, // position of reading in buffer
1230   - Int -> Maybe(One) reload_buffer, // command for loading the sequel in the buffer
1231   - Var(Int) current_offset_v // accumulator for current offset
  1220 + Var(ByteArray) buffer_v, // the current buffer
  1221 + Var(Int) start_v, // start of lexem in buffer
  1222 + Var(FastLexerLastAccepted) last_accept_v, // last accepting position (if any)
  1223 + Var(Int) current_v, // position of reading in buffer
  1224 + Int -> Maybe(One) reload_buffer, // command for loading the sequel in the buffer
  1225 + Var(Int) current_offset_v // accumulator for current offset
1232 1226 ).
1233 1227  
1234   -
1235   -public define One -> Int
1236   - offset_counter
1237   - (
1238   - LexingStream ls
1239   - ) =
1240   - if ls is lexing_stream(_,_,_,_,_,v) then
1241   - (One u) |-> *v.
1242   -
1243   -
  1228 +
  1229 +public define One -> Int
  1230 + offset_counter
  1231 + (
  1232 + LexingStream ls
  1233 + ) =
  1234 + if ls is lexing_stream(_,_,_,_,_,v) then
  1235 + (One u) |-> *v.
  1236 +
  1237 +
1244 1238 While we are reading a lexeme, we keep the starting position (offset of first character
1245 1239 of the current lexeme) in 'start_v' so as to be able to extract the lexeme. We also
1246 1240 keep the last position at which a lexeme was accepted. This is because the lexer always
... ... @@ -1272,8 +1266,8 @@ public define LexingStream
1272 1266 var(0), // starting position
1273 1267 var(none), // last accepting position
1274 1268 var(0), // current position
1275   - (Int u) |-> failure, // buffer cannot be reloaded
1276   - var(0)).
  1269 + (Int u) |-> failure, // buffer cannot be reloaded
  1270 + var(0)).
1277 1271  
1278 1272  
1279 1273  
... ... @@ -1308,7 +1302,7 @@ public define Maybe(LexingStream)
1308 1302 start_v = var((Int)0),
1309 1303 last_accepted_v = var((FastLexerLastAccepted)none),
1310 1304 current_v = var((Int)0),
1311   - offset_v = var((Int)0),
  1305 + offset_v = var((Int)0),
1312 1306 reload_buffer = (Int i) |->
1313 1307 if read(stream,buffer_size,timeout) is
1314 1308 {
... ... @@ -1317,13 +1311,13 @@ public define Maybe(LexingStream)
1317 1311 ok(more) then
1318 1312 //print("Buffer reloaded ("+abs_to_decimal(length(more))+" bytes).\n");
1319 1313 if length(more) = 0
1320   - then (with old_buffer = *buffer_v,
1321   - old_length = length(old_buffer),
1322   - dropped = *start_v, // number of bytes dropped from old buffer
  1314 + then (with old_buffer = *buffer_v,
  1315 + old_length = length(old_buffer),
  1316 + dropped = *start_v, // number of bytes dropped from old buffer
1323 1317 buffer_v <- extract(old_buffer,dropped,old_length);
1324 1318 start_v <- 0;
1325 1319 current_v <- *current_v - dropped;
1326   - /* reloading does not change the current offset */
  1320 + /* reloading does not change the current offset */
1327 1321 last_accepted_v <-
1328 1322 if *last_accepted_v is
1329 1323 {
... ... @@ -1337,7 +1331,7 @@ public define Maybe(LexingStream)
1337 1331 buffer_v <- extract(old_buffer,dropped,old_length)+more;
1338 1332 start_v <- 0;
1339 1333 current_v <- *current_v - dropped;
1340   - /* reloading does not change the current offset */
  1334 + /* reloading does not change the current offset */
1341 1335 last_accepted_v <-
1342 1336 if *last_accepted_v is
1343 1337 {
... ... @@ -1350,8 +1344,8 @@ public define Maybe(LexingStream)
1350 1344 start_v,
1351 1345 last_accepted_v,
1352 1346 current_v,
1353   - reload_buffer,
1354   - offset_v))
  1347 + reload_buffer,
  1348 + offset_v))
1355 1349 }.
1356 1350  
1357 1351  
... ... @@ -1390,7 +1384,7 @@ public define Maybe(LexingStream)
1390 1384 start_v = var((Int)0),
1391 1385 last_accepted_v = var((FastLexerLastAccepted)none),
1392 1386 current_v = var((Int)0),
1393   - offset_v = var((Int)0),
  1387 + offset_v = var((Int)0),
1394 1388 reload_buffer = (Int i) |->
1395 1389 if (Maybe(ByteArray))read(stream,buffer_size,timeout) is
1396 1390 {
... ... @@ -1403,7 +1397,7 @@ public define Maybe(LexingStream)
1403 1397 buffer_v <- extract(old_buffer,dropped,old_length);
1404 1398 start_v <- 0;
1405 1399 current_v <- *current_v - dropped;
1406   - /* reloading does not change the current offset */
  1400 + /* reloading does not change the current offset */
1407 1401 last_accepted_v <-
1408 1402 if *last_accepted_v is
1409 1403 {
... ... @@ -1417,7 +1411,7 @@ public define Maybe(LexingStream)
1417 1411 buffer_v <- extract(old_buffer,dropped,old_length)+more;
1418 1412 start_v <- 0;
1419 1413 current_v <- *current_v - dropped;
1420   - /* reloading does not change the current offset */
  1414 + /* reloading does not change the current offset */
1421 1415 last_accepted_v <-
1422 1416 if *last_accepted_v is
1423 1417 {
... ... @@ -1430,8 +1424,8 @@ public define Maybe(LexingStream)
1430 1424 start_v,
1431 1425 last_accepted_v,
1432 1426 current_v,
1433   - reload_buffer,
1434   - offset_v))
  1427 + reload_buffer,
  1428 + offset_v))
1435 1429 }.
1436 1430  
1437 1431  
... ... @@ -2289,9 +2283,9 @@ define One -&gt; LexerOutput($Token)
2289 2283 Word16 starting_state) -> FastLexerOutput lexer,
2290 2284 MVar(LexerAction($Token)) actions
2291 2285 ) =
2292   - if stream is lexing_stream(buffer_v,start_v,last_accept_v,current_v,reload_buffer,offset_v) then
2293   - (One _) |-l-> with old_current = *current_v,
2294   - if lexer(*buffer_v,
  2286 + if stream is lexing_stream(buffer_v,start_v,last_accept_v,current_v,reload_buffer,offset_v) then
  2287 + (One _) |-l-> with old_current = *current_v,
  2288 + if lexer(*buffer_v,
2295 2289 *last_accept_v,
2296 2290 *current_v,
2297 2291 0) // reading a new token always starts in state 0
... ... @@ -2303,7 +2297,7 @@ define One -&gt; LexerOutput($Token)
2303 2297 not_at_end_of_input then
2304 2298 with result = (LexerOutput($Token))error(extract(*buffer_v,*start_v,end)),
2305 2299 current_v <- end+1;
2306   - offset_v <- *offset_v + (end + 1 - old_current);
  2300 + offset_v <- *offset_v + (end + 1 - old_current);
2307 2301 start_v <- end+1;
2308 2302 last_accept_v <- none;
2309 2303 result,
... ... @@ -2326,7 +2320,7 @@ define One -&gt; LexerOutput($Token)
2326 2320 {
2327 2321 ignore then
2328 2322 current_v <- end;
2329   - offset_v <- *offset_v + (end - old_current);
  2323 + offset_v <- *offset_v + (end - old_current);
2330 2324 start_v <- end;
2331 2325 last_accept_v <- none;
2332 2326 l(unique), // ignore and try to read the next token
... ... @@ -2334,7 +2328,7 @@ define One -&gt; LexerOutput($Token)
2334 2328 return(f) then
2335 2329 with result = f(extract(*buffer_v,*start_v,end)),
2336 2330 current_v <- end;
2337   - offset_v <- *offset_v + (end - old_current);
  2331 + offset_v <- *offset_v + (end - old_current);
2338 2332 start_v <- end;
2339 2333 last_accept_v <- none;
2340 2334 result
... ... @@ -2351,7 +2345,7 @@ define One -&gt; LexerOutput($Token)
2351 2345 return(f) then
2352 2346 with result = f(extract(*buffer_v,*start_v,end)),
2353 2347 current_v <- end;
2354   - offset_v <- *offset_v + (end - old_current);
  2348 + offset_v <- *offset_v + (end - old_current);
2355 2349 start_v <- end;
2356 2350 last_accept_v <- none;
2357 2351 result
... ...
anubis_dev/library/system/convert.anubis
... ... @@ -92,18 +92,18 @@ public define inline ByteArray
92 92  
93 93 // Obsolete name, should be removed
94 94 public define inline Word8
95   - truncate_to_word8
96   - (
97   - Word32 value
98   - ) =
99   - truncate_to_Word8(to_Int(value)).
  95 + truncate_to_word8
  96 + (
  97 + Word32 x
  98 + ) =
  99 + if x is word32(l1,_) then if l1 is word16(l2,_) then l2.
100 100  
101 101 public define inline Word8
102   - truncate_to_Word8
103   - (
104   - Word32 value
105   - ) =
106   - truncate_to_Word8(to_Int(value)).
  102 + truncate_to_Word8
  103 + (
  104 + Word32 x
  105 + ) =
  106 + if x is word32(l1,_) then if l1 is word16(l2,_) then l2.
107 107  
108 108 public define inline Word32
109 109 word8_to_Word32
... ...
anubis_dev/library/test/Anubis UnitTest.aproj
... ... @@ -32,6 +32,7 @@
32 32 <Import Project="$(AnubisBinPath)\Anubis.Build.targets" />
33 33 <ItemGroup>
34 34 <Compile Include="all_unit_test.anubis" />
  35 + <Compile Include="lexical_analysis\fast_lexer.ut.anubis" />
35 36 <Compile Include="predefined\date_and_time.unit_test.anubis" />
36 37 <Compile Include="predefined\decimal_scan.unit_test.anubis" />
37 38 <Compile Include="predefined\fast_lexer.unit_test.anubis" />
... ... @@ -48,6 +49,7 @@
48 49 <Compile Include="tools\utf-8.unit_test.anubis" />
49 50 </ItemGroup>
50 51 <ItemGroup>
  52 + <Folder Include="lexical_analysis" />
51 53 <Folder Include="predefined" />
52 54 <Folder Include="system" />
53 55 <Folder Include="tools" />
... ...
anubis_dev/library/test/all_unit_test.anubis
... ... @@ -10,6 +10,7 @@ read test/predefined/sqlite.unit_test.anubis
10 10 read test/predefined/fast_lexer.unit_test.anubis
11 11  
12 12 read test/system/convert.unit_test.anubis
  13 +read test/lexical_analysis/fast_lexer.ut.anubis
13 14 read test/system/message_queue.unit_test.anubis
14 15 read test/system/message_transceiver.unit_test.anubis
15 16 read test/system/string.unit_test.anubis
... ... @@ -37,7 +38,8 @@ define List(UnitTestSuite)
37 38 make_SQLite_test_suite,
38 39 make_UTF8_test_suite,
39 40 make_line_reader_test_suite,
40   - make_fast_lexer_test_suite
  41 + make_fast_lexer_test_suite,
  42 + make_fast_lexer_test_suite2,
41 43 ]
42 44 .
43 45  
... ...
anubis_dev/library/test/lexical_analysis/fast_lexer.ut.anubis 0 → 100644
  1 +/*
  2 + * Created by PyramIDE.
  3 + * User: ricard
  4 + * Date: 19/09/2008
  5 + * Time: 14:24
  6 + *
  7 + */
  8 +
  9 +read tools/unit_test.anubis
  10 +read lexical_analysis/fast_lexer.anubis
  11 +
  12 +type InfLoopToken:
  13 + line(String),
  14 + eol.
  15 +
  16 +define String
  17 + token_to_string
  18 + (
  19 + LexerOutput(InfLoopToken) output
  20 + ) =
  21 + if output is
  22 + {
  23 + end_of_input then /* no more token: exit the main loop */
  24 + "End of input",
  25 +
  26 + error(b) then
  27 + /* should never happen with this lexer (see the above comment) */
  28 + "Lexer error: ["+to_string(b)+"]",
  29 +
  30 + token(t) then
  31 + /* a token has been recognized */
  32 + if t is
  33 + {
  34 + line(l) then
  35 + "token: line("+l+")"
  36 + eol then
  37 + "token: eol"
  38 + }
  39 + }.
  40 +
  41 +
  42 +define One
  43 + infinite_loop_test
  44 + (
  45 + UnitTestContext ut
  46 + ) =
  47 + if make_lexer_and_automaton([
  48 + lexer_item("#r?#n", return((ByteArray b) |-> token(eol))),
  49 + lexer_item("[^\r\n]*", return((ByteArray b) |-> token(line(to_string(b))))),
  50 + ],
  51 + '#') is
  52 + {
  53 + error(msg) then print("Syntax error in regular expression: "+to_English(msg)+"\n"); assertIsTrue(ut, false, "Can't create lexer"),
  54 + ok(p) then if p is (lexer, automaton) then
  55 + with next_token = lexer(make_lexing_stream("mon texte\n\r\nla suite")),
  56 + assertIsSame(ut, next_token(unique), token(line("mon texte")), token_to_string, "#1 1st line");
  57 + assertIsSame(ut, next_token(unique), token(eol), token_to_string, "#1 1st EOF");
  58 + //assertIsSame(ut, next_token(unique), token(line("")), token_to_string, "#1 2nd line");
  59 + assertIsSame(ut, next_token(unique), token(eol), token_to_string, "#1 2nd EOF");
  60 + assertIsSame(ut, next_token(unique), token(line("la suite")), token_to_string, "#1 3rd line");
  61 + //assertIsSame(ut, next_token(unique), token(eol), token_to_string, "#1 3rd EOF");
  62 + assertIsSame(ut, next_token(unique), end_of_input, token_to_string, "#1 EOF");
  63 +
  64 +
  65 + with next_token = lexer(make_lexing_stream("mon texte\r\n\rla suite")),
  66 + assertIsSame(ut, next_token(unique), token(line("mon texte")), token_to_string, "#2 1st line");
  67 + assertIsSame(ut, next_token(unique), token(eol), token_to_string, "#2 1st EOF");
  68 + assertIsSame(ut, next_token(unique), error(to_byte_array("\r")), token_to_string, "#2 error");
  69 + //assertIsSame(ut, next_token(unique), token(line("")), token_to_string, "#2 2nd line");
  70 + //assertIsSame(ut, next_token(unique), token(eol), token_to_string, "#2 2nd EOF");
  71 + //assertIsSame(ut, next_token(unique), token(line("la suite")), token_to_string, "#2 3rd line");
  72 + //assertIsSame(ut, next_token(unique), token(eol), token_to_string, "#2 3rd EOF");
  73 + //assertIsSame(ut, next_token(unique), end_of_input, token_to_string, "#2 EOF");
  74 + unique
  75 + }.
  76 +
  77 +
  78 +
  79 +public define UnitTestSuite
  80 + make_fast_lexer_test_suite2
  81 + =
  82 + ut_suite("lexical_analysis.fast_lexer",
  83 + [
  84 + ut_fixture("infinite_loop", infinite_loop_test)
  85 + ]).
  86 +
  87 +
... ...
anubis_dev/library/test/predefined/fast_lexer.unit_test.anubis
1   -
2   -
3   -read tools/unit_test.anubis
4   -
5   -
6   -define String
7   - format
8   - (
9   - AtEndOfInput a
10   - ) =
11   - if a is
12   - {
13   - not_at_end_of_input then "not_at_end_of_input",
14   - at_end_of_input then "at_end_of_input"
15   - }.
16   -
17   -
18   -define FastLexerOutput
19   - test_fast_lexer
20   - (
21   - List(FastLexerState) lexer,
22   - ByteArray input,
23   - FastLexerLastAccepted last_accepted,
24   - Int position,
25   - Word16 state
26   - ) =
27   - if make_fast_lexer(lexer) is
28   - {
29   - unknown_state(n) then print("fast lexer unknown state: "+to_decimal(n)+"\n"); alert,
30   - too_many_states then print("too many states.\n"); alert,
31   - ok(fl) then with result = fl(input,last_accepted,position,state),
32   -
33   - if result is
34   - {
35   - rejected(w,e,a) then
36   - print("\nrejected("+to_decimal(w)+","+abs_to_decimal(e)+","+format(a)+")\n"),
37   - accepted(w,e,a,_) then
38   - print("\naccepted("+to_decimal(w)+","+abs_to_decimal(e)+","+format(a)+")\n")
39   - };
40   -
41   - result
42   - }.
43   -
44   -
45   -
46   - A lexer accepting "g+abu", "g+abuzo",
47   -
48   -define List(FastLexerState)
49   - lexer_1
50   - =
51   - [
52   - /* state 0 */ rejecting([transition('g',1)]),
53   - /* state 1 */ rejecting([transition('a',2),transition('g',1)]),
54   - /* state 2 */ rejecting([transition('b',3)]),
55   - /* state 3 */ rejecting([transition('u',4)]),
56   - /* state 4 */ accepting([transition('z',5)]),
57   - /* state 5 */ rejecting([transition('o',6)]),
58   - /* state 6 */ accepting([ ])
59   - ].
60   -
61   -
62   -define One
63   - fast_lexer_test
64   - (
65   - UnitTestContext ut
66   - ) =
67   -
68   - /* starting in state 0, position 0 */
69   - assertIsSame(ut,
70   - test_fast_lexer(lexer_1,to_byte_array("gab"),none,0,0),
71   - rejected(3,3,at_end_of_input), "1");
72   - unique.
73   -
74   -
75   - assertIsSame(ut,
76   - test_fast_lexer(lexer_1,to_byte_array("gabu"),none,bol,neol,0,0),
77   - accepted(4,4,at_end_of_input), "2");
78   - assertIsSame(ut,
79   - test_fast_lexer(lexer_1,to_byte_array("ggggabu"),none,bol,neol,0,0),
80   - accepted(4,7,at_end_of_input), "2bis");
81   - assertIsSame(ut,
82   - test_fast_lexer(lexer_1,to_byte_array("gabuz"),none,bol,neol,0,0),
83   - accepted(4,4,at_end_of_input), "3");
84   - assertIsSame(ut,
85   - test_fast_lexer(lexer_1,to_byte_array("gmbuz"),none,bol,neol,0,0),
86   - rejected(1,1,not_at_end_of_input),"4");
87   - assertIsSame(ut,
88   - test_fast_lexer(lexer_1,to_byte_array("gabuzo"),none,bol,neol,0,0),
89   - accepted(6,6,at_end_of_input), "5");
90   - assertIsSame(ut,
91   - test_fast_lexer(lexer_1,to_byte_array("gabuzobof"),none,bol,neol,0,0),
92   - accepted(6,6,not_at_end_of_input), "6");
93   - assertIsSame(ut,
94   - test_fast_lexer(lexer_1,to_byte_array("gggggabuzobof"),none,bol,neol,0,0),
95   - accepted(6,10,not_at_end_of_input), "6bis");
96   -
97   - /* restarting from some other state (with or without an already accepted position) */
98   -
99   - assertIsSame(ut,
100   - test_fast_lexer(lexer_1,to_byte_array("bu"),none,bol,neol,0,2),
101   - accepted(4,2,at_end_of_input), "7");
102   - assertIsSame(ut,
103   - test_fast_lexer(lexer_1,to_byte_array("gabuzfff"),last(4,4),bol,neol,5,5),
104   - accepted(4,4,not_at_end_of_input), "8");
105   - assertIsSame(ut,
106   - test_fast_lexer(lexer_1,to_byte_array("gabuzomeu"),last(4,4),bol,neol,5,5),
107   - accepted(6,6,not_at_end_of_input), "9");
108   -
109   - /* testing bol and eol */
110   - assertIsSame(ut,
111   - test_fast_lexer(lexer_1,to_byte_array("meu"),none,bol,neol,0,0),
112   - accepted(6,6,not_at_end_of_input), "9");
113   -
114   -
115   - unique.
116   -
117   -
118   -
119   -
120   -
121   -
122   -
123   -public define UnitTestSuite
124   - make_fast_lexer_test_suite
125   - =
126   - ut_suite("predefined.fast_lexer",
127   - [
128   - ut_fixture("fast lexer", fast_lexer_test)
129   - ]).
130   -
131   -
132   -global define One
133   - fast_lexer_unit_test
134   - (
135   - List(String) args
136   - )=
137   - execute_tests([make_fast_lexer_test_suite], args).
138   -
139   -
140 1 \ No newline at end of file
  2 +
  3 +
  4 +read tools/unit_test.anubis
  5 +
  6 +
  7 +define String
  8 + format
  9 + (
  10 + AtEndOfInput a
  11 + ) =
  12 + if a is
  13 + {
  14 + not_at_end_of_input then "not_at_end_of_input",
  15 + at_end_of_input then "at_end_of_input"
  16 + }.
  17 +
  18 +
  19 +define FastLexerOutput
  20 + test_fast_lexer
  21 + (
  22 + List(FastLexerState) lexer,
  23 + ByteArray input,
  24 + FastLexerLastAccepted last_accepted,
  25 + Int position,
  26 + Word16 state
  27 + ) =
  28 + if make_fast_lexer(lexer) is
  29 + {
  30 + unknown_state(n) then print("fast lexer unknown state: "+to_decimal(n)+"\n"); alert,
  31 + too_many_states then print("too many states.\n"); alert,
  32 + ok(fl) then with result = fl(input,last_accepted,position,state),
  33 +
  34 + if result is
  35 + {
  36 + rejected(w,e,a) then
  37 + print("\nrejected("+to_decimal(w)+","+abs_to_decimal(e)+","+format(a)+")\n"),
  38 + accepted(w,e,a,_) then
  39 + print("\naccepted("+to_decimal(w)+","+abs_to_decimal(e)+","+format(a)+")\n")
  40 + };
  41 +
  42 + result
  43 + }.
  44 +
  45 +
  46 +
  47 + A lexer accepting "g+abu", "g+abuzo",
  48 +
  49 +define List(FastLexerState)
  50 + lexer_1
  51 + =
  52 + [
  53 + /* state 0 */ rejecting([transition('g',1)]),
  54 + /* state 1 */ rejecting([transition('a',2),transition('g',1)]),
  55 + /* state 2 */ rejecting([transition('b',3)]),
  56 + /* state 3 */ rejecting([transition('u',4)]),
  57 + /* state 4 */ accepting([transition('z',5)]),
  58 + /* state 5 */ rejecting([transition('o',6)]),
  59 + /* state 6 */ accepting([ ])
  60 + ].
  61 +
  62 +
  63 +define One
  64 + fast_lexer_test
  65 + (
  66 + UnitTestContext ut
  67 + ) =
  68 +
  69 + /* starting in state 0, position 0 */
  70 + assertIsSame(ut,
  71 + test_fast_lexer(lexer_1,to_byte_array("gab"),none,0,0),
  72 + rejected(3,3,at_end_of_input), "1");
  73 + unique.
  74 +
  75 +
  76 + assertIsSame(ut,
  77 + test_fast_lexer(lexer_1,to_byte_array("gabu"),none,bol,neol,0,0),
  78 + accepted(4,4,at_end_of_input), "2");
  79 + assertIsSame(ut,
  80 + test_fast_lexer(lexer_1,to_byte_array("ggggabu"),none,bol,neol,0,0),
  81 + accepted(4,7,at_end_of_input), "2bis");
  82 + assertIsSame(ut,
  83 + test_fast_lexer(lexer_1,to_byte_array("gabuz"),none,bol,neol,0,0),
  84 + accepted(4,4,at_end_of_input), "3");
  85 + assertIsSame(ut,
  86 + test_fast_lexer(lexer_1,to_byte_array("gmbuz"),none,bol,neol,0,0),
  87 + rejected(1,1,not_at_end_of_input),"4");
  88 + assertIsSame(ut,
  89 + test_fast_lexer(lexer_1,to_byte_array("gabuzo"),none,bol,neol,0,0),
  90 + accepted(6,6,at_end_of_input), "5");
  91 + assertIsSame(ut,
  92 + test_fast_lexer(lexer_1,to_byte_array("gabuzobof"),none,bol,neol,0,0),
  93 + accepted(6,6,not_at_end_of_input), "6");
  94 + assertIsSame(ut,
  95 + test_fast_lexer(lexer_1,to_byte_array("gggggabuzobof"),none,bol,neol,0,0),
  96 + accepted(6,10,not_at_end_of_input), "6bis");
  97 +
  98 + /* restarting from some other state (with or without an already accepted position) */
  99 +
  100 + assertIsSame(ut,
  101 + test_fast_lexer(lexer_1,to_byte_array("bu"),none,bol,neol,0,2),
  102 + accepted(4,2,at_end_of_input), "7");
  103 + assertIsSame(ut,
  104 + test_fast_lexer(lexer_1,to_byte_array("gabuzfff"),last(4,4),bol,neol,5,5),
  105 + accepted(4,4,not_at_end_of_input), "8");
  106 + assertIsSame(ut,
  107 + test_fast_lexer(lexer_1,to_byte_array("gabuzomeu"),last(4,4),bol,neol,5,5),
  108 + accepted(6,6,not_at_end_of_input), "9");
  109 +
  110 + /* testing bol and eol */
  111 + assertIsSame(ut,
  112 + test_fast_lexer(lexer_1,to_byte_array("meu"),none,bol,neol,0,0),
  113 + accepted(6,6,not_at_end_of_input), "9");
  114 +
  115 +
  116 + unique.
  117 +
  118 +
  119 +
  120 +
  121 +
  122 +
  123 +public define UnitTestSuite
  124 + make_fast_lexer_test_suite
  125 + =
  126 + ut_suite("predefined.fast_lexer",
  127 + [
  128 + ut_fixture("fast lexer", fast_lexer_test)
  129 + ]).
  130 +
  131 +
... ...
anubis_dev/library/test/tools/line_reader.ut.anubis
... ... @@ -31,11 +31,39 @@ define One
31 31 }
32 32 }.
33 33  
  34 +define One
  35 + infinite_loop_test
  36 + (
  37 + UnitTestContext ut
  38 + ) =
  39 + if make_line_reader("mon texte\n\r\nla suite") is
  40 + {
  41 + failure then assertIsTrue(ut, false, "#1 make_line_reader"),
  42 + success(lr) then
  43 + assertIsSuccessString(ut, read_line(lr), "mon texte", "#1 1st line");
  44 + assertIsSuccessString(ut, read_line(lr), "", "#1 2nd line");
  45 + assertIsSuccessString(ut, read_line(lr), "la suite", "#1 3rd line");
  46 + assertIsFailure(ut, read_line(lr), "#1 EOF");
  47 + unique
  48 + };
  49 +
  50 + if make_line_reader("mon texte\r\n\rla suite") is
  51 + {
  52 + failure then assertIsTrue(ut, false, "#2 make_line_reader"),
  53 + success(lr) then
  54 + assertIsSuccessString(ut, read_line(lr), "mon texte", "#2 1st line");
  55 + assertIsSuccessString(ut, read_line(lr), "", "#2 2nd line");
  56 + assertIsSuccessString(ut, read_line(lr), "la suite", "#2 3rd line");
  57 + assertIsFailure(ut, read_line(lr), "#2 EOF");
  58 + unique
  59 + }.
  60 +
34 61 public define UnitTestSuite
35 62 make_line_reader_test_suite
36 63 =
37 64 ut_suite("tools.line_reader",
38 65 [
39 66 ut_fixture("line_reader", line_reader_test),
  67 + ut_fixture("infinite_loop", infinite_loop_test, false),
40 68 ]).
41 69  
... ...
anubis_dev/library/tools/line_reader.anubis
... ... @@ -30,7 +30,7 @@ public define Maybe(String)
30 30 (
31 31 LineReader lr,
32 32 ) =
33   - if lr is line_reader(lexer, _) then
  33 + if lr is line_reader(lexer, offset) then
34 34 if lexer(unique) is
35 35 {
36 36 end_of_input then /* no more token: exit the main loop */
... ... @@ -46,7 +46,7 @@ public define Maybe(String)
46 46 if t is
47 47 {
48 48 line(l) then
49   - //print("tk: line("+l+")\n");
  49 + //print("tk: line("+l+") @ "+abs_to_decimal(offset(unique))+"\n");
50 50 forget(lexer(unique)); // reading EOL
51 51 success(l),
52 52 eol then
... ... @@ -64,6 +64,7 @@ public define Maybe(LineReader)
64 64 ) =
65 65 if make_lexer_and_automaton([
66 66 lexer_item("#r?#n", return((ByteArray b) |-> token(eol))),
  67 + lexer_item("#r", return((ByteArray b) |-> token(eol))),
67 68 lexer_item("[^\r\n]*", return((ByteArray b) |-> token(line(to_string(b))))),
68 69 ],
69 70 '#') is
... ...
anubis_dev/library/tools/unit_test.anubis
... ... @@ -18,11 +18,13 @@ type UnitTestSuiteResult:
18 18 ut_suite_result(String test_suite_name,
19 19 Word32 passed_count,
20 20 Word32 failed_count,
  21 + Word32 ignored_count,
21 22 List(UnitTestResult) fixture_results).
22 23  
23 24 type UnitTestGlobalResult:
24 25 ut_global_result(Word32 total_passed,
25 26 Word32 total_failed,
  27 + Word32 total_ignored,
26 28 List(UnitTestSuiteResult) suite_results).
27 29  
28 30 public type UnitTestContext:
... ... @@ -51,28 +53,34 @@ define UnitTestContext
51 53  
52 54 public type UnitTestFixture:
53 55 ut_fixture(String test_name,
54   - (UnitTestContext) -> One the_test).
  56 + (UnitTestContext) -> One the_test,
  57 + Bool active). // set this to false to ignore this fixture
  58 +
  59 +public define UnitTestFixture
  60 + ut_fixture(String test_name,
  61 + (UnitTestContext) -> One the_test) = ut_fixture(test_name, the_test, true).
55 62  
56 63 public type UnitTestSuite:
57 64 ut_suite(String suite_name,
58 65 List(UnitTestFixture) fixtures).
59 66  
60   -define (Word32 /*passed*/, Word32 /*failed*/, List(UnitTestResult))
  67 +define (Word32 /*passed*/, Word32 /*failed*/, Word32 /*ignored*/, List(UnitTestResult))
61 68 run_test_suite
62 69 (
63 70 List(UnitTestFixture) tests,
64 71 String filter,
65 72 Word32 passed,
66 73 Word32 failed,
  74 + Word32 ignored,
67 75 List(UnitTestResult) results
68 76 ) =
69 77 if tests is
70 78 {
71   - [ ] then (passed, failed, reverse(results)), //print("All tests finish"),
  79 + [ ] then (passed, failed, ignored, reverse(results)), //print("All tests finish"),
72 80 [test . t] then
73 81 //print("\nTesting " + test.test_name + "... (filter = '"+filter+"')\n");
74 82 with ut = make_new_context,
75   - if length(filter) = 0 | filter = test.test_name then
  83 + if (length(filter) = 0 & active(test)) | filter = test.test_name then
76 84 the_test(test)(ut);
77 85 if *ut.failed_tests is
78 86 {
... ... @@ -81,6 +89,7 @@ define (Word32 /*passed*/, Word32 /*failed*/, List(UnitTestResult))
81 89 filter,
82 90 passed + 1,
83 91 failed,
  92 + ignored,
84 93 [ut_result(test.test_name, ok(unique)) . results]),
85 94  
86 95 [h . _] then print("F");
... ... @@ -94,13 +103,15 @@ define (Word32 /*passed*/, Word32 /*failed*/, List(UnitTestResult))
94 103 filter,
95 104 passed,
96 105 failed + 1,
  106 + ignored,
97 107 map_add(*ut.failed_tests, results))
98 108 }
99 109 else
100 110 run_test_suite(t,
101 111 filter,
102 112 passed,
103   - failed,
  113 + failed,
  114 + ignored + if active(test) then 0 else 1, // only explicitely ignored test are counted
104 115 results)
105 116 }.
106 117  
... ... @@ -108,18 +119,19 @@ define (Word32 /*passed*/, Word32 /*failed*/, List(UnitTestResult))
108 119  
109 120  
110 121  
111   -define (Word32 /* passed */, Word32 /* failed */, List(UnitTestSuiteResult))
  122 +define (Word32 /* passed */, Word32 /* failed */, Word32 /*ignored*/, List(UnitTestSuiteResult))
112 123 execute_tests
113 124 (
114 125 List(UnitTestSuite) suites,
115 126 String filter,
116 127 Word32 passed,
117 128 Word32 failed,
  129 + Word32 ignored,
118 130 List(UnitTestSuiteResult) results
119 131 )=
120 132 if suites is
121 133 {
122   - [ ] then (passed, failed, results),
  134 + [ ] then (passed, failed, ignored, results),
123 135 [h . t] then
124 136 with filter_length = length(filter),
125 137 fixture_name_length = length(h.suite_name),
... ... @@ -127,15 +139,16 @@ define (Word32 /* passed */, Word32 /* failed */, List(UnitTestSuiteResult))
127 139 if filter_length = 0 | start_with(filter + ".", h.suite_name + ".") | start_with(h.suite_name + ".", filter + ".") then //"." is added to avoid partial name match
128 140 with new_filter = if filter_length > fixture_name_length then force(sub_string(filter, fixture_name_length + 1, filter_length - fixture_name_length - 1), "")
129 141 else "",
130   - run_test_suite(h.fixtures, new_filter, 0, 0, [])
  142 + run_test_suite(h.fixtures, new_filter, 0, 0, 0, [])
131 143 else
132   - (0, 0, []),
133   - if all_fixture_results is (fixture_passed, fixture_failed, fixture_results) then
  144 + (0, 0, 0, []),
  145 + if all_fixture_results is (fixture_passed, fixture_failed, fixture_ignored, fixture_results) then
134 146 execute_tests(t,
135 147 filter,
136 148 passed + fixture_passed,
137 149 failed + fixture_failed,
138   - [ut_suite_result(h.suite_name, fixture_passed, fixture_failed, fixture_results)
  150 + ignored + fixture_ignored,
  151 + [ut_suite_result(h.suite_name, fixture_passed, fixture_failed, fixture_ignored, fixture_results)
139 152 . results])
140 153 }.
141 154  
... ... @@ -196,7 +209,7 @@ public define One
196 209 )=
197 210 with start_time = (UTime)unow,
198 211 filter = force(nth(0, args), ""),
199   - if execute_tests(suites, filter, 0, 0, []) is (passed, failed, results) then
  212 + if execute_tests(suites, filter, 0, 0, 0, []) is (passed, failed, ignored, results) then
200 213 println("");
201 214 output_suite_results(results);
202 215 println("----------------------------------------------------------------------");
... ... @@ -204,9 +217,13 @@ public define One
204 217 println("Ran " + (passed + failed) + " tests in " +
205 218 to_decimal(duration.seconds) + "." + zero_pad_n(3, duration.microseconds \ 1000) + "s");
206 219 println("");
  220 + println("Tests passed = " + passed);
  221 + println("Tests failed = " + failed);
  222 + println("Tests ignored = " + ignored);
  223 + println("");
207 224 (
208 225 if failed >+ 0 then
209   - println("FAILED (failures=" + failed + ")")
  226 + println("FAILED")
210 227 else
211 228 println("OK")
212 229 );
... ...