Commit 92c7ed18c2ee6e10f09865d2d577af4a0fcf26a9
1 parent
bde59c8c
Fix LineReader bug (infinite loop)
Add UnitTest to FastLexer revealing bug
Showing
9 changed files
with
347 additions
and
225 deletions
Show diff stats
anubis_dev/library/lexical_analysis/fast_lexer.anubis
| ... | ... | @@ -447,24 +447,24 @@ public define Maybe(LexingStream) make_lexing_stream(SSL_Connection stream, |
| 447 | 447 | file, in particular at the actual definition of type 'LexingStream', and write down |
| 448 | 448 | another such function (in a file of yours within which you put a 'read |
| 449 | 449 | fast_lexer.anubis'). |
| 450 | - | |
| 451 | - | |
| 452 | - To each lexing stream is attached a function for counting of type 'One -> Int'. When | |
| 453 | - applied to 'unique', this function returns the number of bytes already read from the | |
| 454 | - lexing stream, i.e. the position from which the reading of te next token will | |
| 455 | - occur. This function is obtained as follows: | |
| 450 | + | |
| 451 | + | |
| 452 | + To each lexing stream is attached a function for counting of type 'One -> Int'. When | |
| 453 | + applied to 'unique', this function returns the number of bytes already read from the | |
| 454 | + lexing stream, i.e. the position from which the reading of te next token will | |
| 455 | + occur. This function is obtained as follows: | |
| 456 | 456 | |
| 457 | -public define One -> Int | |
| 458 | - offset_counter | |
| 459 | - ( | |
| 460 | - LexingStream ls | |
| 461 | - ). | |
| 457 | +public define One -> Int | |
| 458 | + offset_counter | |
| 459 | + ( | |
| 460 | + LexingStream ls | |
| 461 | + ). | |
| 462 | + | |
| 463 | + If youn need this function, it is recommended to call 'offset_counter' only once just | |
| 464 | + after the lexing stream is created. | |
| 462 | 465 | |
| 463 | - If youn need this function, it is recommended to call 'offset_counter' only once just | |
| 464 | - after the lexing stream is created. | |
| 465 | 466 | |
| 466 | 467 | |
| 467 | - | |
| 468 | 468 | *** (4) Constructing a lexer. |
| 469 | 469 | |
| 470 | 470 | *** (4.1) Construction. |
| ... | ... | @@ -580,7 +580,7 @@ public define Printable_tree |
| 580 | 580 | |
| 581 | 581 | read tools/basis.anubis |
| 582 | 582 | read tools/streams.anubis |
| 583 | - | |
| 583 | +read system/convert.anubis | |
| 584 | 584 | |
| 585 | 585 | -------------------------------- Table of Contents ------------------------------------ |
| 586 | 586 | |
| ... | ... | @@ -740,13 +740,7 @@ define Maybe(ExChar) |
| 740 | 740 | |
| 741 | 741 | *** [1.5.1] Truncating a Word32 to a Word8. |
| 742 | 742 | |
| 743 | -define Word8 | |
| 744 | - truncate_to_Word8 | |
| 745 | - ( | |
| 746 | - Word32 x | |
| 747 | - ) = | |
| 748 | - if x is word32(l1,_) then if l1 is word16(l2,_) then l2. | |
| 749 | - | |
| 743 | + moved to system/convert.anubis | |
| 750 | 744 | |
| 751 | 745 | |
| 752 | 746 | *** [1.5.2] Creating a range of consecutive characters. |
| ... | ... | @@ -1223,24 +1217,24 @@ public define String |
| 1223 | 1217 | public type LexingStream: |
| 1224 | 1218 | lexing_stream |
| 1225 | 1219 | ( |
| 1226 | - Var(ByteArray) buffer_v, // the current buffer | |
| 1227 | - Var(Int) start_v, // start of lexem in buffer | |
| 1228 | - Var(FastLexerLastAccepted) last_accept_v, // last accepting position (if any) | |
| 1229 | - Var(Int) current_v, // position of reading in buffer | |
| 1230 | - Int -> Maybe(One) reload_buffer, // command for loading the sequel in the buffer | |
| 1231 | - Var(Int) current_offset_v // accumulator for current offset | |
| 1220 | + Var(ByteArray) buffer_v, // the current buffer | |
| 1221 | + Var(Int) start_v, // start of lexem in buffer | |
| 1222 | + Var(FastLexerLastAccepted) last_accept_v, // last accepting position (if any) | |
| 1223 | + Var(Int) current_v, // position of reading in buffer | |
| 1224 | + Int -> Maybe(One) reload_buffer, // command for loading the sequel in the buffer | |
| 1225 | + Var(Int) current_offset_v // accumulator for current offset | |
| 1232 | 1226 | ). |
| 1233 | 1227 | |
| 1234 | - | |
| 1235 | -public define One -> Int | |
| 1236 | - offset_counter | |
| 1237 | - ( | |
| 1238 | - LexingStream ls | |
| 1239 | - ) = | |
| 1240 | - if ls is lexing_stream(_,_,_,_,_,v) then | |
| 1241 | - (One u) |-> *v. | |
| 1242 | - | |
| 1243 | - | |
| 1228 | + | |
| 1229 | +public define One -> Int | |
| 1230 | + offset_counter | |
| 1231 | + ( | |
| 1232 | + LexingStream ls | |
| 1233 | + ) = | |
| 1234 | + if ls is lexing_stream(_,_,_,_,_,v) then | |
| 1235 | + (One u) |-> *v. | |
| 1236 | + | |
| 1237 | + | |
| 1244 | 1238 | While we are reading a lexeme, we keep the starting position (offset of first character |
| 1245 | 1239 | of the current lexeme) in 'start_v' so as to be able to extract the lexeme. We also |
| 1246 | 1240 | keep the last position at which a lexeme was accepted. This is because the lexer always |
| ... | ... | @@ -1272,8 +1266,8 @@ public define LexingStream |
| 1272 | 1266 | var(0), // starting position |
| 1273 | 1267 | var(none), // last accepting position |
| 1274 | 1268 | var(0), // current position |
| 1275 | - (Int u) |-> failure, // buffer cannot be reloaded | |
| 1276 | - var(0)). | |
| 1269 | + (Int u) |-> failure, // buffer cannot be reloaded | |
| 1270 | + var(0)). | |
| 1277 | 1271 | |
| 1278 | 1272 | |
| 1279 | 1273 | |
| ... | ... | @@ -1308,7 +1302,7 @@ public define Maybe(LexingStream) |
| 1308 | 1302 | start_v = var((Int)0), |
| 1309 | 1303 | last_accepted_v = var((FastLexerLastAccepted)none), |
| 1310 | 1304 | current_v = var((Int)0), |
| 1311 | - offset_v = var((Int)0), | |
| 1305 | + offset_v = var((Int)0), | |
| 1312 | 1306 | reload_buffer = (Int i) |-> |
| 1313 | 1307 | if read(stream,buffer_size,timeout) is |
| 1314 | 1308 | { |
| ... | ... | @@ -1317,13 +1311,13 @@ public define Maybe(LexingStream) |
| 1317 | 1311 | ok(more) then |
| 1318 | 1312 | //print("Buffer reloaded ("+abs_to_decimal(length(more))+" bytes).\n"); |
| 1319 | 1313 | if length(more) = 0 |
| 1320 | - then (with old_buffer = *buffer_v, | |
| 1321 | - old_length = length(old_buffer), | |
| 1322 | - dropped = *start_v, // number of bytes dropped from old buffer | |
| 1314 | + then (with old_buffer = *buffer_v, | |
| 1315 | + old_length = length(old_buffer), | |
| 1316 | + dropped = *start_v, // number of bytes dropped from old buffer | |
| 1323 | 1317 | buffer_v <- extract(old_buffer,dropped,old_length); |
| 1324 | 1318 | start_v <- 0; |
| 1325 | 1319 | current_v <- *current_v - dropped; |
| 1326 | - /* reloading does not change the current offset */ | |
| 1320 | + /* reloading does not change the current offset */ | |
| 1327 | 1321 | last_accepted_v <- |
| 1328 | 1322 | if *last_accepted_v is |
| 1329 | 1323 | { |
| ... | ... | @@ -1337,7 +1331,7 @@ public define Maybe(LexingStream) |
| 1337 | 1331 | buffer_v <- extract(old_buffer,dropped,old_length)+more; |
| 1338 | 1332 | start_v <- 0; |
| 1339 | 1333 | current_v <- *current_v - dropped; |
| 1340 | - /* reloading does not change the current offset */ | |
| 1334 | + /* reloading does not change the current offset */ | |
| 1341 | 1335 | last_accepted_v <- |
| 1342 | 1336 | if *last_accepted_v is |
| 1343 | 1337 | { |
| ... | ... | @@ -1350,8 +1344,8 @@ public define Maybe(LexingStream) |
| 1350 | 1344 | start_v, |
| 1351 | 1345 | last_accepted_v, |
| 1352 | 1346 | current_v, |
| 1353 | - reload_buffer, | |
| 1354 | - offset_v)) | |
| 1347 | + reload_buffer, | |
| 1348 | + offset_v)) | |
| 1355 | 1349 | }. |
| 1356 | 1350 | |
| 1357 | 1351 | |
| ... | ... | @@ -1390,7 +1384,7 @@ public define Maybe(LexingStream) |
| 1390 | 1384 | start_v = var((Int)0), |
| 1391 | 1385 | last_accepted_v = var((FastLexerLastAccepted)none), |
| 1392 | 1386 | current_v = var((Int)0), |
| 1393 | - offset_v = var((Int)0), | |
| 1387 | + offset_v = var((Int)0), | |
| 1394 | 1388 | reload_buffer = (Int i) |-> |
| 1395 | 1389 | if (Maybe(ByteArray))read(stream,buffer_size,timeout) is |
| 1396 | 1390 | { |
| ... | ... | @@ -1403,7 +1397,7 @@ public define Maybe(LexingStream) |
| 1403 | 1397 | buffer_v <- extract(old_buffer,dropped,old_length); |
| 1404 | 1398 | start_v <- 0; |
| 1405 | 1399 | current_v <- *current_v - dropped; |
| 1406 | - /* reloading does not change the current offset */ | |
| 1400 | + /* reloading does not change the current offset */ | |
| 1407 | 1401 | last_accepted_v <- |
| 1408 | 1402 | if *last_accepted_v is |
| 1409 | 1403 | { |
| ... | ... | @@ -1417,7 +1411,7 @@ public define Maybe(LexingStream) |
| 1417 | 1411 | buffer_v <- extract(old_buffer,dropped,old_length)+more; |
| 1418 | 1412 | start_v <- 0; |
| 1419 | 1413 | current_v <- *current_v - dropped; |
| 1420 | - /* reloading does not change the current offset */ | |
| 1414 | + /* reloading does not change the current offset */ | |
| 1421 | 1415 | last_accepted_v <- |
| 1422 | 1416 | if *last_accepted_v is |
| 1423 | 1417 | { |
| ... | ... | @@ -1430,8 +1424,8 @@ public define Maybe(LexingStream) |
| 1430 | 1424 | start_v, |
| 1431 | 1425 | last_accepted_v, |
| 1432 | 1426 | current_v, |
| 1433 | - reload_buffer, | |
| 1434 | - offset_v)) | |
| 1427 | + reload_buffer, | |
| 1428 | + offset_v)) | |
| 1435 | 1429 | }. |
| 1436 | 1430 | |
| 1437 | 1431 | |
| ... | ... | @@ -2289,9 +2283,9 @@ define One -> LexerOutput($Token) |
| 2289 | 2283 | Word16 starting_state) -> FastLexerOutput lexer, |
| 2290 | 2284 | MVar(LexerAction($Token)) actions |
| 2291 | 2285 | ) = |
| 2292 | - if stream is lexing_stream(buffer_v,start_v,last_accept_v,current_v,reload_buffer,offset_v) then | |
| 2293 | - (One _) |-l-> with old_current = *current_v, | |
| 2294 | - if lexer(*buffer_v, | |
| 2286 | + if stream is lexing_stream(buffer_v,start_v,last_accept_v,current_v,reload_buffer,offset_v) then | |
| 2287 | + (One _) |-l-> with old_current = *current_v, | |
| 2288 | + if lexer(*buffer_v, | |
| 2295 | 2289 | *last_accept_v, |
| 2296 | 2290 | *current_v, |
| 2297 | 2291 | 0) // reading a new token always starts in state 0 |
| ... | ... | @@ -2303,7 +2297,7 @@ define One -> LexerOutput($Token) |
| 2303 | 2297 | not_at_end_of_input then |
| 2304 | 2298 | with result = (LexerOutput($Token))error(extract(*buffer_v,*start_v,end)), |
| 2305 | 2299 | current_v <- end+1; |
| 2306 | - offset_v <- *offset_v + (end + 1 - old_current); | |
| 2300 | + offset_v <- *offset_v + (end + 1 - old_current); | |
| 2307 | 2301 | start_v <- end+1; |
| 2308 | 2302 | last_accept_v <- none; |
| 2309 | 2303 | result, |
| ... | ... | @@ -2326,7 +2320,7 @@ define One -> LexerOutput($Token) |
| 2326 | 2320 | { |
| 2327 | 2321 | ignore then |
| 2328 | 2322 | current_v <- end; |
| 2329 | - offset_v <- *offset_v + (end - old_current); | |
| 2323 | + offset_v <- *offset_v + (end - old_current); | |
| 2330 | 2324 | start_v <- end; |
| 2331 | 2325 | last_accept_v <- none; |
| 2332 | 2326 | l(unique), // ignore and try to read the next token |
| ... | ... | @@ -2334,7 +2328,7 @@ define One -> LexerOutput($Token) |
| 2334 | 2328 | return(f) then |
| 2335 | 2329 | with result = f(extract(*buffer_v,*start_v,end)), |
| 2336 | 2330 | current_v <- end; |
| 2337 | - offset_v <- *offset_v + (end - old_current); | |
| 2331 | + offset_v <- *offset_v + (end - old_current); | |
| 2338 | 2332 | start_v <- end; |
| 2339 | 2333 | last_accept_v <- none; |
| 2340 | 2334 | result |
| ... | ... | @@ -2351,7 +2345,7 @@ define One -> LexerOutput($Token) |
| 2351 | 2345 | return(f) then |
| 2352 | 2346 | with result = f(extract(*buffer_v,*start_v,end)), |
| 2353 | 2347 | current_v <- end; |
| 2354 | - offset_v <- *offset_v + (end - old_current); | |
| 2348 | + offset_v <- *offset_v + (end - old_current); | |
| 2355 | 2349 | start_v <- end; |
| 2356 | 2350 | last_accept_v <- none; |
| 2357 | 2351 | result | ... | ... |
anubis_dev/library/system/convert.anubis
| ... | ... | @@ -92,18 +92,18 @@ public define inline ByteArray |
| 92 | 92 | |
| 93 | 93 | // Obsolete name, should be removed |
| 94 | 94 | public define inline Word8 |
| 95 | - truncate_to_word8 | |
| 96 | - ( | |
| 97 | - Word32 value | |
| 98 | - ) = | |
| 99 | - truncate_to_Word8(to_Int(value)). | |
| 95 | + truncate_to_word8 | |
| 96 | + ( | |
| 97 | + Word32 x | |
| 98 | + ) = | |
| 99 | + if x is word32(l1,_) then if l1 is word16(l2,_) then l2. | |
| 100 | 100 | |
| 101 | 101 | public define inline Word8 |
| 102 | - truncate_to_Word8 | |
| 103 | - ( | |
| 104 | - Word32 value | |
| 105 | - ) = | |
| 106 | - truncate_to_Word8(to_Int(value)). | |
| 102 | + truncate_to_Word8 | |
| 103 | + ( | |
| 104 | + Word32 x | |
| 105 | + ) = | |
| 106 | + if x is word32(l1,_) then if l1 is word16(l2,_) then l2. | |
| 107 | 107 | |
| 108 | 108 | public define inline Word32 |
| 109 | 109 | word8_to_Word32 | ... | ... |
anubis_dev/library/test/Anubis UnitTest.aproj
| ... | ... | @@ -32,6 +32,7 @@ |
| 32 | 32 | <Import Project="$(AnubisBinPath)\Anubis.Build.targets" /> |
| 33 | 33 | <ItemGroup> |
| 34 | 34 | <Compile Include="all_unit_test.anubis" /> |
| 35 | + <Compile Include="lexical_analysis\fast_lexer.ut.anubis" /> | |
| 35 | 36 | <Compile Include="predefined\date_and_time.unit_test.anubis" /> |
| 36 | 37 | <Compile Include="predefined\decimal_scan.unit_test.anubis" /> |
| 37 | 38 | <Compile Include="predefined\fast_lexer.unit_test.anubis" /> |
| ... | ... | @@ -48,6 +49,7 @@ |
| 48 | 49 | <Compile Include="tools\utf-8.unit_test.anubis" /> |
| 49 | 50 | </ItemGroup> |
| 50 | 51 | <ItemGroup> |
| 52 | + <Folder Include="lexical_analysis" /> | |
| 51 | 53 | <Folder Include="predefined" /> |
| 52 | 54 | <Folder Include="system" /> |
| 53 | 55 | <Folder Include="tools" /> | ... | ... |
anubis_dev/library/test/all_unit_test.anubis
| ... | ... | @@ -10,6 +10,7 @@ read test/predefined/sqlite.unit_test.anubis |
| 10 | 10 | read test/predefined/fast_lexer.unit_test.anubis |
| 11 | 11 | |
| 12 | 12 | read test/system/convert.unit_test.anubis |
| 13 | +read test/lexical_analysis/fast_lexer.ut.anubis | |
| 13 | 14 | read test/system/message_queue.unit_test.anubis |
| 14 | 15 | read test/system/message_transceiver.unit_test.anubis |
| 15 | 16 | read test/system/string.unit_test.anubis |
| ... | ... | @@ -37,7 +38,8 @@ define List(UnitTestSuite) |
| 37 | 38 | make_SQLite_test_suite, |
| 38 | 39 | make_UTF8_test_suite, |
| 39 | 40 | make_line_reader_test_suite, |
| 40 | - make_fast_lexer_test_suite | |
| 41 | + make_fast_lexer_test_suite, | |
| 42 | + make_fast_lexer_test_suite2, | |
| 41 | 43 | ] |
| 42 | 44 | . |
| 43 | 45 | ... | ... |
anubis_dev/library/test/lexical_analysis/fast_lexer.ut.anubis
0 → 100644
| 1 | +/* | |
| 2 | + * Created by PyramIDE. | |
| 3 | + * User: ricard | |
| 4 | + * Date: 19/09/2008 | |
| 5 | + * Time: 14:24 | |
| 6 | + * | |
| 7 | + */ | |
| 8 | + | |
| 9 | +read tools/unit_test.anubis | |
| 10 | +read lexical_analysis/fast_lexer.anubis | |
| 11 | + | |
| 12 | +type InfLoopToken: | |
| 13 | + line(String), | |
| 14 | + eol. | |
| 15 | + | |
| 16 | +define String | |
| 17 | + token_to_string | |
| 18 | + ( | |
| 19 | + LexerOutput(InfLoopToken) output | |
| 20 | + ) = | |
| 21 | + if output is | |
| 22 | + { | |
| 23 | + end_of_input then /* no more token: exit the main loop */ | |
| 24 | + "End of input", | |
| 25 | + | |
| 26 | + error(b) then | |
| 27 | + /* should never happen with this lexer (see the above comment) */ | |
| 28 | + "Lexer error: ["+to_string(b)+"]", | |
| 29 | + | |
| 30 | + token(t) then | |
| 31 | + /* a token has been recognized */ | |
| 32 | + if t is | |
| 33 | + { | |
| 34 | + line(l) then | |
| 35 | + "token: line("+l+")" | |
| 36 | + eol then | |
| 37 | + "token: eol" | |
| 38 | + } | |
| 39 | + }. | |
| 40 | + | |
| 41 | + | |
| 42 | +define One | |
| 43 | + infinite_loop_test | |
| 44 | + ( | |
| 45 | + UnitTestContext ut | |
| 46 | + ) = | |
| 47 | + if make_lexer_and_automaton([ | |
| 48 | + lexer_item("#r?#n", return((ByteArray b) |-> token(eol))), | |
| 49 | + lexer_item("[^\r\n]*", return((ByteArray b) |-> token(line(to_string(b))))), | |
| 50 | + ], | |
| 51 | + '#') is | |
| 52 | + { | |
| 53 | + error(msg) then print("Syntax error in regular expression: "+to_English(msg)+"\n"); assertIsTrue(ut, false, "Can't create lexer"), | |
| 54 | + ok(p) then if p is (lexer, automaton) then | |
| 55 | + with next_token = lexer(make_lexing_stream("mon texte\n\r\nla suite")), | |
| 56 | + assertIsSame(ut, next_token(unique), token(line("mon texte")), token_to_string, "#1 1st line"); | |
| 57 | + assertIsSame(ut, next_token(unique), token(eol), token_to_string, "#1 1st EOF"); | |
| 58 | + //assertIsSame(ut, next_token(unique), token(line("")), token_to_string, "#1 2nd line"); | |
| 59 | + assertIsSame(ut, next_token(unique), token(eol), token_to_string, "#1 2nd EOF"); | |
| 60 | + assertIsSame(ut, next_token(unique), token(line("la suite")), token_to_string, "#1 3rd line"); | |
| 61 | + //assertIsSame(ut, next_token(unique), token(eol), token_to_string, "#1 3rd EOF"); | |
| 62 | + assertIsSame(ut, next_token(unique), end_of_input, token_to_string, "#1 EOF"); | |
| 63 | + | |
| 64 | + | |
| 65 | + with next_token = lexer(make_lexing_stream("mon texte\r\n\rla suite")), | |
| 66 | + assertIsSame(ut, next_token(unique), token(line("mon texte")), token_to_string, "#2 1st line"); | |
| 67 | + assertIsSame(ut, next_token(unique), token(eol), token_to_string, "#2 1st EOF"); | |
| 68 | + assertIsSame(ut, next_token(unique), error(to_byte_array("\r")), token_to_string, "#2 error"); | |
| 69 | + //assertIsSame(ut, next_token(unique), token(line("")), token_to_string, "#2 2nd line"); | |
| 70 | + //assertIsSame(ut, next_token(unique), token(eol), token_to_string, "#2 2nd EOF"); | |
| 71 | + //assertIsSame(ut, next_token(unique), token(line("la suite")), token_to_string, "#2 3rd line"); | |
| 72 | + //assertIsSame(ut, next_token(unique), token(eol), token_to_string, "#2 3rd EOF"); | |
| 73 | + //assertIsSame(ut, next_token(unique), end_of_input, token_to_string, "#2 EOF"); | |
| 74 | + unique | |
| 75 | + }. | |
| 76 | + | |
| 77 | + | |
| 78 | + | |
| 79 | +public define UnitTestSuite | |
| 80 | + make_fast_lexer_test_suite2 | |
| 81 | + = | |
| 82 | + ut_suite("lexical_analysis.fast_lexer", | |
| 83 | + [ | |
| 84 | + ut_fixture("infinite_loop", infinite_loop_test) | |
| 85 | + ]). | |
| 86 | + | |
| 87 | + | ... | ... |
anubis_dev/library/test/predefined/fast_lexer.unit_test.anubis
| 1 | - | |
| 2 | - | |
| 3 | -read tools/unit_test.anubis | |
| 4 | - | |
| 5 | - | |
| 6 | -define String | |
| 7 | - format | |
| 8 | - ( | |
| 9 | - AtEndOfInput a | |
| 10 | - ) = | |
| 11 | - if a is | |
| 12 | - { | |
| 13 | - not_at_end_of_input then "not_at_end_of_input", | |
| 14 | - at_end_of_input then "at_end_of_input" | |
| 15 | - }. | |
| 16 | - | |
| 17 | - | |
| 18 | -define FastLexerOutput | |
| 19 | - test_fast_lexer | |
| 20 | - ( | |
| 21 | - List(FastLexerState) lexer, | |
| 22 | - ByteArray input, | |
| 23 | - FastLexerLastAccepted last_accepted, | |
| 24 | - Int position, | |
| 25 | - Word16 state | |
| 26 | - ) = | |
| 27 | - if make_fast_lexer(lexer) is | |
| 28 | - { | |
| 29 | - unknown_state(n) then print("fast lexer unknown state: "+to_decimal(n)+"\n"); alert, | |
| 30 | - too_many_states then print("too many states.\n"); alert, | |
| 31 | - ok(fl) then with result = fl(input,last_accepted,position,state), | |
| 32 | - | |
| 33 | - if result is | |
| 34 | - { | |
| 35 | - rejected(w,e,a) then | |
| 36 | - print("\nrejected("+to_decimal(w)+","+abs_to_decimal(e)+","+format(a)+")\n"), | |
| 37 | - accepted(w,e,a,_) then | |
| 38 | - print("\naccepted("+to_decimal(w)+","+abs_to_decimal(e)+","+format(a)+")\n") | |
| 39 | - }; | |
| 40 | - | |
| 41 | - result | |
| 42 | - }. | |
| 43 | - | |
| 44 | - | |
| 45 | - | |
| 46 | - A lexer accepting "g+abu", "g+abuzo", | |
| 47 | - | |
| 48 | -define List(FastLexerState) | |
| 49 | - lexer_1 | |
| 50 | - = | |
| 51 | - [ | |
| 52 | - /* state 0 */ rejecting([transition('g',1)]), | |
| 53 | - /* state 1 */ rejecting([transition('a',2),transition('g',1)]), | |
| 54 | - /* state 2 */ rejecting([transition('b',3)]), | |
| 55 | - /* state 3 */ rejecting([transition('u',4)]), | |
| 56 | - /* state 4 */ accepting([transition('z',5)]), | |
| 57 | - /* state 5 */ rejecting([transition('o',6)]), | |
| 58 | - /* state 6 */ accepting([ ]) | |
| 59 | - ]. | |
| 60 | - | |
| 61 | - | |
| 62 | -define One | |
| 63 | - fast_lexer_test | |
| 64 | - ( | |
| 65 | - UnitTestContext ut | |
| 66 | - ) = | |
| 67 | - | |
| 68 | - /* starting in state 0, position 0 */ | |
| 69 | - assertIsSame(ut, | |
| 70 | - test_fast_lexer(lexer_1,to_byte_array("gab"),none,0,0), | |
| 71 | - rejected(3,3,at_end_of_input), "1"); | |
| 72 | - unique. | |
| 73 | - | |
| 74 | - | |
| 75 | - assertIsSame(ut, | |
| 76 | - test_fast_lexer(lexer_1,to_byte_array("gabu"),none,bol,neol,0,0), | |
| 77 | - accepted(4,4,at_end_of_input), "2"); | |
| 78 | - assertIsSame(ut, | |
| 79 | - test_fast_lexer(lexer_1,to_byte_array("ggggabu"),none,bol,neol,0,0), | |
| 80 | - accepted(4,7,at_end_of_input), "2bis"); | |
| 81 | - assertIsSame(ut, | |
| 82 | - test_fast_lexer(lexer_1,to_byte_array("gabuz"),none,bol,neol,0,0), | |
| 83 | - accepted(4,4,at_end_of_input), "3"); | |
| 84 | - assertIsSame(ut, | |
| 85 | - test_fast_lexer(lexer_1,to_byte_array("gmbuz"),none,bol,neol,0,0), | |
| 86 | - rejected(1,1,not_at_end_of_input),"4"); | |
| 87 | - assertIsSame(ut, | |
| 88 | - test_fast_lexer(lexer_1,to_byte_array("gabuzo"),none,bol,neol,0,0), | |
| 89 | - accepted(6,6,at_end_of_input), "5"); | |
| 90 | - assertIsSame(ut, | |
| 91 | - test_fast_lexer(lexer_1,to_byte_array("gabuzobof"),none,bol,neol,0,0), | |
| 92 | - accepted(6,6,not_at_end_of_input), "6"); | |
| 93 | - assertIsSame(ut, | |
| 94 | - test_fast_lexer(lexer_1,to_byte_array("gggggabuzobof"),none,bol,neol,0,0), | |
| 95 | - accepted(6,10,not_at_end_of_input), "6bis"); | |
| 96 | - | |
| 97 | - /* restarting from some other state (with or without an already accepted position) */ | |
| 98 | - | |
| 99 | - assertIsSame(ut, | |
| 100 | - test_fast_lexer(lexer_1,to_byte_array("bu"),none,bol,neol,0,2), | |
| 101 | - accepted(4,2,at_end_of_input), "7"); | |
| 102 | - assertIsSame(ut, | |
| 103 | - test_fast_lexer(lexer_1,to_byte_array("gabuzfff"),last(4,4),bol,neol,5,5), | |
| 104 | - accepted(4,4,not_at_end_of_input), "8"); | |
| 105 | - assertIsSame(ut, | |
| 106 | - test_fast_lexer(lexer_1,to_byte_array("gabuzomeu"),last(4,4),bol,neol,5,5), | |
| 107 | - accepted(6,6,not_at_end_of_input), "9"); | |
| 108 | - | |
| 109 | - /* testing bol and eol */ | |
| 110 | - assertIsSame(ut, | |
| 111 | - test_fast_lexer(lexer_1,to_byte_array("meu"),none,bol,neol,0,0), | |
| 112 | - accepted(6,6,not_at_end_of_input), "9"); | |
| 113 | - | |
| 114 | - | |
| 115 | - unique. | |
| 116 | - | |
| 117 | - | |
| 118 | - | |
| 119 | - | |
| 120 | - | |
| 121 | - | |
| 122 | - | |
| 123 | -public define UnitTestSuite | |
| 124 | - make_fast_lexer_test_suite | |
| 125 | - = | |
| 126 | - ut_suite("predefined.fast_lexer", | |
| 127 | - [ | |
| 128 | - ut_fixture("fast lexer", fast_lexer_test) | |
| 129 | - ]). | |
| 130 | - | |
| 131 | - | |
| 132 | -global define One | |
| 133 | - fast_lexer_unit_test | |
| 134 | - ( | |
| 135 | - List(String) args | |
| 136 | - )= | |
| 137 | - execute_tests([make_fast_lexer_test_suite], args). | |
| 138 | - | |
| 139 | - | |
| 140 | 1 | \ No newline at end of file |
| 2 | + | |
| 3 | + | |
| 4 | +read tools/unit_test.anubis | |
| 5 | + | |
| 6 | + | |
| 7 | +define String | |
| 8 | + format | |
| 9 | + ( | |
| 10 | + AtEndOfInput a | |
| 11 | + ) = | |
| 12 | + if a is | |
| 13 | + { | |
| 14 | + not_at_end_of_input then "not_at_end_of_input", | |
| 15 | + at_end_of_input then "at_end_of_input" | |
| 16 | + }. | |
| 17 | + | |
| 18 | + | |
| 19 | +define FastLexerOutput | |
| 20 | + test_fast_lexer | |
| 21 | + ( | |
| 22 | + List(FastLexerState) lexer, | |
| 23 | + ByteArray input, | |
| 24 | + FastLexerLastAccepted last_accepted, | |
| 25 | + Int position, | |
| 26 | + Word16 state | |
| 27 | + ) = | |
| 28 | + if make_fast_lexer(lexer) is | |
| 29 | + { | |
| 30 | + unknown_state(n) then print("fast lexer unknown state: "+to_decimal(n)+"\n"); alert, | |
| 31 | + too_many_states then print("too many states.\n"); alert, | |
| 32 | + ok(fl) then with result = fl(input,last_accepted,position,state), | |
| 33 | + | |
| 34 | + if result is | |
| 35 | + { | |
| 36 | + rejected(w,e,a) then | |
| 37 | + print("\nrejected("+to_decimal(w)+","+abs_to_decimal(e)+","+format(a)+")\n"), | |
| 38 | + accepted(w,e,a,_) then | |
| 39 | + print("\naccepted("+to_decimal(w)+","+abs_to_decimal(e)+","+format(a)+")\n") | |
| 40 | + }; | |
| 41 | + | |
| 42 | + result | |
| 43 | + }. | |
| 44 | + | |
| 45 | + | |
| 46 | + | |
| 47 | + A lexer accepting "g+abu", "g+abuzo", | |
| 48 | + | |
| 49 | +define List(FastLexerState) | |
| 50 | + lexer_1 | |
| 51 | + = | |
| 52 | + [ | |
| 53 | + /* state 0 */ rejecting([transition('g',1)]), | |
| 54 | + /* state 1 */ rejecting([transition('a',2),transition('g',1)]), | |
| 55 | + /* state 2 */ rejecting([transition('b',3)]), | |
| 56 | + /* state 3 */ rejecting([transition('u',4)]), | |
| 57 | + /* state 4 */ accepting([transition('z',5)]), | |
| 58 | + /* state 5 */ rejecting([transition('o',6)]), | |
| 59 | + /* state 6 */ accepting([ ]) | |
| 60 | + ]. | |
| 61 | + | |
| 62 | + | |
| 63 | +define One | |
| 64 | + fast_lexer_test | |
| 65 | + ( | |
| 66 | + UnitTestContext ut | |
| 67 | + ) = | |
| 68 | + | |
| 69 | + /* starting in state 0, position 0 */ | |
| 70 | + assertIsSame(ut, | |
| 71 | + test_fast_lexer(lexer_1,to_byte_array("gab"),none,0,0), | |
| 72 | + rejected(3,3,at_end_of_input), "1"); | |
| 73 | + unique. | |
| 74 | + | |
| 75 | + | |
| 76 | + assertIsSame(ut, | |
| 77 | + test_fast_lexer(lexer_1,to_byte_array("gabu"),none,bol,neol,0,0), | |
| 78 | + accepted(4,4,at_end_of_input), "2"); | |
| 79 | + assertIsSame(ut, | |
| 80 | + test_fast_lexer(lexer_1,to_byte_array("ggggabu"),none,bol,neol,0,0), | |
| 81 | + accepted(4,7,at_end_of_input), "2bis"); | |
| 82 | + assertIsSame(ut, | |
| 83 | + test_fast_lexer(lexer_1,to_byte_array("gabuz"),none,bol,neol,0,0), | |
| 84 | + accepted(4,4,at_end_of_input), "3"); | |
| 85 | + assertIsSame(ut, | |
| 86 | + test_fast_lexer(lexer_1,to_byte_array("gmbuz"),none,bol,neol,0,0), | |
| 87 | + rejected(1,1,not_at_end_of_input),"4"); | |
| 88 | + assertIsSame(ut, | |
| 89 | + test_fast_lexer(lexer_1,to_byte_array("gabuzo"),none,bol,neol,0,0), | |
| 90 | + accepted(6,6,at_end_of_input), "5"); | |
| 91 | + assertIsSame(ut, | |
| 92 | + test_fast_lexer(lexer_1,to_byte_array("gabuzobof"),none,bol,neol,0,0), | |
| 93 | + accepted(6,6,not_at_end_of_input), "6"); | |
| 94 | + assertIsSame(ut, | |
| 95 | + test_fast_lexer(lexer_1,to_byte_array("gggggabuzobof"),none,bol,neol,0,0), | |
| 96 | + accepted(6,10,not_at_end_of_input), "6bis"); | |
| 97 | + | |
| 98 | + /* restarting from some other state (with or without an already accepted position) */ | |
| 99 | + | |
| 100 | + assertIsSame(ut, | |
| 101 | + test_fast_lexer(lexer_1,to_byte_array("bu"),none,bol,neol,0,2), | |
| 102 | + accepted(4,2,at_end_of_input), "7"); | |
| 103 | + assertIsSame(ut, | |
| 104 | + test_fast_lexer(lexer_1,to_byte_array("gabuzfff"),last(4,4),bol,neol,5,5), | |
| 105 | + accepted(4,4,not_at_end_of_input), "8"); | |
| 106 | + assertIsSame(ut, | |
| 107 | + test_fast_lexer(lexer_1,to_byte_array("gabuzomeu"),last(4,4),bol,neol,5,5), | |
| 108 | + accepted(6,6,not_at_end_of_input), "9"); | |
| 109 | + | |
| 110 | + /* testing bol and eol */ | |
| 111 | + assertIsSame(ut, | |
| 112 | + test_fast_lexer(lexer_1,to_byte_array("meu"),none,bol,neol,0,0), | |
| 113 | + accepted(6,6,not_at_end_of_input), "9"); | |
| 114 | + | |
| 115 | + | |
| 116 | + unique. | |
| 117 | + | |
| 118 | + | |
| 119 | + | |
| 120 | + | |
| 121 | + | |
| 122 | + | |
| 123 | +public define UnitTestSuite | |
| 124 | + make_fast_lexer_test_suite | |
| 125 | + = | |
| 126 | + ut_suite("predefined.fast_lexer", | |
| 127 | + [ | |
| 128 | + ut_fixture("fast lexer", fast_lexer_test) | |
| 129 | + ]). | |
| 130 | + | |
| 131 | + | ... | ... |
anubis_dev/library/test/tools/line_reader.ut.anubis
| ... | ... | @@ -31,11 +31,39 @@ define One |
| 31 | 31 | } |
| 32 | 32 | }. |
| 33 | 33 | |
| 34 | +define One | |
| 35 | + infinite_loop_test | |
| 36 | + ( | |
| 37 | + UnitTestContext ut | |
| 38 | + ) = | |
| 39 | + if make_line_reader("mon texte\n\r\nla suite") is | |
| 40 | + { | |
| 41 | + failure then assertIsTrue(ut, false, "#1 make_line_reader"), | |
| 42 | + success(lr) then | |
| 43 | + assertIsSuccessString(ut, read_line(lr), "mon texte", "#1 1st line"); | |
| 44 | + assertIsSuccessString(ut, read_line(lr), "", "#1 2nd line"); | |
| 45 | + assertIsSuccessString(ut, read_line(lr), "la suite", "#1 3rd line"); | |
| 46 | + assertIsFailure(ut, read_line(lr), "#1 EOF"); | |
| 47 | + unique | |
| 48 | + }; | |
| 49 | + | |
| 50 | + if make_line_reader("mon texte\r\n\rla suite") is | |
| 51 | + { | |
| 52 | + failure then assertIsTrue(ut, false, "#2 make_line_reader"), | |
| 53 | + success(lr) then | |
| 54 | + assertIsSuccessString(ut, read_line(lr), "mon texte", "#2 1st line"); | |
| 55 | + assertIsSuccessString(ut, read_line(lr), "", "#2 2nd line"); | |
| 56 | + assertIsSuccessString(ut, read_line(lr), "la suite", "#2 3rd line"); | |
| 57 | + assertIsFailure(ut, read_line(lr), "#2 EOF"); | |
| 58 | + unique | |
| 59 | + }. | |
| 60 | + | |
| 34 | 61 | public define UnitTestSuite |
| 35 | 62 | make_line_reader_test_suite |
| 36 | 63 | = |
| 37 | 64 | ut_suite("tools.line_reader", |
| 38 | 65 | [ |
| 39 | 66 | ut_fixture("line_reader", line_reader_test), |
| 67 | + ut_fixture("infinite_loop", infinite_loop_test, false), | |
| 40 | 68 | ]). |
| 41 | 69 | ... | ... |
anubis_dev/library/tools/line_reader.anubis
| ... | ... | @@ -30,7 +30,7 @@ public define Maybe(String) |
| 30 | 30 | ( |
| 31 | 31 | LineReader lr, |
| 32 | 32 | ) = |
| 33 | - if lr is line_reader(lexer, _) then | |
| 33 | + if lr is line_reader(lexer, offset) then | |
| 34 | 34 | if lexer(unique) is |
| 35 | 35 | { |
| 36 | 36 | end_of_input then /* no more token: exit the main loop */ |
| ... | ... | @@ -46,7 +46,7 @@ public define Maybe(String) |
| 46 | 46 | if t is |
| 47 | 47 | { |
| 48 | 48 | line(l) then |
| 49 | - //print("tk: line("+l+")\n"); | |
| 49 | + //print("tk: line("+l+") @ "+abs_to_decimal(offset(unique))+"\n"); | |
| 50 | 50 | forget(lexer(unique)); // reading EOL |
| 51 | 51 | success(l), |
| 52 | 52 | eol then |
| ... | ... | @@ -64,6 +64,7 @@ public define Maybe(LineReader) |
| 64 | 64 | ) = |
| 65 | 65 | if make_lexer_and_automaton([ |
| 66 | 66 | lexer_item("#r?#n", return((ByteArray b) |-> token(eol))), |
| 67 | + lexer_item("#r", return((ByteArray b) |-> token(eol))), | |
| 67 | 68 | lexer_item("[^\r\n]*", return((ByteArray b) |-> token(line(to_string(b))))), |
| 68 | 69 | ], |
| 69 | 70 | '#') is | ... | ... |
anubis_dev/library/tools/unit_test.anubis
| ... | ... | @@ -18,11 +18,13 @@ type UnitTestSuiteResult: |
| 18 | 18 | ut_suite_result(String test_suite_name, |
| 19 | 19 | Word32 passed_count, |
| 20 | 20 | Word32 failed_count, |
| 21 | + Word32 ignored_count, | |
| 21 | 22 | List(UnitTestResult) fixture_results). |
| 22 | 23 | |
| 23 | 24 | type UnitTestGlobalResult: |
| 24 | 25 | ut_global_result(Word32 total_passed, |
| 25 | 26 | Word32 total_failed, |
| 27 | + Word32 total_ignored, | |
| 26 | 28 | List(UnitTestSuiteResult) suite_results). |
| 27 | 29 | |
| 28 | 30 | public type UnitTestContext: |
| ... | ... | @@ -51,28 +53,34 @@ define UnitTestContext |
| 51 | 53 | |
| 52 | 54 | public type UnitTestFixture: |
| 53 | 55 | ut_fixture(String test_name, |
| 54 | - (UnitTestContext) -> One the_test). | |
| 56 | + (UnitTestContext) -> One the_test, | |
| 57 | + Bool active). // set this to false to ignore this fixture | |
| 58 | + | |
| 59 | +public define UnitTestFixture | |
| 60 | + ut_fixture(String test_name, | |
| 61 | + (UnitTestContext) -> One the_test) = ut_fixture(test_name, the_test, true). | |
| 55 | 62 | |
| 56 | 63 | public type UnitTestSuite: |
| 57 | 64 | ut_suite(String suite_name, |
| 58 | 65 | List(UnitTestFixture) fixtures). |
| 59 | 66 | |
| 60 | -define (Word32 /*passed*/, Word32 /*failed*/, List(UnitTestResult)) | |
| 67 | +define (Word32 /*passed*/, Word32 /*failed*/, Word32 /*ignored*/, List(UnitTestResult)) | |
| 61 | 68 | run_test_suite |
| 62 | 69 | ( |
| 63 | 70 | List(UnitTestFixture) tests, |
| 64 | 71 | String filter, |
| 65 | 72 | Word32 passed, |
| 66 | 73 | Word32 failed, |
| 74 | + Word32 ignored, | |
| 67 | 75 | List(UnitTestResult) results |
| 68 | 76 | ) = |
| 69 | 77 | if tests is |
| 70 | 78 | { |
| 71 | - [ ] then (passed, failed, reverse(results)), //print("All tests finish"), | |
| 79 | + [ ] then (passed, failed, ignored, reverse(results)), //print("All tests finish"), | |
| 72 | 80 | [test . t] then |
| 73 | 81 | //print("\nTesting " + test.test_name + "... (filter = '"+filter+"')\n"); |
| 74 | 82 | with ut = make_new_context, |
| 75 | - if length(filter) = 0 | filter = test.test_name then | |
| 83 | + if (length(filter) = 0 & active(test)) | filter = test.test_name then | |
| 76 | 84 | the_test(test)(ut); |
| 77 | 85 | if *ut.failed_tests is |
| 78 | 86 | { |
| ... | ... | @@ -81,6 +89,7 @@ define (Word32 /*passed*/, Word32 /*failed*/, List(UnitTestResult)) |
| 81 | 89 | filter, |
| 82 | 90 | passed + 1, |
| 83 | 91 | failed, |
| 92 | + ignored, | |
| 84 | 93 | [ut_result(test.test_name, ok(unique)) . results]), |
| 85 | 94 | |
| 86 | 95 | [h . _] then print("F"); |
| ... | ... | @@ -94,13 +103,15 @@ define (Word32 /*passed*/, Word32 /*failed*/, List(UnitTestResult)) |
| 94 | 103 | filter, |
| 95 | 104 | passed, |
| 96 | 105 | failed + 1, |
| 106 | + ignored, | |
| 97 | 107 | map_add(*ut.failed_tests, results)) |
| 98 | 108 | } |
| 99 | 109 | else |
| 100 | 110 | run_test_suite(t, |
| 101 | 111 | filter, |
| 102 | 112 | passed, |
| 103 | - failed, | |
| 113 | + failed, | |
| 114 | + ignored + if active(test) then 0 else 1, // only explicitely ignored test are counted | |
| 104 | 115 | results) |
| 105 | 116 | }. |
| 106 | 117 | |
| ... | ... | @@ -108,18 +119,19 @@ define (Word32 /*passed*/, Word32 /*failed*/, List(UnitTestResult)) |
| 108 | 119 | |
| 109 | 120 | |
| 110 | 121 | |
| 111 | -define (Word32 /* passed */, Word32 /* failed */, List(UnitTestSuiteResult)) | |
| 122 | +define (Word32 /* passed */, Word32 /* failed */, Word32 /*ignored*/, List(UnitTestSuiteResult)) | |
| 112 | 123 | execute_tests |
| 113 | 124 | ( |
| 114 | 125 | List(UnitTestSuite) suites, |
| 115 | 126 | String filter, |
| 116 | 127 | Word32 passed, |
| 117 | 128 | Word32 failed, |
| 129 | + Word32 ignored, | |
| 118 | 130 | List(UnitTestSuiteResult) results |
| 119 | 131 | )= |
| 120 | 132 | if suites is |
| 121 | 133 | { |
| 122 | - [ ] then (passed, failed, results), | |
| 134 | + [ ] then (passed, failed, ignored, results), | |
| 123 | 135 | [h . t] then |
| 124 | 136 | with filter_length = length(filter), |
| 125 | 137 | fixture_name_length = length(h.suite_name), |
| ... | ... | @@ -127,15 +139,16 @@ define (Word32 /* passed */, Word32 /* failed */, List(UnitTestSuiteResult)) |
| 127 | 139 | if filter_length = 0 | start_with(filter + ".", h.suite_name + ".") | start_with(h.suite_name + ".", filter + ".") then //"." is added to avoid partial name match |
| 128 | 140 | with new_filter = if filter_length > fixture_name_length then force(sub_string(filter, fixture_name_length + 1, filter_length - fixture_name_length - 1), "") |
| 129 | 141 | else "", |
| 130 | - run_test_suite(h.fixtures, new_filter, 0, 0, []) | |
| 142 | + run_test_suite(h.fixtures, new_filter, 0, 0, 0, []) | |
| 131 | 143 | else |
| 132 | - (0, 0, []), | |
| 133 | - if all_fixture_results is (fixture_passed, fixture_failed, fixture_results) then | |
| 144 | + (0, 0, 0, []), | |
| 145 | + if all_fixture_results is (fixture_passed, fixture_failed, fixture_ignored, fixture_results) then | |
| 134 | 146 | execute_tests(t, |
| 135 | 147 | filter, |
| 136 | 148 | passed + fixture_passed, |
| 137 | 149 | failed + fixture_failed, |
| 138 | - [ut_suite_result(h.suite_name, fixture_passed, fixture_failed, fixture_results) | |
| 150 | + ignored + fixture_ignored, | |
| 151 | + [ut_suite_result(h.suite_name, fixture_passed, fixture_failed, fixture_ignored, fixture_results) | |
| 139 | 152 | . results]) |
| 140 | 153 | }. |
| 141 | 154 | |
| ... | ... | @@ -196,7 +209,7 @@ public define One |
| 196 | 209 | )= |
| 197 | 210 | with start_time = (UTime)unow, |
| 198 | 211 | filter = force(nth(0, args), ""), |
| 199 | - if execute_tests(suites, filter, 0, 0, []) is (passed, failed, results) then | |
| 212 | + if execute_tests(suites, filter, 0, 0, 0, []) is (passed, failed, ignored, results) then | |
| 200 | 213 | println(""); |
| 201 | 214 | output_suite_results(results); |
| 202 | 215 | println("----------------------------------------------------------------------"); |
| ... | ... | @@ -204,9 +217,13 @@ public define One |
| 204 | 217 | println("Ran " + (passed + failed) + " tests in " + |
| 205 | 218 | to_decimal(duration.seconds) + "." + zero_pad_n(3, duration.microseconds \ 1000) + "s"); |
| 206 | 219 | println(""); |
| 220 | + println("Tests passed = " + passed); | |
| 221 | + println("Tests failed = " + failed); | |
| 222 | + println("Tests ignored = " + ignored); | |
| 223 | + println(""); | |
| 207 | 224 | ( |
| 208 | 225 | if failed >+ 0 then |
| 209 | - println("FAILED (failures=" + failed + ")") | |
| 226 | + println("FAILED") | |
| 210 | 227 | else |
| 211 | 228 | println("OK") |
| 212 | 229 | ); | ... | ... |