Commit 9f65ef33bd96add8fb512fa3153bc1d76e8ff8f4

Authored by totoro
1 parent 159a6521

Revert "Definitive (I hope so) fast_lexer_5 with its documentation updated."

This reverts commit eace497398506d94a2b8b8725b6d1004f7275b18.
anubis_dev/compiler/src/predef.anubis
... ... @@ -720,9 +720,6 @@ public define One truncate(ByteArray s,
720 720 public define ByteArray extract(ByteArray s,
721 721 Int start,
722 722 Int end) = +++avm{ extract_byte_array }.
723   -public define One write(ByteArray src,
724   - ByteArray dest,
725   - Int where) = +++avm{ write_byte_array }.
726 723 public define ByteArray ByteArray s + ByteArray t = +++avm{ concat_byte_array }.
727 724 public define String to_ascii(ByteArray s) = +++avm{ byte_array_to_ascii }.
728 725 public define String to_string(ByteArray s) = +++avm{ byte_array_to_string }.
... ...
anubis_dev/include/bytecode.h
... ... @@ -598,7 +598,6 @@ typedef enum {
598 598 sc_item(s_cnl_b,2,0)\
599 599 sc_item(s_cnl_s,2,0)\
600 600 sc_item(constant_byte_array_16,2,0)\
601   - sc_item(write_byte_array,3,0)\
602 601  
603 602  
604 603  
... ...
anubis_dev/include/minver.h
1 1 #define maj_version (1)
2 2 #define min_version (14)
3   -#define rel_version (3)
  3 +#define rel_version (2)
4 4 #define build_version (0)
5 5  
6 6  
... ...
anubis_dev/library/data_base/read_csv.anubis
... ... @@ -9,14 +9,14 @@ read tools/time.anubis
9 9 read lexical_analysis/fast_lexer_4.anubis
10 10  
11 11  
  12 + The function made by the function below reads a single record from a CSV input source.
  13 +
12 14 public type ReadCsvResult:
13 15 end_of_input,
14 16 error (String message), // an error message
15 17 ok (Int offset, List(String) record). // a single record and the offset of the end of
16 18 // this record.
17 19  
18   - The function constructed by the function below reads a single record from a CSV input source.
19   -
20 20 public define One -> ReadCsvResult
21 21 make_read_csv_line
22 22 (
... ...
anubis_dev/library/lexical_analysis/fast_lexer_4.anubis
... ... @@ -12,8 +12,7 @@
12 12  
13 13 This is the reason why this file, which replaces the previous fast_lexer_4 is in fact
14 14 just a compatibility file between fast_lexer_4 and fast_lexer_5. The old version
15   - of fast_lexer_4 is kept into fast_lexer_4.anubis.old, but you cannot use it because
16   - it is incompatible with the stuff in predefined.anubis.
  15 + of fast_lexer_4 is kept into fast_lexer_4.anubis.old.
17 16  
18 17 However, in order to compile a file using fast_lexer_4.anubis, you have to replace:
19 18  
... ... @@ -69,25 +68,6 @@ public define LexerOutput($Token)
69 68 }
70 69 }.
71 70  
72   -public define LexerOutput($Token)
73   - convert
74   - (
75   - LexerResult(ExToken($Token),$Aux) r,
76   - ) =
77   - if r is
78   - {
79   - ignore then should_not_happen(end_of_input), // because fast_lexer_4 has no 'ignore with action'
80   - error(e) then
81   - if e is lex_error(b,tools,aux) then
82   - error(b,line(tools)(unique),column(tools)(unique)),
83   -
84   - token(tok) then if tok is
85   - {
86   - end_of_input then end_of_input, // both 'end_of_input' are not of the same type
87   - token(t) then token(t) // idem
88   - }
89   - }.
90   -
91 71 Conversion in the other direction:
92 72  
93 73 public define Result(LexicalError($Aux),ExToken($Token))
... ...
anubis_dev/library/lexical_analysis/fast_lexer_5.anubis
... ... @@ -22,16 +22,13 @@
22 22  
23 23 $authorline(Author:) (Alain Prouté (2016 from previous versions 2008, 2013 and 2014).)
24 24 $authorline(Contributions by:) (Matthieu Herrmann (2014, 2016).)
25   - $authorline(Last revision:) (Dec. 2016)
  25 + $authorline(Last revision:) (nov. 2016)
26 26 $par
27 27  
28 28  
29 29 This is the fifth version of this tool. The main novelties compared to previous versions are:
30 30 $list(
31 31 $item Automatic generation of a $em(multistate lexer) (gathering several $em(sublexers)).
32   - $item Possibility to switch the state of the lexer from within itself.
33   - $item Possibility to 'ignore with action'. (These two points are useful for example for ignoring nested
34   - left and right delimited comments.)
35 32 $item The type schema $att(LexerOutput($Token)) is no more used. It is replaced by a type which makes the
36 33 lexer directly usable as an argument to your APG parser (if any).
37 34 $item This documentation written in MAML and included into the source file.
... ... @@ -76,19 +73,7 @@
76 73 The lexers can be constructed by this program in two different ways, either statically
77 74 (i.e. at compile time) or dynamically (i.e. at run time). Use the second possibility only
78 75 if the $em(regular expressions) (see the definition below) needed for constructing the lexer
79   - are not known at compile time.$p
80   -
81   - See also APG (the $em(Anubis Parser Generator)) in$par
82   - $center($fname(library/syntactic_analysis/parser_generator.anubis))
83   - which is the companion tool to this one.$p
84   -
85   - Note: If you want to scan utf-8 texts you just have to design your regular expressions
86   - accordingly. Indeed, the lexer considers all bytes from 0 to 255. Notice
87   - that the lexer computes column numbers in utf-8 mode, i.e. each multibyte utf-8 character counts
88   - for just one column.
89   -
90   -
91   -
  76 + are not known at compile time.
92 77  
93 78 $section(Regular expressions)
94 79  
... ... @@ -291,12 +276,7 @@ public type LexingTools:
291 276 is to prepare the token after it is just recognized, i.e. to transform the token, which comes as a byte array,
292 277 into a datum of type $att($Token).$p
293 278  
294   - You can also get the tools associated to a lexing stream by using the following:
295   - $ecode(public define LexingTools tools(LexingStream ls).)
296   - This is useful for example if you want to transmit the lexing tools to an APG parser (through the
297   - $att(extra datum); see APG documentation).$p
298   -
299   - The first three tools provide the line and column number of the current token together with its offset in
  279 + The first three tools provide the line and column number of the token together with its offset in
300 280 the input stream.$p
301 281  
302 282 The last tool enables to go several bytes backward in the input. It is not garanteed that the lexer can go
... ... @@ -309,7 +289,7 @@ public type LexingTools:
309 289  
310 290  
311 291 $section(Describing a single state lexer)
312   - As we shall see below, a lexer can have so-called $em(states). These are of course not the states of the constructed
  292 + As we shall see below, a lexer can have so called $em(states). These are of course not the states of the constructed
313 293 automaton, but a notion similar to the LEX/FLEX notion of state. For the time being we consider lexer with only one
314 294 (default) state.
315 295  
... ... @@ -338,7 +318,7 @@ public type LexingTools:
338 318 Hence, in most cases, it is preferable to write an APG file first, and
339 319 to define your lexer later on. $p
340 320  
341   - The type of tokens for a given lexer is represented in this documentation by the type parameter
  321 + The type of tokens for a given lexer is represented in this file by the type parameter
342 322 $att($ Token).
343 323  
344 324  
... ... @@ -355,12 +335,12 @@ public type LexicalError($Aux):
355 335 )
356 336 The component $att(b) is the text which produced the error. From this and the components $att(t) and $att(a), you can
357 337 produce well documented error messages, containing for example a line number, a column number. You can also use the
358   - tool $att(back(t)) (if needed, and with care).$p
  338 + tool $att(t(back)) in order to come back to the beginning (for example) of the faulty text, change the state of the
  339 + lexer, and reread the token with an other sublexer.$p
359 340  
360   - A lexer returns a datum of type:$par
  341 + A lexer returns a datum of type:
361 342 $center($att(Result(LexicalError($Aux),$Token)))
362   - instead of $att(LexerOutput($Token)) which was the case
363   - in $fname(fast_lexer_4). The advantage is that the constructed lexer is
  343 + instead of $att(LexerOutput($Token)) in $fname(fast_lexer_4). The advantage is that the constructed lexer is
364 344 directly usable as an argument to parsers produced by APG.$p
365 345  
366 346 Actually, the main structural difference with $att(LexerOutput($Token)) which had a alternative named $att(end_of_input) is
... ... @@ -370,6 +350,11 @@ public type LexicalError($Aux):
370 350 we shall see below, this function takes this token as an argument. If you don't use APG, you must create a type of
371 351 tokens with an alternative for representing the end of input.$p
372 352  
  353 + Note: If you want to scan utf-8 texts you just have to design your regular expressions
  354 + accordingly. Indeed, the lexer considers all bytes from 0 to 255. Notice
  355 + that the lexer computes column numbers in utf-8 mode, i.e. each multibyte utf-8 character counts
  356 + for just one column.
  357 +
373 358  
374 359  
375 360 $subsection(Actions)
... ... @@ -377,9 +362,6 @@ public type LexicalError($Aux):
377 362 $acode(
378 363 public type LexerAction($Token,$Aux):
379 364 ignore,
380   - ignore((ByteArray token,
381   - LexingTools tools,
382   - $Aux aux) -> One),
383 365 return((ByteArray token,
384 366 LexingTools tools,
385 367 $Aux aux) -> Result(LexicalError($Aux),$Token)),
... ... @@ -391,37 +373,22 @@ public type LexerAction($Token,$Aux):
391 373 The first alternative $att(ignore) means that no action should be performed so that the recognized token is just
392 374 $em(ignored) (and the low level lexer keeps on reading without returning).$p
393 375  
394   - The second alternative also ignores the token but allows to perform an action.
395   - The action is a function receiving the
  376 + The second alternative is used when a token is recognized and
  377 + an actual action must be performed. The action is a function receiving the
396 378 following arguments:
397 379 $list(
398 380 $item the token in the form of a byte array,
399 381 $item the toolbox containing the lexing tools,
400 382 $item auxiliary data whose type is to be defined by yourself.
401 383 )
402   - and it returns a datum
403   - of type $att(One) because the lexer will not return anything. Instead, it will resume reading from
404   - the input stream.$p
405   -
406   - $bold(Important note): If a token is to be ignored and no action is needed, use the first alternative,
407   - not the second one (with a dummy action). Not doing so could dramatically degrade performances. This is
408   - due to the fact that the first alternative doesn't make the low level lexer (defined in
409   - $fname(predefined.anubis)) return.$p
410   -
411   - The third alternative is used when a token is recognized and
412   - an actual action must be performed. The action is a function receiving the
413   - same arguments as above.
414 384 It must return the token as a datum of type $att(Result(LexicalError($Aux),$Token)). The reason why it doesn't just return
415   - a datum of type $att($Token) is that the function itself must be able to reject (for some reason of
416   - your own) the recognized
  385 + a datum of type $att($Token) is that the function itself must be able to reject (for some reason) the recognized
417 386 token, and to return an error (see example below).$p
418 387  
419   - The fourth alternative in $att(LexerAction($Token,$Aux)) is a variant of the third one. Instead of extracting
  388 + The third alternative in $att(LexerAction($Token,$Aux)) is a variant of the second one. Instead of extracting
420 389 the token from the buffer, the function provides tools for extracting a part of the token. The argument
421   - $att(length) is the total length of the token. The function $att(extract) enables
422   - to extract the part of the token
423   - located between positions $att(s) (included) and $att(e) (not included), relative to the beginning of
424   - the token. For example,
  390 + $att(length) is the total length of the token. The function $att(extract) enables to extract the part of the token
  391 + located between positions $att(s) (included) and $att(e) (not included), relative to the token. For example,
425 392 $att(extract(0,length)) gives the whole token.
426 393  
427 394  
... ... @@ -495,6 +462,17 @@ public type LexerItem($Token,$Aux):
495 462 for the users of your program.
496 463  
497 464  
  465 + $subsection(Ignoring a token)
  466 + If you don't provide a function in a lexer item (using $att(ignore) instead of $att(return)),
  467 + the recognized token is just ignored and the lexer tries to read the next token. For example,
  468 + this may be used for ignoring white spaces. $p
  469 +
  470 + If you want to ignore a token and neverthelesss execute an action, use the $att(return)
  471 + alternative and discard the token instead of returning it to the parser. Don't use this
  472 + possibility when $att(ignore) can be used (i.e. if no action is required), because performances
  473 + could be dramatically degraded. Indeed, the Anubis virtual machine instruction implementing the low
  474 + level fast lexer doesn't return on an $att(ignore) and keeps on reading.
  475 +
498 476  
499 477 $subsection(Putting lexer items in the right order)
500 478 The order of the lexer items in a lexer description can be important. The lexer can
... ... @@ -555,13 +533,114 @@ public define Result(RegExprError,
555 533  
556 534  
557 535  
  536 +
  537 + $section(Multistate lexers)
  538 + The LEX/FLEX software has a notion of $em(state) for lexers. This means that regular expressions are partitioned into
  539 + several (disjoint) sets, one for each $em(state), and that the lexer, when called in a given state, uses only those
  540 + regular expressions which belong to the set corresponding to this state.$p
  541 +
  542 + It is equivalent to say that the lexer is made of several $em(sublexers), one for each state, and that the $em(global
  543 + lexer) uses only one sublexer depending on its state. In this section, we describe how to create a $em(multistate)
  544 + lexer.$p
  545 +
  546 + $subsection(Describing a multistate lexer)
  547 + In order to create a multistate lexer, you first describe $em(several) sublexers, one for each state. Each sublexer is of
  548 + type $att(List(LexerItem($Token,$Aux))), in other words, it is described exactly in the same way as a single state
  549 + lexer.
  550 + The values of the type parameters $att($Token) and $att($Aux) must be
  551 + the same one for all sublexers. In other words, there is a unique token type for your multistate lexer, and also a
  552 + unique auxiliary data type.$p
  553 +
  554 + In order to associate a name to a sublexer description, we need the following type:
  555 + $acode(
  556 +public type SubLexer($Token,$Aux):
  557 + sublexer (String sublexer_name,
  558 + List(LexerItem($Token,$Aux)) description).
  559 + )
  560 + where $att(sublexer_name) is the name you want to give to the corresponding sublexer. This name must be valid as an
  561 + Anubis symbol, because it is used as such in the generated file in case you want to precompile your lexer.$p
  562 +
  563 + $define(LexerState)(0)($ LexerState)
  564 +
  565 + Actually, what this program will do is to set (within a generated file) the definition of a
  566 + $em(type of states) for the lexer, whose name is constructed by prefixing $att(LexerState_) in front of
  567 + the name of your multistate lexer. This type has one alternative (without component) for each sublexer.
  568 + It is represented below by the type parameter $att($LexerState).$p
  569 +
  570 + In order to avoid a circularity problem with APG, the definition of the type $att(LexerState_<lexer name>) is placed
  571 + into a separate file whose name is $fname(<lexer name>_states.anubis). The rest of the generated lexer is placed into
  572 + the file $fname(<lexer name>.anubis). Indeed, APG generates the type of tokens which is to be used from within the
  573 + lexer, and fast_lexer_5 generates the type of states of the lexer which is to be used from within the APG grammar.$p
  574 +
  575 + Thanks to the above separation of the generated lexer into two generated files, the parser can $att(read) (actually
  576 + $att(transmit)) the file $fname(<lexer name>_states.anubis) and the lexer can $att(read) the declaration file
  577 + generated by APG without creating a circularity problem.$p
  578 +
  579 + If you are using APG, remember that you should produce all generated files (those generated by APG and thoses
  580 + generated by fast_lexer_5) before reading any of them.
  581 +
  582 +
  583 + $subsection(Creating a multistate lexer)
  584 + This done, you can construct your multistate lexer (at run time) as follows:
  585 + $acode(
  586 +public define Result(RegExprError,
  587 + ((LexingStream,$Aux) -> One -> Result(LexicalError($Aux),$Token),
  588 + $LexerState -> One))
  589 + make_lexer
  590 + (
  591 + String lexer_name,
  592 + String name_of_initial_sublexer,
  593 + List(SubLexer($Token,$Aux)) sublexers_descriptions,
  594 + Word8 escape_char // '#' recommanded here
  595 + ).
  596 + )
  597 + The differencies with the previous $att(make_lexer) are:
  598 + $list(
  599 + $item after plugging the lexer onto a lexing stream, you get two functions instead of one. The first one is the
  600 + lexer itself of type $att(One -> Result(LexicalError($Aux),$Token)), and the second one of type $att($LexerState -> One) is the
  601 + command for changing the state of the lexer,
  602 +
  603 + $item $att(make_lexer) needs to know the names of the lexer and of the initial state (i.e. of the sublexer to used
  604 + in the first place),
  605 +
  606 + $item you must give a list of sublexers descriptions instead of a single lexer description.
  607 + )
  608 +
  609 +
  610 + $subsection(Using a multistate lexer)
  611 + A multistate lexer is used exactly in the same way as a single state lexer. You just have the extra possibility to
  612 + change its state at will.$p
  613 +
  614 + If you are not using APG, you can change the state of the lexer between two calls.$p
  615 +
  616 + If you are using APG, the best is to change the state of the lexer from within the grammar rules. This can be done
  617 + by way of $em(immediate commands). In order to make the change of state function available in grammar rules,
  618 + you must transmit it within the so-called $em(extra datum) accepted by APG. See APG's documentation.
  619 +
  620 +
  621 +
  622 +
  623 +
  624 + $section(Computing a lexer at compile time)
  625 + If you do as explained above, your lexer is constructed at run time. If the
  626 + lexer description is already known at compile time, it is preferable to construct
  627 + the lexer at compile time.
  628 +
  629 +
558 630 $subsection(Precompiling a single state lexer)
559   - If the description of your lexer is known at compile time, it is preferable to
560   - $em(precompile) your lexer. This is done by the function $att(make_precompiled_lexer)
561   - defined below. It creates an Anubis source file (that we call below the $em(generated file))
562   - containing your already compiled lexer,
563   - but not containing the actions. We explain below how you can get your working
564   - lexer from this file and the original lexer description.$p
  631 + In order to do that, write the following into your
  632 + source file: $label(precompilemylexer)
  633 + $ecode( global define One
  634 + precompile_my_lexer // of course, you can choose another name here
  635 + (
  636 + List(String) _ // not used
  637 + ) =
  638 + make_precompiled_lexer(lexer_name,lexer_description,'#',end_of_input).
  639 +
  640 + execute anbexec precompile_my_lexer)
  641 + This creates an Anubis source file whose name is $fname(lexer_name.anubis)
  642 + within a subdirectory (of the current directory) named $fname(generated), which is created
  643 + if needed. This execution prints error messages (if any) on the standard output.$p
565 644  
566 645 The function $att(make_precompiled_lexer) is declared as follows:
567 646 $acode(
... ... @@ -572,35 +651,11 @@ public define One
572 651 List(LexerItem($Token,$Aux)) lexer_description,
573 652 Word8 escape_char,
574 653 String end_of_input
575   - ). )
576   - The argument $att(lexer_name) is the name of the lexer. It will become the name of the generated
577   - file (with $fname(.anubis) appended). For example, if the first argument is $att("my_lexer"), the
578   - name of the file will be $fname(my_lexer.anubis).$p
579   -
580   - The argument $att(end_of_input) is necessary because the
581   - lexer needs to know which token is to be returned when the end of the input is encountered.
582   - Notice that here, the end of input is not given as a token but as a character string.
583   - This is because this string will
584   - be printed into the generated file in order to make the actual token.$p
  654 + ).
  655 + )
  656 + Notice that here the $att(end_of_input) is not given a s a token but as character string. This is because this string will
  657 + be printed into the generated file in order to make the actual token.$p
585 658  
586   -
587   -
588   - The generated file is created by default in the subdirectory $fname(generated) of the current
589   - directory (this subdirectory is automatically created if needed).
590   - In order to produce this file, write the following into your
591   - source file: $label(precompilemylexer)
592   - $ecode(global define One
593   - precompile_my_lexer // of course, you can choose another name here
594   - (
595   - List(String) _ // not used
596   - ) =
597   - make_precompiled_lexer("my_lexer",
598   - my_lexer_description,
599   - '#',
600   - "end_of_input").)
601   - $ecode(execute anbexec precompile_my_lexer)
602   - This execution prints error messages (if any) on the standard output.$p
603   -
604 659 If you want to create the target files in another directory than $fname(generated), use
605 660 the variant below:
606 661 $acode(
... ... @@ -619,20 +674,20 @@ public define One
619 674 don't even need to have a look at it.
620 675  
621 676 $subsection(Plugging a precompiled single state lexer onto a lexing stream)
622   - In order to get your precompiled lexer in the form of
  677 + In order to get your lexer in the form of
623 678 a function of type:$par
624 679  
625 680 $center($att((LexingStream,$Aux) -> One -> Result(LexicalError($Aux),$Token)))
626 681  
627   - as above, just write this (still assuming that the name of your lexer is $att(my_lexer)):
628   - $ecode(read generated/my_lexer.anubis)
629   - This defines a datum whose name is $att(my_lexer). This datum contains the compiled automaton itself,
  682 + as above, just write this:
  683 + $ecode(read generated/lexer_name.anubis)
  684 + This defines a datum whose name is $att(lexer_name). This datum contains the compiled automaton itself,
630 685 but not the actions.$p
631 686  
632   - At the place in your program where you want to have your lexer, write this:
633   - $ecode(retrieve_lexer(my_lexer_description,my_lexer))
634   - Actually, $att(my_lexer_description) is used here only for retrieving the actions. The automaton
635   - is contained into $att(my_lexer).$p
  687 + At the place in your program where you want to have your lexer:
  688 + $ecode(retrieve_lexer(lexer_description,lexer_name))
  689 + Actually, $att(lexer_description) is used here only for retrieving the actions. The atomaton
  690 + is contained into $att(lexer_name).$p
636 691  
637 692 The function $att(retrieve_lexer) is declared as follows:
638 693 $acode(
... ... @@ -643,61 +698,22 @@ public define One
643 698 PrecompiledLexer automaton // the datum in the generated file
644 699 ).
645 700 )
646   - At this moment no error can happen if the file $fname(my_lexer.anubis) did not contain
  701 + At this moment no error can happen if the file $fname(lexer_name.anubis) did not contain
647 702 any error (this automatically generated file should not contain any error, anyway).
648 703  
649 704  
650 705  
651 706  
652   -
653   -
654   -
655   -
656   -
657   -
658   -
659   - $section(Multistate lexers)
660   - The LEX/FLEX software has a notion of $em(state) for lexers. This means that regular expressions are partitioned into
661   - several (disjoint) sets, one for each $em(state), and that the lexer, when called in a given state, uses only those
662   - regular expressions which belong to the set corresponding to this state.$p
663   -
664   - It is equivalent to say that the lexer is made of several $em(sublexers), one for each state, and that the $em(global
665   - lexer) uses only one sublexer depending on its state. In this section, we describe how to create a $em(multistate)
666   - lexer.$p
667   -
668   - Multistate lexers are always precompiled. Hence, their description must be known at compile time.
669   -
670   -
671   - $subsection(Describing a multistate lexer)
672   - In order to create a multistate lexer, you first describe $em(several) sublexers, one for each state. Each sublexer is of
673   - type $att(List(LexerItem($Token,$Aux))), in other words, it is described exactly in the same way as a single state
674   - lexer.
675   - The values of the type parameters $att($Token) and $att($Aux) must be
676   - the same one for all sublexers. In other words, there is a unique token type for your multistate lexer, and also a
677   - unique auxiliary data type.$p
678   -
679   - In order to associate a name to a sublexer description, we need the following type:
680   - $acode(
681   -public type SubLexer($Token,$Aux):
682   - sublexer (String sublexer_name,
683   - List(LexerItem($Token,$Aux)) description).
684   - )
685   - where $att(sublexer_name) is the name you want to give to the corresponding sublexer. This name must be valid as an
686   - Anubis symbol, because it is used as such in the generated file in case you want to precompile your lexer.$p
687   -
688   - $define(LexerState)(0)($ LexerState)
689   -
690   -
691 707  
692 708 $subsection(Precompiling a multistate lexer)
693   - Precompiling a multistate lexer is similar to precompiling a single state lexer.
694   - To precompile a multistate lexer, use the following:
  709 + The above $att(make_lexer) function creates a multistate lexer at run time. If you want to compile a multistate lexer
  710 + at compile time, use the following:
695 711 $acode(
696 712 public define One
697 713 make_precompiled_lexer
698 714 (
699 715 String lexer_name,
700   - List(String) reads,
  716 + String name_of_initial_sublexer,
701 717 List(SubLexer($Token,$Aux)) sublexers_descriptions,
702 718 Word8 escape_char,
703 719 String end_of_input
... ... @@ -711,69 +727,47 @@ public define One
711 727 (
712 728 String directory,
713 729 String lexer_name,
714   - List(String) reads,
  730 + String name_of_initial_sublexer,
715 731 List(SubLexer($Token,$Aux)) sublexers_descriptions,
716 732 Word8 escape_char,
717 733 String end_of_input
718 734 ).
719 735 )
720   - The argument $att(reads) let you give a list of Anubis source file names that will be $att(read) at
721   - the beginning of the generated file. You need to put at least one such file name, because the generated
722   - program looks for a type whose name is $att(LexerState_my_lexer) (again assuming that $att(my_lexer)
723   - is the name of your lexer). This is the type of the states of the
724   - lexer. This type must be a enumerated type with one alternative for each sublexer (in the same order),
725   - and the name of this
726   - alternative must be the name of the corresponding sublexer.$p
727   -
728   - The reason why this type is not automaticallly generated at the beginning of the generated file is to avoid
729   - a possible circularity problem in case you also use APG. Indeed, APG defines the type of tokens, so that we
730   - will have to put $att(my_parser.apg.dec.anubis) (assuming that the name of your parser is $att(my_parser))
731   - into the list $att(reads). But your parser will also need
732   - to know the type of states of the lexer, since it must be able to switch between these states.$p
733   -
734   - As a consequence, this type should be defined (by hand) in another file. If you are using APG, you should
735   - define it (as a $att(public type)) in the public preambule of your APG file, so that it appears at the
736   - beginning of
737   - $fname(my_parser.apg.dec.anubis).$p
738   -
739   - In order to actually precompile your lexer, you must execute the above function. You can do this in exactly
740   - the same
741   - way as for a single state lexer (see the example $ref(precompilemylexer)($att(precompile_my_lexer)) above).$p
742   -
743   -
  736 + In order to actually precompile your lexer, you must execute the above function. You can do this in exactly the same
  737 + way as for a single state lexer (see $ref(precompilemylexer)($att(precompile_my_lexer)) above).
744 738  
745 739 $subsection(Plugging a precompiled multistate lexer onto a lexing stream)
746   - The file generated by the above function $att(make_precompiled_lexer) has a very short public part which
747   - contains the declaration of a public function for creating an instance of the lexer plugged onto a given
748   - lexing stream.$p
  740 + The file generated by the above function $att(make_precompiled_lexer) has a very short public part which contains:
  741 + $list(
  742 + $item the definition of the public type of lexer states,
  743 + $item a public function for creating an instance of the lexer plugged onto a given lexing stream.
  744 + )
  745 + The name of the type of lexer states is $att(LexerState_<lexer name>), where $att(<lexer name>) is the
  746 + actual name of your lexer (i.e. the value of the $att(lexer_name) argument in the above function). Assuming that this name is
  747 + $att(my_lexer), the type of lexer states has name $att(LexerState_my_lexer). This is an enumarated type with one
  748 + alternative for each sublexer. The name of such an alternative is the name of the corresponding sublexer, but of
  749 + course in the form of an Anubis symbol instead of a character string.$p
749 750  
750   - This public function has name $att(plug_my_lexer) (again assuming that the name of the lexer is $att(my_lexer)). This
  751 + The public function has name $att(plug_my_lexer) (again assuming that the name of the lexer is $att(my_lexer)). This
751 752 function is declared as follows:
752   - $ecode(public define Maybe(One -> Result(LexicalError($Aux),$Token))
  753 + $ecode(public define Maybe((One -> Result(LexicalError($Aux),$Token), // The actual lexer
  754 + LexerState_my_lexer -> One)) // The change of state command
753 755 plug_my_lexer
754 756 (
755 757 LexingStream ls,
756   - Var(LexerState_my_lexer) lexer_state_v,
757 758 $Aux aux,
758   - List(SubLexer($Token,$Aux)) lexer_description,
759   - $Token end_of_input
  759 + List(SubLexer($Token,$Aux)) lexer_description
760 760 ).)
761 761 This function returns $att(failure) if the given $att(lexer_description) doesn't correspond to the precompiled lexer.
762   - Otherwise, it returns a function of type$par
763   - $center($att(One -> Result(LexicalError($Aux),$Token)))
764   - which is the lexer itself already plugged
765   - onto the lexing
766   - stream.$p
767   -
768   - Remark that you must provide a dynamic variable whose role is to hold the current state of the lexer. You can
769   - transmit this variable (or a function able to assign a value to it) to your parser. If you are using APG, you
770   - can transmit it through the $em(extra datum), so that it is available from within $em(immediate commands).
771   - The content of this dynamic variable will of course be the initial
772   - state of the multistate lexer.
  762 + Otherwise, it returns a pair of functions.$p
773 763  
  764 + The first function, of type $att(One -> Result(LexicalError($Aux),$Token)), is the lexer itself already plugged onto the lexing
  765 + stream. It returns the next token (or $att(end_of_input) or an error) at each call.$p
774 766  
  767 + The second one is the function you can use for changing the state of the lexer. It takes the wanted new state as its
  768 + argument. This function is normally used from within your parser, and if you are using APG, you should transmit it to
  769 + the parser through the $att(extra) datum, and use it within $em(immediate commands) (see APG documentation).
775 770  
776   -
777 771 $section(Useful tricks)
778 772  
779 773 $subsection(Testing if a whole string is a single token)
... ... @@ -832,6 +826,9 @@ public define One
832 826 obtained at each reading of this variable.
833 827  
834 828  
  829 + $subsection(Viewing the automaton)
  830 +
  831 +
835 832  
836 833  
837 834  
... ... @@ -865,7 +862,6 @@ read tools/2-4tree.anubis
865 862  
866 863 type LexerRankAction($Token,$Aux):
867 864 ignore(Int rk),
868   - ignore(Int rk, (ByteArray,LexingTools,$Aux) -> One),
869 865 return(Int rk, (ByteArray,LexingTools,$Aux) -> Result(LexicalError($Aux),$Token)),
870 866 return(Int rk, ((Int,Int) -> ByteArray,Int,LexingTools,$Aux) -> Result(LexicalError($Aux),$Token)).
871 867  
... ... @@ -882,7 +878,6 @@ define LexerRankAction($Token,$Aux)
882 878 if a is
883 879 {
884 880 ignore then ignore(rank),
885   - ignore(f) then ignore(rank,f),
886 881 return(f) then return(rank,f),
887 882 return(f) then return(rank,f)
888 883 }.
... ... @@ -2462,20 +2457,14 @@ public type DFA_state($Token,$Aux):
2462 2457 List(DFA_transition) transitions,
2463 2458 Int action_rank,
2464 2459 (ByteArray,LexingTools,$Aux)
2465   - -> Result(LexicalError($Aux),$Token) action),
2466   -
2467   - ignoring (Word16 name,
2468   - List(DFA_transition) transitions,
2469   - Int action_rank,
2470   - (ByteArray,LexingTools,$Aux)
2471   - -> One action),
  2460 + -> Result(LexicalError($Aux),$Token) action),
2472 2461  
2473 2462 accepting (Word16 name,
2474 2463 List(DFA_transition) transitions,
2475 2464 Int action_rank,
2476 2465 ((Int,Int)
2477 2466 -> ByteArray,Int,LexingTools,$Aux)
2478   - -> Result(LexicalError($Aux),$Token) action),
  2467 + -> Result(LexicalError($Aux),$Token) action),
2479 2468  
2480 2469 ignoring (Word16 name,
2481 2470 List(DFA_transition) transitions).
... ... @@ -2586,8 +2575,6 @@ define List(DFA_state($Token,$Aux))
2586 2575 {
2587 2576 ignore(rk) then // a state has a 'ignore' if and only if it is 'ignoring'
2588 2577 [ignoring(get_new_name(old_name,nlist),trs) . rename(t,nlist)],
2589   - ignore(rk,a) then
2590   - [ignoring(get_new_name(old_name,nlist),trs,rk,a) . rename(t,nlist)],
2591 2578 return(rk,a) then
2592 2579 [accepting(get_new_name(old_name,nlist),trs,rk,a) . rename(t,nlist)],
2593 2580 return(rk,a) then
... ... @@ -2705,9 +2692,6 @@ public define List(FastLexerState)
2705 2692 {
2706 2693 rejecting(n,trs) then rejecting(to_fast_lexer_transitions(trs))
2707 2694 accepting(n,trs,rk,a) then accepting(to_fast_lexer_transitions(trs))
2708   - // ignoring with action translates to 'accepting' in the low level lexer, because
2709   - // the low level lexer must return so that the action is performed.
2710   - ignoring (n,trs,rk,a) then accepting(to_fast_lexer_transitions(trs))
2711 2695 accepting(n,trs,rk,a) then accepting(to_fast_lexer_transitions(trs))
2712 2696 ignoring (n,trs) then ignoring(to_fast_lexer_transitions(trs))
2713 2697 } . to_fast_lexer_description(t)]
... ... @@ -2752,9 +2736,8 @@ define TreeKV(Word16,LexerAction($Token,$Aux))
2752 2736 {
2753 2737 rejecting(name,trs) then tree,
2754 2738 accepting(name,trs,rank,action) then insert(name,return(action),tree),
2755   - ignoring (name,trs,rank,action) then insert(name,ignore(action),tree),
2756 2739 accepting(name,trs,rank,action) then insert(name,return(action),tree),
2757   - ignoring (name,trs) then tree
  2740 + ignoring(name,trs) then tree
2758 2741 },
2759 2742 fill_actions(t,next_tree)
2760 2743 }.
... ... @@ -2858,14 +2841,6 @@ public define TreeKV(Word16,LexerAction($Token,$Aux))
2858 2841  
2859 2842  
2860 2843 *** [7.3] Reading the next token.
2861   -
2862   - What the lexer returns:
2863   -
2864   -public type LexerResult($Token,$Aux):
2865   - ignore, // something to be ignored (this happens because of ignore+action)
2866   - error (LexicalError($Aux)), // a lexical error
2867   - token ($Token). // a token to be returned
2868   -
2869 2844  
2870 2845 A special debugging macro for the function 'read_next_token'.
2871 2846  
... ... @@ -2874,7 +2849,8 @@ define macro One debug_rnt(String s) = unique.
2874 2849  
2875 2850 The function which reads the next token:
2876 2851  
2877   -public define LexerResult($Token,$Aux)
  2852 +public define Result(LexicalError($Aux), // can return a lexical error
  2853 + $Token) // or a token (which can be 'end_of_input')
2878 2854 read_next_token
2879 2855 (
2880 2856 (ByteArray,FastLexerLastAccepted,Int,Int,Word16) -> FastLexerOutput low_level_lexer,
... ... @@ -3009,7 +2985,7 @@ public define LexerResult($Token,$Aux)
3009 2985 current_v <- end;
3010 2986 last_accept_v <- none;
3011 2987 if start >= end
3012   - then debug_rnt("case (2a1)"); token(end_of_input)
  2988 + then debug_rnt("case (2a1)"); ok(end_of_input)
3013 2989 else debug_rnt("case (2a2)"); error(lex_error(extract(*buffer_v,start,end),tools,aux)),
3014 2990  
3015 2991 success(_) then /*** Case (2b) ***/
... ... @@ -3044,33 +3020,21 @@ public define LexerResult($Token,$Aux)
3044 3020 last_accept_v <- none;
3045 3021 if get(s,actions) is
3046 3022 {
3047   - failure then should_not_happen(token(end_of_input)),
  3023 + failure then should_not_happen(ok(end_of_input)),
3048 3024 success(ac) then if ac is
3049 3025 {
3050 3026 ignore then /* this should not happen */
3051 3027 show(lstream);
3052 3028 should_not_happen((String file, Word32 line) |->
3053 3029 print("In '"+file+"' at line "+to_decimal(line)+
3054   - ": no action found for accepting state "+to_decimal(s)+".\n"),
3055   - token(end_of_input)),
3056   -
3057   - ignore(f) then f(extract(*buffer_v,start,end),tools,aux);
3058   - debug_rnt("ignore with action and restart: "+to_string(extract(*buffer_v,start,end)));
3059   - ignore,
3060   -
  3030 + ": no action found for accepting state "+to_decimal(s)+".\n"), ok(end_of_input)),
3061 3031 return(f) then
3062   - if f(extract(*buffer_v,start,end),tools,aux) is
3063   - {
3064   - error(e) then error(e),
3065   - ok(tok) then token(tok)
3066   - },
  3032 + with result = f(extract(*buffer_v,start,end),tools,aux),
  3033 + result,
3067 3034  
3068 3035 return(f) then
3069   - if f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l),end-start,tools,aux) is
3070   - {
3071   - error(e) then error(e),
3072   - ok(tok) then token(tok)
3073   - }
  3036 + with result = f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l),end-start,tools,aux),
  3037 + result
3074 3038 }
3075 3039 }
3076 3040 )
... ... @@ -3115,26 +3079,17 @@ public define LexerResult($Token,$Aux)
3115 3079 last_accept_v <- none;
3116 3080 if get(s,actions) is
3117 3081 {
3118   - failure then should_not_happen(token(end_of_input)),
  3082 + failure then should_not_happen(ok(end_of_input)),
3119 3083 success(ac) then if ac is
3120 3084 {
3121   - ignore then should_not_happen(token(end_of_input)),
3122   - ignore(f) then f(extract(*buffer_v,start,end),tools,aux); // execute the 'ignoring' action
3123   - debug_rnt("ignore with action and no restart");
3124   - token(end_of_input),
  3085 + ignore then should_not_happen(ok(end_of_input)),
3125 3086 return(f) then
3126   - if f(extract(*buffer_v,start,end),tools,aux) is
3127   - {
3128   - error(e) then error(e),
3129   - ok(tok) then token(tok)
3130   - },
  3087 + with result = f(extract(*buffer_v,start,end),tools,aux),
  3088 + result,
3131 3089 return(f) then
3132   - if f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l),
3133   - end-start,tools,aux) is
3134   - {
3135   - error(e) then error(e),
3136   - ok(tok) then token(tok)
3137   - }
  3090 + with result = f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l),
  3091 + end-start,tools,aux),
  3092 + result
3138 3093 }
3139 3094 },
3140 3095  
... ... @@ -3178,7 +3133,7 @@ public define LexerResult($Token,$Aux)
3178 3133 debug_rnt("buffer cannot be reloaded (case (5a))");
3179 3134 current_v <- lgbuf;
3180 3135 last_accept_v <- none;
3181   - token(end_of_input),
  3136 + ok(end_of_input),
3182 3137  
3183 3138 success(_) then
3184 3139 /* Warning: after reloading of the buffer 'lgbuf' is invalid. */
... ... @@ -3197,10 +3152,8 @@ public define LexerResult($Token,$Aux)
3197 3152  
3198 3153  
3199 3154 *** [7.4] Plugging a low level lexer onto a lexing stream.
3200   -
3201   -
3202 3155  
3203   -define One -> LexerResult($Token,$Aux)
  3156 +define One -> Result(LexicalError($Aux),$Token)
3204 3157 plug_lexer
3205 3158 (
3206 3159 LexingStream stream,
... ... @@ -3219,7 +3172,7 @@ define One -&gt; LexerResult($Token,$Aux)
3219 3172  
3220 3173 *** [7.5] Making a single state lexer at run time.
3221 3174  
3222   -public define Result(RegExprError, (LexingStream,$Aux)-> One -> LexerResult($Token,$Aux))
  3175 +public define Result(RegExprError, (LexingStream,$Aux)-> One -> Result(LexicalError($Aux),$Token))
3223 3176 make_lexer
3224 3177 (
3225 3178 List(LexerRankItem($Token,$Aux)) lexer_description,
... ... @@ -3322,14 +3275,8 @@ define One
3322 3275 [ ] then if (i&15) = 15 then unique else print(f,"\n"),
3323 3276 [h . t] then if h is transition(label,target_name) then
3324 3277 if label is char(c) then
3325   - (if ' ' +=< c & c +=< '}' // don't print non utf-8 characters
3326   - then
3327   - (
3328   - print(f," "+implode([c])+"->"+to_decimal(target_name));
3329   - (if (i&15) = 15 then print(f,"\n") else unique)
3330   - )
3331   - else unique
3332   - );
  3278 + print(f," '"+implode([c])+"'>"+to_decimal(target_name));
  3279 + (if (i&15) = 15 then print(f,"\n") else unique);
3333 3280 dump(f,t,i+1)
3334 3281 }.
3335 3282  
... ... @@ -3352,10 +3299,6 @@ define One
3352 3299 accepting(name,transitions,action_rank,action) then
3353 3300 print(f,"\n --- state "+to_decimal(name)+" (accepting with action number "+to_decimal(action_rank)+") ---\n");
3354 3301 dump(f,transitions,0),
3355   -
3356   - ignoring(name,transitions,action_rank,action) then
3357   - print(f,"\n --- state "+to_decimal(name)+" (ignoring with action number "+to_decimal(action_rank)+") ---\n");
3358   - dump(f,transitions,0),
3359 3302  
3360 3303 accepting(name,transitions,action_rank,action) then
3361 3304 print(f,"\n --- state "+to_decimal(name)+" (accepting with action number "+to_decimal(action_rank)+") ---\n");
... ... @@ -3383,7 +3326,7 @@ define One
3383 3326 if l is precompiled_fast_lexer(fba,sba) then
3384 3327 print(f,"\n *** '"+lexer_name+"'.\n");
3385 3328 print(f,"\n This (deterministic) automaton has "+to_decimal(length(actions_ranks))+" states.\n");
3386   - dump(f,dfa);
  3329 + //dump(f,dfa);
3387 3330 print(f,"\ndefine (List(Int),PrecompiledFastLexer)\n");
3388 3331 print(f," "+lexer_name+" =\n");
3389 3332 print(f," // The list below gives the action associated to each state:\n"+
... ... @@ -3424,7 +3367,6 @@ define List(Int)
3424 3367 {
3425 3368 rejecting(name,transitions) then [-1 . actions_ranks(t)],
3426 3369 accepting(name,transitions,rank,action) then [rank . actions_ranks(t)],
3427   - ignoring (name,transitions,rank,action) then [rank . actions_ranks(t)],
3428 3370 accepting(name,transitions,rank,action) then [rank . actions_ranks(t)],
3429 3371 ignoring(name,transitions) then [-1 . actions_ranks(t)]
3430 3372 }
... ... @@ -3448,7 +3390,6 @@ define List(ByteArray)
3448 3390 with asign = (LexerAction($Token,$Aux) a) |-> if a is
3449 3391 {
3450 3392 ignore then "(*i)", // something which is illegal as a regular expression
3451   - ignore(ac) then "(*ia)",
3452 3393 return(ac) then "(*r1)",
3453 3394 return(ac) then "(*r2)"
3454 3395 },
... ... @@ -3567,7 +3508,7 @@ public define One
3567 3508  
3568 3509  
3569 3510  
3570   -public define (LexingStream,$Aux) -> One -> LexerResult($Token,$Aux)
  3511 +public define (LexingStream,$Aux) -> One -> Result(LexicalError($Aux),$Token)
3571 3512 retrieve_lexer
3572 3513 (
3573 3514 List(LexerItem($Token,$Aux)) lexer_description,
... ... @@ -3588,7 +3529,6 @@ public define (LexingStream,$Aux) -&gt; One -&gt; LexerResult($Token,$Aux)
3588 3529  
3589 3530  
3590 3531  
3591   -
3592 3532 *** [8.2] Dumping the definitions of sublexers.
3593 3533  
3594 3534 For each sublexer we dump a definition of its precompiled form.
... ... @@ -3651,12 +3591,7 @@ define One
3651 3591 {
3652 3592 [ ] then unique,
3653 3593 [h . t] then if h is sublexer(name,_) then
3654   - print(target," "+pad(name,20)+" then if subl_"+name+"(unique) is\n");
3655   - print(target," {\n");
3656   - print(target," ignore then lex(unique),\n");
3657   - print(target," error(e) then error(e),\n");
3658   - print(target," token(tok) then ok(tok)\n");
3659   - print(target," }");
  3594 + print(target," "+pad(name,20)+" then subl_"+name+"(unique)");
3660 3595 print(target,if t is [] then "\n" else ",\n");
3661 3596 dump_switching_cases(target,lexer_name,t)
3662 3597 }.
... ... @@ -3702,18 +3637,19 @@ define One
3702 3637 (
3703 3638 WStream target,
3704 3639 String lexer_name,
  3640 + String initial_state_name,
3705 3641 List(SubLexer($Token,$Aux)) sublexers
3706 3642 ) =
3707 3643 print(target,"\nread lexical_analysis/fast_lexer_5.anubis\n");
3708 3644 print(target,"\n The function below creates an instance of your lexer from a lexing stream.\n");
3709   - print(target,"\n You must provide the dynamic variable containing the state of the lexer.");
3710   - print(target,"\n You have to provide the auxiliary datum.");
  3645 + print(target," It also provides the function for switching the states of the lexer.\n");
  3646 + print(target,"\n You have to provide the auxiliary datum.\n");
3711 3647 print(target,"\n The argument 'description' is needed for recovering the actions.\n");
3712   - print(target,"\npublic define Maybe(One -> Result(LexicalError($Aux),$Token))\n");
  3648 + print(target,"\n"+pad("public define Maybe((One -> Result(LexicalError($Aux),$Token),",70)+" // The actual lexer\n");
  3649 + print(target,pad(" LexerState_"+lexer_name+" -> One))",70)+" // The change of state command\n");
3713 3650 print(target," plug_"+lexer_name+"\n");
3714 3651 print(target," (\n");
3715 3652 print(target," LexingStream ls,\n");
3716   - print(target," Var(LexerState_"+lexer_name+") lexer_state_v,\n");
3717 3653 print(target," $Aux aux,\n");
3718 3654 print(target," List(SubLexer($Token,$Aux)) description,\n");
3719 3655 print(target," $Token end_of_input\n");
... ... @@ -3726,14 +3662,15 @@ define One
3726 3662 WStream target,
3727 3663 String signature,
3728 3664 String lexer_name,
  3665 + String initial_state_name,
3729 3666 List(SubLexer($Token,$Aux)) sublexers,
3730 3667 String end_of_input
3731 3668 ) =
3732   - print(target,"\npublic define Maybe(One -> Result(LexicalError($Aux),$Token))\n");
  3669 + print(target,"\npublic define Maybe((One -> Result(LexicalError($Aux),$Token),\n");
  3670 + print(target," LexerState_"+lexer_name+" -> One))\n");
3733 3671 print(target," plug_"+lexer_name+"\n");
3734 3672 print(target," (\n");
3735 3673 print(target," LexingStream ls,\n");
3736   - print(target," Var(LexerState_"+lexer_name+") lexer_state_v,\n");
3737 3674 print(target," $Aux aux,\n");
3738 3675 print(target," List(SubLexer($Token,$Aux)) description,\n");
3739 3676 print(target," $Token end_of_input\n");
... ... @@ -3742,11 +3679,14 @@ define One
3742 3679 concat(map(sublexer_name,sublexers),"\",\"")+"\"],description)\n");
3743 3680 print(target," then failure else\n");
3744 3681 dump_plug_sublexers(target,lexer_name,sublexers,end_of_input);
3745   - print(target," success(\n");
3746   - print(target," (One u) |-lex-> if *lexer_state_v is\n");
  3682 + print(target," with sv = var((LexerState_"+lexer_name+")"+initial_state_name+"),\n");
  3683 + print(target," success((\n");
  3684 + print(target," (One u) |-> if *sv is\n");
3747 3685 print(target," {\n");
3748 3686 dump_switching_cases(target,lexer_name,sublexers);
3749   - print(target," }).\n\n").
  3687 + print(target," },\n");
  3688 + print(target," (LexerState_"+lexer_name+" st) |-> sv <- st\n");
  3689 + print(target," )).\n\n").
3750 3690  
3751 3691  
3752 3692 public define Bool
... ... @@ -3773,6 +3713,7 @@ public define One
3773 3713 String directory,
3774 3714 String lexer_name,
3775 3715 List(String) reads,
  3716 + String initial_state_name,
3776 3717 List(SubLexer($Token,$Aux)) sublexers,
3777 3718 Word8 escape_char,
3778 3719 String end_of_input
... ... @@ -3799,7 +3740,7 @@ public define One
3799 3740 print(target,"\n");
3800 3741  
3801 3742 // lexer making function declaration.
3802   - dump_switching_function_dec(target,lexer_name,sublexers);
  3743 + dump_switching_function_dec(target,lexer_name,initial_state_name,sublexers);
3803 3744  
3804 3745 print(target,"\n --- Thats all for the public part ! --------------------------------------------------\n\n");
3805 3746  
... ... @@ -3808,7 +3749,7 @@ public define One
3808 3749  
3809 3750 // the switching function.
3810 3751 print(target,"\nread tools/bool.anubis\n\n");
3811   - dump_switching_function(target,signature,lexer_name,sublexers,end_of_input);
  3752 + dump_switching_function(target,signature,lexer_name,initial_state_name,sublexers,end_of_input);
3812 3753 print("Done.\n"); forget(flush(stdout))
3813 3754 },
3814 3755 if read_signature(path) is
... ... @@ -3827,11 +3768,12 @@ public define One
3827 3768 (
3828 3769 String lexer_name,
3829 3770 List(String) reads,
  3771 + String initial_state_name,
3830 3772 List(SubLexer($Token,$Aux)) sublexers,
3831 3773 Word8 escape_char,
3832 3774 String end_of_input
3833 3775 ) =
3834   - make_precompiled_lexer("generated",lexer_name,reads,sublexers,escape_char,end_of_input).
  3776 + make_precompiled_lexer("generated",lexer_name,reads,initial_state_name,sublexers,escape_char,end_of_input).
3835 3777  
3836 3778  
3837 3779  
... ...
anubis_dev/library/predefined.anubis
... ... @@ -994,16 +994,9 @@ public define ByteArray extract(ByteArray s, Int start, Int end).
994 994 shorter than 'end - start', if 'end' and/or 'start' are out of bounds. In particular,
995 995 it may be the empty byte array. There is no side effect.
996 996  
997   - The function below writes a byte array 'src' into a byte array 'dest' at a given position.
998   -
999   -public define One write(ByteArray scr, ByteArray dest, Int position).
1000   -
1001   - If some bytes of 'src' are to be written outside 'dest' (on one side or the other one),
1002   - they are simply not written. Of course, 'write' produces a side effet on 'dest'.
1003   -
1004 997 public define ByteArray ByteArray s + ByteArray t.
1005 998  
1006   - This concatenates byte arrays.
  999 + This concatenates byte arrays.
1007 1000  
1008 1001  
1009 1002 The following two primitives transform a byte array into a string. The first one
... ... @@ -1998,7 +1991,7 @@ public type SocketLinger:
1998 1991  
1999 1992 public type SocketOption:
2000 1993 // Socket level
2001   - so_broadcast(Bool), // Allows transmission of broadcast messages on the socket.
  1994 + so_broadcast(Bool), // Allows transmission of broadcast messages on the socket.
2002 1995 so_debug(Bool), // Records debugging information.
2003 1996 so_dontroute(Bool), // Does not route: sends directly to interface.
2004 1997 // Not supported on ATM sockets (results in an error).
... ... @@ -2124,7 +2117,7 @@ public type PacketSocketProtocol:
2124 2117 * Non DIX types. Won't clash for 1500 types.
2125 2118 */
2126 2119 eth_p_802_3, /* Dummy type for 802.3 frames */
2127   - eth_p_ax25, /* Dummy protocol id for AX.25 */
  2120 + eth_p_ax25, /* Dummy protocol id for AX.25 */
2128 2121 eth_h_802_2, /* 802.2 frames */
2129 2122 eth_h_tr_802_2. /* 802.2 frames */
2130 2123  
... ...
anubis_dev/vm/src/syscall.cpp
... ... @@ -5902,60 +5902,6 @@ Returns: ExecuteStatus (see predefined.anubis)
5902 5902 MAM(m_IP) += 1+2;
5903 5903 return;
5904 5904  
5905   -
5906   - /* Expects:
5907   - - at *(MAM(m_SP)-1): ByteArray src
5908   - - at *(MAM(m_SP)-2): ByteArray dest
5909   - - at *(MAM(m_SP)-3): Int where
5910   -
5911   - Returns (One)unique.
5912   - */
5913   -syscall_case(write_byte_array)
5914   - {
5915   - U8 *src = (U8 *)(*(MAM(m_SP)-1)) + 8;
5916   - U8 *dest = (U8 *)(*(MAM(m_SP)-2)) + 8;
5917   - U32 where = *(MAM(m_SP)-3);
5918   - U32 src_length = ((U32 *)(*(MAM(m_SP)-1)))[1];
5919   - U32 dest_length = ((U32 *)(*(MAM(m_SP)-2)))[1];
5920   - U32 i;
5921   -
5922   - /* get the sign of where (which can be negative) */
5923   - int where_sign = +1;
5924   - if (where&2) where_sign = -1;
5925   -
5926   - /* The result is always (One)unique */
5927   - MAM(m_R) = 0;
5928   -
5929   - /* compute absolute value of 'where' as an U32 */
5930   - if (where&1)
5931   - where = where>>2;
5932   - else
5933   - where = ((U32 *)where)[2];
5934   -
5935   - /* if 'where' is negative: */
5936   - if (where_sign == -1)
5937   - {
5938   - if (where >= src_length)
5939   - { /* all of 'src' is on the left of 'dest' */
5940   - MAM(m_IP) += 1+2;
5941   - return;
5942   - }
5943   - else
5944   - { /* write only those bytes of 'src' which fall into 'dest' */
5945   - src += where;
5946   - src_length -= where;
5947   - where = 0;
5948   - }
5949   - }
5950   - for (i = 0; i < src_length && where+i < dest_length; i++)
5951   - {
5952   - dest[where+i] = src[i];
5953   - }
5954   - /* length of 'dest' is unchanged. */
5955   - }
5956   - MAM(m_IP) += 1+2;
5957   - return;
5958   -
5959 5905  
5960 5906 syscall_case(virtual_machine_id)
5961 5907 {
... ...