fast_lexer_4.anubis 6.04 KB

                   Compatibility file for enabling obsolete fast_lexer_4. 

   I designed fast_lexer_5.anubis because I wanted to fix some bugs in fast_lexer_4.anubis, 
   enhance the interface with APG, and include a notion of multistate lexer. 
   
   While fixing the bugs (all related to the reloading of the buffer in a lexing stream), I
   discovered that the low level fast lexer defined in predefined.anubis was not correctly
   designed. I redesigned it and this led me to change the definition of some types. As a
   consequence, fast_lexer_4 was no more compatible with the low level lexer, hence became
   unusable. 
   
   This is the reason why this file, which replaces the previous fast_lexer_4 is in fact 
   just a compatibility file between fast_lexer_4 and fast_lexer_5. The old version
   of fast_lexer_4 is kept into fast_lexer_4.anubis.old. 
   
   However, in order to compile a file using fast_lexer_4.anubis, you have to replace:
   
      LexerItem         by         LexerItem4            (if any)
      LexerAction       by         LexerAction4          (if any)
      
   everywhere in this file.
   
   If you are writing a new source file needing a lexer, don't use this file. Use 
   fast_lexer_5 instead. 
   
                                                                A.P.
                                                                
                                                                
   --- That's all for the public part ! ----------------------------------------                                                                
   
transmit fast_lexer_5.anubis

   Below is the compatibility stuff:
   
   
   This type has desappeared from fast_lexer_5:
   
public type LexerOutput($Token):   
   end_of_input, 
   error            (ByteArray b, Int line, Int col),
   token            ($Token t).   

   Because 'end_of_input' is now a token in fast_lexer_5, we need an extension of type $Token:
   
type ExToken($Token):
   end_of_input,
   token($Token). 
      
   LexerOutput($Token) is replaced by Result(LexicalError($Aux),$Token). We need a function for converting
   from the new type to the old one.
   
public define LexerOutput($Token)
   convert
   (
     Result(LexicalError($Aux),ExToken($Token))    r,
   ) =
   if r is 
   {
     error(e) then 
       if e is lex_error(b,tools,aux) then
       error(b,line(tools)(unique),column(tools)(unique)), 
       
     ok(tok)  then if tok is
                   {
                     end_of_input  then end_of_input,   // both 'end_of_input' are not of the same type
                     token(t)      then token(t)        // idem
                   }
   }. 
   
   Conversion in the other direction:
   
public define Result(LexicalError($Aux),ExToken($Token))
   convert
   (
     LexerOutput($Token)      r,
     LexingTools              tools,
     $Aux                     aux     
   ) =
   if r is 
   {
     end_of_input       then ok(end_of_input),
     error(b,line,col)  then error(lex_error(b,tools,aux)),
     token(t)           then ok(token(t))
   }. 
   
   
public type LexerAction4($Token,$Aux):
   ignore,
   return((ByteArray                   token,             
           LexingTools                 tools,
           $Aux                        aux) -> LexerOutput($Token)),  // return the token using this function
   return(((Int s,Int e) -> ByteArray  extract,    // extract token from buffer (start/end relative to token)
           Int                         length,     // length of token
           LexingTools                 tools,
           $Aux                        aux) -> LexerOutput($Token)).  // idem but allowing to extract part of
                                                                      // the token 
   
public type LexerItem4($Token,$Aux):
  lexer_item(String                         regular_expression,
             LexerAction4($Token,$Aux)      action),
  lexer_item(ByteArray                      literal,
             LexerAction4($Token,$Aux)      action).


                                                                      
define LexerAction(ExToken($Token),$Aux)
   extend
   (
     LexerAction4($Token,$Aux)  ac
   ) =
   if ac is 
   {
     ignore     then ignore, 
     return(f)  then return((ByteArray token, LexingTools tools, $Aux aux) |->
                               convert(f(token,tools,aux),tools,aux)), 
     return(f)  then return(((Int,Int) -> ByteArray  extract,  
                             Int length, LexingTools tools, $Aux aux) |-> 
                                      convert(f(extract,length,tools,aux),tools,aux))
   }. 
   
define LexerItem(ExToken($Token),$Aux)
   extend
   (
     LexerItem4($Token,$Aux)  li
   ) =
   if li is 
   {
     lexer_item(re,ac)   then lexer_item(re,extend(ac)),
     lexer_item(lit,ac)  then lexer_item(lit,extend(ac))
   }. 
   
public define Result(RegExprError, 
                     (LexingStream,$Aux) -> One -> LexerOutput($Token))
   make_lexer
     (
       List(LexerItem4($Token,$Aux))   lexer_description,
       Word8                           escape_char               // '#' recommanded here
     ) = 
     if make_lexer(map(extend,lexer_description),end_of_input,escape_char) is 
     {
       error(e)    then error(e)
       ok(lexer)   then 
         ok(
             (LexingStream ls, $Aux aux) |-> (One u) |-> convert(lexer(ls,aux)(u))
           )
     }. 
     
     
public define One
  make_precompiled_lexer
  (
    String                          lexer_name, 
    List(LexerItem4($Token,$Aux))   lexer_description,
    Word8                           escape_char    
  ) =
  make_precompiled_lexer(lexer_name,
                         map(extend,lexer_description),
                         escape_char). 
                         
     
public define (LexingStream,$Aux) -> One -> LexerOutput($Token)
  retrieve_lexer
  (
    List(LexerItem4($Token,$Aux))              lexer_description,
    (List(Int),PrecompiledFastLexer)           p
  ) =
  with f = retrieve_lexer(map(extend,lexer_description),p,end_of_input), 
  (LexingStream ls, $Aux aux) |-> (One u) |-> convert(f(ls,aux)(u)).