Added a variant to the 'return' function, allowing to extract part of the token.

Alain Prouté
1 parent baf88b9d
Showing 1 changed file with 47 additions and 8 deletions Show diff stats
anubis_dev/library/lexical_analysis/fast_lexer_3.anubis
@@ -387,9 +387,22 @@ public type LexingTools:
  
  
 public type LexerAction($Token,$Aux):
-   ignore,                                                         // ignore the token (no action)
-   return((ByteArray,LexingTools,$Aux) -> LexerOutput($Token)).    // return the token using this function
-
+   ignore,                                                        // ignore the token (no action)
+   return((ByteArray                   token,             
+           LexingTools                 tools,
+           $Aux                        aux) -> LexerOutput($Token)),  // return the token using this function
+   return(((Int s,Int e) -> ByteArray  extract,    // extract token from buffer (start/end relative to token)
+           Int                         length,     // length of token
+           LexingTools                 tools,
+           $Aux                        aux) -> LexerOutput($Token)).  // idem but allowing to extract part of
+                                                                  // the token 
+                                                                  
+   The third alternative in 'LexerAction($Token,$Aux)' is a variant of the second one. Instead of extracting
+   the token from the buffer, the function provides tools for extracting a part of the token. The argument 
+   'length' is the total length of the token. The function 'extract' enables to extract the part of the token
+   located between positions 's' (included) and 'e' (not included).  
+                                                                  
+                                                                  
 public type LexerItem($Token,$Aux):
   lexer_item(String                        regular_expression,
              LexerAction($Token,$Aux)      action),
@@ -589,6 +602,11 @@ public type DFA_state($Token,$Aux):
                Int                                                  action_rank, 
                (ByteArray,LexingTools,$Aux) -> LexerOutput($Token)  action),
  
+  accepting   (Word16                                               name, 
+               List(DFA_transition)                                 transitions, 
+               Int                                                  action_rank, 
+               ((Int,Int) -> ByteArray,Int,LexingTools,$Aux) -> LexerOutput($Token)  action),
+   
   ignoring    (Word16                                               name,
                List(DFA_transition)                                 transitions).
  
@@ -903,7 +921,8 @@ public type RegExpr:
  
 type LexerRankAction($Token,$Aux):
    ignore,   
-   return(Int rk, (ByteArray,LexingTools,$Aux) -> LexerOutput($Token)).  
+   return(Int rk, (ByteArray,LexingTools,$Aux) -> LexerOutput($Token)), 
+   return(Int rk, ((Int,Int) -> ByteArray,Int,LexingTools,$Aux) -> LexerOutput($Token)).
  
  
 public type BasicRegExpr($Token,$Aux):
@@ -1927,12 +1946,17 @@ public define LexerOutput($Token)
                  {
                    ignore      then should_not_happen(end_of_input), 
                                     // because the low level lexer doesn't return in this case
-                   return(_0)  then 
+                   return(f)  then 
                      // At this point a token is successfully read. 
                      // We must update some variables
                       penult_tok_v <- *last_tok_v; 
                       last_tok_v <- tstate(end,*line_v,*col_v); 
-                     _0(extract(*buffer_v,start,end),tools,aux)
+                      f(extract(*buffer_v,start,end),tools,aux),
+                   return(f)  then 
+                      penult_tok_v <- *last_tok_v; 
+                      last_tok_v <- tstate(end,*line_v,*col_v); 
+                      f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l),
+                        end-start,tools,aux),
                  }
          )
          else 
@@ -1950,9 +1974,13 @@ public define LexerOutput($Token)
                    else if *actions(word32(s,0)) is 
                      {
                        ignore      then should_not_happen(end_of_input), 
-                       return(_0)  then penult_tok_v <- *last_tok_v; 
+                       return(f)   then penult_tok_v <- *last_tok_v; 
                                         last_tok_v <- tstate(end,*line_v,*col_v); 
-                                        _0(extract(*buffer_v,start,end),tools,aux)
+                                        f(extract(*buffer_v,start,end),tools,aux),
+                       return(f)   then penult_tok_v <- *last_tok_v; 
+                                        last_tok_v <- tstate(end,*line_v,*col_v); 
+                                        f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l),
+                                           end-start,tools,aux)
                      }, 
  
                  success(_) then 
@@ -2651,6 +2679,8 @@ define List(DFA_state($Token,$Aux))
                         ignore then 
                           [ignoring(get_new_name(old_name,nlist),trs) . rename(t,nlist)],
                         return(rk,a) then 
+                          [accepting(get_new_name(old_name,nlist),trs,rk,a) . rename(t,nlist)],
+                        return(rk,a) then 
                           [accepting(get_new_name(old_name,nlist),trs,rk,a) . rename(t,nlist)]
                       }
                   }
@@ -2673,6 +2703,7 @@ define LexerRankAction($Token,$Aux)
   if a is 
   {
     ignore    then ignore, 
+    return(f) then return(rank,f),
     return(f) then return(rank,f)
   }. 
  
@@ -2769,6 +2800,7 @@ public define List(FastLexerState)
          {
            rejecting(n,trs)        then rejecting(to_fast_lexer_transitions(trs))
            accepting(n,trs,rk,a)   then accepting(to_fast_lexer_transitions(trs))
+           accepting(n,trs,rk,a)   then accepting(to_fast_lexer_transitions(trs))
            ignoring (n,trs)        then ignoring(to_fast_lexer_transitions(trs))
          } . to_fast_lexer_description(t)]
      }. 
@@ -2809,6 +2841,7 @@ define One
            {
              rejecting(name,trs)           then unique, 
              accepting(name,trs,rk,action) then v(word32(name,0)) <- return(action),
+             accepting(name,trs,rk,action) then v(word32(name,0)) <- return(action),
              ignoring (name,trs)           then unique
            }; 
          fill_actions(t,v)
@@ -2952,6 +2985,10 @@ define One
         print(f,"\n --- state "+to_decimal(name)+" (accepting with action number "+to_decimal(action_rank)+") ---\n"); 
         dump(f,transitions,0), 
  
+      accepting(name,transitions,action_rank,action)  then 
+        print(f,"\n --- state "+to_decimal(name)+" (accepting with action number "+to_decimal(action_rank)+") ---\n"); 
+        dump(f,transitions,0), 
+
       ignoring(name,transitions)  then 
         print(f,"\n --- state "+to_decimal(name)+" (ignoring no action) ---\n"); 
         dump(f,transitions,0)
@@ -2995,6 +3032,7 @@ define List(Int)
      {
        rejecting(name,transitions)              then [-1   . actions_ranks(t)], 
        accepting(name,transitions,rank,action)  then [rank . actions_ranks(t)], 
+       accepting(name,transitions,rank,action)  then [rank . actions_ranks(t)], 
        ignoring(name,transitions)               then [-1   . actions_ranks(t)]
      }
   }. 
@@ -3200,6 +3238,7 @@ define GResult dump_all_states(
     {
       rejecting(_, _)        then color("red")
       accepting(_, _, _, _)  then color("green")
+      accepting(_, _, _, _)  then color("green")
       ignoring(_, _)         then color("blue")
     },
     if tuple is (tkv, baseg) then