Commit 3746a4f77160ca7d7ad8e9aa26401b0897e86934

Authored by Alain Prouté
1 parent baf88b9d

Added a variant to the 'return' function, allowing to extract part of the token.

Showing 1 changed file with 47 additions and 8 deletions   Show diff stats
anubis_dev/library/lexical_analysis/fast_lexer_3.anubis
... ... @@ -387,9 +387,22 @@ public type LexingTools:
387 387  
388 388  
389 389 public type LexerAction($Token,$Aux):
390   - ignore, // ignore the token (no action)
391   - return((ByteArray,LexingTools,$Aux) -> LexerOutput($Token)). // return the token using this function
392   -
  390 + ignore, // ignore the token (no action)
  391 + return((ByteArray token,
  392 + LexingTools tools,
  393 + $Aux aux) -> LexerOutput($Token)), // return the token using this function
  394 + return(((Int s,Int e) -> ByteArray extract, // extract token from buffer (start/end relative to token)
  395 + Int length, // length of token
  396 + LexingTools tools,
  397 + $Aux aux) -> LexerOutput($Token)). // idem but allowing to extract part of
  398 + // the token
  399 +
  400 + The third alternative in 'LexerAction($Token,$Aux)' is a variant of the second one. Instead of extracting
  401 + the token from the buffer, the function provides tools for extracting a part of the token. The argument
  402 + 'length' is the total length of the token. The function 'extract' enables to extract the part of the token
  403 + located between positions 's' (included) and 'e' (not included).
  404 +
  405 +
393 406 public type LexerItem($Token,$Aux):
394 407 lexer_item(String regular_expression,
395 408 LexerAction($Token,$Aux) action),
... ... @@ -589,6 +602,11 @@ public type DFA_state($Token,$Aux):
589 602 Int action_rank,
590 603 (ByteArray,LexingTools,$Aux) -> LexerOutput($Token) action),
591 604  
  605 + accepting (Word16 name,
  606 + List(DFA_transition) transitions,
  607 + Int action_rank,
  608 + ((Int,Int) -> ByteArray,Int,LexingTools,$Aux) -> LexerOutput($Token) action),
  609 +
592 610 ignoring (Word16 name,
593 611 List(DFA_transition) transitions).
594 612  
... ... @@ -903,7 +921,8 @@ public type RegExpr:
903 921  
904 922 type LexerRankAction($Token,$Aux):
905 923 ignore,
906   - return(Int rk, (ByteArray,LexingTools,$Aux) -> LexerOutput($Token)).
  924 + return(Int rk, (ByteArray,LexingTools,$Aux) -> LexerOutput($Token)),
  925 + return(Int rk, ((Int,Int) -> ByteArray,Int,LexingTools,$Aux) -> LexerOutput($Token)).
907 926  
908 927  
909 928 public type BasicRegExpr($Token,$Aux):
... ... @@ -1927,12 +1946,17 @@ public define LexerOutput($Token)
1927 1946 {
1928 1947 ignore then should_not_happen(end_of_input),
1929 1948 // because the low level lexer doesn't return in this case
1930   - return(_0) then
  1949 + return(f) then
1931 1950 // At this point a token is successfully read.
1932 1951 // We must update some variables
1933 1952 penult_tok_v <- *last_tok_v;
1934 1953 last_tok_v <- tstate(end,*line_v,*col_v);
1935   - _0(extract(*buffer_v,start,end),tools,aux)
  1954 + f(extract(*buffer_v,start,end),tools,aux),
  1955 + return(f) then
  1956 + penult_tok_v <- *last_tok_v;
  1957 + last_tok_v <- tstate(end,*line_v,*col_v);
  1958 + f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l),
  1959 + end-start,tools,aux),
1936 1960 }
1937 1961 )
1938 1962 else
... ... @@ -1950,9 +1974,13 @@ public define LexerOutput($Token)
1950 1974 else if *actions(word32(s,0)) is
1951 1975 {
1952 1976 ignore then should_not_happen(end_of_input),
1953   - return(_0) then penult_tok_v <- *last_tok_v;
  1977 + return(f) then penult_tok_v <- *last_tok_v;
1954 1978 last_tok_v <- tstate(end,*line_v,*col_v);
1955   - _0(extract(*buffer_v,start,end),tools,aux)
  1979 + f(extract(*buffer_v,start,end),tools,aux),
  1980 + return(f) then penult_tok_v <- *last_tok_v;
  1981 + last_tok_v <- tstate(end,*line_v,*col_v);
  1982 + f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l),
  1983 + end-start,tools,aux)
1956 1984 },
1957 1985  
1958 1986 success(_) then
... ... @@ -2651,6 +2679,8 @@ define List(DFA_state($Token,$Aux))
2651 2679 ignore then
2652 2680 [ignoring(get_new_name(old_name,nlist),trs) . rename(t,nlist)],
2653 2681 return(rk,a) then
  2682 + [accepting(get_new_name(old_name,nlist),trs,rk,a) . rename(t,nlist)],
  2683 + return(rk,a) then
2654 2684 [accepting(get_new_name(old_name,nlist),trs,rk,a) . rename(t,nlist)]
2655 2685 }
2656 2686 }
... ... @@ -2673,6 +2703,7 @@ define LexerRankAction($Token,$Aux)
2673 2703 if a is
2674 2704 {
2675 2705 ignore then ignore,
  2706 + return(f) then return(rank,f),
2676 2707 return(f) then return(rank,f)
2677 2708 }.
2678 2709  
... ... @@ -2769,6 +2800,7 @@ public define List(FastLexerState)
2769 2800 {
2770 2801 rejecting(n,trs) then rejecting(to_fast_lexer_transitions(trs))
2771 2802 accepting(n,trs,rk,a) then accepting(to_fast_lexer_transitions(trs))
  2803 + accepting(n,trs,rk,a) then accepting(to_fast_lexer_transitions(trs))
2772 2804 ignoring (n,trs) then ignoring(to_fast_lexer_transitions(trs))
2773 2805 } . to_fast_lexer_description(t)]
2774 2806 }.
... ... @@ -2809,6 +2841,7 @@ define One
2809 2841 {
2810 2842 rejecting(name,trs) then unique,
2811 2843 accepting(name,trs,rk,action) then v(word32(name,0)) <- return(action),
  2844 + accepting(name,trs,rk,action) then v(word32(name,0)) <- return(action),
2812 2845 ignoring (name,trs) then unique
2813 2846 };
2814 2847 fill_actions(t,v)
... ... @@ -2952,6 +2985,10 @@ define One
2952 2985 print(f,"\n --- state "+to_decimal(name)+" (accepting with action number "+to_decimal(action_rank)+") ---\n");
2953 2986 dump(f,transitions,0),
2954 2987  
  2988 + accepting(name,transitions,action_rank,action) then
  2989 + print(f,"\n --- state "+to_decimal(name)+" (accepting with action number "+to_decimal(action_rank)+") ---\n");
  2990 + dump(f,transitions,0),
  2991 +
2955 2992 ignoring(name,transitions) then
2956 2993 print(f,"\n --- state "+to_decimal(name)+" (ignoring no action) ---\n");
2957 2994 dump(f,transitions,0)
... ... @@ -2995,6 +3032,7 @@ define List(Int)
2995 3032 {
2996 3033 rejecting(name,transitions) then [-1 . actions_ranks(t)],
2997 3034 accepting(name,transitions,rank,action) then [rank . actions_ranks(t)],
  3035 + accepting(name,transitions,rank,action) then [rank . actions_ranks(t)],
2998 3036 ignoring(name,transitions) then [-1 . actions_ranks(t)]
2999 3037 }
3000 3038 }.
... ... @@ -3200,6 +3238,7 @@ define GResult dump_all_states(
3200 3238 {
3201 3239 rejecting(_, _) then color("red")
3202 3240 accepting(_, _, _, _) then color("green")
  3241 + accepting(_, _, _, _) then color("green")
3203 3242 ignoring(_, _) then color("blue")
3204 3243 },
3205 3244 if tuple is (tkv, baseg) then
... ...