Commit 3746a4f77160ca7d7ad8e9aa26401b0897e86934
1 parent
baf88b9d
Added a variant to the 'return' function, allowing to extract part of the token.
Showing
1 changed file
with
47 additions
and
8 deletions
Show diff stats
anubis_dev/library/lexical_analysis/fast_lexer_3.anubis
| ... | ... | @@ -387,9 +387,22 @@ public type LexingTools: |
| 387 | 387 | |
| 388 | 388 | |
| 389 | 389 | public type LexerAction($Token,$Aux): |
| 390 | - ignore, // ignore the token (no action) | |
| 391 | - return((ByteArray,LexingTools,$Aux) -> LexerOutput($Token)). // return the token using this function | |
| 392 | - | |
| 390 | + ignore, // ignore the token (no action) | |
| 391 | + return((ByteArray token, | |
| 392 | + LexingTools tools, | |
| 393 | + $Aux aux) -> LexerOutput($Token)), // return the token using this function | |
| 394 | + return(((Int s,Int e) -> ByteArray extract, // extract token from buffer (start/end relative to token) | |
| 395 | + Int length, // length of token | |
| 396 | + LexingTools tools, | |
| 397 | + $Aux aux) -> LexerOutput($Token)). // idem but allowing to extract part of | |
| 398 | + // the token | |
| 399 | + | |
| 400 | + The third alternative in 'LexerAction($Token,$Aux)' is a variant of the second one. Instead of extracting | |
| 401 | + the token from the buffer, the function provides tools for extracting a part of the token. The argument | |
| 402 | + 'length' is the total length of the token. The function 'extract' enables to extract the part of the token | |
| 403 | + located between positions 's' (included) and 'e' (not included). | |
| 404 | + | |
| 405 | + | |
| 393 | 406 | public type LexerItem($Token,$Aux): |
| 394 | 407 | lexer_item(String regular_expression, |
| 395 | 408 | LexerAction($Token,$Aux) action), |
| ... | ... | @@ -589,6 +602,11 @@ public type DFA_state($Token,$Aux): |
| 589 | 602 | Int action_rank, |
| 590 | 603 | (ByteArray,LexingTools,$Aux) -> LexerOutput($Token) action), |
| 591 | 604 | |
| 605 | + accepting (Word16 name, | |
| 606 | + List(DFA_transition) transitions, | |
| 607 | + Int action_rank, | |
| 608 | + ((Int,Int) -> ByteArray,Int,LexingTools,$Aux) -> LexerOutput($Token) action), | |
| 609 | + | |
| 592 | 610 | ignoring (Word16 name, |
| 593 | 611 | List(DFA_transition) transitions). |
| 594 | 612 | |
| ... | ... | @@ -903,7 +921,8 @@ public type RegExpr: |
| 903 | 921 | |
| 904 | 922 | type LexerRankAction($Token,$Aux): |
| 905 | 923 | ignore, |
| 906 | - return(Int rk, (ByteArray,LexingTools,$Aux) -> LexerOutput($Token)). | |
| 924 | + return(Int rk, (ByteArray,LexingTools,$Aux) -> LexerOutput($Token)), | |
| 925 | + return(Int rk, ((Int,Int) -> ByteArray,Int,LexingTools,$Aux) -> LexerOutput($Token)). | |
| 907 | 926 | |
| 908 | 927 | |
| 909 | 928 | public type BasicRegExpr($Token,$Aux): |
| ... | ... | @@ -1927,12 +1946,17 @@ public define LexerOutput($Token) |
| 1927 | 1946 | { |
| 1928 | 1947 | ignore then should_not_happen(end_of_input), |
| 1929 | 1948 | // because the low level lexer doesn't return in this case |
| 1930 | - return(_0) then | |
| 1949 | + return(f) then | |
| 1931 | 1950 | // At this point a token is successfully read. |
| 1932 | 1951 | // We must update some variables |
| 1933 | 1952 | penult_tok_v <- *last_tok_v; |
| 1934 | 1953 | last_tok_v <- tstate(end,*line_v,*col_v); |
| 1935 | - _0(extract(*buffer_v,start,end),tools,aux) | |
| 1954 | + f(extract(*buffer_v,start,end),tools,aux), | |
| 1955 | + return(f) then | |
| 1956 | + penult_tok_v <- *last_tok_v; | |
| 1957 | + last_tok_v <- tstate(end,*line_v,*col_v); | |
| 1958 | + f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l), | |
| 1959 | + end-start,tools,aux), | |
| 1936 | 1960 | } |
| 1937 | 1961 | ) |
| 1938 | 1962 | else |
| ... | ... | @@ -1950,9 +1974,13 @@ public define LexerOutput($Token) |
| 1950 | 1974 | else if *actions(word32(s,0)) is |
| 1951 | 1975 | { |
| 1952 | 1976 | ignore then should_not_happen(end_of_input), |
| 1953 | - return(_0) then penult_tok_v <- *last_tok_v; | |
| 1977 | + return(f) then penult_tok_v <- *last_tok_v; | |
| 1954 | 1978 | last_tok_v <- tstate(end,*line_v,*col_v); |
| 1955 | - _0(extract(*buffer_v,start,end),tools,aux) | |
| 1979 | + f(extract(*buffer_v,start,end),tools,aux), | |
| 1980 | + return(f) then penult_tok_v <- *last_tok_v; | |
| 1981 | + last_tok_v <- tstate(end,*line_v,*col_v); | |
| 1982 | + f((Int k, Int l) |-> extract(*buffer_v,start+k,start+l), | |
| 1983 | + end-start,tools,aux) | |
| 1956 | 1984 | }, |
| 1957 | 1985 | |
| 1958 | 1986 | success(_) then |
| ... | ... | @@ -2651,6 +2679,8 @@ define List(DFA_state($Token,$Aux)) |
| 2651 | 2679 | ignore then |
| 2652 | 2680 | [ignoring(get_new_name(old_name,nlist),trs) . rename(t,nlist)], |
| 2653 | 2681 | return(rk,a) then |
| 2682 | + [accepting(get_new_name(old_name,nlist),trs,rk,a) . rename(t,nlist)], | |
| 2683 | + return(rk,a) then | |
| 2654 | 2684 | [accepting(get_new_name(old_name,nlist),trs,rk,a) . rename(t,nlist)] |
| 2655 | 2685 | } |
| 2656 | 2686 | } |
| ... | ... | @@ -2673,6 +2703,7 @@ define LexerRankAction($Token,$Aux) |
| 2673 | 2703 | if a is |
| 2674 | 2704 | { |
| 2675 | 2705 | ignore then ignore, |
| 2706 | + return(f) then return(rank,f), | |
| 2676 | 2707 | return(f) then return(rank,f) |
| 2677 | 2708 | }. |
| 2678 | 2709 | |
| ... | ... | @@ -2769,6 +2800,7 @@ public define List(FastLexerState) |
| 2769 | 2800 | { |
| 2770 | 2801 | rejecting(n,trs) then rejecting(to_fast_lexer_transitions(trs)) |
| 2771 | 2802 | accepting(n,trs,rk,a) then accepting(to_fast_lexer_transitions(trs)) |
| 2803 | + accepting(n,trs,rk,a) then accepting(to_fast_lexer_transitions(trs)) | |
| 2772 | 2804 | ignoring (n,trs) then ignoring(to_fast_lexer_transitions(trs)) |
| 2773 | 2805 | } . to_fast_lexer_description(t)] |
| 2774 | 2806 | }. |
| ... | ... | @@ -2809,6 +2841,7 @@ define One |
| 2809 | 2841 | { |
| 2810 | 2842 | rejecting(name,trs) then unique, |
| 2811 | 2843 | accepting(name,trs,rk,action) then v(word32(name,0)) <- return(action), |
| 2844 | + accepting(name,trs,rk,action) then v(word32(name,0)) <- return(action), | |
| 2812 | 2845 | ignoring (name,trs) then unique |
| 2813 | 2846 | }; |
| 2814 | 2847 | fill_actions(t,v) |
| ... | ... | @@ -2952,6 +2985,10 @@ define One |
| 2952 | 2985 | print(f,"\n --- state "+to_decimal(name)+" (accepting with action number "+to_decimal(action_rank)+") ---\n"); |
| 2953 | 2986 | dump(f,transitions,0), |
| 2954 | 2987 | |
| 2988 | + accepting(name,transitions,action_rank,action) then | |
| 2989 | + print(f,"\n --- state "+to_decimal(name)+" (accepting with action number "+to_decimal(action_rank)+") ---\n"); | |
| 2990 | + dump(f,transitions,0), | |
| 2991 | + | |
| 2955 | 2992 | ignoring(name,transitions) then |
| 2956 | 2993 | print(f,"\n --- state "+to_decimal(name)+" (ignoring no action) ---\n"); |
| 2957 | 2994 | dump(f,transitions,0) |
| ... | ... | @@ -2995,6 +3032,7 @@ define List(Int) |
| 2995 | 3032 | { |
| 2996 | 3033 | rejecting(name,transitions) then [-1 . actions_ranks(t)], |
| 2997 | 3034 | accepting(name,transitions,rank,action) then [rank . actions_ranks(t)], |
| 3035 | + accepting(name,transitions,rank,action) then [rank . actions_ranks(t)], | |
| 2998 | 3036 | ignoring(name,transitions) then [-1 . actions_ranks(t)] |
| 2999 | 3037 | } |
| 3000 | 3038 | }. |
| ... | ... | @@ -3200,6 +3238,7 @@ define GResult dump_all_states( |
| 3200 | 3238 | { |
| 3201 | 3239 | rejecting(_, _) then color("red") |
| 3202 | 3240 | accepting(_, _, _, _) then color("green") |
| 3241 | + accepting(_, _, _, _) then color("green") | |
| 3203 | 3242 | ignoring(_, _) then color("blue") |
| 3204 | 3243 | }, |
| 3205 | 3244 | if tuple is (tkv, baseg) then | ... | ... |