fixed the ''small buffer'' bug.

Alain Prouté
1 parent c8e4349b
Showing 1 changed file with 12 additions and 15 deletions Show diff stats
anubis_dev/library/lexical_analysis/fast_lexer_4.anubis
@@ -1450,7 +1450,7 @@ public define String
       *** [1.1] The type 'LexingStream'. 
  
    A lexing stream provides tools which are adhoc for using low level fast lexers as
-   defined in section 13 of predefined.anubis:
+   defined in section 13 of predefined.anubis.
  
    The type below records the information needed to come back to the state just after the 
    last or penultimate token was read. 
@@ -1463,9 +1463,10 @@ type TokenState:
       Int         col
     ).      
  
-   There is a ``penultimate token'' when at least one token has been successfully read since the 
+   There is a ``penultimate token'' when at least two token has been successfully read since the 
    creation of the lexing stream. If it is not the case, the value of the ``penultimate state''
-   defaults to the very initial state.
+   defaults to the state after the very first token was read or to the very initial state if no
+   tokan was read.
  
    When the buffer is reloaded, part of the current buffer is kept. One reason for this is that
    when we encounter the end of the buffer it can be the case that we are currently reading a token
@@ -1487,7 +1488,6 @@ type TokenState:
    state informations for token1 and token2, the last two tokens successfully read.
  
  
-        
 public type LexingStream:
    lexing_stream
      (
@@ -1569,18 +1569,18 @@ public type LexingStream:
            -- the new current buffer "source text."
  
            -- last accepted: (s,3), because 'sou' has been accepted in state 's' and
-              ends at offset 0 within the new buffer,
+              ends at offset 3 within the new buffer,
  
            -- current_v receives the value 3, because 'sou' is already read, 
  
            -- token_start_v receives the value 0, because the token we are currently 
               reading begins at offset 0. 
  
-           -- state s, because we want to try to read the sequel of 'sou'. 
+           -- restart in state s, because we want to try to read the sequel of 'sou'. 
  
    Notice that if the low level lexer had returned 'rejected(s,at_end_of_input,12,15)'
    instead of 'accepted(s,at_end_of_input,12,15)', the scenario is the same one except
-   that last accepted is 'none'.
+   that last accepted will be 'none'.
  
    The low level lexer will now return 'accepted(s,not_at_end_of_input,0,6)', meaning that
    it has recognized the token 'source' between positions 0 (included) and 6 (not
@@ -1597,7 +1597,7 @@ public type LexingStream:
 define LexingTools
   make_tools
   (
-    Var(Int)                     token_start_v, 
+    Var(Int)                     token_start_v,    // actually not used in this function
     Var(Int)                     current_v, 
     Var(Int)                     line_v,
     Var(Int)                     col_v,
@@ -1614,21 +1614,18 @@ define LexingTools
     (One _) |-> *col_v,
  
     // get current offset:
+    // This is the number of bytes which are no more in the buffer plus the current position.
     (One _) |-> *past_v + *current_v,
  
     // go back one char:
     // don't go beyond the beginning of the buffer
-    // No need to update line_v and col_v because they
-    // refer to the beginning of the token.
     (Int n) |-> current_v <- max(*current_v - n, 0),
-                //token_start_v <- *current_v, 
  
     // comming back to the state just after the last token was read
     (One _) |-> if *last_tok_v is tstate(cur,l,c) then  
                 current_v <- cur; 
                 line_v <- l; 
                 col_v <- c;
-                last_tok_v <- *penult_tok_v;
                 last_accept_v <- none, 
  
     // comming back to the state just after the penultimate token was read
@@ -1667,7 +1664,7 @@ public define LexingStream
                  (One u) |-> failure,        // buffer is never reloaded
                  line_v,                     // current line
                  col_v,                      // current column
-                 past_v,                     // past bytes
+                 past_v,                     // past bytes (will remain always 0 in this case)
                  make_tools(token_start_v,current_v,line_v,col_v,past_v,last_tok_v,penult_tok_v,last_accept_v)). 
  
  
@@ -1726,7 +1723,7 @@ public define Maybe(LexingStream)
          //print("Keeping this from previous buffer: ["+to_string(extract(old_buffer,dropped,old_length))+"]\n");
                               buffer_v <- extract(old_buffer,dropped,old_length)+more; 
          //print("New buffer: ["+to_string(*buffer_v)+"] size: "+to_decimal(length(*buffer_v))+"\n");
-                              token_start_v <- 0; 
+                              token_start_v <- *token_start_v - dropped; 
          //print("Next token starting position: "+to_decimal(*token_start_v)+"\n");
                               current_v <- old_length - dropped; 
          //print("New current reading position: "+to_decimal(*current_v)+"\n");
@@ -1811,7 +1808,7 @@ public define Maybe(LexingStream)
                                      min(min(current(*penult_tok_v),current(*last_tok_v)),*token_start_v), 
  
                               buffer_v <- extract(old_buffer,dropped,old_length)+more; 
-                              token_start_v <- 0; 
+                              token_start_v <- *token_start_v - dropped; 
                               current_v <- old_length - dropped; 
                               past_bytes_v <- *past_bytes_v + dropped; 
                               last_tok_v <- (if *last_tok_v is tstate(cur,l,c) then tstate(cur - dropped,l,c));