Commit d8c550092832aa9e5f7c3203977be9ecbdbacac7

Authored by Alain Prouté
1 parent c8e4349b

fixed the ''small buffer'' bug.

Showing 1 changed file with 12 additions and 15 deletions   Show diff stats
anubis_dev/library/lexical_analysis/fast_lexer_4.anubis
... ... @@ -1450,7 +1450,7 @@ public define String
1450 1450 *** [1.1] The type 'LexingStream'.
1451 1451  
1452 1452 A lexing stream provides tools which are adhoc for using low level fast lexers as
1453   - defined in section 13 of predefined.anubis:
  1453 + defined in section 13 of predefined.anubis.
1454 1454  
1455 1455 The type below records the information needed to come back to the state just after the
1456 1456 last or penultimate token was read.
... ... @@ -1463,9 +1463,10 @@ type TokenState:
1463 1463 Int col
1464 1464 ).
1465 1465  
1466   - There is a ``penultimate token'' when at least one token has been successfully read since the
  1466 + There is a ``penultimate token'' when at least two token has been successfully read since the
1467 1467 creation of the lexing stream. If it is not the case, the value of the ``penultimate state''
1468   - defaults to the very initial state.
  1468 + defaults to the state after the very first token was read or to the very initial state if no
  1469 + tokan was read.
1469 1470  
1470 1471 When the buffer is reloaded, part of the current buffer is kept. One reason for this is that
1471 1472 when we encounter the end of the buffer it can be the case that we are currently reading a token
... ... @@ -1487,7 +1488,6 @@ type TokenState:
1487 1488 state informations for token1 and token2, the last two tokens successfully read.
1488 1489  
1489 1490  
1490   -
1491 1491 public type LexingStream:
1492 1492 lexing_stream
1493 1493 (
... ... @@ -1569,18 +1569,18 @@ public type LexingStream:
1569 1569 -- the new current buffer "source text."
1570 1570  
1571 1571 -- last accepted: (s,3), because 'sou' has been accepted in state 's' and
1572   - ends at offset 0 within the new buffer,
  1572 + ends at offset 3 within the new buffer,
1573 1573  
1574 1574 -- current_v receives the value 3, because 'sou' is already read,
1575 1575  
1576 1576 -- token_start_v receives the value 0, because the token we are currently
1577 1577 reading begins at offset 0.
1578 1578  
1579   - -- state s, because we want to try to read the sequel of 'sou'.
  1579 + -- restart in state s, because we want to try to read the sequel of 'sou'.
1580 1580  
1581 1581 Notice that if the low level lexer had returned 'rejected(s,at_end_of_input,12,15)'
1582 1582 instead of 'accepted(s,at_end_of_input,12,15)', the scenario is the same one except
1583   - that last accepted is 'none'.
  1583 + that last accepted will be 'none'.
1584 1584  
1585 1585 The low level lexer will now return 'accepted(s,not_at_end_of_input,0,6)', meaning that
1586 1586 it has recognized the token 'source' between positions 0 (included) and 6 (not
... ... @@ -1597,7 +1597,7 @@ public type LexingStream:
1597 1597 define LexingTools
1598 1598 make_tools
1599 1599 (
1600   - Var(Int) token_start_v,
  1600 + Var(Int) token_start_v, // actually not used in this function
1601 1601 Var(Int) current_v,
1602 1602 Var(Int) line_v,
1603 1603 Var(Int) col_v,
... ... @@ -1614,21 +1614,18 @@ define LexingTools
1614 1614 (One _) |-> *col_v,
1615 1615  
1616 1616 // get current offset:
  1617 + // This is the number of bytes which are no more in the buffer plus the current position.
1617 1618 (One _) |-> *past_v + *current_v,
1618 1619  
1619 1620 // go back one char:
1620 1621 // don't go beyond the beginning of the buffer
1621   - // No need to update line_v and col_v because they
1622   - // refer to the beginning of the token.
1623 1622 (Int n) |-> current_v <- max(*current_v - n, 0),
1624   - //token_start_v <- *current_v,
1625 1623  
1626 1624 // comming back to the state just after the last token was read
1627 1625 (One _) |-> if *last_tok_v is tstate(cur,l,c) then
1628 1626 current_v <- cur;
1629 1627 line_v <- l;
1630 1628 col_v <- c;
1631   - last_tok_v <- *penult_tok_v;
1632 1629 last_accept_v <- none,
1633 1630  
1634 1631 // comming back to the state just after the penultimate token was read
... ... @@ -1667,7 +1664,7 @@ public define LexingStream
1667 1664 (One u) |-> failure, // buffer is never reloaded
1668 1665 line_v, // current line
1669 1666 col_v, // current column
1670   - past_v, // past bytes
  1667 + past_v, // past bytes (will remain always 0 in this case)
1671 1668 make_tools(token_start_v,current_v,line_v,col_v,past_v,last_tok_v,penult_tok_v,last_accept_v)).
1672 1669  
1673 1670  
... ... @@ -1726,7 +1723,7 @@ public define Maybe(LexingStream)
1726 1723 //print("Keeping this from previous buffer: ["+to_string(extract(old_buffer,dropped,old_length))+"]\n");
1727 1724 buffer_v <- extract(old_buffer,dropped,old_length)+more;
1728 1725 //print("New buffer: ["+to_string(*buffer_v)+"] size: "+to_decimal(length(*buffer_v))+"\n");
1729   - token_start_v <- 0;
  1726 + token_start_v <- *token_start_v - dropped;
1730 1727 //print("Next token starting position: "+to_decimal(*token_start_v)+"\n");
1731 1728 current_v <- old_length - dropped;
1732 1729 //print("New current reading position: "+to_decimal(*current_v)+"\n");
... ... @@ -1811,7 +1808,7 @@ public define Maybe(LexingStream)
1811 1808 min(min(current(*penult_tok_v),current(*last_tok_v)),*token_start_v),
1812 1809  
1813 1810 buffer_v <- extract(old_buffer,dropped,old_length)+more;
1814   - token_start_v <- 0;
  1811 + token_start_v <- *token_start_v - dropped;
1815 1812 current_v <- old_length - dropped;
1816 1813 past_bytes_v <- *past_bytes_v + dropped;
1817 1814 last_tok_v <- (if *last_tok_v is tstate(cur,l,c) then tstate(cur - dropped,l,c));
... ...