Commit d8c550092832aa9e5f7c3203977be9ecbdbacac7
1 parent
c8e4349b
fixed the ''small buffer'' bug.
Showing
1 changed file
with
12 additions
and
15 deletions
Show diff stats
anubis_dev/library/lexical_analysis/fast_lexer_4.anubis
| ... | ... | @@ -1450,7 +1450,7 @@ public define String |
| 1450 | 1450 | *** [1.1] The type 'LexingStream'. |
| 1451 | 1451 | |
| 1452 | 1452 | A lexing stream provides tools which are adhoc for using low level fast lexers as |
| 1453 | - defined in section 13 of predefined.anubis: | |
| 1453 | + defined in section 13 of predefined.anubis. | |
| 1454 | 1454 | |
| 1455 | 1455 | The type below records the information needed to come back to the state just after the |
| 1456 | 1456 | last or penultimate token was read. |
| ... | ... | @@ -1463,9 +1463,10 @@ type TokenState: |
| 1463 | 1463 | Int col |
| 1464 | 1464 | ). |
| 1465 | 1465 | |
| 1466 | - There is a ``penultimate token'' when at least one token has been successfully read since the | |
| 1466 | + There is a ``penultimate token'' when at least two token has been successfully read since the | |
| 1467 | 1467 | creation of the lexing stream. If it is not the case, the value of the ``penultimate state'' |
| 1468 | - defaults to the very initial state. | |
| 1468 | + defaults to the state after the very first token was read or to the very initial state if no | |
| 1469 | + tokan was read. | |
| 1469 | 1470 | |
| 1470 | 1471 | When the buffer is reloaded, part of the current buffer is kept. One reason for this is that |
| 1471 | 1472 | when we encounter the end of the buffer it can be the case that we are currently reading a token |
| ... | ... | @@ -1487,7 +1488,6 @@ type TokenState: |
| 1487 | 1488 | state informations for token1 and token2, the last two tokens successfully read. |
| 1488 | 1489 | |
| 1489 | 1490 | |
| 1490 | - | |
| 1491 | 1491 | public type LexingStream: |
| 1492 | 1492 | lexing_stream |
| 1493 | 1493 | ( |
| ... | ... | @@ -1569,18 +1569,18 @@ public type LexingStream: |
| 1569 | 1569 | -- the new current buffer "source text." |
| 1570 | 1570 | |
| 1571 | 1571 | -- last accepted: (s,3), because 'sou' has been accepted in state 's' and |
| 1572 | - ends at offset 0 within the new buffer, | |
| 1572 | + ends at offset 3 within the new buffer, | |
| 1573 | 1573 | |
| 1574 | 1574 | -- current_v receives the value 3, because 'sou' is already read, |
| 1575 | 1575 | |
| 1576 | 1576 | -- token_start_v receives the value 0, because the token we are currently |
| 1577 | 1577 | reading begins at offset 0. |
| 1578 | 1578 | |
| 1579 | - -- state s, because we want to try to read the sequel of 'sou'. | |
| 1579 | + -- restart in state s, because we want to try to read the sequel of 'sou'. | |
| 1580 | 1580 | |
| 1581 | 1581 | Notice that if the low level lexer had returned 'rejected(s,at_end_of_input,12,15)' |
| 1582 | 1582 | instead of 'accepted(s,at_end_of_input,12,15)', the scenario is the same one except |
| 1583 | - that last accepted is 'none'. | |
| 1583 | + that last accepted will be 'none'. | |
| 1584 | 1584 | |
| 1585 | 1585 | The low level lexer will now return 'accepted(s,not_at_end_of_input,0,6)', meaning that |
| 1586 | 1586 | it has recognized the token 'source' between positions 0 (included) and 6 (not |
| ... | ... | @@ -1597,7 +1597,7 @@ public type LexingStream: |
| 1597 | 1597 | define LexingTools |
| 1598 | 1598 | make_tools |
| 1599 | 1599 | ( |
| 1600 | - Var(Int) token_start_v, | |
| 1600 | + Var(Int) token_start_v, // actually not used in this function | |
| 1601 | 1601 | Var(Int) current_v, |
| 1602 | 1602 | Var(Int) line_v, |
| 1603 | 1603 | Var(Int) col_v, |
| ... | ... | @@ -1614,21 +1614,18 @@ define LexingTools |
| 1614 | 1614 | (One _) |-> *col_v, |
| 1615 | 1615 | |
| 1616 | 1616 | // get current offset: |
| 1617 | + // This is the number of bytes which are no more in the buffer plus the current position. | |
| 1617 | 1618 | (One _) |-> *past_v + *current_v, |
| 1618 | 1619 | |
| 1619 | 1620 | // go back one char: |
| 1620 | 1621 | // don't go beyond the beginning of the buffer |
| 1621 | - // No need to update line_v and col_v because they | |
| 1622 | - // refer to the beginning of the token. | |
| 1623 | 1622 | (Int n) |-> current_v <- max(*current_v - n, 0), |
| 1624 | - //token_start_v <- *current_v, | |
| 1625 | 1623 | |
| 1626 | 1624 | // comming back to the state just after the last token was read |
| 1627 | 1625 | (One _) |-> if *last_tok_v is tstate(cur,l,c) then |
| 1628 | 1626 | current_v <- cur; |
| 1629 | 1627 | line_v <- l; |
| 1630 | 1628 | col_v <- c; |
| 1631 | - last_tok_v <- *penult_tok_v; | |
| 1632 | 1629 | last_accept_v <- none, |
| 1633 | 1630 | |
| 1634 | 1631 | // comming back to the state just after the penultimate token was read |
| ... | ... | @@ -1667,7 +1664,7 @@ public define LexingStream |
| 1667 | 1664 | (One u) |-> failure, // buffer is never reloaded |
| 1668 | 1665 | line_v, // current line |
| 1669 | 1666 | col_v, // current column |
| 1670 | - past_v, // past bytes | |
| 1667 | + past_v, // past bytes (will remain always 0 in this case) | |
| 1671 | 1668 | make_tools(token_start_v,current_v,line_v,col_v,past_v,last_tok_v,penult_tok_v,last_accept_v)). |
| 1672 | 1669 | |
| 1673 | 1670 | |
| ... | ... | @@ -1726,7 +1723,7 @@ public define Maybe(LexingStream) |
| 1726 | 1723 | //print("Keeping this from previous buffer: ["+to_string(extract(old_buffer,dropped,old_length))+"]\n"); |
| 1727 | 1724 | buffer_v <- extract(old_buffer,dropped,old_length)+more; |
| 1728 | 1725 | //print("New buffer: ["+to_string(*buffer_v)+"] size: "+to_decimal(length(*buffer_v))+"\n"); |
| 1729 | - token_start_v <- 0; | |
| 1726 | + token_start_v <- *token_start_v - dropped; | |
| 1730 | 1727 | //print("Next token starting position: "+to_decimal(*token_start_v)+"\n"); |
| 1731 | 1728 | current_v <- old_length - dropped; |
| 1732 | 1729 | //print("New current reading position: "+to_decimal(*current_v)+"\n"); |
| ... | ... | @@ -1811,7 +1808,7 @@ public define Maybe(LexingStream) |
| 1811 | 1808 | min(min(current(*penult_tok_v),current(*last_tok_v)),*token_start_v), |
| 1812 | 1809 | |
| 1813 | 1810 | buffer_v <- extract(old_buffer,dropped,old_length)+more; |
| 1814 | - token_start_v <- 0; | |
| 1811 | + token_start_v <- *token_start_v - dropped; | |
| 1815 | 1812 | current_v <- old_length - dropped; |
| 1816 | 1813 | past_bytes_v <- *past_bytes_v + dropped; |
| 1817 | 1814 | last_tok_v <- (if *last_tok_v is tstate(cur,l,c) then tstate(cur - dropped,l,c)); | ... | ... |