wip AST generation

This commit is contained in:
2024-11-20 16:52:40 -07:00
parent 609e7ccdb3
commit f756e3e29f
4 changed files with 388 additions and 96 deletions

View File

@@ -1,6 +1,7 @@
pub mod token;
use std::{
collections::VecDeque,
fs::File,
io::{BufReader, Cursor, Read, Seek, SeekFrom},
path::PathBuf,
@@ -123,6 +124,8 @@ where
Ok(())
}
/// Consumes the tokenizer and returns the next token in the stream
/// If there are no more tokens in the stream, this function returns None
pub fn next_token(&mut self) -> Result<Option<Token>, TokenizerError> {
while let Some(next_char) = self.next_char()? {
// skip whitespace
@@ -167,6 +170,8 @@ where
}
}
/// Peeks the next token in the stream without consuming it
/// If there are no more tokens in the stream, this function returns None
pub fn peek_next(&mut self) -> Result<Option<Token>, TokenizerError> {
let current_pos = self.reader.stream_position()?;
let column = self.column.clone();
@@ -408,6 +413,106 @@ where
}
}
pub struct TokenizerBuffer<T>
where
T: Read + Seek,
{
tokenizer: Tokenizer<T>,
buffer: VecDeque<Token>,
history: VecDeque<Token>,
}
impl<T> TokenizerBuffer<T>
where
T: Seek + Read,
{
pub fn new(tokenizer: Tokenizer<T>) -> Self {
Self {
tokenizer,
buffer: VecDeque::new(),
history: VecDeque::with_capacity(128),
}
}
/// Reads the next token from the tokenizer, pushing the value to the back of the history
/// and returning the token
pub fn next(&mut self) -> Result<Option<Token>, TokenizerError> {
if let Some(token) = self.buffer.pop_front() {
self.history.push_back(token.clone());
return Ok(Some(token));
}
let token = self.tokenizer.next_token()?;
if let Some(ref token) = token {
self.history.push_back(token.clone());
}
Ok(token)
}
/// Peeks the next token in the stream without adding to the history stack
pub fn peek(&mut self) -> Result<Option<Token>, TokenizerError> {
if let Some(token) = self.buffer.front() {
return Ok(Some(token.clone()));
}
let token = self.tokenizer.peek_next()?;
Ok(token)
}
fn seek_from_start(&mut self, pos: usize) -> Result<(), TokenizerError> {
// if pos
Ok(())
}
fn seek_from_current(&mut self, seek_to: i64) -> Result<(), TokenizerError> {
// if seek_to > 0 then we need to check if the buffer has enough tokens to pop, otherwise we need to read from the tokenizer
// if seek_to < 0 then we need to pop from the history and push to the front of the buffer. If not enough, then we throw (we reached the front of the history)
// if seek_to == 0 then we don't need to do anything
if seek_to > 0 {
let mut tokens = Vec::with_capacity(seek_to as usize);
for _ in 0..seek_to {
if let Some(token) = self.tokenizer.next_token()? {
tokens.push(token);
} else {
return Err(TokenizerError::IOError(std::io::Error::new(
std::io::ErrorKind::UnexpectedEof,
"Unexpected EOF",
)));
}
}
self.history.extend(tokens);
} else if seek_to < 0 {
let seek_to = seek_to.abs() as usize;
let mut tokens = Vec::with_capacity(seek_to);
for _ in 0..seek_to {
if let Some(token) = self.history.pop_back() {
tokens.push(token);
} else {
return Err(TokenizerError::IOError(std::io::Error::new(
std::io::ErrorKind::UnexpectedEof,
"Unexpected EOF",
)));
}
}
self.buffer.extend(tokens.into_iter().rev());
}
Ok(())
}
/// Adds to or removes from the History stack, allowing the user to move back and forth in the stream
pub fn seek(&mut self, from: SeekFrom) -> Result<(), TokenizerError> {
Ok(match from {
SeekFrom::Start(pos) => self.seek_from_start(pos as usize)?,
SeekFrom::Current(seek_to) => self.seek_from_current(seek_to)?,
SeekFrom::End(_) => unimplemented!("SeekFrom::End will not be implemented"),
})
}
}
#[cfg(test)]
mod tests {
use super::*;
@@ -422,6 +527,35 @@ mod tests {
}
"#;
#[test]
fn test_tokenizer_buffer_seek_from_start() -> Result<()> {
let tokenizer = Tokenizer::from(TEST_STRING.to_owned());
let mut buffer = TokenizerBuffer::new(tokenizer);
let token = buffer.next()?;
assert_eq!(token.unwrap().token_type, TokenType::Keyword(Keyword::Fn));
let token = buffer.next()?;
assert_eq!(
token.unwrap().token_type,
TokenType::Identifier(String::from("test"))
);
buffer.seek(SeekFrom::Start(0))?;
let token = buffer.next()?;
assert_eq!(token.unwrap().token_type, TokenType::Keyword(Keyword::Fn));
buffer.seek(SeekFrom::Start(16))?;
let token = buffer.next()?;
assert_eq!(token.unwrap().token_type, TokenType::Keyword(Keyword::Let));
Ok(())
}
#[test]
fn test_tokenizer_from_path_ok() {
let tokenizer = Tokenizer::from_path(TEST_FILE);