still WIP

This commit is contained in:
2024-11-21 12:12:06 -07:00
parent f756e3e29f
commit 62bf83e277
3 changed files with 145 additions and 136 deletions

View File

@@ -2,11 +2,11 @@ mod tree_node;
use crate::tokenizer::{ use crate::tokenizer::{
token::{Symbol, Token, TokenType}, token::{Symbol, Token, TokenType},
Tokenizer, TokenizerError, Tokenizer, TokenizerBuffer, TokenizerError,
}; };
use std::{ use std::{
collections::VecDeque, collections::VecDeque,
io::{Read, Seek}, io::{Read, Seek, SeekFrom},
}; };
use thiserror::Error; use thiserror::Error;
use tree_node::*; use tree_node::*;
@@ -24,7 +24,7 @@ pub enum ParseError {
} }
pub struct Parser<R: Read + Seek> { pub struct Parser<R: Read + Seek> {
tokenizer: Tokenizer<R>, tokenizer: TokenizerBuffer<R>,
current_token: Option<Token>, current_token: Option<Token>,
} }
@@ -34,40 +34,64 @@ where
{ {
pub fn new(tokenizer: Tokenizer<R>) -> Self { pub fn new(tokenizer: Tokenizer<R>) -> Self {
Parser { Parser {
tokenizer, tokenizer: TokenizerBuffer::new(tokenizer),
current_token: None, current_token: None,
} }
} }
pub fn parse(&mut self) -> Result<tree_node::Expression, ParseError> { pub fn parse(&mut self) -> Result<Option<tree_node::Expression>, ParseError> {
self.current_token = self.tokenizer.next_token()?; self.current_token = self.tokenizer.next()?;
self.expression() self.expression()
} }
fn expression(&mut self) -> Result<tree_node::Expression, ParseError> { fn expression(&mut self) -> Result<Option<tree_node::Expression>, ParseError> {
let current_token = self /// Helper macro to match the next token in the tokenizer buffer to a pattern
.current_token /// with an optional if condition. The token is peeked and not consumed.
.as_ref() macro_rules! matches_peek {
.ok_or(ParseError::UnknownError)?; ($pattern:pat) => {
matches!(self.tokenizer.peek()?, Some(Token { token_type: $pattern, .. }))
};
($pattern:pat if $cond:expr) => {
matches!(self.tokenizer.peek()?, Some(Token { token_type: $pattern, .. }) if $cond)
};
}
let Some(current_token) = self.current_token.as_ref() else {
return Ok(None);
};
Ok(match current_token.token_type { Ok(match current_token.token_type {
// Match a number or string literal as long as the next token is not an operator // Assignment expression
TokenType::Identifier(_) if matches_peek!(TokenType::Symbol(Symbol::Assign)) => {
Some(Expression::AssignmentExpression(self.assignment()?))
}
// Negation expression
TokenType::Symbol(Symbol::Minus) if matches_peek!(TokenType::Number(_)) => {
self.tokenizer.next()?;
Some(Expression::Negation(Box::new(
self.parse()?.ok_or(ParseError::UnexpectedEOF)?,
)))
}
// Literal expression
TokenType::Number(_) | TokenType::String(_) TokenType::Number(_) | TokenType::String(_)
if !matches!( if !matches_peek!(
self.tokenizer.peek_next()?, Some(Token { token_type: TokenType::Symbol(e), .. }) if e.is_operator() TokenType::Symbol(s) if s.is_operator() || s.is_comparison() || s.is_logical()
) => ) =>
{ {
Expression::Literal(self.literal()?) Some(Expression::Literal(self.literal()?))
} }
// Match a negation operator // Logical expression
TokenType::Symbol(Symbol::Minus) => Expression::Negation(Box::new(self.parse()?)), TokenType::Number(_) | TokenType::String(_)
if matches_peek!(
_ if matches!(self.tokenizer.peek_next()?, Some(Token { token_type: TokenType::Symbol(e), .. }) if e.is_operator()) => { TokenType::Symbol(s) if s.is_comparison() || s.is_logical()
Expression::BinaryExpression(self.binary()?) ) =>
{
Some(Expression::LogicalExpression(self.logical()?))
} }
// Something went wrong. Return an error
_ => { _ => {
return Err(ParseError::UnexpectedToken { return Err(ParseError::UnexpectedToken {
token: current_token.clone(), token: current_token.clone(),
@@ -76,56 +100,61 @@ where
}) })
} }
fn assignment(&mut self) -> Result<tree_node::AssignmentExpression, ParseError> {
let Some(Token {
token_type: TokenType::Identifier(identifier),
..
}) = self.current_token.as_ref()
else {
return Err(ParseError::UnexpectedToken {
// Safety: We have already checked that `self.current_token` is `Some` in the `parse()` function
token: self.current_token.clone().unwrap(),
});
};
// make sure the next token is `=` for sanity
if let Some(Token {
token_type: TokenType::Symbol(Symbol::Assign),
..
}) = self.tokenizer.next()?
{
} else {
self.tokenizer.seek(SeekFrom::Current(-1))?;
return Err(ParseError::UnexpectedToken {
token: self.tokenizer.next()?.unwrap(),
});
};
Ok(AssignmentExpression {
identifier: identifier.clone(),
expression: Box::new(self.parse()?.ok_or(ParseError::UnexpectedEOF)?),
})
}
fn logical(&mut self) -> Result<tree_node::LogicalExpression, ParseError> {
let Some(current_token) = self.current_token.as_ref() else {
return Err(ParseError::UnexpectedEOF);
};
todo!()
}
fn binary(&mut self) -> Result<tree_node::BinaryExpression, ParseError> { fn binary(&mut self) -> Result<tree_node::BinaryExpression, ParseError> {
let literal = self.literal()?; let Some(current_token) = self.current_token.as_ref() else {
return Err(ParseError::UnexpectedEOF);
let Some(Token {
token_type: TokenType::Symbol(operator),
..
}) = self.current_token
else {
return Err(ParseError::UnknownError);
}; };
self.current_token = self.tokenizer.next_token()?;
Ok(match operator { todo!()
Symbol::Plus => BinaryExpression::Add(
Box::new(Expression::Literal(literal)),
Box::new(self.expression()?),
),
Symbol::Asterisk => BinaryExpression::Multiply(
Box::new(Expression::Literal(literal)),
Box::new(self.expression()?),
),
Symbol::Slash => BinaryExpression::Divide(
Box::new(Expression::Literal(literal)),
Box::new(self.expression()?),
),
Symbol::Minus => BinaryExpression::Subtract(
Box::new(Expression::Literal(literal)),
Box::new(self.expression()?),
),
_ => {
return Err(ParseError::UnexpectedToken {
token: Token {
token_type: TokenType::Symbol(operator),
line: 0,
column: 0,
},
})
}
})
} }
fn literal(&mut self) -> Result<tree_node::Literal, ParseError> { fn literal(&mut self) -> Result<tree_node::Literal, ParseError> {
let current_token = self let Some(current_token) = self.current_token.as_ref() else {
.current_token return Err(ParseError::UnexpectedEOF);
.as_ref() };
.ok_or(ParseError::UnknownError)?;
let to_return = match current_token.token_type { let to_return = match current_token.token_type {
TokenType::Number(ref number) => tree_node::Literal::Number(number.clone()), TokenType::Number(n) => Literal::Number(n),
TokenType::String(ref string) => tree_node::Literal::String(string.clone()), TokenType::String(ref s) => Literal::String(s.clone()),
_ => { _ => {
return Err(ParseError::UnexpectedToken { return Err(ParseError::UnexpectedToken {
token: current_token.clone(), token: current_token.clone(),
@@ -133,78 +162,59 @@ where
} }
}; };
self.current_token = self.tokenizer.next_token()?; // Advance the tokenizer if the next token is a semicolon
if let Some(Token {
token_type: TokenType::Symbol(Symbol::Semicolon),
..
}) = self.tokenizer.peek()?
{
self.tokenizer.next()?;
}
Ok(to_return) Ok(to_return)
} }
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::tree_node::*;
use super::*; use super::*;
use anyhow::Result; use anyhow::Result;
#[test] #[test]
fn test_add_expr() -> Result<()> { fn test_assignment() -> Result<()> {
let input = "123 + 456"; let input = r#"
x = 10;
y = "testing";
"#;
let tokenizer = Tokenizer::from(input.to_owned());
let mut parser = Parser::new(tokenizer);
let mut parser = Parser::new(Tokenizer::from(input.to_owned())); let expr = parser.parse()?.unwrap();
let result = parser.parse()?; assert_eq!("x = 10", format!("{}", expr));
let formatted_output = format!("{}", result);
assert_eq!(formatted_output, "(123 + 456)"); let expr = parser.parse()?.unwrap();
Ok(())
}
#[test] assert_eq!("y = \"testing\"", format!("{}", expr));
fn test_parse_number() -> Result<()> {
let input = "123";
let mut parser = Parser::new(Tokenizer::from(input.to_owned()));
let result = parser.parse()?;
let formatted_output = format!("{}", result);
assert_eq!(formatted_output, "123");
Ok(()) Ok(())
} }
#[test] #[test]
fn test_parse_negation() -> Result<()> { fn test_literal() -> Result<()> {
let input = "-123"; let input = r#"
let mut parser = Parser::new(Tokenizer::from(input.to_owned())); 10;
let result = parser.parse()?; "testing";
"#;
let formatted_output = format!("{}", result); let tokenizer = Tokenizer::from(input.to_owned());
let mut parser = Parser::new(tokenizer);
assert_eq!(formatted_output, "(-123)"); let expr = parser.parse()?.unwrap();
assert_eq!("10", format!("{}", expr));
Ok(()) let expr = parser.parse()?.unwrap();
} assert_eq!("\"testing\"", format!("{}", expr));
#[test]
fn test_order_of_operations() -> Result<()> {
let input = "123 - 456 + 789";
let mut parser = Parser::new(Tokenizer::from(input.to_owned()));
let result = parser.parse()?;
let formatted_output = format!("{}", result);
println!("{}", formatted_output);
Ok(())
}
#[test]
fn test_chained_operators() -> Result<()> {
let input = "123 + 456 * 789";
let mut parser = Parser::new(Tokenizer::from(input.to_owned()));
let result = parser.parse()?;
let formatted_output = format!("{}", result);
assert_eq!(formatted_output, "(123 + (456 * 789))");
Ok(()) Ok(())
} }

View File

@@ -1,4 +1,4 @@
use crate::tokenizer::token::Number; use crate::tokenizer::token::{Number, TokenType};
#[derive(Debug, Eq, PartialEq)] #[derive(Debug, Eq, PartialEq)]
pub enum Literal { pub enum Literal {
@@ -10,7 +10,7 @@ impl std::fmt::Display for Literal {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {
Literal::Number(n) => write!(f, "{}", n), Literal::Number(n) => write!(f, "{}", n),
Literal::String(s) => write!(f, "{}", s), Literal::String(s) => write!(f, "\"{}\"", s),
} }
} }
} }
@@ -63,12 +63,25 @@ impl std::fmt::Display for LogicalExpression {
} }
} }
#[derive(Debug, PartialEq, Eq)]
pub struct AssignmentExpression {
pub identifier: String,
pub expression: Box<Expression>,
}
impl std::fmt::Display for AssignmentExpression {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{} = {}", self.identifier, self.expression)
}
}
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
pub enum Expression { pub enum Expression {
Literal(Literal), Literal(Literal),
Negation(Box<Expression>), Negation(Box<Expression>),
BinaryExpression(BinaryExpression), BinaryExpression(BinaryExpression),
LogicalExpression(LogicalExpression), LogicalExpression(LogicalExpression),
AssignmentExpression(AssignmentExpression),
} }
impl std::fmt::Display for Expression { impl std::fmt::Display for Expression {
@@ -78,6 +91,7 @@ impl std::fmt::Display for Expression {
Expression::Negation(e) => write!(f, "(-{})", e), Expression::Negation(e) => write!(f, "(-{})", e),
Expression::BinaryExpression(e) => write!(f, "{}", e), Expression::BinaryExpression(e) => write!(f, "{}", e),
Expression::LogicalExpression(e) => write!(f, "{}", e), Expression::LogicalExpression(e) => write!(f, "{}", e),
Expression::AssignmentExpression(e) => write!(f, "{}", e),
} }
} }
} }

View File

@@ -459,13 +459,6 @@ where
Ok(token) Ok(token)
} }
fn seek_from_start(&mut self, pos: usize) -> Result<(), TokenizerError> {
// if pos
Ok(())
}
fn seek_from_current(&mut self, seek_to: i64) -> Result<(), TokenizerError> { fn seek_from_current(&mut self, seek_to: i64) -> Result<(), TokenizerError> {
// if seek_to > 0 then we need to check if the buffer has enough tokens to pop, otherwise we need to read from the tokenizer // if seek_to > 0 then we need to check if the buffer has enough tokens to pop, otherwise we need to read from the tokenizer
// if seek_to < 0 then we need to pop from the history and push to the front of the buffer. If not enough, then we throw (we reached the front of the history) // if seek_to < 0 then we need to pop from the history and push to the front of the buffer. If not enough, then we throw (we reached the front of the history)
@@ -506,9 +499,9 @@ where
/// Adds to or removes from the History stack, allowing the user to move back and forth in the stream /// Adds to or removes from the History stack, allowing the user to move back and forth in the stream
pub fn seek(&mut self, from: SeekFrom) -> Result<(), TokenizerError> { pub fn seek(&mut self, from: SeekFrom) -> Result<(), TokenizerError> {
Ok(match from { Ok(match from {
SeekFrom::Start(pos) => self.seek_from_start(pos as usize)?,
SeekFrom::Current(seek_to) => self.seek_from_current(seek_to)?, SeekFrom::Current(seek_to) => self.seek_from_current(seek_to)?,
SeekFrom::End(_) => unimplemented!("SeekFrom::End will not be implemented"), SeekFrom::End(_) => unimplemented!("SeekFrom::End will not be implemented"),
SeekFrom::Start(_) => unimplemented!("SeekFrom::Start will not be implemented"),
}) })
} }
} }
@@ -528,30 +521,22 @@ mod tests {
"#; "#;
#[test] #[test]
fn test_tokenizer_buffer_seek_from_start() -> Result<()> { fn test_tokenizer_buffer_seek_from_current() -> Result<()> {
let tokenizer = Tokenizer::from(TEST_STRING.to_owned()); let tokenizer = Tokenizer::from(TEST_STRING.to_owned());
let mut buffer = TokenizerBuffer::new(tokenizer); let mut buffer = TokenizerBuffer::new(tokenizer);
let token = buffer.next()?; let token = buffer.next()?;
assert_eq!(token.unwrap().token_type, TokenType::Keyword(Keyword::Fn)); assert_eq!(token.unwrap().token_type, TokenType::Keyword(Keyword::Fn));
let token = buffer.next()?; buffer.seek(SeekFrom::Current(1))?;
assert_eq!(
token.unwrap().token_type,
TokenType::Identifier(String::from("test"))
);
buffer.seek(SeekFrom::Start(0))?;
let token = buffer.next()?; let token = buffer.next()?;
assert_eq!(token.unwrap().token_type, TokenType::Symbol(Symbol::LParen));
assert_eq!(token.unwrap().token_type, TokenType::Keyword(Keyword::Fn)); buffer.seek(SeekFrom::Current(-1))?;
buffer.seek(SeekFrom::Start(16))?;
let token = buffer.next()?; let token = buffer.next()?;
assert_eq!(token.unwrap().token_type, TokenType::Symbol(Symbol::LParen));
assert_eq!(token.unwrap().token_type, TokenType::Keyword(Keyword::Let));
Ok(()) Ok(())
} }