diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f91dc89..5a0cf2c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2,11 +2,11 @@ mod tree_node; use crate::tokenizer::{ token::{Symbol, Token, TokenType}, - Tokenizer, TokenizerError, + Tokenizer, TokenizerBuffer, TokenizerError, }; use std::{ collections::VecDeque, - io::{Read, Seek}, + io::{Read, Seek, SeekFrom}, }; use thiserror::Error; use tree_node::*; @@ -24,7 +24,7 @@ pub enum ParseError { } pub struct Parser { - tokenizer: Tokenizer, + tokenizer: TokenizerBuffer, current_token: Option, } @@ -34,40 +34,64 @@ where { pub fn new(tokenizer: Tokenizer) -> Self { Parser { - tokenizer, + tokenizer: TokenizerBuffer::new(tokenizer), current_token: None, } } - pub fn parse(&mut self) -> Result { - self.current_token = self.tokenizer.next_token()?; + pub fn parse(&mut self) -> Result, ParseError> { + self.current_token = self.tokenizer.next()?; self.expression() } - fn expression(&mut self) -> Result { - let current_token = self - .current_token - .as_ref() - .ok_or(ParseError::UnknownError)?; + fn expression(&mut self) -> Result, ParseError> { + /// Helper macro to match the next token in the tokenizer buffer to a pattern + /// with an optional if condition. The token is peeked and not consumed. + macro_rules! matches_peek { + ($pattern:pat) => { + matches!(self.tokenizer.peek()?, Some(Token { token_type: $pattern, .. })) + }; + ($pattern:pat if $cond:expr) => { + matches!(self.tokenizer.peek()?, Some(Token { token_type: $pattern, .. }) if $cond) + }; + } + + let Some(current_token) = self.current_token.as_ref() else { + return Ok(None); + }; Ok(match current_token.token_type { - // Match a number or string literal as long as the next token is not an operator + // Assignment expression + TokenType::Identifier(_) if matches_peek!(TokenType::Symbol(Symbol::Assign)) => { + Some(Expression::AssignmentExpression(self.assignment()?)) + } + + // Negation expression + TokenType::Symbol(Symbol::Minus) if matches_peek!(TokenType::Number(_)) => { + self.tokenizer.next()?; + Some(Expression::Negation(Box::new( + self.parse()?.ok_or(ParseError::UnexpectedEOF)?, + ))) + } + + // Literal expression TokenType::Number(_) | TokenType::String(_) - if !matches!( - self.tokenizer.peek_next()?, Some(Token { token_type: TokenType::Symbol(e), .. }) if e.is_operator() + if !matches_peek!( + TokenType::Symbol(s) if s.is_operator() || s.is_comparison() || s.is_logical() ) => { - Expression::Literal(self.literal()?) + Some(Expression::Literal(self.literal()?)) } - // Match a negation operator - TokenType::Symbol(Symbol::Minus) => Expression::Negation(Box::new(self.parse()?)), - - _ if matches!(self.tokenizer.peek_next()?, Some(Token { token_type: TokenType::Symbol(e), .. }) if e.is_operator()) => { - Expression::BinaryExpression(self.binary()?) + // Logical expression + TokenType::Number(_) | TokenType::String(_) + if matches_peek!( + TokenType::Symbol(s) if s.is_comparison() || s.is_logical() + ) => + { + Some(Expression::LogicalExpression(self.logical()?)) } - // Something went wrong. Return an error _ => { return Err(ParseError::UnexpectedToken { token: current_token.clone(), @@ -76,56 +100,61 @@ where }) } + fn assignment(&mut self) -> Result { + let Some(Token { + token_type: TokenType::Identifier(identifier), + .. + }) = self.current_token.as_ref() + else { + return Err(ParseError::UnexpectedToken { + // Safety: We have already checked that `self.current_token` is `Some` in the `parse()` function + token: self.current_token.clone().unwrap(), + }); + }; + + // make sure the next token is `=` for sanity + if let Some(Token { + token_type: TokenType::Symbol(Symbol::Assign), + .. + }) = self.tokenizer.next()? + { + } else { + self.tokenizer.seek(SeekFrom::Current(-1))?; + return Err(ParseError::UnexpectedToken { + token: self.tokenizer.next()?.unwrap(), + }); + }; + + Ok(AssignmentExpression { + identifier: identifier.clone(), + expression: Box::new(self.parse()?.ok_or(ParseError::UnexpectedEOF)?), + }) + } + + fn logical(&mut self) -> Result { + let Some(current_token) = self.current_token.as_ref() else { + return Err(ParseError::UnexpectedEOF); + }; + + todo!() + } + fn binary(&mut self) -> Result { - let literal = self.literal()?; - - let Some(Token { - token_type: TokenType::Symbol(operator), - .. - }) = self.current_token - else { - return Err(ParseError::UnknownError); + let Some(current_token) = self.current_token.as_ref() else { + return Err(ParseError::UnexpectedEOF); }; - self.current_token = self.tokenizer.next_token()?; - Ok(match operator { - Symbol::Plus => BinaryExpression::Add( - Box::new(Expression::Literal(literal)), - Box::new(self.expression()?), - ), - Symbol::Asterisk => BinaryExpression::Multiply( - Box::new(Expression::Literal(literal)), - Box::new(self.expression()?), - ), - Symbol::Slash => BinaryExpression::Divide( - Box::new(Expression::Literal(literal)), - Box::new(self.expression()?), - ), - Symbol::Minus => BinaryExpression::Subtract( - Box::new(Expression::Literal(literal)), - Box::new(self.expression()?), - ), - _ => { - return Err(ParseError::UnexpectedToken { - token: Token { - token_type: TokenType::Symbol(operator), - line: 0, - column: 0, - }, - }) - } - }) + todo!() } fn literal(&mut self) -> Result { - let current_token = self - .current_token - .as_ref() - .ok_or(ParseError::UnknownError)?; + let Some(current_token) = self.current_token.as_ref() else { + return Err(ParseError::UnexpectedEOF); + }; let to_return = match current_token.token_type { - TokenType::Number(ref number) => tree_node::Literal::Number(number.clone()), - TokenType::String(ref string) => tree_node::Literal::String(string.clone()), + TokenType::Number(n) => Literal::Number(n), + TokenType::String(ref s) => Literal::String(s.clone()), _ => { return Err(ParseError::UnexpectedToken { token: current_token.clone(), @@ -133,78 +162,59 @@ where } }; - self.current_token = self.tokenizer.next_token()?; + // Advance the tokenizer if the next token is a semicolon + if let Some(Token { + token_type: TokenType::Symbol(Symbol::Semicolon), + .. + }) = self.tokenizer.peek()? + { + self.tokenizer.next()?; + } + Ok(to_return) } } #[cfg(test)] mod tests { - use super::tree_node::*; use super::*; use anyhow::Result; #[test] - fn test_add_expr() -> Result<()> { - let input = "123 + 456"; + fn test_assignment() -> Result<()> { + let input = r#" + x = 10; + y = "testing"; + "#; + let tokenizer = Tokenizer::from(input.to_owned()); + let mut parser = Parser::new(tokenizer); - let mut parser = Parser::new(Tokenizer::from(input.to_owned())); + let expr = parser.parse()?.unwrap(); - let result = parser.parse()?; - let formatted_output = format!("{}", result); + assert_eq!("x = 10", format!("{}", expr)); - assert_eq!(formatted_output, "(123 + 456)"); - Ok(()) - } + let expr = parser.parse()?.unwrap(); - #[test] - fn test_parse_number() -> Result<()> { - let input = "123"; - let mut parser = Parser::new(Tokenizer::from(input.to_owned())); - let result = parser.parse()?; - - let formatted_output = format!("{}", result); - - assert_eq!(formatted_output, "123"); + assert_eq!("y = \"testing\"", format!("{}", expr)); Ok(()) } #[test] - fn test_parse_negation() -> Result<()> { - let input = "-123"; - let mut parser = Parser::new(Tokenizer::from(input.to_owned())); - let result = parser.parse()?; + fn test_literal() -> Result<()> { + let input = r#" + 10; + "testing"; + "#; - let formatted_output = format!("{}", result); + let tokenizer = Tokenizer::from(input.to_owned()); + let mut parser = Parser::new(tokenizer); - assert_eq!(formatted_output, "(-123)"); + let expr = parser.parse()?.unwrap(); + assert_eq!("10", format!("{}", expr)); - Ok(()) - } - - #[test] - fn test_order_of_operations() -> Result<()> { - let input = "123 - 456 + 789"; - - let mut parser = Parser::new(Tokenizer::from(input.to_owned())); - let result = parser.parse()?; - - let formatted_output = format!("{}", result); - println!("{}", formatted_output); - - Ok(()) - } - - #[test] - fn test_chained_operators() -> Result<()> { - let input = "123 + 456 * 789"; - let mut parser = Parser::new(Tokenizer::from(input.to_owned())); - let result = parser.parse()?; - - let formatted_output = format!("{}", result); - - assert_eq!(formatted_output, "(123 + (456 * 789))"); + let expr = parser.parse()?.unwrap(); + assert_eq!("\"testing\"", format!("{}", expr)); Ok(()) } diff --git a/src/parser/tree_node.rs b/src/parser/tree_node.rs index a74ed18..0b960d8 100644 --- a/src/parser/tree_node.rs +++ b/src/parser/tree_node.rs @@ -1,4 +1,4 @@ -use crate::tokenizer::token::Number; +use crate::tokenizer::token::{Number, TokenType}; #[derive(Debug, Eq, PartialEq)] pub enum Literal { @@ -10,7 +10,7 @@ impl std::fmt::Display for Literal { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Literal::Number(n) => write!(f, "{}", n), - Literal::String(s) => write!(f, "{}", s), + Literal::String(s) => write!(f, "\"{}\"", s), } } } @@ -63,12 +63,25 @@ impl std::fmt::Display for LogicalExpression { } } +#[derive(Debug, PartialEq, Eq)] +pub struct AssignmentExpression { + pub identifier: String, + pub expression: Box, +} + +impl std::fmt::Display for AssignmentExpression { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{} = {}", self.identifier, self.expression) + } +} + #[derive(Debug, PartialEq, Eq)] pub enum Expression { Literal(Literal), Negation(Box), BinaryExpression(BinaryExpression), LogicalExpression(LogicalExpression), + AssignmentExpression(AssignmentExpression), } impl std::fmt::Display for Expression { @@ -78,6 +91,7 @@ impl std::fmt::Display for Expression { Expression::Negation(e) => write!(f, "(-{})", e), Expression::BinaryExpression(e) => write!(f, "{}", e), Expression::LogicalExpression(e) => write!(f, "{}", e), + Expression::AssignmentExpression(e) => write!(f, "{}", e), } } } diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index a712bc3..6aa3b73 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -459,13 +459,6 @@ where Ok(token) } - fn seek_from_start(&mut self, pos: usize) -> Result<(), TokenizerError> { - // if pos - - - Ok(()) - } - fn seek_from_current(&mut self, seek_to: i64) -> Result<(), TokenizerError> { // if seek_to > 0 then we need to check if the buffer has enough tokens to pop, otherwise we need to read from the tokenizer // if seek_to < 0 then we need to pop from the history and push to the front of the buffer. If not enough, then we throw (we reached the front of the history) @@ -506,9 +499,9 @@ where /// Adds to or removes from the History stack, allowing the user to move back and forth in the stream pub fn seek(&mut self, from: SeekFrom) -> Result<(), TokenizerError> { Ok(match from { - SeekFrom::Start(pos) => self.seek_from_start(pos as usize)?, SeekFrom::Current(seek_to) => self.seek_from_current(seek_to)?, SeekFrom::End(_) => unimplemented!("SeekFrom::End will not be implemented"), + SeekFrom::Start(_) => unimplemented!("SeekFrom::Start will not be implemented"), }) } } @@ -528,30 +521,22 @@ mod tests { "#; #[test] - fn test_tokenizer_buffer_seek_from_start() -> Result<()> { + fn test_tokenizer_buffer_seek_from_current() -> Result<()> { let tokenizer = Tokenizer::from(TEST_STRING.to_owned()); let mut buffer = TokenizerBuffer::new(tokenizer); let token = buffer.next()?; assert_eq!(token.unwrap().token_type, TokenType::Keyword(Keyword::Fn)); - let token = buffer.next()?; - assert_eq!( - token.unwrap().token_type, - TokenType::Identifier(String::from("test")) - ); - - buffer.seek(SeekFrom::Start(0))?; + buffer.seek(SeekFrom::Current(1))?; let token = buffer.next()?; + assert_eq!(token.unwrap().token_type, TokenType::Symbol(Symbol::LParen)); - assert_eq!(token.unwrap().token_type, TokenType::Keyword(Keyword::Fn)); - - buffer.seek(SeekFrom::Start(16))?; + buffer.seek(SeekFrom::Current(-1))?; let token = buffer.next()?; - - assert_eq!(token.unwrap().token_type, TokenType::Keyword(Keyword::Let)); + assert_eq!(token.unwrap().token_type, TokenType::Symbol(Symbol::LParen)); Ok(()) }