diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 5a0cf2c..7df6b41 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,13 +1,10 @@ mod tree_node; use crate::tokenizer::{ - token::{Symbol, Token, TokenType}, + token::{Keyword, Symbol, Token, TokenType}, Tokenizer, TokenizerBuffer, TokenizerError, }; -use std::{ - collections::VecDeque, - io::{Read, Seek, SeekFrom}, -}; +use std::io::{Read, Seek}; use thiserror::Error; use tree_node::*; @@ -15,7 +12,7 @@ use tree_node::*; pub enum ParseError { #[error(transparent)] TokenizerError(#[from] TokenizerError), - #[error("Unexpected token\n\nLine: {0}, Column: {1}\nToken: {2}", token.line, token.column, token.token_type)] + #[error("Unexpected token\n\nLine: {0}, Column: {1}\nToken: {2}\n", token.line, token.column, token.token_type)] UnexpectedToken { token: Token }, #[error("Unexpected EOF")] UnexpectedEOF, @@ -23,6 +20,71 @@ pub enum ParseError { UnknownError, } +macro_rules! self_matches_peek { + ($self:ident, $pattern:pat) => { + matches!($self.tokenizer.peek()?, Some(Token { token_type: $pattern, .. })) + }; + ($self:ident, $pattern:pat if $cond:expr) => { + matches!($self.tokenizer.peek()?, Some(Token { token_type: $pattern, .. }) if $cond) + }; +} + +macro_rules! token_from_option { + ($token:expr) => { + match $token { + Some(ref token) => token.clone(), + None => return Err(ParseError::UnexpectedEOF), + } + }; +} + +macro_rules! extract_token_data { + ($token:ident, $pattern:pat, $extraction:expr) => { + match $token.token_type { + $pattern => $extraction, + _ => { + return Err(ParseError::UnexpectedToken { + token: $token.clone(), + }) + } + } + }; + ($token:expr, $pattern:pat, $extraction:expr) => { + match $token.token_type { + $pattern => $extraction, + _ => { + return Err(ParseError::UnexpectedToken { + token: $token.clone(), + }) + } + } + }; +} + +macro_rules! self_matches_current { + ($self:ident, $pattern:pat) => { + matches!($self.current_token, Some(Token { token_type: $pattern, .. })) + }; + ($self:ident, $pattern:pat if $cond:expr) => { + matches!($self.current_token, Some(Token { token_type: $pattern, .. }) if $cond) + }; +} + +macro_rules! token_matches { + ($token:ident, $pattern:pat) => { + matches!($token.token_type, $pattern) + }; + ($token:expr, $pattern:pat) => { + matches!($token.token_type, $pattern) + }; + ($token:ident, $pattern:pat if $cond:expr) => { + matches!($token.token_type, $pattern if $cond) + }; + ($token:expr, $pattern:pat if $cond:expr) => { + matches!($token.token_type, $pattern if $cond) + }; +} + pub struct Parser { tokenizer: TokenizerBuffer, current_token: Option, @@ -39,57 +101,40 @@ where } } + /// Parses the input from the tokenizer buffer and returns the resulting expression pub fn parse(&mut self) -> Result, ParseError> { - self.current_token = self.tokenizer.next()?; + self.assign_next()?; self.expression() } - fn expression(&mut self) -> Result, ParseError> { - /// Helper macro to match the next token in the tokenizer buffer to a pattern - /// with an optional if condition. The token is peeked and not consumed. - macro_rules! matches_peek { - ($pattern:pat) => { - matches!(self.tokenizer.peek()?, Some(Token { token_type: $pattern, .. })) - }; - ($pattern:pat if $cond:expr) => { - matches!(self.tokenizer.peek()?, Some(Token { token_type: $pattern, .. }) if $cond) - }; - } + /// Assigns the next token in the tokenizer buffer to the current token + fn assign_next(&mut self) -> Result<(), ParseError> { + self.current_token = self.tokenizer.next()?; + Ok(()) + } + fn get_next(&mut self) -> Result, ParseError> { + self.assign_next()?; + Ok(self.current_token.as_ref()) + } + + fn expression(&mut self) -> Result, ParseError> { let Some(current_token) = self.current_token.as_ref() else { return Ok(None); }; - Ok(match current_token.token_type { - // Assignment expression - TokenType::Identifier(_) if matches_peek!(TokenType::Symbol(Symbol::Assign)) => { - Some(Expression::AssignmentExpression(self.assignment()?)) - } + let to_return = Some(match current_token.token_type { + // match declarations with a `let` keyword + TokenType::Keyword(Keyword::Let) => self.declaration()?, - // Negation expression - TokenType::Symbol(Symbol::Minus) if matches_peek!(TokenType::Number(_)) => { - self.tokenizer.next()?; - Some(Expression::Negation(Box::new( - self.parse()?.ok_or(ParseError::UnexpectedEOF)?, - ))) - } + // match functions with a `fn` keyword + TokenType::Keyword(Keyword::Fn) => Expression::FunctionExpression(self.function()?), - // Literal expression + // match literal expressions with a semi-colon afterwards TokenType::Number(_) | TokenType::String(_) - if !matches_peek!( - TokenType::Symbol(s) if s.is_operator() || s.is_comparison() || s.is_logical() - ) => + if self_matches_peek!(self, TokenType::Symbol(Symbol::Semicolon)) => { - Some(Expression::Literal(self.literal()?)) - } - - // Logical expression - TokenType::Number(_) | TokenType::String(_) - if matches_peek!( - TokenType::Symbol(s) if s.is_comparison() || s.is_logical() - ) => - { - Some(Expression::LogicalExpression(self.logical()?)) + Expression::Literal(self.literal()?) } _ => { @@ -97,82 +142,66 @@ where token: current_token.clone(), }) } - }) - } - - fn assignment(&mut self) -> Result { - let Some(Token { - token_type: TokenType::Identifier(identifier), - .. - }) = self.current_token.as_ref() - else { - return Err(ParseError::UnexpectedToken { - // Safety: We have already checked that `self.current_token` is `Some` in the `parse()` function - token: self.current_token.clone().unwrap(), - }); - }; - - // make sure the next token is `=` for sanity - if let Some(Token { - token_type: TokenType::Symbol(Symbol::Assign), - .. - }) = self.tokenizer.next()? - { - } else { - self.tokenizer.seek(SeekFrom::Current(-1))?; - return Err(ParseError::UnexpectedToken { - token: self.tokenizer.next()?.unwrap(), - }); - }; - - Ok(AssignmentExpression { - identifier: identifier.clone(), - expression: Box::new(self.parse()?.ok_or(ParseError::UnexpectedEOF)?), - }) - } - - fn logical(&mut self) -> Result { - let Some(current_token) = self.current_token.as_ref() else { - return Err(ParseError::UnexpectedEOF); - }; - - todo!() - } - - fn binary(&mut self) -> Result { - let Some(current_token) = self.current_token.as_ref() else { - return Err(ParseError::UnexpectedEOF); - }; - - todo!() - } - - fn literal(&mut self) -> Result { - let Some(current_token) = self.current_token.as_ref() else { - return Err(ParseError::UnexpectedEOF); - }; - - let to_return = match current_token.token_type { - TokenType::Number(n) => Literal::Number(n), - TokenType::String(ref s) => Literal::String(s.clone()), - _ => { - return Err(ParseError::UnexpectedToken { - token: current_token.clone(), - }) - } - }; - - // Advance the tokenizer if the next token is a semicolon - if let Some(Token { - token_type: TokenType::Symbol(Symbol::Semicolon), - .. - }) = self.tokenizer.peek()? - { - self.tokenizer.next()?; - } + }); Ok(to_return) } + + fn declaration(&mut self) -> Result { + let current_token = token_from_option!(self.current_token); + if !self_matches_current!(self, TokenType::Keyword(Keyword::Let)) { + return Err(ParseError::UnexpectedToken { + token: current_token.clone(), + }); + } + let identifier = extract_token_data!( + token_from_option!(self.get_next()?), + TokenType::Identifier(ref id), + id.clone() + ); + + let current_token = token_from_option!(self.get_next()?).clone(); + + if !token_matches!(current_token, TokenType::Symbol(Symbol::Assign)) { + return Err(ParseError::UnexpectedToken { + token: current_token, + }); + } + + let assignment_expression = self.parse()?.ok_or(ParseError::UnexpectedEOF)?; + + // make sure the next token is a semi-colon + let current_token = token_from_option!(self.get_next()?); + if !token_matches!(current_token, TokenType::Symbol(Symbol::Semicolon)) { + return Err(ParseError::UnexpectedToken { + token: current_token.clone(), + }); + } + + Ok(Expression::DeclarationExpression( + identifier, + Box::new(assignment_expression), + )) + } + + fn literal(&mut self) -> Result { + let current_token = token_from_option!(self.current_token); + let literal = match current_token.token_type { + TokenType::Number(ref num) => Literal::Number(num.clone()), + TokenType::String(ref string) => Literal::String(string.clone()), + _ => { + return Err(ParseError::UnexpectedToken { + token: current_token.clone(), + }) + } + }; + + Ok(literal) + } + + fn function(&mut self) -> Result { + todo!("Implement function parsing") + } } #[cfg(test)] @@ -181,40 +210,20 @@ mod tests { use anyhow::Result; #[test] - fn test_assignment() -> Result<()> { + fn test_declarations() -> Result<()> { let input = r#" - x = 10; - y = "testing"; + let x = 5; + // The below line should fail + let y = 234 "#; let tokenizer = Tokenizer::from(input.to_owned()); let mut parser = Parser::new(tokenizer); - let expr = parser.parse()?.unwrap(); + let expression = parser.parse()?.unwrap(); - assert_eq!("x = 10", format!("{}", expr)); + assert_eq!("(let x = 5)", expression.to_string()); - let expr = parser.parse()?.unwrap(); - - assert_eq!("y = \"testing\"", format!("{}", expr)); - - Ok(()) - } - - #[test] - fn test_literal() -> Result<()> { - let input = r#" - 10; - "testing"; - "#; - - let tokenizer = Tokenizer::from(input.to_owned()); - let mut parser = Parser::new(tokenizer); - - let expr = parser.parse()?.unwrap(); - assert_eq!("10", format!("{}", expr)); - - let expr = parser.parse()?.unwrap(); - assert_eq!("\"testing\"", format!("{}", expr)); + assert!(parser.parse().is_err()); Ok(()) } diff --git a/src/parser/tree_node.rs b/src/parser/tree_node.rs index 0b960d8..19235eb 100644 --- a/src/parser/tree_node.rs +++ b/src/parser/tree_node.rs @@ -1,4 +1,6 @@ -use crate::tokenizer::token::{Number, TokenType}; +use std::collections::HashSet; + +use crate::tokenizer::token::Number; #[derive(Debug, Eq, PartialEq)] pub enum Literal { @@ -71,7 +73,30 @@ pub struct AssignmentExpression { impl std::fmt::Display for AssignmentExpression { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{} = {}", self.identifier, self.expression) + write!(f, "({} = {})", self.identifier, self.expression) + } +} + +#[derive(Debug, PartialEq, Eq)] +pub struct FunctionExpression { + pub name: String, + pub arguments: HashSet, + pub body: Box, +} + +impl std::fmt::Display for FunctionExpression { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "(fn {}({}) {{ {} }})", + self.name, + self.arguments + .iter() + .cloned() + .collect::>() + .join(", "), + self.body + ) } } @@ -82,6 +107,8 @@ pub enum Expression { BinaryExpression(BinaryExpression), LogicalExpression(LogicalExpression), AssignmentExpression(AssignmentExpression), + DeclarationExpression(String, Box), + FunctionExpression(FunctionExpression), } impl std::fmt::Display for Expression { @@ -92,6 +119,8 @@ impl std::fmt::Display for Expression { Expression::BinaryExpression(e) => write!(f, "{}", e), Expression::LogicalExpression(e) => write!(f, "{}", e), Expression::AssignmentExpression(e) => write!(f, "{}", e), + Expression::DeclarationExpression(id, e) => write!(f, "(let {} = {})", id, e), + Expression::FunctionExpression(e) => write!(f, "{}", e), } } } diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index 6aa3b73..ccebf61 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -520,27 +520,6 @@ mod tests { } "#; - #[test] - fn test_tokenizer_buffer_seek_from_current() -> Result<()> { - let tokenizer = Tokenizer::from(TEST_STRING.to_owned()); - let mut buffer = TokenizerBuffer::new(tokenizer); - - let token = buffer.next()?; - assert_eq!(token.unwrap().token_type, TokenType::Keyword(Keyword::Fn)); - - buffer.seek(SeekFrom::Current(1))?; - - let token = buffer.next()?; - assert_eq!(token.unwrap().token_type, TokenType::Symbol(Symbol::LParen)); - - buffer.seek(SeekFrom::Current(-1))?; - - let token = buffer.next()?; - assert_eq!(token.unwrap().token_type, TokenType::Symbol(Symbol::LParen)); - - Ok(()) - } - #[test] fn test_tokenizer_from_path_ok() { let tokenizer = Tokenizer::from_path(TEST_FILE); diff --git a/src/tokenizer/token.rs b/src/tokenizer/token.rs index 46465dd..5a04c91 100644 --- a/src/tokenizer/token.rs +++ b/src/tokenizer/token.rs @@ -1,4 +1,4 @@ -#[derive(Debug, PartialEq, Clone)] +#[derive(Debug, PartialEq, Eq, Clone)] pub struct Token { /// The type of the token pub token_type: TokenType,