From c9fc59a03495e5899a403f781f7016c4cc13b163 Mon Sep 17 00:00:00 2001 From: Devin Bidwell Date: Thu, 21 Nov 2024 22:07:01 -0700 Subject: [PATCH] Start implementing binary expressions --- src/main.rs | 7 +-- src/parser/mod.rs | 122 +++++++++++++++++++++++++++++++++++++--- src/parser/tree_node.rs | 2 +- 3 files changed, 117 insertions(+), 14 deletions(-) diff --git a/src/main.rs b/src/main.rs index 86e6bea..024271e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,9 @@ mod parser; mod tokenizer; -use std::io::{Read, Seek}; - use clap::Parser; use parser::Parser as ASTParser; +use std::io::Read; use tokenizer::{Tokenizer, TokenizerError}; #[derive(Debug, thiserror::Error)] @@ -53,7 +52,7 @@ fn run_logic() -> Result<(), StationlangError> { let mut parser = ASTParser::new(tokenizer); - let ast = parser.parse()?; + let ast = parser.parse_all()?; if let Some(ast) = ast { println!("{}", ast); @@ -64,6 +63,6 @@ fn run_logic() -> Result<(), StationlangError> { fn main() { if let Err(e) = run_logic() { - eprintln!("\n\n{}", e); + eprintln!("\n{}", e); } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0daba7f..338d7fc 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,13 +1,10 @@ mod tree_node; use crate::tokenizer::{ - token::{self, Keyword, Symbol, Token, TokenType}, + token::{Keyword, Symbol, Token, TokenType}, Tokenizer, TokenizerBuffer, TokenizerError, }; -use std::{ - collections::HashSet, - io::{Read, Seek}, -}; +use std::collections::HashSet; use thiserror::Error; use tree_node::*; @@ -19,6 +16,8 @@ pub enum ParseError { UnexpectedToken { token: Token }, #[error("Duplicated Identifer\n\nLine: {0}, Column: {1}\nToken: {2}\n", token.line, token.column, token.token_type)] DuplicateIdentifier { token: Token }, + #[error("Invalid Syntax\n\nLine: {0}, Column: {1}\nReason: {reason}", token.line, token.column)] + InvalidSyntax { token: Token, reason: String }, #[error("Unexpected EOF")] UnexpectedEOF, #[error("An unknown error has occurred")] @@ -103,6 +102,20 @@ impl Parser { } } + /// Parses all the input from the tokenizer buffer and returns the resulting expression + /// Expressions are returned in a root block expression node + pub fn parse_all(&mut self) -> Result, ParseError> { + let mut expressions = Vec::::new(); + + while let Some(expression) = self.parse()? { + expressions.push(expression); + } + + Ok(Some(Expression::BlockExpression(BlockExpression( + expressions, + )))) + } + /// Parses the input from the tokenizer buffer and returns the resulting expression pub fn parse(&mut self) -> Result, ParseError> { self.assign_next()?; @@ -115,6 +128,7 @@ impl Parser { Ok(()) } + /// Calls `assign_next` and returns the next token in the tokenizer buffer fn get_next(&mut self) -> Result, ParseError> { self.assign_next()?; Ok(self.current_token.as_ref()) @@ -125,6 +139,10 @@ impl Parser { return Ok(None); }; + if token_matches!(current_token, TokenType::EOF) { + return Ok(None); + } + let to_return = Some(match current_token.token_type { // match declarations with a `let` keyword TokenType::Keyword(Keyword::Let) => self.declaration()?, @@ -132,6 +150,14 @@ impl Parser { // match functions with a `fn` keyword TokenType::Keyword(Keyword::Fn) => Expression::FunctionExpression(self.function()?), + // match a variable expression with opening parenthesis + TokenType::Identifier(_) + if self_matches_peek!(self, TokenType::Symbol(Symbol::LParen)) => + { + Expression::InvocationExpression(self.invocation()?) + } + + // match variable expressions with an identifier TokenType::Identifier(ref id) => Expression::Variable(id.clone()), // match block expressions with a `{` symbol @@ -139,7 +165,7 @@ impl Parser { // match literal expressions with a semi-colon afterwards TokenType::Number(_) | TokenType::String(_) - if self_matches_peek!(self, TokenType::Symbol(Symbol::Semicolon)) => + if !self_matches_peek!(self, TokenType::Symbol(s) if s.is_operator() || s.is_comparison() || s.is_logical()) => { Expression::Literal(self.literal()?) } @@ -154,6 +180,68 @@ impl Parser { Ok(to_return) } + fn binary(&mut self) -> Result { + todo!() + } + + fn invocation(&mut self) -> Result { + let identifier = extract_token_data!( + token_from_option!(self.current_token), + TokenType::Identifier(ref id), + id.clone() + ); + + // Ensure the next token is a left parenthesis + let current_token = token_from_option!(self.get_next()?); + if !token_matches!(current_token, TokenType::Symbol(Symbol::LParen)) { + return Err(ParseError::UnexpectedToken { + token: current_token.clone(), + }); + } + + let mut arguments = Vec::::new(); + // We need to make sure the expressions are NOT BlockExpressions, as they are not allowed + + while !token_matches!( + token_from_option!(self.get_next()?), + TokenType::Symbol(Symbol::RParen) + ) { + let current_token = token_from_option!(self.current_token); + let expression = self.expression()?.ok_or(ParseError::UnexpectedEOF)?; + + if let Expression::BlockExpression(_) = expression { + return Err(ParseError::InvalidSyntax { + token: current_token, + reason: "Block expressions are not allowed in function invocations".to_owned(), + }); + } + + arguments.push(expression); + + // make sure the next token is a comma or right parenthesis + if !self_matches_peek!(self, TokenType::Symbol(Symbol::Comma)) + && !self_matches_peek!(self, TokenType::Symbol(Symbol::RParen)) + { + return Err(ParseError::UnexpectedToken { + token: token_from_option!(self.get_next()?).clone(), + }); + } + + // edge case: if the next token is not a right parenthesis, increment the current token + // + // This will allow the loop to break on a right parenthesis with the next iteration + // which is incremented by the loop + if !self_matches_peek!(self, TokenType::Symbol(Symbol::RParen)) { + self.assign_next()?; + } + } + + Ok(InvocationExpression { + name: identifier, + arguments, + }) + } + fn block(&mut self) -> Result { let mut expressions = Vec::::new(); let current_token = token_from_option!(self.current_token); @@ -251,7 +339,7 @@ impl Parser { }); } - let mut arguments = HashSet::::new(); + let mut arguments = Vec::::new(); // iterate through the arguments. While expression while increment the current token // with the `token_from_option!(self.get_next()?)` macro @@ -269,7 +357,7 @@ impl Parser { }); } - arguments.insert(argument); + arguments.push(argument); // make sure the next token is a comma or right parenthesis if !self_matches_peek!(self, TokenType::Symbol(Symbol::Comma)) @@ -362,10 +450,26 @@ mod tests { let expression = parser.parse()?.unwrap(); assert_eq!( - "(fn add(x, y) { { (let z = 5); } })", + "(fn add(x, y) { { (let z = x); } })", expression.to_string() ); Ok(()) } + + #[test] + fn test_function_invocation() -> Result<()> { + let input = r#" + add(); + "#; + + let tokenizer = Tokenizer::from(input.to_owned()); + let mut parser = Parser::new(tokenizer); + + let expression = parser.parse()?.unwrap(); + + assert_eq!("add()", expression.to_string()); + + Ok(()) + } } diff --git a/src/parser/tree_node.rs b/src/parser/tree_node.rs index 1b40e35..ce32fdf 100644 --- a/src/parser/tree_node.rs +++ b/src/parser/tree_node.rs @@ -80,7 +80,7 @@ impl std::fmt::Display for AssignmentExpression { #[derive(Debug, PartialEq, Eq)] pub struct FunctionExpression { pub name: String, - pub arguments: HashSet, + pub arguments: Vec, pub body: BlockExpression, }