refactor AST parsing
This commit is contained in:
@@ -1,10 +1,13 @@
|
|||||||
mod tree_node;
|
mod tree_node;
|
||||||
|
|
||||||
use crate::tokenizer::{
|
use crate::tokenizer::{
|
||||||
token::{Keyword, Symbol, Token, TokenType},
|
token::{self, Keyword, Symbol, Token, TokenType},
|
||||||
Tokenizer, TokenizerBuffer, TokenizerError,
|
Tokenizer, TokenizerBuffer, TokenizerError,
|
||||||
};
|
};
|
||||||
use std::io::{Read, Seek};
|
use std::{
|
||||||
|
collections::HashSet,
|
||||||
|
io::{Read, Seek},
|
||||||
|
};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tree_node::*;
|
use tree_node::*;
|
||||||
|
|
||||||
@@ -14,6 +17,8 @@ pub enum ParseError {
|
|||||||
TokenizerError(#[from] TokenizerError),
|
TokenizerError(#[from] TokenizerError),
|
||||||
#[error("Unexpected token\n\nLine: {0}, Column: {1}\nToken: {2}\n", token.line, token.column, token.token_type)]
|
#[error("Unexpected token\n\nLine: {0}, Column: {1}\nToken: {2}\n", token.line, token.column, token.token_type)]
|
||||||
UnexpectedToken { token: Token },
|
UnexpectedToken { token: Token },
|
||||||
|
#[error("Duplicated Identifer\n\nLine: {0}, Column: {1}\nToken: {2}\n", token.line, token.column, token.token_type)]
|
||||||
|
DuplicateIdentifier { token: Token },
|
||||||
#[error("Unexpected EOF")]
|
#[error("Unexpected EOF")]
|
||||||
UnexpectedEOF,
|
UnexpectedEOF,
|
||||||
#[error("An unknown error has occurred")]
|
#[error("An unknown error has occurred")]
|
||||||
@@ -130,6 +135,9 @@ where
|
|||||||
// match functions with a `fn` keyword
|
// match functions with a `fn` keyword
|
||||||
TokenType::Keyword(Keyword::Fn) => Expression::FunctionExpression(self.function()?),
|
TokenType::Keyword(Keyword::Fn) => Expression::FunctionExpression(self.function()?),
|
||||||
|
|
||||||
|
// match block expressions with a `{` symbol
|
||||||
|
TokenType::Symbol(Symbol::LBrace) => Expression::BlockExpression(self.block()?),
|
||||||
|
|
||||||
// match literal expressions with a semi-colon afterwards
|
// match literal expressions with a semi-colon afterwards
|
||||||
TokenType::Number(_) | TokenType::String(_)
|
TokenType::Number(_) | TokenType::String(_)
|
||||||
if self_matches_peek!(self, TokenType::Symbol(Symbol::Semicolon)) =>
|
if self_matches_peek!(self, TokenType::Symbol(Symbol::Semicolon)) =>
|
||||||
@@ -147,6 +155,28 @@ where
|
|||||||
Ok(to_return)
|
Ok(to_return)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn block(&mut self) -> Result<BlockExpression, ParseError> {
|
||||||
|
let mut expressions = Vec::<Expression>::new();
|
||||||
|
let current_token = token_from_option!(self.current_token);
|
||||||
|
|
||||||
|
// sanity check: make sure the current token is a left brace
|
||||||
|
if !token_matches!(current_token, TokenType::Symbol(Symbol::LBrace)) {
|
||||||
|
return Err(ParseError::UnexpectedToken {
|
||||||
|
token: current_token.clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
while !token_matches!(
|
||||||
|
token_from_option!(self.get_next()?),
|
||||||
|
TokenType::Symbol(Symbol::RBrace)
|
||||||
|
) {
|
||||||
|
let expression = self.expression()?.ok_or(ParseError::UnexpectedEOF)?;
|
||||||
|
expressions.push(expression);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(BlockExpression(expressions))
|
||||||
|
}
|
||||||
|
|
||||||
fn declaration(&mut self) -> Result<Expression, ParseError> {
|
fn declaration(&mut self) -> Result<Expression, ParseError> {
|
||||||
let current_token = token_from_option!(self.current_token);
|
let current_token = token_from_option!(self.current_token);
|
||||||
if !self_matches_current!(self, TokenType::Keyword(Keyword::Let)) {
|
if !self_matches_current!(self, TokenType::Keyword(Keyword::Let)) {
|
||||||
@@ -200,7 +230,79 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn function(&mut self) -> Result<FunctionExpression, ParseError> {
|
fn function(&mut self) -> Result<FunctionExpression, ParseError> {
|
||||||
todo!("Implement function parsing")
|
let current_token = token_from_option!(self.current_token);
|
||||||
|
// Sanify check that the current token is a `fn` keyword
|
||||||
|
if !self_matches_current!(self, TokenType::Keyword(Keyword::Fn)) {
|
||||||
|
return Err(ParseError::UnexpectedToken {
|
||||||
|
token: current_token.clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let fn_ident = extract_token_data!(
|
||||||
|
token_from_option!(self.get_next()?),
|
||||||
|
TokenType::Identifier(ref id),
|
||||||
|
id.clone()
|
||||||
|
);
|
||||||
|
|
||||||
|
// make sure next token is a left parenthesis
|
||||||
|
let current_token = token_from_option!(self.get_next()?);
|
||||||
|
if !token_matches!(current_token, TokenType::Symbol(Symbol::LParen)) {
|
||||||
|
return Err(ParseError::UnexpectedToken {
|
||||||
|
token: current_token.clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut arguments = HashSet::<String>::new();
|
||||||
|
|
||||||
|
// iterate through the arguments. While expression while increment the current token
|
||||||
|
// with the `token_from_option!(self.get_next()?)` macro
|
||||||
|
while !token_matches!(
|
||||||
|
token_from_option!(self.get_next()?),
|
||||||
|
TokenType::Symbol(Symbol::RParen)
|
||||||
|
) {
|
||||||
|
let current_token = token_from_option!(self.current_token);
|
||||||
|
let argument =
|
||||||
|
extract_token_data!(current_token, TokenType::Identifier(ref id), id.clone());
|
||||||
|
|
||||||
|
if arguments.contains(&argument) {
|
||||||
|
return Err(ParseError::DuplicateIdentifier {
|
||||||
|
token: current_token.clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
arguments.insert(argument);
|
||||||
|
|
||||||
|
// make sure the next token is a comma or right parenthesis
|
||||||
|
if !self_matches_peek!(self, TokenType::Symbol(Symbol::Comma))
|
||||||
|
&& !self_matches_peek!(self, TokenType::Symbol(Symbol::RParen))
|
||||||
|
{
|
||||||
|
return Err(ParseError::UnexpectedToken {
|
||||||
|
token: token_from_option!(self.get_next()?).clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// edge case: if the next token is not a right parenthesis, increment the current token
|
||||||
|
//
|
||||||
|
// This will allow the loop to break on a right parenthesis with the next iteration
|
||||||
|
// which is incremented by the loop
|
||||||
|
if !self_matches_peek!(self, TokenType::Symbol(Symbol::RParen)) {
|
||||||
|
self.assign_next()?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// make sure the next token is a left brace
|
||||||
|
let current_token = token_from_option!(self.get_next()?);
|
||||||
|
if !token_matches!(current_token, TokenType::Symbol(Symbol::LBrace)) {
|
||||||
|
return Err(ParseError::UnexpectedToken {
|
||||||
|
token: current_token.clone(),
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(FunctionExpression {
|
||||||
|
name: fn_ident,
|
||||||
|
arguments,
|
||||||
|
body: self.block()?,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -227,4 +329,44 @@ mod tests {
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_block() -> Result<()> {
|
||||||
|
let input = r#"
|
||||||
|
{
|
||||||
|
let x = 5;
|
||||||
|
let y = 10;
|
||||||
|
}
|
||||||
|
"#;
|
||||||
|
let tokenizer = Tokenizer::from(input.to_owned());
|
||||||
|
let mut parser = Parser::new(tokenizer);
|
||||||
|
|
||||||
|
let expression = parser.parse()?.unwrap();
|
||||||
|
|
||||||
|
assert_eq!("{ (let x = 5); (let y = 10); }", expression.to_string());
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_function_expression() -> Result<()> {
|
||||||
|
let input = r#"
|
||||||
|
// This is a function. The parser is starting to get more complex
|
||||||
|
fn add(x, y) {
|
||||||
|
let z = 5;
|
||||||
|
}
|
||||||
|
"#;
|
||||||
|
|
||||||
|
let tokenizer = Tokenizer::from(input.to_owned());
|
||||||
|
let mut parser = Parser::new(tokenizer);
|
||||||
|
|
||||||
|
let expression = parser.parse()?.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
"(fn add(x, y) { { (let z = 5); } })",
|
||||||
|
expression.to_string()
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -81,7 +81,7 @@ impl std::fmt::Display for AssignmentExpression {
|
|||||||
pub struct FunctionExpression {
|
pub struct FunctionExpression {
|
||||||
pub name: String,
|
pub name: String,
|
||||||
pub arguments: HashSet<String>,
|
pub arguments: HashSet<String>,
|
||||||
pub body: Box<Expression>,
|
pub body: BlockExpression,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Display for FunctionExpression {
|
impl std::fmt::Display for FunctionExpression {
|
||||||
@@ -100,15 +100,56 @@ impl std::fmt::Display for FunctionExpression {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
pub struct BlockExpression(pub Vec<Expression>);
|
||||||
|
|
||||||
|
impl std::fmt::Display for BlockExpression {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"{{ {}; }}",
|
||||||
|
self.0
|
||||||
|
.iter()
|
||||||
|
.map(|e| e.to_string())
|
||||||
|
.collect::<Vec<String>>()
|
||||||
|
.join("; ")
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
pub struct InvocationExpression {
|
||||||
|
pub name: String,
|
||||||
|
pub arguments: Vec<Expression>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for InvocationExpression {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"{}({})",
|
||||||
|
self.name,
|
||||||
|
self.arguments
|
||||||
|
.iter()
|
||||||
|
.map(|e| e.to_string())
|
||||||
|
.collect::<Vec<String>>()
|
||||||
|
.join(", ")
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
pub enum Expression {
|
pub enum Expression {
|
||||||
Literal(Literal),
|
Literal(Literal),
|
||||||
|
Variable(String),
|
||||||
Negation(Box<Expression>),
|
Negation(Box<Expression>),
|
||||||
BinaryExpression(BinaryExpression),
|
BinaryExpression(BinaryExpression),
|
||||||
LogicalExpression(LogicalExpression),
|
LogicalExpression(LogicalExpression),
|
||||||
AssignmentExpression(AssignmentExpression),
|
AssignmentExpression(AssignmentExpression),
|
||||||
DeclarationExpression(String, Box<Expression>),
|
DeclarationExpression(String, Box<Expression>),
|
||||||
FunctionExpression(FunctionExpression),
|
FunctionExpression(FunctionExpression),
|
||||||
|
BlockExpression(BlockExpression),
|
||||||
|
InvocationExpression(InvocationExpression),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Display for Expression {
|
impl std::fmt::Display for Expression {
|
||||||
@@ -121,6 +162,9 @@ impl std::fmt::Display for Expression {
|
|||||||
Expression::AssignmentExpression(e) => write!(f, "{}", e),
|
Expression::AssignmentExpression(e) => write!(f, "{}", e),
|
||||||
Expression::DeclarationExpression(id, e) => write!(f, "(let {} = {})", id, e),
|
Expression::DeclarationExpression(id, e) => write!(f, "(let {} = {})", id, e),
|
||||||
Expression::FunctionExpression(e) => write!(f, "{}", e),
|
Expression::FunctionExpression(e) => write!(f, "{}", e),
|
||||||
|
Expression::BlockExpression(e) => write!(f, "{}", e),
|
||||||
|
Expression::InvocationExpression(e) => write!(f, "{}", e),
|
||||||
|
Expression::Variable(id) => write!(f, "{}", id),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user