This commit is contained in:
2024-11-21 16:28:38 -07:00
parent 62bf83e277
commit 67c22e1f6b
4 changed files with 185 additions and 168 deletions

View File

@@ -1,13 +1,10 @@
mod tree_node; mod tree_node;
use crate::tokenizer::{ use crate::tokenizer::{
token::{Symbol, Token, TokenType}, token::{Keyword, Symbol, Token, TokenType},
Tokenizer, TokenizerBuffer, TokenizerError, Tokenizer, TokenizerBuffer, TokenizerError,
}; };
use std::{ use std::io::{Read, Seek};
collections::VecDeque,
io::{Read, Seek, SeekFrom},
};
use thiserror::Error; use thiserror::Error;
use tree_node::*; use tree_node::*;
@@ -15,7 +12,7 @@ use tree_node::*;
pub enum ParseError { pub enum ParseError {
#[error(transparent)] #[error(transparent)]
TokenizerError(#[from] TokenizerError), TokenizerError(#[from] TokenizerError),
#[error("Unexpected token\n\nLine: {0}, Column: {1}\nToken: {2}", token.line, token.column, token.token_type)] #[error("Unexpected token\n\nLine: {0}, Column: {1}\nToken: {2}\n", token.line, token.column, token.token_type)]
UnexpectedToken { token: Token }, UnexpectedToken { token: Token },
#[error("Unexpected EOF")] #[error("Unexpected EOF")]
UnexpectedEOF, UnexpectedEOF,
@@ -23,6 +20,71 @@ pub enum ParseError {
UnknownError, UnknownError,
} }
macro_rules! self_matches_peek {
($self:ident, $pattern:pat) => {
matches!($self.tokenizer.peek()?, Some(Token { token_type: $pattern, .. }))
};
($self:ident, $pattern:pat if $cond:expr) => {
matches!($self.tokenizer.peek()?, Some(Token { token_type: $pattern, .. }) if $cond)
};
}
macro_rules! token_from_option {
($token:expr) => {
match $token {
Some(ref token) => token.clone(),
None => return Err(ParseError::UnexpectedEOF),
}
};
}
macro_rules! extract_token_data {
($token:ident, $pattern:pat, $extraction:expr) => {
match $token.token_type {
$pattern => $extraction,
_ => {
return Err(ParseError::UnexpectedToken {
token: $token.clone(),
})
}
}
};
($token:expr, $pattern:pat, $extraction:expr) => {
match $token.token_type {
$pattern => $extraction,
_ => {
return Err(ParseError::UnexpectedToken {
token: $token.clone(),
})
}
}
};
}
macro_rules! self_matches_current {
($self:ident, $pattern:pat) => {
matches!($self.current_token, Some(Token { token_type: $pattern, .. }))
};
($self:ident, $pattern:pat if $cond:expr) => {
matches!($self.current_token, Some(Token { token_type: $pattern, .. }) if $cond)
};
}
macro_rules! token_matches {
($token:ident, $pattern:pat) => {
matches!($token.token_type, $pattern)
};
($token:expr, $pattern:pat) => {
matches!($token.token_type, $pattern)
};
($token:ident, $pattern:pat if $cond:expr) => {
matches!($token.token_type, $pattern if $cond)
};
($token:expr, $pattern:pat if $cond:expr) => {
matches!($token.token_type, $pattern if $cond)
};
}
pub struct Parser<R: Read + Seek> { pub struct Parser<R: Read + Seek> {
tokenizer: TokenizerBuffer<R>, tokenizer: TokenizerBuffer<R>,
current_token: Option<Token>, current_token: Option<Token>,
@@ -39,57 +101,40 @@ where
} }
} }
/// Parses the input from the tokenizer buffer and returns the resulting expression
pub fn parse(&mut self) -> Result<Option<tree_node::Expression>, ParseError> { pub fn parse(&mut self) -> Result<Option<tree_node::Expression>, ParseError> {
self.current_token = self.tokenizer.next()?; self.assign_next()?;
self.expression() self.expression()
} }
fn expression(&mut self) -> Result<Option<tree_node::Expression>, ParseError> { /// Assigns the next token in the tokenizer buffer to the current token
/// Helper macro to match the next token in the tokenizer buffer to a pattern fn assign_next(&mut self) -> Result<(), ParseError> {
/// with an optional if condition. The token is peeked and not consumed. self.current_token = self.tokenizer.next()?;
macro_rules! matches_peek { Ok(())
($pattern:pat) => {
matches!(self.tokenizer.peek()?, Some(Token { token_type: $pattern, .. }))
};
($pattern:pat if $cond:expr) => {
matches!(self.tokenizer.peek()?, Some(Token { token_type: $pattern, .. }) if $cond)
};
} }
fn get_next(&mut self) -> Result<Option<&Token>, ParseError> {
self.assign_next()?;
Ok(self.current_token.as_ref())
}
fn expression(&mut self) -> Result<Option<tree_node::Expression>, ParseError> {
let Some(current_token) = self.current_token.as_ref() else { let Some(current_token) = self.current_token.as_ref() else {
return Ok(None); return Ok(None);
}; };
Ok(match current_token.token_type { let to_return = Some(match current_token.token_type {
// Assignment expression // match declarations with a `let` keyword
TokenType::Identifier(_) if matches_peek!(TokenType::Symbol(Symbol::Assign)) => { TokenType::Keyword(Keyword::Let) => self.declaration()?,
Some(Expression::AssignmentExpression(self.assignment()?))
}
// Negation expression // match functions with a `fn` keyword
TokenType::Symbol(Symbol::Minus) if matches_peek!(TokenType::Number(_)) => { TokenType::Keyword(Keyword::Fn) => Expression::FunctionExpression(self.function()?),
self.tokenizer.next()?;
Some(Expression::Negation(Box::new(
self.parse()?.ok_or(ParseError::UnexpectedEOF)?,
)))
}
// Literal expression // match literal expressions with a semi-colon afterwards
TokenType::Number(_) | TokenType::String(_) TokenType::Number(_) | TokenType::String(_)
if !matches_peek!( if self_matches_peek!(self, TokenType::Symbol(Symbol::Semicolon)) =>
TokenType::Symbol(s) if s.is_operator() || s.is_comparison() || s.is_logical()
) =>
{ {
Some(Expression::Literal(self.literal()?)) Expression::Literal(self.literal()?)
}
// Logical expression
TokenType::Number(_) | TokenType::String(_)
if matches_peek!(
TokenType::Symbol(s) if s.is_comparison() || s.is_logical()
) =>
{
Some(Expression::LogicalExpression(self.logical()?))
} }
_ => { _ => {
@@ -97,82 +142,66 @@ where
token: current_token.clone(), token: current_token.clone(),
}) })
} }
})
}
fn assignment(&mut self) -> Result<tree_node::AssignmentExpression, ParseError> {
let Some(Token {
token_type: TokenType::Identifier(identifier),
..
}) = self.current_token.as_ref()
else {
return Err(ParseError::UnexpectedToken {
// Safety: We have already checked that `self.current_token` is `Some` in the `parse()` function
token: self.current_token.clone().unwrap(),
}); });
};
// make sure the next token is `=` for sanity
if let Some(Token {
token_type: TokenType::Symbol(Symbol::Assign),
..
}) = self.tokenizer.next()?
{
} else {
self.tokenizer.seek(SeekFrom::Current(-1))?;
return Err(ParseError::UnexpectedToken {
token: self.tokenizer.next()?.unwrap(),
});
};
Ok(AssignmentExpression {
identifier: identifier.clone(),
expression: Box::new(self.parse()?.ok_or(ParseError::UnexpectedEOF)?),
})
}
fn logical(&mut self) -> Result<tree_node::LogicalExpression, ParseError> {
let Some(current_token) = self.current_token.as_ref() else {
return Err(ParseError::UnexpectedEOF);
};
todo!()
}
fn binary(&mut self) -> Result<tree_node::BinaryExpression, ParseError> {
let Some(current_token) = self.current_token.as_ref() else {
return Err(ParseError::UnexpectedEOF);
};
todo!()
}
fn literal(&mut self) -> Result<tree_node::Literal, ParseError> {
let Some(current_token) = self.current_token.as_ref() else {
return Err(ParseError::UnexpectedEOF);
};
let to_return = match current_token.token_type {
TokenType::Number(n) => Literal::Number(n),
TokenType::String(ref s) => Literal::String(s.clone()),
_ => {
return Err(ParseError::UnexpectedToken {
token: current_token.clone(),
})
}
};
// Advance the tokenizer if the next token is a semicolon
if let Some(Token {
token_type: TokenType::Symbol(Symbol::Semicolon),
..
}) = self.tokenizer.peek()?
{
self.tokenizer.next()?;
}
Ok(to_return) Ok(to_return)
} }
fn declaration(&mut self) -> Result<Expression, ParseError> {
let current_token = token_from_option!(self.current_token);
if !self_matches_current!(self, TokenType::Keyword(Keyword::Let)) {
return Err(ParseError::UnexpectedToken {
token: current_token.clone(),
});
}
let identifier = extract_token_data!(
token_from_option!(self.get_next()?),
TokenType::Identifier(ref id),
id.clone()
);
let current_token = token_from_option!(self.get_next()?).clone();
if !token_matches!(current_token, TokenType::Symbol(Symbol::Assign)) {
return Err(ParseError::UnexpectedToken {
token: current_token,
});
}
let assignment_expression = self.parse()?.ok_or(ParseError::UnexpectedEOF)?;
// make sure the next token is a semi-colon
let current_token = token_from_option!(self.get_next()?);
if !token_matches!(current_token, TokenType::Symbol(Symbol::Semicolon)) {
return Err(ParseError::UnexpectedToken {
token: current_token.clone(),
});
}
Ok(Expression::DeclarationExpression(
identifier,
Box::new(assignment_expression),
))
}
fn literal(&mut self) -> Result<Literal, ParseError> {
let current_token = token_from_option!(self.current_token);
let literal = match current_token.token_type {
TokenType::Number(ref num) => Literal::Number(num.clone()),
TokenType::String(ref string) => Literal::String(string.clone()),
_ => {
return Err(ParseError::UnexpectedToken {
token: current_token.clone(),
})
}
};
Ok(literal)
}
fn function(&mut self) -> Result<FunctionExpression, ParseError> {
todo!("Implement function parsing")
}
} }
#[cfg(test)] #[cfg(test)]
@@ -181,40 +210,20 @@ mod tests {
use anyhow::Result; use anyhow::Result;
#[test] #[test]
fn test_assignment() -> Result<()> { fn test_declarations() -> Result<()> {
let input = r#" let input = r#"
x = 10; let x = 5;
y = "testing"; // The below line should fail
let y = 234
"#; "#;
let tokenizer = Tokenizer::from(input.to_owned()); let tokenizer = Tokenizer::from(input.to_owned());
let mut parser = Parser::new(tokenizer); let mut parser = Parser::new(tokenizer);
let expr = parser.parse()?.unwrap(); let expression = parser.parse()?.unwrap();
assert_eq!("x = 10", format!("{}", expr)); assert_eq!("(let x = 5)", expression.to_string());
let expr = parser.parse()?.unwrap(); assert!(parser.parse().is_err());
assert_eq!("y = \"testing\"", format!("{}", expr));
Ok(())
}
#[test]
fn test_literal() -> Result<()> {
let input = r#"
10;
"testing";
"#;
let tokenizer = Tokenizer::from(input.to_owned());
let mut parser = Parser::new(tokenizer);
let expr = parser.parse()?.unwrap();
assert_eq!("10", format!("{}", expr));
let expr = parser.parse()?.unwrap();
assert_eq!("\"testing\"", format!("{}", expr));
Ok(()) Ok(())
} }

View File

@@ -1,4 +1,6 @@
use crate::tokenizer::token::{Number, TokenType}; use std::collections::HashSet;
use crate::tokenizer::token::Number;
#[derive(Debug, Eq, PartialEq)] #[derive(Debug, Eq, PartialEq)]
pub enum Literal { pub enum Literal {
@@ -71,7 +73,30 @@ pub struct AssignmentExpression {
impl std::fmt::Display for AssignmentExpression { impl std::fmt::Display for AssignmentExpression {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{} = {}", self.identifier, self.expression) write!(f, "({} = {})", self.identifier, self.expression)
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct FunctionExpression {
pub name: String,
pub arguments: HashSet<String>,
pub body: Box<Expression>,
}
impl std::fmt::Display for FunctionExpression {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"(fn {}({}) {{ {} }})",
self.name,
self.arguments
.iter()
.cloned()
.collect::<Vec<String>>()
.join(", "),
self.body
)
} }
} }
@@ -82,6 +107,8 @@ pub enum Expression {
BinaryExpression(BinaryExpression), BinaryExpression(BinaryExpression),
LogicalExpression(LogicalExpression), LogicalExpression(LogicalExpression),
AssignmentExpression(AssignmentExpression), AssignmentExpression(AssignmentExpression),
DeclarationExpression(String, Box<Expression>),
FunctionExpression(FunctionExpression),
} }
impl std::fmt::Display for Expression { impl std::fmt::Display for Expression {
@@ -92,6 +119,8 @@ impl std::fmt::Display for Expression {
Expression::BinaryExpression(e) => write!(f, "{}", e), Expression::BinaryExpression(e) => write!(f, "{}", e),
Expression::LogicalExpression(e) => write!(f, "{}", e), Expression::LogicalExpression(e) => write!(f, "{}", e),
Expression::AssignmentExpression(e) => write!(f, "{}", e), Expression::AssignmentExpression(e) => write!(f, "{}", e),
Expression::DeclarationExpression(id, e) => write!(f, "(let {} = {})", id, e),
Expression::FunctionExpression(e) => write!(f, "{}", e),
} }
} }
} }

View File

@@ -520,27 +520,6 @@ mod tests {
} }
"#; "#;
#[test]
fn test_tokenizer_buffer_seek_from_current() -> Result<()> {
let tokenizer = Tokenizer::from(TEST_STRING.to_owned());
let mut buffer = TokenizerBuffer::new(tokenizer);
let token = buffer.next()?;
assert_eq!(token.unwrap().token_type, TokenType::Keyword(Keyword::Fn));
buffer.seek(SeekFrom::Current(1))?;
let token = buffer.next()?;
assert_eq!(token.unwrap().token_type, TokenType::Symbol(Symbol::LParen));
buffer.seek(SeekFrom::Current(-1))?;
let token = buffer.next()?;
assert_eq!(token.unwrap().token_type, TokenType::Symbol(Symbol::LParen));
Ok(())
}
#[test] #[test]
fn test_tokenizer_from_path_ok() { fn test_tokenizer_from_path_ok() {
let tokenizer = Tokenizer::from_path(TEST_FILE); let tokenizer = Tokenizer::from_path(TEST_FILE);

View File

@@ -1,4 +1,4 @@
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Eq, Clone)]
pub struct Token { pub struct Token {
/// The type of the token /// The type of the token
pub token_type: TokenType, pub token_type: TokenType,