wip
This commit is contained in:
@@ -1,13 +1,10 @@
|
|||||||
mod tree_node;
|
mod tree_node;
|
||||||
|
|
||||||
use crate::tokenizer::{
|
use crate::tokenizer::{
|
||||||
token::{Symbol, Token, TokenType},
|
token::{Keyword, Symbol, Token, TokenType},
|
||||||
Tokenizer, TokenizerBuffer, TokenizerError,
|
Tokenizer, TokenizerBuffer, TokenizerError,
|
||||||
};
|
};
|
||||||
use std::{
|
use std::io::{Read, Seek};
|
||||||
collections::VecDeque,
|
|
||||||
io::{Read, Seek, SeekFrom},
|
|
||||||
};
|
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tree_node::*;
|
use tree_node::*;
|
||||||
|
|
||||||
@@ -15,7 +12,7 @@ use tree_node::*;
|
|||||||
pub enum ParseError {
|
pub enum ParseError {
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
TokenizerError(#[from] TokenizerError),
|
TokenizerError(#[from] TokenizerError),
|
||||||
#[error("Unexpected token\n\nLine: {0}, Column: {1}\nToken: {2}", token.line, token.column, token.token_type)]
|
#[error("Unexpected token\n\nLine: {0}, Column: {1}\nToken: {2}\n", token.line, token.column, token.token_type)]
|
||||||
UnexpectedToken { token: Token },
|
UnexpectedToken { token: Token },
|
||||||
#[error("Unexpected EOF")]
|
#[error("Unexpected EOF")]
|
||||||
UnexpectedEOF,
|
UnexpectedEOF,
|
||||||
@@ -23,6 +20,71 @@ pub enum ParseError {
|
|||||||
UnknownError,
|
UnknownError,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
macro_rules! self_matches_peek {
|
||||||
|
($self:ident, $pattern:pat) => {
|
||||||
|
matches!($self.tokenizer.peek()?, Some(Token { token_type: $pattern, .. }))
|
||||||
|
};
|
||||||
|
($self:ident, $pattern:pat if $cond:expr) => {
|
||||||
|
matches!($self.tokenizer.peek()?, Some(Token { token_type: $pattern, .. }) if $cond)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! token_from_option {
|
||||||
|
($token:expr) => {
|
||||||
|
match $token {
|
||||||
|
Some(ref token) => token.clone(),
|
||||||
|
None => return Err(ParseError::UnexpectedEOF),
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! extract_token_data {
|
||||||
|
($token:ident, $pattern:pat, $extraction:expr) => {
|
||||||
|
match $token.token_type {
|
||||||
|
$pattern => $extraction,
|
||||||
|
_ => {
|
||||||
|
return Err(ParseError::UnexpectedToken {
|
||||||
|
token: $token.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
($token:expr, $pattern:pat, $extraction:expr) => {
|
||||||
|
match $token.token_type {
|
||||||
|
$pattern => $extraction,
|
||||||
|
_ => {
|
||||||
|
return Err(ParseError::UnexpectedToken {
|
||||||
|
token: $token.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! self_matches_current {
|
||||||
|
($self:ident, $pattern:pat) => {
|
||||||
|
matches!($self.current_token, Some(Token { token_type: $pattern, .. }))
|
||||||
|
};
|
||||||
|
($self:ident, $pattern:pat if $cond:expr) => {
|
||||||
|
matches!($self.current_token, Some(Token { token_type: $pattern, .. }) if $cond)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! token_matches {
|
||||||
|
($token:ident, $pattern:pat) => {
|
||||||
|
matches!($token.token_type, $pattern)
|
||||||
|
};
|
||||||
|
($token:expr, $pattern:pat) => {
|
||||||
|
matches!($token.token_type, $pattern)
|
||||||
|
};
|
||||||
|
($token:ident, $pattern:pat if $cond:expr) => {
|
||||||
|
matches!($token.token_type, $pattern if $cond)
|
||||||
|
};
|
||||||
|
($token:expr, $pattern:pat if $cond:expr) => {
|
||||||
|
matches!($token.token_type, $pattern if $cond)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
pub struct Parser<R: Read + Seek> {
|
pub struct Parser<R: Read + Seek> {
|
||||||
tokenizer: TokenizerBuffer<R>,
|
tokenizer: TokenizerBuffer<R>,
|
||||||
current_token: Option<Token>,
|
current_token: Option<Token>,
|
||||||
@@ -39,57 +101,40 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parses the input from the tokenizer buffer and returns the resulting expression
|
||||||
pub fn parse(&mut self) -> Result<Option<tree_node::Expression>, ParseError> {
|
pub fn parse(&mut self) -> Result<Option<tree_node::Expression>, ParseError> {
|
||||||
self.current_token = self.tokenizer.next()?;
|
self.assign_next()?;
|
||||||
self.expression()
|
self.expression()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn expression(&mut self) -> Result<Option<tree_node::Expression>, ParseError> {
|
/// Assigns the next token in the tokenizer buffer to the current token
|
||||||
/// Helper macro to match the next token in the tokenizer buffer to a pattern
|
fn assign_next(&mut self) -> Result<(), ParseError> {
|
||||||
/// with an optional if condition. The token is peeked and not consumed.
|
self.current_token = self.tokenizer.next()?;
|
||||||
macro_rules! matches_peek {
|
Ok(())
|
||||||
($pattern:pat) => {
|
|
||||||
matches!(self.tokenizer.peek()?, Some(Token { token_type: $pattern, .. }))
|
|
||||||
};
|
|
||||||
($pattern:pat if $cond:expr) => {
|
|
||||||
matches!(self.tokenizer.peek()?, Some(Token { token_type: $pattern, .. }) if $cond)
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn get_next(&mut self) -> Result<Option<&Token>, ParseError> {
|
||||||
|
self.assign_next()?;
|
||||||
|
Ok(self.current_token.as_ref())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn expression(&mut self) -> Result<Option<tree_node::Expression>, ParseError> {
|
||||||
let Some(current_token) = self.current_token.as_ref() else {
|
let Some(current_token) = self.current_token.as_ref() else {
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(match current_token.token_type {
|
let to_return = Some(match current_token.token_type {
|
||||||
// Assignment expression
|
// match declarations with a `let` keyword
|
||||||
TokenType::Identifier(_) if matches_peek!(TokenType::Symbol(Symbol::Assign)) => {
|
TokenType::Keyword(Keyword::Let) => self.declaration()?,
|
||||||
Some(Expression::AssignmentExpression(self.assignment()?))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Negation expression
|
// match functions with a `fn` keyword
|
||||||
TokenType::Symbol(Symbol::Minus) if matches_peek!(TokenType::Number(_)) => {
|
TokenType::Keyword(Keyword::Fn) => Expression::FunctionExpression(self.function()?),
|
||||||
self.tokenizer.next()?;
|
|
||||||
Some(Expression::Negation(Box::new(
|
|
||||||
self.parse()?.ok_or(ParseError::UnexpectedEOF)?,
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Literal expression
|
// match literal expressions with a semi-colon afterwards
|
||||||
TokenType::Number(_) | TokenType::String(_)
|
TokenType::Number(_) | TokenType::String(_)
|
||||||
if !matches_peek!(
|
if self_matches_peek!(self, TokenType::Symbol(Symbol::Semicolon)) =>
|
||||||
TokenType::Symbol(s) if s.is_operator() || s.is_comparison() || s.is_logical()
|
|
||||||
) =>
|
|
||||||
{
|
{
|
||||||
Some(Expression::Literal(self.literal()?))
|
Expression::Literal(self.literal()?)
|
||||||
}
|
|
||||||
|
|
||||||
// Logical expression
|
|
||||||
TokenType::Number(_) | TokenType::String(_)
|
|
||||||
if matches_peek!(
|
|
||||||
TokenType::Symbol(s) if s.is_comparison() || s.is_logical()
|
|
||||||
) =>
|
|
||||||
{
|
|
||||||
Some(Expression::LogicalExpression(self.logical()?))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
_ => {
|
_ => {
|
||||||
@@ -97,82 +142,66 @@ where
|
|||||||
token: current_token.clone(),
|
token: current_token.clone(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn assignment(&mut self) -> Result<tree_node::AssignmentExpression, ParseError> {
|
|
||||||
let Some(Token {
|
|
||||||
token_type: TokenType::Identifier(identifier),
|
|
||||||
..
|
|
||||||
}) = self.current_token.as_ref()
|
|
||||||
else {
|
|
||||||
return Err(ParseError::UnexpectedToken {
|
|
||||||
// Safety: We have already checked that `self.current_token` is `Some` in the `parse()` function
|
|
||||||
token: self.current_token.clone().unwrap(),
|
|
||||||
});
|
});
|
||||||
};
|
|
||||||
|
|
||||||
// make sure the next token is `=` for sanity
|
|
||||||
if let Some(Token {
|
|
||||||
token_type: TokenType::Symbol(Symbol::Assign),
|
|
||||||
..
|
|
||||||
}) = self.tokenizer.next()?
|
|
||||||
{
|
|
||||||
} else {
|
|
||||||
self.tokenizer.seek(SeekFrom::Current(-1))?;
|
|
||||||
return Err(ParseError::UnexpectedToken {
|
|
||||||
token: self.tokenizer.next()?.unwrap(),
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok(AssignmentExpression {
|
|
||||||
identifier: identifier.clone(),
|
|
||||||
expression: Box::new(self.parse()?.ok_or(ParseError::UnexpectedEOF)?),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn logical(&mut self) -> Result<tree_node::LogicalExpression, ParseError> {
|
|
||||||
let Some(current_token) = self.current_token.as_ref() else {
|
|
||||||
return Err(ParseError::UnexpectedEOF);
|
|
||||||
};
|
|
||||||
|
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn binary(&mut self) -> Result<tree_node::BinaryExpression, ParseError> {
|
|
||||||
let Some(current_token) = self.current_token.as_ref() else {
|
|
||||||
return Err(ParseError::UnexpectedEOF);
|
|
||||||
};
|
|
||||||
|
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn literal(&mut self) -> Result<tree_node::Literal, ParseError> {
|
|
||||||
let Some(current_token) = self.current_token.as_ref() else {
|
|
||||||
return Err(ParseError::UnexpectedEOF);
|
|
||||||
};
|
|
||||||
|
|
||||||
let to_return = match current_token.token_type {
|
|
||||||
TokenType::Number(n) => Literal::Number(n),
|
|
||||||
TokenType::String(ref s) => Literal::String(s.clone()),
|
|
||||||
_ => {
|
|
||||||
return Err(ParseError::UnexpectedToken {
|
|
||||||
token: current_token.clone(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Advance the tokenizer if the next token is a semicolon
|
|
||||||
if let Some(Token {
|
|
||||||
token_type: TokenType::Symbol(Symbol::Semicolon),
|
|
||||||
..
|
|
||||||
}) = self.tokenizer.peek()?
|
|
||||||
{
|
|
||||||
self.tokenizer.next()?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(to_return)
|
Ok(to_return)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn declaration(&mut self) -> Result<Expression, ParseError> {
|
||||||
|
let current_token = token_from_option!(self.current_token);
|
||||||
|
if !self_matches_current!(self, TokenType::Keyword(Keyword::Let)) {
|
||||||
|
return Err(ParseError::UnexpectedToken {
|
||||||
|
token: current_token.clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
let identifier = extract_token_data!(
|
||||||
|
token_from_option!(self.get_next()?),
|
||||||
|
TokenType::Identifier(ref id),
|
||||||
|
id.clone()
|
||||||
|
);
|
||||||
|
|
||||||
|
let current_token = token_from_option!(self.get_next()?).clone();
|
||||||
|
|
||||||
|
if !token_matches!(current_token, TokenType::Symbol(Symbol::Assign)) {
|
||||||
|
return Err(ParseError::UnexpectedToken {
|
||||||
|
token: current_token,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let assignment_expression = self.parse()?.ok_or(ParseError::UnexpectedEOF)?;
|
||||||
|
|
||||||
|
// make sure the next token is a semi-colon
|
||||||
|
let current_token = token_from_option!(self.get_next()?);
|
||||||
|
if !token_matches!(current_token, TokenType::Symbol(Symbol::Semicolon)) {
|
||||||
|
return Err(ParseError::UnexpectedToken {
|
||||||
|
token: current_token.clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Expression::DeclarationExpression(
|
||||||
|
identifier,
|
||||||
|
Box::new(assignment_expression),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn literal(&mut self) -> Result<Literal, ParseError> {
|
||||||
|
let current_token = token_from_option!(self.current_token);
|
||||||
|
let literal = match current_token.token_type {
|
||||||
|
TokenType::Number(ref num) => Literal::Number(num.clone()),
|
||||||
|
TokenType::String(ref string) => Literal::String(string.clone()),
|
||||||
|
_ => {
|
||||||
|
return Err(ParseError::UnexpectedToken {
|
||||||
|
token: current_token.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(literal)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn function(&mut self) -> Result<FunctionExpression, ParseError> {
|
||||||
|
todo!("Implement function parsing")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@@ -181,40 +210,20 @@ mod tests {
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_assignment() -> Result<()> {
|
fn test_declarations() -> Result<()> {
|
||||||
let input = r#"
|
let input = r#"
|
||||||
x = 10;
|
let x = 5;
|
||||||
y = "testing";
|
// The below line should fail
|
||||||
|
let y = 234
|
||||||
"#;
|
"#;
|
||||||
let tokenizer = Tokenizer::from(input.to_owned());
|
let tokenizer = Tokenizer::from(input.to_owned());
|
||||||
let mut parser = Parser::new(tokenizer);
|
let mut parser = Parser::new(tokenizer);
|
||||||
|
|
||||||
let expr = parser.parse()?.unwrap();
|
let expression = parser.parse()?.unwrap();
|
||||||
|
|
||||||
assert_eq!("x = 10", format!("{}", expr));
|
assert_eq!("(let x = 5)", expression.to_string());
|
||||||
|
|
||||||
let expr = parser.parse()?.unwrap();
|
assert!(parser.parse().is_err());
|
||||||
|
|
||||||
assert_eq!("y = \"testing\"", format!("{}", expr));
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_literal() -> Result<()> {
|
|
||||||
let input = r#"
|
|
||||||
10;
|
|
||||||
"testing";
|
|
||||||
"#;
|
|
||||||
|
|
||||||
let tokenizer = Tokenizer::from(input.to_owned());
|
|
||||||
let mut parser = Parser::new(tokenizer);
|
|
||||||
|
|
||||||
let expr = parser.parse()?.unwrap();
|
|
||||||
assert_eq!("10", format!("{}", expr));
|
|
||||||
|
|
||||||
let expr = parser.parse()?.unwrap();
|
|
||||||
assert_eq!("\"testing\"", format!("{}", expr));
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,6 @@
|
|||||||
use crate::tokenizer::token::{Number, TokenType};
|
use std::collections::HashSet;
|
||||||
|
|
||||||
|
use crate::tokenizer::token::Number;
|
||||||
|
|
||||||
#[derive(Debug, Eq, PartialEq)]
|
#[derive(Debug, Eq, PartialEq)]
|
||||||
pub enum Literal {
|
pub enum Literal {
|
||||||
@@ -71,7 +73,30 @@ pub struct AssignmentExpression {
|
|||||||
|
|
||||||
impl std::fmt::Display for AssignmentExpression {
|
impl std::fmt::Display for AssignmentExpression {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
write!(f, "{} = {}", self.identifier, self.expression)
|
write!(f, "({} = {})", self.identifier, self.expression)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
pub struct FunctionExpression {
|
||||||
|
pub name: String,
|
||||||
|
pub arguments: HashSet<String>,
|
||||||
|
pub body: Box<Expression>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for FunctionExpression {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"(fn {}({}) {{ {} }})",
|
||||||
|
self.name,
|
||||||
|
self.arguments
|
||||||
|
.iter()
|
||||||
|
.cloned()
|
||||||
|
.collect::<Vec<String>>()
|
||||||
|
.join(", "),
|
||||||
|
self.body
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -82,6 +107,8 @@ pub enum Expression {
|
|||||||
BinaryExpression(BinaryExpression),
|
BinaryExpression(BinaryExpression),
|
||||||
LogicalExpression(LogicalExpression),
|
LogicalExpression(LogicalExpression),
|
||||||
AssignmentExpression(AssignmentExpression),
|
AssignmentExpression(AssignmentExpression),
|
||||||
|
DeclarationExpression(String, Box<Expression>),
|
||||||
|
FunctionExpression(FunctionExpression),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Display for Expression {
|
impl std::fmt::Display for Expression {
|
||||||
@@ -92,6 +119,8 @@ impl std::fmt::Display for Expression {
|
|||||||
Expression::BinaryExpression(e) => write!(f, "{}", e),
|
Expression::BinaryExpression(e) => write!(f, "{}", e),
|
||||||
Expression::LogicalExpression(e) => write!(f, "{}", e),
|
Expression::LogicalExpression(e) => write!(f, "{}", e),
|
||||||
Expression::AssignmentExpression(e) => write!(f, "{}", e),
|
Expression::AssignmentExpression(e) => write!(f, "{}", e),
|
||||||
|
Expression::DeclarationExpression(id, e) => write!(f, "(let {} = {})", id, e),
|
||||||
|
Expression::FunctionExpression(e) => write!(f, "{}", e),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -520,27 +520,6 @@ mod tests {
|
|||||||
}
|
}
|
||||||
"#;
|
"#;
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_tokenizer_buffer_seek_from_current() -> Result<()> {
|
|
||||||
let tokenizer = Tokenizer::from(TEST_STRING.to_owned());
|
|
||||||
let mut buffer = TokenizerBuffer::new(tokenizer);
|
|
||||||
|
|
||||||
let token = buffer.next()?;
|
|
||||||
assert_eq!(token.unwrap().token_type, TokenType::Keyword(Keyword::Fn));
|
|
||||||
|
|
||||||
buffer.seek(SeekFrom::Current(1))?;
|
|
||||||
|
|
||||||
let token = buffer.next()?;
|
|
||||||
assert_eq!(token.unwrap().token_type, TokenType::Symbol(Symbol::LParen));
|
|
||||||
|
|
||||||
buffer.seek(SeekFrom::Current(-1))?;
|
|
||||||
|
|
||||||
let token = buffer.next()?;
|
|
||||||
assert_eq!(token.unwrap().token_type, TokenType::Symbol(Symbol::LParen));
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_tokenizer_from_path_ok() {
|
fn test_tokenizer_from_path_ok() {
|
||||||
let tokenizer = Tokenizer::from_path(TEST_FILE);
|
let tokenizer = Tokenizer::from_path(TEST_FILE);
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
#[derive(Debug, PartialEq, Clone)]
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||||
pub struct Token {
|
pub struct Token {
|
||||||
/// The type of the token
|
/// The type of the token
|
||||||
pub token_type: TokenType,
|
pub token_type: TokenType,
|
||||||
|
|||||||
Reference in New Issue
Block a user