still WIP
This commit is contained in:
@@ -2,11 +2,11 @@ mod tree_node;
|
|||||||
|
|
||||||
use crate::tokenizer::{
|
use crate::tokenizer::{
|
||||||
token::{Symbol, Token, TokenType},
|
token::{Symbol, Token, TokenType},
|
||||||
Tokenizer, TokenizerError,
|
Tokenizer, TokenizerBuffer, TokenizerError,
|
||||||
};
|
};
|
||||||
use std::{
|
use std::{
|
||||||
collections::VecDeque,
|
collections::VecDeque,
|
||||||
io::{Read, Seek},
|
io::{Read, Seek, SeekFrom},
|
||||||
};
|
};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tree_node::*;
|
use tree_node::*;
|
||||||
@@ -24,7 +24,7 @@ pub enum ParseError {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct Parser<R: Read + Seek> {
|
pub struct Parser<R: Read + Seek> {
|
||||||
tokenizer: Tokenizer<R>,
|
tokenizer: TokenizerBuffer<R>,
|
||||||
current_token: Option<Token>,
|
current_token: Option<Token>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -34,40 +34,64 @@ where
|
|||||||
{
|
{
|
||||||
pub fn new(tokenizer: Tokenizer<R>) -> Self {
|
pub fn new(tokenizer: Tokenizer<R>) -> Self {
|
||||||
Parser {
|
Parser {
|
||||||
tokenizer,
|
tokenizer: TokenizerBuffer::new(tokenizer),
|
||||||
current_token: None,
|
current_token: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse(&mut self) -> Result<tree_node::Expression, ParseError> {
|
pub fn parse(&mut self) -> Result<Option<tree_node::Expression>, ParseError> {
|
||||||
self.current_token = self.tokenizer.next_token()?;
|
self.current_token = self.tokenizer.next()?;
|
||||||
self.expression()
|
self.expression()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn expression(&mut self) -> Result<tree_node::Expression, ParseError> {
|
fn expression(&mut self) -> Result<Option<tree_node::Expression>, ParseError> {
|
||||||
let current_token = self
|
/// Helper macro to match the next token in the tokenizer buffer to a pattern
|
||||||
.current_token
|
/// with an optional if condition. The token is peeked and not consumed.
|
||||||
.as_ref()
|
macro_rules! matches_peek {
|
||||||
.ok_or(ParseError::UnknownError)?;
|
($pattern:pat) => {
|
||||||
|
matches!(self.tokenizer.peek()?, Some(Token { token_type: $pattern, .. }))
|
||||||
|
};
|
||||||
|
($pattern:pat if $cond:expr) => {
|
||||||
|
matches!(self.tokenizer.peek()?, Some(Token { token_type: $pattern, .. }) if $cond)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
let Some(current_token) = self.current_token.as_ref() else {
|
||||||
|
return Ok(None);
|
||||||
|
};
|
||||||
|
|
||||||
Ok(match current_token.token_type {
|
Ok(match current_token.token_type {
|
||||||
// Match a number or string literal as long as the next token is not an operator
|
// Assignment expression
|
||||||
|
TokenType::Identifier(_) if matches_peek!(TokenType::Symbol(Symbol::Assign)) => {
|
||||||
|
Some(Expression::AssignmentExpression(self.assignment()?))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Negation expression
|
||||||
|
TokenType::Symbol(Symbol::Minus) if matches_peek!(TokenType::Number(_)) => {
|
||||||
|
self.tokenizer.next()?;
|
||||||
|
Some(Expression::Negation(Box::new(
|
||||||
|
self.parse()?.ok_or(ParseError::UnexpectedEOF)?,
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Literal expression
|
||||||
TokenType::Number(_) | TokenType::String(_)
|
TokenType::Number(_) | TokenType::String(_)
|
||||||
if !matches!(
|
if !matches_peek!(
|
||||||
self.tokenizer.peek_next()?, Some(Token { token_type: TokenType::Symbol(e), .. }) if e.is_operator()
|
TokenType::Symbol(s) if s.is_operator() || s.is_comparison() || s.is_logical()
|
||||||
) =>
|
) =>
|
||||||
{
|
{
|
||||||
Expression::Literal(self.literal()?)
|
Some(Expression::Literal(self.literal()?))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Match a negation operator
|
// Logical expression
|
||||||
TokenType::Symbol(Symbol::Minus) => Expression::Negation(Box::new(self.parse()?)),
|
TokenType::Number(_) | TokenType::String(_)
|
||||||
|
if matches_peek!(
|
||||||
_ if matches!(self.tokenizer.peek_next()?, Some(Token { token_type: TokenType::Symbol(e), .. }) if e.is_operator()) => {
|
TokenType::Symbol(s) if s.is_comparison() || s.is_logical()
|
||||||
Expression::BinaryExpression(self.binary()?)
|
) =>
|
||||||
|
{
|
||||||
|
Some(Expression::LogicalExpression(self.logical()?))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Something went wrong. Return an error
|
|
||||||
_ => {
|
_ => {
|
||||||
return Err(ParseError::UnexpectedToken {
|
return Err(ParseError::UnexpectedToken {
|
||||||
token: current_token.clone(),
|
token: current_token.clone(),
|
||||||
@@ -76,56 +100,61 @@ where
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn assignment(&mut self) -> Result<tree_node::AssignmentExpression, ParseError> {
|
||||||
|
let Some(Token {
|
||||||
|
token_type: TokenType::Identifier(identifier),
|
||||||
|
..
|
||||||
|
}) = self.current_token.as_ref()
|
||||||
|
else {
|
||||||
|
return Err(ParseError::UnexpectedToken {
|
||||||
|
// Safety: We have already checked that `self.current_token` is `Some` in the `parse()` function
|
||||||
|
token: self.current_token.clone().unwrap(),
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
// make sure the next token is `=` for sanity
|
||||||
|
if let Some(Token {
|
||||||
|
token_type: TokenType::Symbol(Symbol::Assign),
|
||||||
|
..
|
||||||
|
}) = self.tokenizer.next()?
|
||||||
|
{
|
||||||
|
} else {
|
||||||
|
self.tokenizer.seek(SeekFrom::Current(-1))?;
|
||||||
|
return Err(ParseError::UnexpectedToken {
|
||||||
|
token: self.tokenizer.next()?.unwrap(),
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(AssignmentExpression {
|
||||||
|
identifier: identifier.clone(),
|
||||||
|
expression: Box::new(self.parse()?.ok_or(ParseError::UnexpectedEOF)?),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn logical(&mut self) -> Result<tree_node::LogicalExpression, ParseError> {
|
||||||
|
let Some(current_token) = self.current_token.as_ref() else {
|
||||||
|
return Err(ParseError::UnexpectedEOF);
|
||||||
|
};
|
||||||
|
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
fn binary(&mut self) -> Result<tree_node::BinaryExpression, ParseError> {
|
fn binary(&mut self) -> Result<tree_node::BinaryExpression, ParseError> {
|
||||||
let literal = self.literal()?;
|
let Some(current_token) = self.current_token.as_ref() else {
|
||||||
|
return Err(ParseError::UnexpectedEOF);
|
||||||
let Some(Token {
|
|
||||||
token_type: TokenType::Symbol(operator),
|
|
||||||
..
|
|
||||||
}) = self.current_token
|
|
||||||
else {
|
|
||||||
return Err(ParseError::UnknownError);
|
|
||||||
};
|
};
|
||||||
self.current_token = self.tokenizer.next_token()?;
|
|
||||||
|
|
||||||
Ok(match operator {
|
todo!()
|
||||||
Symbol::Plus => BinaryExpression::Add(
|
|
||||||
Box::new(Expression::Literal(literal)),
|
|
||||||
Box::new(self.expression()?),
|
|
||||||
),
|
|
||||||
Symbol::Asterisk => BinaryExpression::Multiply(
|
|
||||||
Box::new(Expression::Literal(literal)),
|
|
||||||
Box::new(self.expression()?),
|
|
||||||
),
|
|
||||||
Symbol::Slash => BinaryExpression::Divide(
|
|
||||||
Box::new(Expression::Literal(literal)),
|
|
||||||
Box::new(self.expression()?),
|
|
||||||
),
|
|
||||||
Symbol::Minus => BinaryExpression::Subtract(
|
|
||||||
Box::new(Expression::Literal(literal)),
|
|
||||||
Box::new(self.expression()?),
|
|
||||||
),
|
|
||||||
_ => {
|
|
||||||
return Err(ParseError::UnexpectedToken {
|
|
||||||
token: Token {
|
|
||||||
token_type: TokenType::Symbol(operator),
|
|
||||||
line: 0,
|
|
||||||
column: 0,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn literal(&mut self) -> Result<tree_node::Literal, ParseError> {
|
fn literal(&mut self) -> Result<tree_node::Literal, ParseError> {
|
||||||
let current_token = self
|
let Some(current_token) = self.current_token.as_ref() else {
|
||||||
.current_token
|
return Err(ParseError::UnexpectedEOF);
|
||||||
.as_ref()
|
};
|
||||||
.ok_or(ParseError::UnknownError)?;
|
|
||||||
|
|
||||||
let to_return = match current_token.token_type {
|
let to_return = match current_token.token_type {
|
||||||
TokenType::Number(ref number) => tree_node::Literal::Number(number.clone()),
|
TokenType::Number(n) => Literal::Number(n),
|
||||||
TokenType::String(ref string) => tree_node::Literal::String(string.clone()),
|
TokenType::String(ref s) => Literal::String(s.clone()),
|
||||||
_ => {
|
_ => {
|
||||||
return Err(ParseError::UnexpectedToken {
|
return Err(ParseError::UnexpectedToken {
|
||||||
token: current_token.clone(),
|
token: current_token.clone(),
|
||||||
@@ -133,78 +162,59 @@ where
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
self.current_token = self.tokenizer.next_token()?;
|
// Advance the tokenizer if the next token is a semicolon
|
||||||
|
if let Some(Token {
|
||||||
|
token_type: TokenType::Symbol(Symbol::Semicolon),
|
||||||
|
..
|
||||||
|
}) = self.tokenizer.peek()?
|
||||||
|
{
|
||||||
|
self.tokenizer.next()?;
|
||||||
|
}
|
||||||
|
|
||||||
Ok(to_return)
|
Ok(to_return)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::tree_node::*;
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_add_expr() -> Result<()> {
|
fn test_assignment() -> Result<()> {
|
||||||
let input = "123 + 456";
|
let input = r#"
|
||||||
|
x = 10;
|
||||||
|
y = "testing";
|
||||||
|
"#;
|
||||||
|
let tokenizer = Tokenizer::from(input.to_owned());
|
||||||
|
let mut parser = Parser::new(tokenizer);
|
||||||
|
|
||||||
let mut parser = Parser::new(Tokenizer::from(input.to_owned()));
|
let expr = parser.parse()?.unwrap();
|
||||||
|
|
||||||
let result = parser.parse()?;
|
assert_eq!("x = 10", format!("{}", expr));
|
||||||
let formatted_output = format!("{}", result);
|
|
||||||
|
|
||||||
assert_eq!(formatted_output, "(123 + 456)");
|
let expr = parser.parse()?.unwrap();
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
assert_eq!("y = \"testing\"", format!("{}", expr));
|
||||||
fn test_parse_number() -> Result<()> {
|
|
||||||
let input = "123";
|
|
||||||
let mut parser = Parser::new(Tokenizer::from(input.to_owned()));
|
|
||||||
let result = parser.parse()?;
|
|
||||||
|
|
||||||
let formatted_output = format!("{}", result);
|
|
||||||
|
|
||||||
assert_eq!(formatted_output, "123");
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_parse_negation() -> Result<()> {
|
fn test_literal() -> Result<()> {
|
||||||
let input = "-123";
|
let input = r#"
|
||||||
let mut parser = Parser::new(Tokenizer::from(input.to_owned()));
|
10;
|
||||||
let result = parser.parse()?;
|
"testing";
|
||||||
|
"#;
|
||||||
|
|
||||||
let formatted_output = format!("{}", result);
|
let tokenizer = Tokenizer::from(input.to_owned());
|
||||||
|
let mut parser = Parser::new(tokenizer);
|
||||||
|
|
||||||
assert_eq!(formatted_output, "(-123)");
|
let expr = parser.parse()?.unwrap();
|
||||||
|
assert_eq!("10", format!("{}", expr));
|
||||||
|
|
||||||
Ok(())
|
let expr = parser.parse()?.unwrap();
|
||||||
}
|
assert_eq!("\"testing\"", format!("{}", expr));
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_order_of_operations() -> Result<()> {
|
|
||||||
let input = "123 - 456 + 789";
|
|
||||||
|
|
||||||
let mut parser = Parser::new(Tokenizer::from(input.to_owned()));
|
|
||||||
let result = parser.parse()?;
|
|
||||||
|
|
||||||
let formatted_output = format!("{}", result);
|
|
||||||
println!("{}", formatted_output);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_chained_operators() -> Result<()> {
|
|
||||||
let input = "123 + 456 * 789";
|
|
||||||
let mut parser = Parser::new(Tokenizer::from(input.to_owned()));
|
|
||||||
let result = parser.parse()?;
|
|
||||||
|
|
||||||
let formatted_output = format!("{}", result);
|
|
||||||
|
|
||||||
assert_eq!(formatted_output, "(123 + (456 * 789))");
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
use crate::tokenizer::token::Number;
|
use crate::tokenizer::token::{Number, TokenType};
|
||||||
|
|
||||||
#[derive(Debug, Eq, PartialEq)]
|
#[derive(Debug, Eq, PartialEq)]
|
||||||
pub enum Literal {
|
pub enum Literal {
|
||||||
@@ -10,7 +10,7 @@ impl std::fmt::Display for Literal {
|
|||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
Literal::Number(n) => write!(f, "{}", n),
|
Literal::Number(n) => write!(f, "{}", n),
|
||||||
Literal::String(s) => write!(f, "{}", s),
|
Literal::String(s) => write!(f, "\"{}\"", s),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -63,12 +63,25 @@ impl std::fmt::Display for LogicalExpression {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
pub struct AssignmentExpression {
|
||||||
|
pub identifier: String,
|
||||||
|
pub expression: Box<Expression>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for AssignmentExpression {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "{} = {}", self.identifier, self.expression)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
pub enum Expression {
|
pub enum Expression {
|
||||||
Literal(Literal),
|
Literal(Literal),
|
||||||
Negation(Box<Expression>),
|
Negation(Box<Expression>),
|
||||||
BinaryExpression(BinaryExpression),
|
BinaryExpression(BinaryExpression),
|
||||||
LogicalExpression(LogicalExpression),
|
LogicalExpression(LogicalExpression),
|
||||||
|
AssignmentExpression(AssignmentExpression),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Display for Expression {
|
impl std::fmt::Display for Expression {
|
||||||
@@ -78,6 +91,7 @@ impl std::fmt::Display for Expression {
|
|||||||
Expression::Negation(e) => write!(f, "(-{})", e),
|
Expression::Negation(e) => write!(f, "(-{})", e),
|
||||||
Expression::BinaryExpression(e) => write!(f, "{}", e),
|
Expression::BinaryExpression(e) => write!(f, "{}", e),
|
||||||
Expression::LogicalExpression(e) => write!(f, "{}", e),
|
Expression::LogicalExpression(e) => write!(f, "{}", e),
|
||||||
|
Expression::AssignmentExpression(e) => write!(f, "{}", e),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -459,13 +459,6 @@ where
|
|||||||
Ok(token)
|
Ok(token)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn seek_from_start(&mut self, pos: usize) -> Result<(), TokenizerError> {
|
|
||||||
// if pos
|
|
||||||
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn seek_from_current(&mut self, seek_to: i64) -> Result<(), TokenizerError> {
|
fn seek_from_current(&mut self, seek_to: i64) -> Result<(), TokenizerError> {
|
||||||
// if seek_to > 0 then we need to check if the buffer has enough tokens to pop, otherwise we need to read from the tokenizer
|
// if seek_to > 0 then we need to check if the buffer has enough tokens to pop, otherwise we need to read from the tokenizer
|
||||||
// if seek_to < 0 then we need to pop from the history and push to the front of the buffer. If not enough, then we throw (we reached the front of the history)
|
// if seek_to < 0 then we need to pop from the history and push to the front of the buffer. If not enough, then we throw (we reached the front of the history)
|
||||||
@@ -506,9 +499,9 @@ where
|
|||||||
/// Adds to or removes from the History stack, allowing the user to move back and forth in the stream
|
/// Adds to or removes from the History stack, allowing the user to move back and forth in the stream
|
||||||
pub fn seek(&mut self, from: SeekFrom) -> Result<(), TokenizerError> {
|
pub fn seek(&mut self, from: SeekFrom) -> Result<(), TokenizerError> {
|
||||||
Ok(match from {
|
Ok(match from {
|
||||||
SeekFrom::Start(pos) => self.seek_from_start(pos as usize)?,
|
|
||||||
SeekFrom::Current(seek_to) => self.seek_from_current(seek_to)?,
|
SeekFrom::Current(seek_to) => self.seek_from_current(seek_to)?,
|
||||||
SeekFrom::End(_) => unimplemented!("SeekFrom::End will not be implemented"),
|
SeekFrom::End(_) => unimplemented!("SeekFrom::End will not be implemented"),
|
||||||
|
SeekFrom::Start(_) => unimplemented!("SeekFrom::Start will not be implemented"),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -528,30 +521,22 @@ mod tests {
|
|||||||
"#;
|
"#;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_tokenizer_buffer_seek_from_start() -> Result<()> {
|
fn test_tokenizer_buffer_seek_from_current() -> Result<()> {
|
||||||
let tokenizer = Tokenizer::from(TEST_STRING.to_owned());
|
let tokenizer = Tokenizer::from(TEST_STRING.to_owned());
|
||||||
let mut buffer = TokenizerBuffer::new(tokenizer);
|
let mut buffer = TokenizerBuffer::new(tokenizer);
|
||||||
|
|
||||||
let token = buffer.next()?;
|
let token = buffer.next()?;
|
||||||
assert_eq!(token.unwrap().token_type, TokenType::Keyword(Keyword::Fn));
|
assert_eq!(token.unwrap().token_type, TokenType::Keyword(Keyword::Fn));
|
||||||
|
|
||||||
let token = buffer.next()?;
|
buffer.seek(SeekFrom::Current(1))?;
|
||||||
assert_eq!(
|
|
||||||
token.unwrap().token_type,
|
|
||||||
TokenType::Identifier(String::from("test"))
|
|
||||||
);
|
|
||||||
|
|
||||||
buffer.seek(SeekFrom::Start(0))?;
|
|
||||||
|
|
||||||
let token = buffer.next()?;
|
let token = buffer.next()?;
|
||||||
|
assert_eq!(token.unwrap().token_type, TokenType::Symbol(Symbol::LParen));
|
||||||
|
|
||||||
assert_eq!(token.unwrap().token_type, TokenType::Keyword(Keyword::Fn));
|
buffer.seek(SeekFrom::Current(-1))?;
|
||||||
|
|
||||||
buffer.seek(SeekFrom::Start(16))?;
|
|
||||||
|
|
||||||
let token = buffer.next()?;
|
let token = buffer.next()?;
|
||||||
|
assert_eq!(token.unwrap().token_type, TokenType::Symbol(Symbol::LParen));
|
||||||
assert_eq!(token.unwrap().token_type, TokenType::Keyword(Keyword::Let));
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user