Binary expressions are working with PEMDAS

This commit is contained in:
2024-11-22 01:57:50 -07:00
parent bf13987449
commit 5ec0a79e6c
4 changed files with 244 additions and 71 deletions

View File

@@ -17,8 +17,8 @@ pub enum ParseError {
DuplicateIdentifier { token: Token }, DuplicateIdentifier { token: Token },
#[error("Invalid Syntax\n\nLine: {0}, Column: {1}\nReason: {reason}", token.line, token.column)] #[error("Invalid Syntax\n\nLine: {0}, Column: {1}\nReason: {reason}", token.line, token.column)]
InvalidSyntax { token: Token, reason: String }, InvalidSyntax { token: Token, reason: String },
#[error("This keyword is either not supported or not yet implemented\n\nLine: {0}, Column: {1}\nToken: {2}\n", token.line, token.column, token.token_type)] #[error("This keyword is not yet implemented\n\nLine: {0}, Column: {1}\nToken: {2}\n", token.line, token.column, token.token_type)]
UnsupportedKeyword { token: Token}, UnsupportedKeyword { token: Token },
#[error("Unexpected EOF")] #[error("Unexpected EOF")]
UnexpectedEOF, UnexpectedEOF,
} }
@@ -140,12 +140,10 @@ impl Parser {
}; };
} }
let Some(current_token) = self.current_token.as_ref() else { let Some(current_token) = self.current_token.as_ref() else {
return Ok(None); return Ok(None);
}; };
if token_matches!(current_token, TokenType::EOF) { if token_matches!(current_token, TokenType::EOF) {
return Ok(None); return Ok(None);
} }
@@ -153,11 +151,19 @@ impl Parser {
let expr = Some(match current_token.token_type { let expr = Some(match current_token.token_type {
// match unsupported keywords // match unsupported keywords
TokenType::Keyword(e) TokenType::Keyword(e)
if matches_keyword!(e, Keyword::Import, Keyword::Export, Keyword::Enum, Keyword::If, Keyword::Else) => { if matches_keyword!(
e,
Keyword::Import,
Keyword::Export,
Keyword::Enum,
Keyword::If,
Keyword::Else
) =>
{
return Err(ParseError::UnsupportedKeyword { return Err(ParseError::UnsupportedKeyword {
token: current_token.clone(), token: current_token.clone(),
}) })
}, }
// match declarations with a `let` keyword // match declarations with a `let` keyword
TokenType::Keyword(Keyword::Let) => self.declaration()?, TokenType::Keyword(Keyword::Let) => self.declaration()?,
@@ -179,9 +185,7 @@ impl Parser {
TokenType::Symbol(Symbol::LBrace) => Expression::BlockExpression(self.block()?), TokenType::Symbol(Symbol::LBrace) => Expression::BlockExpression(self.block()?),
// match literal expressions with a semi-colon afterwards // match literal expressions with a semi-colon afterwards
TokenType::Number(_) | TokenType::String(_) => { TokenType::Number(_) | TokenType::String(_) => Expression::Literal(self.literal()?),
Expression::Literal(self.literal()?)
}
// match priority expressions with a left parenthesis // match priority expressions with a left parenthesis
TokenType::Symbol(Symbol::LParen) => Expression::PriorityExpression(self.priority()?), TokenType::Symbol(Symbol::LParen) => Expression::PriorityExpression(self.priority()?),
@@ -198,7 +202,7 @@ impl Parser {
}; };
if self_matches_peek!(self, TokenType::Symbol(s) if s.is_operator()) { if self_matches_peek!(self, TokenType::Symbol(s) if s.is_operator()) {
return Ok(Some(self.binary(expr)?)); return Ok(Some(Expression::BinaryExpression(self.binary(expr)?)));
} }
// step 2: check if the next token is an operator and if we should parse a binary expression with the previous expression // step 2: check if the next token is an operator and if we should parse a binary expression with the previous expression
@@ -206,15 +210,58 @@ impl Parser {
Ok(Some(expr)) Ok(Some(expr))
} }
fn binary(&mut self, previous: Expression) -> Result<tree_node::Expression, ParseError> { fn get_binary_child_node(&mut self) -> Result<tree_node::Expression, ParseError> {
let current_token = token_from_option!(self.get_next()?).clone(); let current_token = token_from_option!(self.current_token);
match current_token.token_type {
// A literal number
TokenType::Number(_) => self.literal().map(Expression::Literal),
// A plain variable
TokenType::Identifier(ident)
if !self_matches_peek!(self, TokenType::Symbol(Symbol::LParen)) =>
{
Ok(Expression::Variable(ident))
}
// A priority expression ( -> (1 + 2) <- + 3 )
TokenType::Symbol(Symbol::LParen) => {
self.priority().map(Expression::PriorityExpression)
}
// A function invocation
TokenType::Identifier(_)
if self_matches_peek!(self, TokenType::Symbol(Symbol::LParen)) =>
{
self.invocation().map(Expression::InvocationExpression)
}
_ => Err(ParseError::UnexpectedToken {
token: current_token.clone(),
}),
}
}
/// Handles mathmatical expressions in the explicit order of PEMDAS
fn binary(&mut self, previous: Expression) -> Result<BinaryExpression, ParseError> {
macro_rules! min {
($a:expr, $b:expr) => {
if $a < $b {
$a
} else {
$b
}
};
}
// We cannot use recursion here, as we need to handle the precedence of the operators
// We need to use a loop to parse the binary expressions.
let mut current_token = token_from_option!(self.get_next()?).clone();
// first, make sure the previous expression supports binary expressions // first, make sure the previous expression supports binary expressions
match previous { match previous {
Expression::BinaryExpression(_) // 1 + 2 + 3 Expression::BinaryExpression(_) // 1 + 2 + 3
| Expression::InvocationExpression(_) // add() + 3 | Expression::InvocationExpression(_) // add() + 3
| Expression::PriorityExpression(_) // (1 + 2) + 3 | Expression::PriorityExpression(_) // (1 + 2) + 3
| Expression::Literal(_) // 1 + 2 | Expression::Literal(Literal::Number(_)) // 1 + 2 (no addition of strings)
| Expression::Variable(_) // x + 2 | Expression::Variable(_) // x + 2
| Expression::Negation(_) // -1 + 2 | Expression::Negation(_) // -1 + 2
=> {} => {}
@@ -226,33 +273,125 @@ impl Parser {
} }
} }
// now check the operator. If we have certain operators, we need to wrap in a priority expression let mut expressions = vec![previous]; // 1, 2, 3
// Example: subtraction and division. Order of operations is important
let operator = extract_token_data!( // operators Vec should be `expressions.len() - 1`
current_token, let mut operators = Vec::<Symbol>::new(); // +, +
TokenType::Symbol(ref s),
s.clone()
);
let expr = match operator { // build the expressions and operators vectors
Symbol::Plus => { while token_matches!(current_token, TokenType::Symbol(s) if s.is_operator()) {
let right = self.expression()?.ok_or(ParseError::UnexpectedEOF)?; println!(
Expression::BinaryExpression(BinaryExpression::Add(Box::new(previous), Box::new(right))) "Looped: expressions len: {}, operators len: {}",
}, expressions.len(),
Symbol::Minus => { operators.len()
let right = self.expression()?.ok_or(ParseError::UnexpectedEOF)?; );
Expression::PriorityExpression(Box::new(Expression::BinaryExpression(BinaryExpression::Subtract(Box::new(previous), Box::new(right))))) // We are guaranteed to have an operator symbol here as we checked in the while loop
}, let operator = extract_token_data!(current_token, TokenType::Symbol(ref s), s.clone());
_ => { operators.push(operator);
return Err(ParseError::InvalidSyntax { self.assign_next()?;
token: current_token.clone(), expressions.push(self.get_binary_child_node()?);
reason: "Invalid operator for binary operation".to_owned(), current_token = token_from_option!(self.get_next()?).clone();
}) }
// validate the vectors and make sure operators.len() == expressions.len() - 1
if operators.len() != expressions.len() - 1 {
return Err(ParseError::InvalidSyntax {
token: current_token.clone(),
reason: "Invalid number of operators".to_owned(),
});
}
// Loop through operators, and build the binary expressions for exponential operators only
for (i, operator) in operators.iter().enumerate() {
if operator == &Symbol::Caret {
let left = expressions.remove(min!(i, expressions.len() - 1));
let right = expressions.remove(min!(i, expressions.len() - 1));
expressions.insert(
min!(i, expressions.len()),
Expression::BinaryExpression(BinaryExpression::Exponent(
Box::new(left),
Box::new(right),
)),
);
} }
}; }
Ok(expr) // remove all the exponential operators from the operators vector
operators.retain(|symbol| symbol != &Symbol::Caret);
// Loop through operators, and build the binary expressions for multiplication and division operators
for (i, operator) in operators.iter().enumerate() {
if operator == &Symbol::Asterisk || operator == &Symbol::Slash {
let left = expressions.remove(min!(i, expressions.len() - 1));
let right = expressions.remove(min!(i, expressions.len() - 1));
match operator {
Symbol::Asterisk => expressions.insert(
min!(i, expressions.len()),
Expression::BinaryExpression(BinaryExpression::Multiply(
Box::new(left),
Box::new(right),
)),
),
Symbol::Slash => expressions.insert(
min!(i, expressions.len()),
Expression::BinaryExpression(BinaryExpression::Divide(
Box::new(left),
Box::new(right),
)),
),
// safety: we have already checked for the operator
_ => unreachable!(),
}
}
}
// remove all the multiplication and division operators from the operators vector
operators.retain(|symbol| symbol != &Symbol::Asterisk && symbol != &Symbol::Slash);
// Loop through operators, and build the binary expressions for addition and subtraction operators
for (i, operator) in operators.iter().enumerate() {
if operator == &Symbol::Plus || operator == &Symbol::Minus {
let left = expressions.remove(i);
let right = expressions.remove(min!(i, expressions.len() - 1));
match operator {
Symbol::Plus => expressions.insert(
min!(i, expressions.len()),
Expression::BinaryExpression(BinaryExpression::Add(
Box::new(left),
Box::new(right),
)),
),
Symbol::Minus => expressions.insert(
min!(i, expressions.len()),
Expression::BinaryExpression(BinaryExpression::Subtract(
Box::new(left),
Box::new(right),
)),
),
// safety: we have already checked for the operator
_ => unreachable!(),
}
}
}
// remove all the addition and subtraction operators from the operators vector
operators.retain(|symbol| symbol != &Symbol::Plus && symbol != &Symbol::Minus);
// Ensure there is only one expression left in the expressions vector, and no operators left
if expressions.len() != 1 || !operators.is_empty() {
return Err(ParseError::InvalidSyntax {
token: current_token.clone(),
reason: "Invalid number of operators".to_owned(),
});
}
// Ensure the last expression is a binary expression
match expressions.pop().unwrap() {
Expression::BinaryExpression(binary) => Ok(binary),
_ => unreachable!(),
}
} }
fn priority(&mut self) -> Result<Box<Expression>, ParseError> { fn priority(&mut self) -> Result<Box<Expression>, ParseError> {
@@ -490,6 +629,32 @@ mod tests {
use super::*; use super::*;
use anyhow::Result; use anyhow::Result;
macro_rules! parser {
($input:expr) => {
Parser::new(Tokenizer::from($input.to_owned()))
};
}
#[test]
fn test_unsupported_keywords() -> Result<()> {
let mut parser = parser!("import x;");
assert!(parser.parse().is_err());
let mut parser = parser!("export x;");
assert!(parser.parse().is_err());
let mut parser = parser!("enum x;");
assert!(parser.parse().is_err());
let mut parser = parser!("if x {}");
assert!(parser.parse().is_err());
let mut parser = parser!("else {}");
assert!(parser.parse().is_err());
Ok(())
}
#[test] #[test]
fn test_declarations() -> Result<()> { fn test_declarations() -> Result<()> {
let input = r#" let input = r#"
@@ -583,16 +748,12 @@ mod tests {
#[test] #[test]
fn test_binary() -> Result<()> { fn test_binary() -> Result<()> {
let input = r#" let expr = parser!("1 + 3 ^ 5").parse()?.unwrap();
let x = 1 + 2; assert_eq!("(1 + (3 ^ 5))", expr.to_string());
"#;
let tokenizer = Tokenizer::from(input.to_owned()); let expr = parser!("12 - 1 + 3 * 5").parse()?.unwrap();
let mut parser = Parser::new(tokenizer);
let expression = parser.parse()?.unwrap(); assert_eq!("((12 - 1) + (3 * 5))", expr.to_string());
assert_eq!("(let x = (1 + 2))", expression.to_string());
Ok(()) Ok(())
} }

View File

@@ -21,6 +21,7 @@ pub enum BinaryExpression {
Multiply(Box<Expression>, Box<Expression>), Multiply(Box<Expression>, Box<Expression>),
Divide(Box<Expression>, Box<Expression>), Divide(Box<Expression>, Box<Expression>),
Subtract(Box<Expression>, Box<Expression>), Subtract(Box<Expression>, Box<Expression>),
Exponent(Box<Expression>, Box<Expression>),
} }
impl std::fmt::Display for BinaryExpression { impl std::fmt::Display for BinaryExpression {
@@ -30,6 +31,7 @@ impl std::fmt::Display for BinaryExpression {
BinaryExpression::Multiply(l, r) => write!(f, "({} * {})", l, r), BinaryExpression::Multiply(l, r) => write!(f, "({} * {})", l, r),
BinaryExpression::Divide(l, r) => write!(f, "({} / {})", l, r), BinaryExpression::Divide(l, r) => write!(f, "({} / {})", l, r),
BinaryExpression::Subtract(l, r) => write!(f, "({} - {})", l, r), BinaryExpression::Subtract(l, r) => write!(f, "({} - {})", l, r),
BinaryExpression::Exponent(l, r) => write!(f, "({} ^ {})", l, r),
} }
} }
} }

View File

@@ -1,6 +1,7 @@
pub mod token; pub mod token;
use std::{ use std::{
cmp::Ordering,
collections::VecDeque, collections::VecDeque,
fs::File, fs::File,
io::{BufReader, Cursor, Read, Seek, SeekFrom}, io::{BufReader, Cursor, Read, Seek, SeekFrom},
@@ -210,6 +211,7 @@ impl Tokenizer {
'/' => symbol!(Slash), '/' => symbol!(Slash),
'*' => symbol!(Asterisk), '*' => symbol!(Asterisk),
'.' => symbol!(Dot), '.' => symbol!(Dot),
'^' => symbol!(Caret),
// multi-character symbols // multi-character symbols
'<' if self.peek_next_char()? == Some('=') => { '<' if self.peek_next_char()? == Some('=') => {
@@ -451,37 +453,42 @@ impl TokenizerBuffer {
} }
fn seek_from_current(&mut self, seek_to: i64) -> Result<(), TokenizerError> { fn seek_from_current(&mut self, seek_to: i64) -> Result<(), TokenizerError> {
use Ordering::*;
// if seek_to > 0 then we need to check if the buffer has enough tokens to pop, otherwise we need to read from the tokenizer // if seek_to > 0 then we need to check if the buffer has enough tokens to pop, otherwise we need to read from the tokenizer
// if seek_to < 0 then we need to pop from the history and push to the front of the buffer. If not enough, then we throw (we reached the front of the history) // if seek_to < 0 then we need to pop from the history and push to the front of the buffer. If not enough, then we throw (we reached the front of the history)
// if seek_to == 0 then we don't need to do anything // if seek_to == 0 then we don't need to do anything
if seek_to > 0 { match seek_to.cmp(&0) {
let mut tokens = Vec::with_capacity(seek_to as usize); Greater => {
for _ in 0..seek_to { let mut tokens = Vec::with_capacity(seek_to as usize);
if let Some(token) = self.tokenizer.next_token()? { for _ in 0..seek_to {
tokens.push(token); if let Some(token) = self.tokenizer.next_token()? {
} else { tokens.push(token);
return Err(TokenizerError::IOError(std::io::Error::new( } else {
std::io::ErrorKind::UnexpectedEof, return Err(TokenizerError::IOError(std::io::Error::new(
"Unexpected EOF", std::io::ErrorKind::UnexpectedEof,
))); "Unexpected EOF",
)));
}
} }
self.history.extend(tokens);
} }
self.history.extend(tokens); Less => {
} else if seek_to < 0 { let seek_to = seek_to.unsigned_abs() as usize;
let seek_to = seek_to.unsigned_abs() as usize; let mut tokens = Vec::with_capacity(seek_to);
let mut tokens = Vec::with_capacity(seek_to); for _ in 0..seek_to {
for _ in 0..seek_to { if let Some(token) = self.history.pop_back() {
if let Some(token) = self.history.pop_back() { tokens.push(token);
tokens.push(token); } else {
} else { return Err(TokenizerError::IOError(std::io::Error::new(
return Err(TokenizerError::IOError(std::io::Error::new( std::io::ErrorKind::UnexpectedEof,
std::io::ErrorKind::UnexpectedEof, "Unexpected EOF",
"Unexpected EOF", )));
))); }
} }
self.buffer.extend(tokens.into_iter().rev());
} }
self.buffer.extend(tokens.into_iter().rev()); _ => {}
} }
Ok(()) Ok(())
@@ -667,10 +674,11 @@ This is a skippable line"#,
#[test] #[test]
fn test_symbol_parse() -> Result<()> { fn test_symbol_parse() -> Result<()> {
let mut tokenizer = Tokenizer::from(String::from( let mut tokenizer = Tokenizer::from(String::from(
"! () [] {} , . ; : + - * / < > = != && || >= <=", "^ ! () [] {} , . ; : + - * / < > = != && || >= <=",
)); ));
let expected_tokens = vec![ let expected_tokens = vec![
TokenType::Symbol(Symbol::Caret),
TokenType::Symbol(Symbol::LogicalNot), TokenType::Symbol(Symbol::LogicalNot),
TokenType::Symbol(Symbol::LParen), TokenType::Symbol(Symbol::LParen),
TokenType::Symbol(Symbol::RParen), TokenType::Symbol(Symbol::RParen),

View File

@@ -106,6 +106,8 @@ pub enum Symbol {
LogicalNot, LogicalNot,
/// Represents the `.` symbol /// Represents the `.` symbol
Dot, Dot,
/// Represents the `^` symbol
Caret,
// Double Character Symbols // Double Character Symbols
/// Represents the `==` symbol /// Represents the `==` symbol
@@ -125,7 +127,7 @@ pub enum Symbol {
impl Symbol { impl Symbol {
pub fn is_operator(&self) -> bool { pub fn is_operator(&self) -> bool {
match self { match self {
Symbol::Plus | Symbol::Minus | Symbol::Asterisk | Symbol::Slash => true, Symbol::Plus | Symbol::Minus | Symbol::Asterisk | Symbol::Slash | Symbol::Caret => true,
_ => false, _ => false,
} }
} }