diff --git a/src/main.rs b/src/main.rs index fa831b9..d384416 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,4 @@ +mod parser; mod tokenizer; use clap::Parser; diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000..34dd372 --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,136 @@ +use thiserror::Error; + +use crate::tokenizer::{ + token::{Keyword, Number, Symbol, Token, TokenType}, + Tokenizer, TokenizerError, +}; +use std::io::{Read, Seek}; + +#[derive(Debug, Error)] +pub enum ParseError { + #[error("{0}")] + TokenizerError(#[from] TokenizerError), + #[error("Unexpected EOF\n\nLine: {0}, Column: {1}", token.line, token.column)] + UnexpectedEOF { token: Token }, + #[error("Unexpected token\n\nLine: {0}, Column: {1}\nToken: {2}", token.line, token.column, token.token_type)] + UnexpectedToken { token: Token }, + #[error("An unknown error has occurred")] + UnknownError, +} + +#[derive(Debug)] +enum Literal { + Number(Number), + String(String), + Boolean(bool), +} + +#[derive(Debug)] +struct Identifier(String); + +#[derive(Debug)] +pub enum Expression { + Declaration { + identifier: Identifier, + value: Box, + }, + Assignment { + identifier: Identifier, + value: Box, + }, + Binary { + left: Box, + operator: Symbol, + right: Box, + }, + Literal(Literal), +} + +pub struct Parser +where + T: Read + Seek, +{ + tokenizer: Tokenizer, +} + +impl Parser +where + T: Read + Seek, +{ + pub fn new(tokenizer: Tokenizer) -> Self { + Self { tokenizer } + } + + pub fn parse(&mut self) -> Result, ParseError> { + while let Some(token) = self.tokenizer.next_token()? { + match token.token_type { + TokenType::Number(n) => { + if let Some(Token { + token_type: TokenType::Symbol(s), + .. + }) = self.tokenizer.peek_next()? + { + if s.is_operator() { + self.tokenizer.next_token()?; + return Ok(Some(Expression::Binary { + left: Box::new(Expression::Literal(Literal::Number(n))), + operator: s, + right: Box::new(self.parse()?.ok_or(ParseError::UnknownError)?), + })); + } + } else { + return Ok(Some(Expression::Literal(Literal::Number(n)))); + } + } + _ => return Err(ParseError::UnexpectedToken { token }), + } + } + return Err(ParseError::UnknownError); + } + + fn parse_declaration(&mut self) -> Result { + let identifier = match self.tokenizer.next_token()? { + Some(token) => match token.token_type { + TokenType::Identifier(i) => Identifier(i), + _ => return Err(ParseError::UnexpectedToken { token }), + }, + None => return Err(ParseError::UnknownError), + }; + + return Ok(Expression::Declaration { + identifier, + value: Box::new(self.parse()?.ok_or(ParseError::UnknownError)?), + }); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use anyhow::Result; + + #[test] + fn test_parser() -> Result<()> { + let input = r#" + 5.3245 + 5 + + + + + 45 - 2 + "#; + + let tokenizer = Tokenizer::from(input.to_owned()); + let mut parser = Parser::new(tokenizer); + + let expr = parser.parse()?; + + println!("{:?}", expr); + + let expr = parser.parse()?; + + println!("{:?}", expr); + + Ok(()) + } +} diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index c913933..ed6ef1c 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -1,4 +1,4 @@ -mod token; +pub mod token; use std::{ fs::File, @@ -167,6 +167,18 @@ where } } + pub fn peek_next(&mut self) -> Result, TokenizerError> { + let current_pos = self.reader.stream_position()?; + let column = self.column.clone(); + let line = self.line.clone(); + + let token = self.next_token()?; + self.reader.seek(SeekFrom::Start(current_pos))?; + self.column = column; + self.line = line; + Ok(token) + } + /// Tokenizes a symbol fn tokenize_symbol(&mut self, first_symbol: char) -> Result { /// Helper macro to create a symbol token @@ -668,4 +680,32 @@ This is a skippable line"#, Ok(()) } + + #[test] + fn test_peek_next() -> Result<()> { + let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned()); + + let column = tokenizer.column.clone(); + let line = tokenizer.line.clone(); + + let peeked_token = tokenizer.peek_next()?; + + assert_eq!( + peeked_token.unwrap().token_type, + TokenType::Keyword(Keyword::Fn) + ); + assert_eq!(tokenizer.column, column); + assert_eq!(tokenizer.line, line); + + let next_token = tokenizer.next_token()?; + + assert_eq!( + next_token.unwrap().token_type, + TokenType::Keyword(Keyword::Fn) + ); + assert_ne!(tokenizer.column, column); + assert_ne!(tokenizer.line, line); + + Ok(()) + } } diff --git a/src/tokenizer/token.rs b/src/tokenizer/token.rs index e7fe73c..58db8db 100644 --- a/src/tokenizer/token.rs +++ b/src/tokenizer/token.rs @@ -36,6 +36,20 @@ pub enum TokenType { EOF, } +impl std::fmt::Display for TokenType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + TokenType::String(s) => write!(f, "{}", s), + TokenType::Number(n) => write!(f, "{}", n), + TokenType::Boolean(b) => write!(f, "{}", b), + TokenType::Keyword(k) => write!(f, "{:?}", k), + TokenType::Identifier(i) => write!(f, "{}", i), + TokenType::Symbol(s) => write!(f, "{:?}", s), + TokenType::EOF => write!(f, "EOF"), + } + } +} + #[derive(Debug, PartialEq, Hash, Eq)] pub enum Number { /// Represents an integer number @@ -44,6 +58,15 @@ pub enum Number { Decimal(u64, u64), } +impl std::fmt::Display for Number { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Number::Integer(i) => write!(f, "{}", i), + Number::Decimal(i, d) => write!(f, "{}.{}", i, d), + } + } +} + #[derive(Debug, PartialEq, Hash, Eq)] pub enum Symbol { // Single Character Symbols @@ -99,6 +122,41 @@ pub enum Symbol { GreaterThanOrEqual, } +impl Symbol { + pub fn is_operator(&self) -> bool { + match self { + Symbol::Plus | Symbol::Minus | Symbol::Asterisk | Symbol::Slash => true, + _ => false, + } + } + + pub fn is_comparison(&self) -> bool { + match self { + Symbol::LessThan + | Symbol::GreaterThan + | Symbol::Equal + | Symbol::NotEqual + | Symbol::LessThanOrEqual + | Symbol::GreaterThanOrEqual => true, + _ => false, + } + } + + pub fn is_logical(&self) -> bool { + match self { + Symbol::LogicalAnd | Symbol::LogicalOr => true, + _ => false, + } + } + + pub fn is_assignment(&self) -> bool { + match self { + Symbol::Assign => true, + _ => false, + } + } +} + #[derive(Debug, PartialEq, Hash, Eq)] pub enum Keyword { /// Represents the `let` keyword