This commit is contained in:
2024-11-20 01:16:46 -07:00
parent 66064a21d7
commit 7cff659275
4 changed files with 236 additions and 1 deletions

View File

@@ -1,3 +1,4 @@
mod parser;
mod tokenizer;
use clap::Parser;

136
src/parser/mod.rs Normal file
View File

@@ -0,0 +1,136 @@
use thiserror::Error;
use crate::tokenizer::{
token::{Keyword, Number, Symbol, Token, TokenType},
Tokenizer, TokenizerError,
};
use std::io::{Read, Seek};
#[derive(Debug, Error)]
pub enum ParseError {
#[error("{0}")]
TokenizerError(#[from] TokenizerError),
#[error("Unexpected EOF\n\nLine: {0}, Column: {1}", token.line, token.column)]
UnexpectedEOF { token: Token },
#[error("Unexpected token\n\nLine: {0}, Column: {1}\nToken: {2}", token.line, token.column, token.token_type)]
UnexpectedToken { token: Token },
#[error("An unknown error has occurred")]
UnknownError,
}
#[derive(Debug)]
enum Literal {
Number(Number),
String(String),
Boolean(bool),
}
#[derive(Debug)]
struct Identifier(String);
#[derive(Debug)]
pub enum Expression {
Declaration {
identifier: Identifier,
value: Box<Expression>,
},
Assignment {
identifier: Identifier,
value: Box<Expression>,
},
Binary {
left: Box<Expression>,
operator: Symbol,
right: Box<Expression>,
},
Literal(Literal),
}
pub struct Parser<T>
where
T: Read + Seek,
{
tokenizer: Tokenizer<T>,
}
impl<T> Parser<T>
where
T: Read + Seek,
{
pub fn new(tokenizer: Tokenizer<T>) -> Self {
Self { tokenizer }
}
pub fn parse(&mut self) -> Result<Option<Expression>, ParseError> {
while let Some(token) = self.tokenizer.next_token()? {
match token.token_type {
TokenType::Number(n) => {
if let Some(Token {
token_type: TokenType::Symbol(s),
..
}) = self.tokenizer.peek_next()?
{
if s.is_operator() {
self.tokenizer.next_token()?;
return Ok(Some(Expression::Binary {
left: Box::new(Expression::Literal(Literal::Number(n))),
operator: s,
right: Box::new(self.parse()?.ok_or(ParseError::UnknownError)?),
}));
}
} else {
return Ok(Some(Expression::Literal(Literal::Number(n))));
}
}
_ => return Err(ParseError::UnexpectedToken { token }),
}
}
return Err(ParseError::UnknownError);
}
fn parse_declaration(&mut self) -> Result<Expression, ParseError> {
let identifier = match self.tokenizer.next_token()? {
Some(token) => match token.token_type {
TokenType::Identifier(i) => Identifier(i),
_ => return Err(ParseError::UnexpectedToken { token }),
},
None => return Err(ParseError::UnknownError),
};
return Ok(Expression::Declaration {
identifier,
value: Box::new(self.parse()?.ok_or(ParseError::UnknownError)?),
});
}
}
#[cfg(test)]
mod tests {
use super::*;
use anyhow::Result;
#[test]
fn test_parser() -> Result<()> {
let input = r#"
5.3245 + 5
45 - 2
"#;
let tokenizer = Tokenizer::from(input.to_owned());
let mut parser = Parser::new(tokenizer);
let expr = parser.parse()?;
println!("{:?}", expr);
let expr = parser.parse()?;
println!("{:?}", expr);
Ok(())
}
}

View File

@@ -1,4 +1,4 @@
mod token;
pub mod token;
use std::{
fs::File,
@@ -167,6 +167,18 @@ where
}
}
pub fn peek_next(&mut self) -> Result<Option<Token>, TokenizerError> {
let current_pos = self.reader.stream_position()?;
let column = self.column.clone();
let line = self.line.clone();
let token = self.next_token()?;
self.reader.seek(SeekFrom::Start(current_pos))?;
self.column = column;
self.line = line;
Ok(token)
}
/// Tokenizes a symbol
fn tokenize_symbol(&mut self, first_symbol: char) -> Result<Token, TokenizerError> {
/// Helper macro to create a symbol token
@@ -668,4 +680,32 @@ This is a skippable line"#,
Ok(())
}
#[test]
fn test_peek_next() -> Result<()> {
let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned());
let column = tokenizer.column.clone();
let line = tokenizer.line.clone();
let peeked_token = tokenizer.peek_next()?;
assert_eq!(
peeked_token.unwrap().token_type,
TokenType::Keyword(Keyword::Fn)
);
assert_eq!(tokenizer.column, column);
assert_eq!(tokenizer.line, line);
let next_token = tokenizer.next_token()?;
assert_eq!(
next_token.unwrap().token_type,
TokenType::Keyword(Keyword::Fn)
);
assert_ne!(tokenizer.column, column);
assert_ne!(tokenizer.line, line);
Ok(())
}
}

View File

@@ -36,6 +36,20 @@ pub enum TokenType {
EOF,
}
impl std::fmt::Display for TokenType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TokenType::String(s) => write!(f, "{}", s),
TokenType::Number(n) => write!(f, "{}", n),
TokenType::Boolean(b) => write!(f, "{}", b),
TokenType::Keyword(k) => write!(f, "{:?}", k),
TokenType::Identifier(i) => write!(f, "{}", i),
TokenType::Symbol(s) => write!(f, "{:?}", s),
TokenType::EOF => write!(f, "EOF"),
}
}
}
#[derive(Debug, PartialEq, Hash, Eq)]
pub enum Number {
/// Represents an integer number
@@ -44,6 +58,15 @@ pub enum Number {
Decimal(u64, u64),
}
impl std::fmt::Display for Number {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Number::Integer(i) => write!(f, "{}", i),
Number::Decimal(i, d) => write!(f, "{}.{}", i, d),
}
}
}
#[derive(Debug, PartialEq, Hash, Eq)]
pub enum Symbol {
// Single Character Symbols
@@ -99,6 +122,41 @@ pub enum Symbol {
GreaterThanOrEqual,
}
impl Symbol {
pub fn is_operator(&self) -> bool {
match self {
Symbol::Plus | Symbol::Minus | Symbol::Asterisk | Symbol::Slash => true,
_ => false,
}
}
pub fn is_comparison(&self) -> bool {
match self {
Symbol::LessThan
| Symbol::GreaterThan
| Symbol::Equal
| Symbol::NotEqual
| Symbol::LessThanOrEqual
| Symbol::GreaterThanOrEqual => true,
_ => false,
}
}
pub fn is_logical(&self) -> bool {
match self {
Symbol::LogicalAnd | Symbol::LogicalOr => true,
_ => false,
}
}
pub fn is_assignment(&self) -> bool {
match self {
Symbol::Assign => true,
_ => false,
}
}
}
#[derive(Debug, PartialEq, Hash, Eq)]
pub enum Keyword {
/// Represents the `let` keyword