tokenizer

2024-11-19 23:29:01 -07:00
commit 66064a21d7
7 changed files with 1117 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
+/target
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -0,0 +1,265 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "anstream"
+version = "0.6.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c"
+dependencies = [
+ "windows-sys",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125"
+dependencies = [
+ "anstyle",
+ "windows-sys",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.93"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775"
+
+[[package]]
+name = "clap"
+version = "4.5.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fb3b4b9e5a7c7514dfa52869339ee98b3156b0bfb4e8a77c4ff4babb64b1604f"
+dependencies = [
+ "clap_builder",
+ "clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.5.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b17a95aa67cc7b5ebd32aa5370189aa0d79069ef1c64ce893bd30fb24bff20ec"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "clap_lex",
+ "strsim",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.5.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "clap_lex"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "afb84c814227b90d6895e01398aee0d8033c00e7466aca416fb6a8e0eb19d8a7"
+
+[[package]]
+name = "colorchoice"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.89"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "stationlang"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "clap",
+ "thiserror",
+]
+
+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+
+[[package]]
+name = "syn"
+version = "2.0.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "thiserror"
+version = "2.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c006c85c7651b3cf2ada4584faa36773bd07bac24acfb39f3c431b36d7e667aa"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "2.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe"
+
+[[package]]
+name = "utf8parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "stationlang"
+version = "0.1.0"
+edition = "2021"
+
+[profile.dev]
+panic = "unwind"
+
+[dependencies]
+clap = { version = "^4.5", features = ["derive"] }
+thiserror = { version = "^2.0" }
+
+[dev-dependencies]
+anyhow = { version = "^1.0" }
--- a/src/main.rs
+++ b/src/main.rs
@@ -0,0 +1,43 @@
+mod tokenizer;
+
+use clap::Parser;
+use tokenizer::{Tokenizer, TokenizerError};
+
+#[derive(Debug, thiserror::Error)]
+enum StationlangError {
+    #[error("{0}")]
+    TokenizerError(#[from] TokenizerError),
+}
+
+#[derive(Parser, Debug)]
+#[command(version, about, long_about = None)]
+struct Args {
+    /// What file should be compiled
+    #[arg(short, long)]
+    input_file: String,
+    /// The default stack size for the program
+    #[arg(short, long, default_value_t = 512)]
+    stack_size: usize,
+    /// The output file for the compiled program. If not set, output will go to stdout
+    #[arg(short, long)]
+    output_file: Option<String>,
+}
+
+fn run_logic() -> Result<(), StationlangError> {
+    let args = Args::parse();
+    let input_file = args.input_file;
+
+    let mut tokenizer = Tokenizer::from_path(&input_file)?;
+
+    while let Some(token) = tokenizer.next_token()? {
+        println!("{:?}", token);
+    }
+
+    Ok(())
+}
+
+fn main() {
+    if let Err(e) = run_logic() {
+        eprintln!("\n\n{}", e);
+    }
+}
--- a/src/tokenizer/mod.rs
+++ b/src/tokenizer/mod.rs
@@ -0,0 +1,671 @@
+mod token;
+
+use std::{
+    fs::File,
+    io::{BufReader, Cursor, Read, Seek, SeekFrom},
+    path::PathBuf,
+};
+use thiserror::Error;
+use token::{Keyword, Number, Symbol, Token, TokenType};
+
+#[derive(Error, Debug)]
+pub enum TokenizerError {
+    #[error("IO Error: {0}")]
+    IOError(#[from] std::io::Error),
+    #[error("Number Parse Error \"{0}\"\nLine: {1}, Column: {2}")]
+    NumberParseError(std::num::ParseIntError, usize, usize),
+    #[error("Unknown Symbol \"{0}\"\nLine: {1}, Column: {2}")]
+    UnknownSymbolError(char, usize, usize),
+    #[error("Unknown Keyword or Identifier \"{0}\"\nLine: {1}, Column: {2}")]
+    UnknownKeywordOrIdentifierError(String, usize, usize),
+}
+
+pub(crate) struct Tokenizer<T>
+where
+    T: Read + Seek,
+{
+    reader: BufReader<T>,
+    char_buffer: [u8; 1],
+    line: usize,
+    column: usize,
+    returned_eof: bool,
+}
+
+impl From<String> for Tokenizer<Cursor<Vec<u8>>> {
+    fn from(input: String) -> Self {
+        let cursor = Cursor::new(input.into_bytes());
+        let reader = BufReader::new(cursor);
+
+        Self {
+            reader,
+            line: 1,
+            column: 1,
+            char_buffer: [0],
+            returned_eof: false,
+        }
+    }
+}
+
+impl Tokenizer<File> {
+    pub fn from_path(input_file: impl Into<PathBuf>) -> Result<Self, TokenizerError> {
+        let file = std::fs::File::open(input_file.into())?;
+        let reader = BufReader::new(file);
+
+        Ok(Self {
+            reader,
+            line: 1,
+            column: 1,
+            char_buffer: [0],
+            returned_eof: false,
+        })
+    }
+}
+
+impl<T> Tokenizer<T>
+where
+    T: Read + Seek,
+{
+    /// Consumes the tokenizer and returns the next token in the stream
+    /// If there are no more tokens in the stream, this function returns None
+    /// If there is an error reading the stream, this function returns an error
+    ///
+    /// # Important
+    /// This function will increment the line and column counters
+    fn next_char(&mut self) -> Result<Option<char>, TokenizerError> {
+        let bytes_read = self.reader.read(&mut self.char_buffer)?;
+
+        if bytes_read == 0 {
+            return Ok(None);
+        }
+
+        // Safety: The buffer is guaranteed to have 1 value as it is initialized with a size of 1
+        let c = self.char_buffer[0] as char;
+        if c == '\n' {
+            self.line += 1;
+            self.column = 1;
+        } else {
+            self.column += 1;
+        }
+
+        Ok(Some(c))
+    }
+
+    /// Peeks the next character in the stream without consuming it
+    ///
+    /// # Important
+    /// This does not increment the line or column counters
+    fn peek_next_char(&mut self) -> Result<Option<char>, TokenizerError> {
+        let current_pos = self.reader.stream_position()?;
+
+        let to_return = if self.reader.read(&mut self.char_buffer)? == 0 {
+            None
+        } else {
+            self.reader.seek(SeekFrom::Start(current_pos))?;
+
+            // Safety: The buffer is guaranteed to have 1 value as it is initialized with a size of 1
+            Some(self.char_buffer[0] as char)
+        };
+
+        Ok(to_return)
+    }
+
+    /// Skips the current line in the stream.
+    /// Useful for skipping comments or empty lines
+    ///
+    /// # Important
+    /// This function will increment the line and column counters
+    fn skip_line(&mut self) -> Result<(), TokenizerError> {
+        while let Some(next_char) = self.next_char()? {
+            if next_char == '\n' {
+                break;
+            }
+        }
+        Ok(())
+    }
+
+    pub fn next_token(&mut self) -> Result<Option<Token>, TokenizerError> {
+        while let Some(next_char) = self.next_char()? {
+            // skip whitespace
+            if next_char.is_whitespace() {
+                continue;
+            }
+            // skip comments
+            if next_char == '/' && self.peek_next_char()? == Some('/') {
+                self.skip_line()?;
+                continue;
+            }
+
+            match next_char {
+                // numbers
+                '0'..='9' => {
+                    return self.tokenize_number(next_char).map(Some);
+                }
+                // strings
+                '"' | '\'' => return self.tokenize_string(next_char).map(Some),
+                // symbols excluding `"` and `'`
+                char if !char.is_alphanumeric() && char != '"' && char != '\'' => {
+                    return self.tokenize_symbol(next_char).map(Some)
+                }
+                // keywords and identifiers
+                char if char.is_alphabetic() => {
+                    return self.tokenize_keyword_or_identifier(next_char).map(Some)
+                }
+                _ => {
+                    return Err(TokenizerError::UnknownSymbolError(
+                        next_char,
+                        self.line,
+                        self.column,
+                    ))
+                }
+            }
+        }
+        if self.returned_eof {
+            Ok(None)
+        } else {
+            self.returned_eof = true;
+            Ok(Some(Token::new(TokenType::EOF, self.line, self.column)))
+        }
+    }
+
+    /// Tokenizes a symbol
+    fn tokenize_symbol(&mut self, first_symbol: char) -> Result<Token, TokenizerError> {
+        /// Helper macro to create a symbol token
+        macro_rules! symbol {
+            ($symbol:ident) => {
+                Ok(Token::new(
+                    TokenType::Symbol(Symbol::$symbol),
+                    self.line,
+                    self.column,
+                ))
+            };
+        }
+
+        match first_symbol {
+            // single character symbols
+            '(' => symbol!(LParen),
+            ')' => symbol!(RParen),
+            '{' => symbol!(LBrace),
+            '}' => symbol!(RBrace),
+            '[' => symbol!(LBracket),
+            ']' => symbol!(RBracket),
+            ';' => symbol!(Semicolon),
+            ':' => symbol!(Colon),
+            ',' => symbol!(Comma),
+            '+' => symbol!(Plus),
+            '-' => symbol!(Minus),
+            '/' => symbol!(Slash),
+            '*' => symbol!(Asterisk),
+            '.' => symbol!(Dot),
+
+            // multi-character symbols
+            '<' if self.peek_next_char()? == Some('=') => {
+                self.next_char()?;
+                symbol!(LessThanOrEqual)
+            }
+            '<' => symbol!(LessThan),
+
+            '>' if self.peek_next_char()? == Some('=') => {
+                self.next_char()?;
+                symbol!(GreaterThanOrEqual)
+            }
+            '>' => symbol!(GreaterThan),
+
+            '=' if self.peek_next_char()? == Some('=') => {
+                self.next_char()?;
+                symbol!(Equal)
+            }
+            '=' => symbol!(Assign),
+
+            '!' if self.peek_next_char()? == Some('=') => {
+                self.next_char()?;
+                symbol!(NotEqual)
+            }
+            '!' => symbol!(LogicalNot),
+
+            '&' if self.peek_next_char()? == Some('&') => {
+                self.next_char()?;
+                symbol!(LogicalAnd)
+            }
+            '|' if self.peek_next_char()? == Some('|') => {
+                self.next_char()?;
+                symbol!(LogicalOr)
+            }
+            _ => Err(TokenizerError::UnknownSymbolError(
+                first_symbol,
+                self.line,
+                self.column,
+            )),
+        }
+    }
+
+    /// Tokenizes a number literal
+    fn tokenize_number(&mut self, first_char: char) -> Result<Token, TokenizerError> {
+        let mut primary = String::with_capacity(16);
+        let mut decimal: Option<String> = None;
+        let mut reading_decimal = false;
+
+        let column = self.column.clone();
+        let line = self.line.clone();
+
+        primary.push(first_char);
+
+        while let Some(next_char) = self.peek_next_char()? {
+            if next_char.is_whitespace() {
+                break;
+            }
+
+            if next_char == '.' {
+                reading_decimal = true;
+                self.next_char()?;
+                continue;
+            }
+
+            // This is for the times when we have a number followed by a symbol (like a semicolon or =)
+            if !next_char.is_numeric() {
+                break;
+            }
+
+            if reading_decimal {
+                decimal.get_or_insert_with(String::new).push(next_char);
+            } else {
+                primary.push(next_char);
+            }
+            self.next_char()?;
+        }
+
+        if let Some(decimal) = decimal {
+            Ok(Token::new(
+                TokenType::Number(Number::Decimal(
+                    primary
+                        .parse()
+                        .map_err(|e| TokenizerError::NumberParseError(e, line, column))?,
+                    decimal
+                        .parse()
+                        .map_err(|e| TokenizerError::NumberParseError(e, line, column))?,
+                )),
+                line,
+                column,
+            ))
+        } else {
+            Ok(Token::new(
+                TokenType::Number(Number::Integer(
+                    primary
+                        .parse()
+                        .map_err(|e| TokenizerError::NumberParseError(e, line, column))?,
+                )),
+                line,
+                column,
+            ))
+        }
+    }
+
+    /// Tokenizes a string literal
+    fn tokenize_string(&mut self, beginning_quote: char) -> Result<Token, TokenizerError> {
+        let mut buffer = String::with_capacity(16);
+
+        let column = self.column.clone();
+        let line = self.line.clone();
+
+        while let Some(next_char) = self.next_char()? {
+            if next_char == beginning_quote {
+                break;
+            }
+
+            buffer.push(next_char);
+        }
+
+        Ok(Token::new(TokenType::String(buffer), line, column))
+    }
+
+    /// Tokenizes a keyword or an identifier. Also handles boolean literals
+    fn tokenize_keyword_or_identifier(
+        &mut self,
+        first_char: char,
+    ) -> Result<Token, TokenizerError> {
+        macro_rules! keyword {
+            ($keyword:ident) => {{
+                return Ok(Token::new(
+                    TokenType::Keyword(Keyword::$keyword),
+                    self.line,
+                    self.column,
+                ));
+            }};
+        }
+
+        /// Helper macro to check if the next character is whitespace or not alphanumeric
+        macro_rules! next_ws {
+            () => {
+                matches!(self.peek_next_char()?, Some(x) if x.is_whitespace() || !x.is_alphanumeric()) || matches!(self.peek_next_char()?, None)
+            };
+        }
+
+        let mut buffer = String::with_capacity(16);
+        let line = self.line.clone();
+        let column = self.column.clone();
+
+        let mut looped_char = Some(first_char);
+
+        while let Some(next_char) = looped_char {
+            if next_char.is_whitespace() {
+                break;
+            }
+
+            if !next_char.is_alphanumeric() {
+                break;
+            }
+            buffer.push(next_char);
+
+            match buffer.as_str() {
+                "let" if next_ws!() => keyword!(Let),
+                "fn" if next_ws!() => keyword!(Fn),
+                "if" if next_ws!() => keyword!(If),
+                "else" if next_ws!() => keyword!(Else),
+                "return" if next_ws!() => keyword!(Return),
+                "enum" if next_ws!() => keyword!(Enum),
+                "import" if next_ws!() => keyword!(Import),
+                "export" if next_ws!() => keyword!(Export),
+
+                // boolean literals
+                "true" if next_ws!() => {
+                    return Ok(Token::new(TokenType::Boolean(true), self.line, self.column))
+                }
+                "false" if next_ws!() => {
+                    return Ok(Token::new(
+                        TokenType::Boolean(false),
+                        self.line,
+                        self.column,
+                    ))
+                }
+                // if the next character is whitespace or not alphanumeric, then we have an identifier
+                // this is because keywords are checked first
+                val if next_ws!() => {
+                    return Ok(Token::new(
+                        TokenType::Identifier(val.to_string()),
+                        line,
+                        column,
+                    ));
+                }
+                _ => {}
+            }
+
+            looped_char = self.next_char()?;
+        }
+        Err(TokenizerError::UnknownKeywordOrIdentifierError(
+            buffer, line, column,
+        ))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use anyhow::Result;
+
+    const TEST_FILE: &str = "tests/file.stlg";
+
+    const TEST_STRING: &str = r#"
+        fn test() {
+            let x = 10;
+            return x + 2;
+        }
+    "#;
+
+    #[test]
+    fn test_tokenizer_from_path_ok() {
+        let tokenizer = Tokenizer::from_path(TEST_FILE);
+        assert!(tokenizer.is_ok());
+    }
+
+    #[test]
+    fn test_tokenizer_from_path_err() {
+        let tokenizer = Tokenizer::from_path("non_existent_file.stlg");
+        assert!(tokenizer.is_err());
+    }
+
+    #[test]
+    fn test_next_char() -> Result<()> {
+        let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned());
+
+        let char = tokenizer.next_char()?;
+
+        assert_eq!(char, Some('\n'));
+        assert_eq!(tokenizer.line, 2);
+        assert_eq!(tokenizer.column, 1);
+
+        let mut tokenizer = Tokenizer::from(String::from("fn"));
+
+        let char = tokenizer.next_char()?;
+
+        assert_eq!(char, Some('f'));
+        assert_eq!(tokenizer.line, 1);
+        assert_eq!(tokenizer.column, 2);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_peek_next_char() -> Result<()> {
+        let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned());
+
+        let char = tokenizer.peek_next_char()?;
+
+        assert_eq!(char, Some('\n'));
+        assert_eq!(tokenizer.line, 1);
+        assert_eq!(tokenizer.column, 1);
+
+        let char = tokenizer.next_char()?;
+        assert_eq!(char, Some('\n'));
+        assert_eq!(tokenizer.line, 2);
+        assert_eq!(tokenizer.column, 1);
+
+        let char = tokenizer.peek_next_char()?;
+        assert_eq!(char, Some(' '));
+        assert_eq!(tokenizer.line, 2);
+        assert_eq!(tokenizer.column, 1);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_skip_line() -> Result<()> {
+        let mut tokenizer = Tokenizer::from(String::from(
+            r#"
+This is a skippable line"#,
+        ));
+
+        tokenizer.skip_line()?;
+
+        assert_eq!(tokenizer.line, 2);
+        assert_eq!(tokenizer.column, 1);
+
+        let next_char = tokenizer.next_char()?;
+        assert_eq!(next_char, Some('T'));
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_parse_integer() -> Result<()> {
+        let mut tokenizer = Tokenizer::from(String::from("10"));
+
+        let token = tokenizer.next_token()?.unwrap();
+
+        assert_eq!(token.token_type, TokenType::Number(Number::Integer(10)));
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_parse_decimal() -> Result<()> {
+        let mut tokenizer = Tokenizer::from(String::from("10.5"));
+
+        let token = tokenizer.next_token()?.unwrap();
+
+        assert_eq!(token.token_type, TokenType::Number(Number::Decimal(10, 5)));
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_parse_number_with_symbol() -> Result<()> {
+        let mut tokenizer = Tokenizer::from(String::from("10;"));
+
+        let token = tokenizer.next_token()?.unwrap();
+
+        assert_eq!(token.token_type, TokenType::Number(Number::Integer(10)));
+
+        let next_char = tokenizer.next_char()?;
+
+        assert_eq!(next_char, Some(';'));
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_string_parse() -> Result<()> {
+        let mut tokenizer = Tokenizer::from(String::from(r#""Hello, World!""#));
+
+        let token = tokenizer.next_token()?.unwrap();
+
+        assert_eq!(
+            token.token_type,
+            TokenType::String(String::from("Hello, World!"))
+        );
+
+        let mut tokenizer = Tokenizer::from(String::from(r#"'Hello, World!'"#));
+
+        let token = tokenizer.next_token()?.unwrap();
+
+        assert_eq!(
+            token.token_type,
+            TokenType::String(String::from("Hello, World!"))
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_symbol_parse() -> Result<()> {
+        let mut tokenizer = Tokenizer::from(String::from(
+            "! () [] {} , . ; : + - * / < > = != && || >= <=",
+        ));
+
+        let expected_tokens = vec![
+            TokenType::Symbol(Symbol::LogicalNot),
+            TokenType::Symbol(Symbol::LParen),
+            TokenType::Symbol(Symbol::RParen),
+            TokenType::Symbol(Symbol::LBracket),
+            TokenType::Symbol(Symbol::RBracket),
+            TokenType::Symbol(Symbol::LBrace),
+            TokenType::Symbol(Symbol::RBrace),
+            TokenType::Symbol(Symbol::Comma),
+            TokenType::Symbol(Symbol::Dot),
+            TokenType::Symbol(Symbol::Semicolon),
+            TokenType::Symbol(Symbol::Colon),
+            TokenType::Symbol(Symbol::Plus),
+            TokenType::Symbol(Symbol::Minus),
+            TokenType::Symbol(Symbol::Asterisk),
+            TokenType::Symbol(Symbol::Slash),
+            TokenType::Symbol(Symbol::LessThan),
+            TokenType::Symbol(Symbol::GreaterThan),
+            TokenType::Symbol(Symbol::Assign),
+            TokenType::Symbol(Symbol::NotEqual),
+            TokenType::Symbol(Symbol::LogicalAnd),
+            TokenType::Symbol(Symbol::LogicalOr),
+            TokenType::Symbol(Symbol::GreaterThanOrEqual),
+            TokenType::Symbol(Symbol::LessThanOrEqual),
+        ];
+
+        for expected_token in expected_tokens {
+            let token = tokenizer.next_token()?.unwrap();
+
+            assert_eq!(token.token_type, expected_token);
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_keyword_parse() -> Result<()> {
+        let mut tokenizer =
+            Tokenizer::from(String::from("let fn if else return enum import export"));
+
+        let expected_tokens = vec![
+            TokenType::Keyword(Keyword::Let),
+            TokenType::Keyword(Keyword::Fn),
+            TokenType::Keyword(Keyword::If),
+            TokenType::Keyword(Keyword::Else),
+            TokenType::Keyword(Keyword::Return),
+            TokenType::Keyword(Keyword::Enum),
+            TokenType::Keyword(Keyword::Import),
+            TokenType::Keyword(Keyword::Export),
+        ];
+
+        for expected_token in expected_tokens {
+            let token = tokenizer.next_token()?.unwrap();
+
+            assert_eq!(token.token_type, expected_token);
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_identifier_parse() -> Result<()> {
+        let mut tokenizer = Tokenizer::from(String::from("fn test"));
+
+        let token = tokenizer.next_token()?.unwrap();
+        assert_eq!(token.token_type, TokenType::Keyword(Keyword::Fn));
+        let token = tokenizer.next_token()?.unwrap();
+        assert_eq!(
+            token.token_type,
+            TokenType::Identifier(String::from("test"))
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_boolean_parse() -> Result<()> {
+        let mut tokenizer = Tokenizer::from(String::from("true false"));
+
+        let token = tokenizer.next_token()?.unwrap();
+        assert_eq!(token.token_type, TokenType::Boolean(true));
+        let token = tokenizer.next_token()?.unwrap();
+        assert_eq!(token.token_type, TokenType::Boolean(false));
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_full_source() -> Result<()> {
+        let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned());
+
+        let expected_tokens = vec![
+            TokenType::Keyword(Keyword::Fn),
+            TokenType::Identifier(String::from("test")),
+            TokenType::Symbol(Symbol::LParen),
+            TokenType::Symbol(Symbol::RParen),
+            TokenType::Symbol(Symbol::LBrace),
+            TokenType::Keyword(Keyword::Let),
+            TokenType::Identifier(String::from("x")),
+            TokenType::Symbol(Symbol::Assign),
+            TokenType::Number(Number::Integer(10)),
+            TokenType::Symbol(Symbol::Semicolon),
+            TokenType::Keyword(Keyword::Return),
+            TokenType::Identifier(String::from("x")),
+            TokenType::Symbol(Symbol::Plus),
+            TokenType::Number(Number::Integer(2)),
+            TokenType::Symbol(Symbol::Semicolon),
+            TokenType::Symbol(Symbol::RBrace),
+        ];
+
+        for expected_token in expected_tokens {
+            let token = tokenizer.next_token()?.unwrap();
+
+            assert_eq!(token.token_type, expected_token);
+        }
+
+        Ok(())
+    }
+}
--- a/src/tokenizer/token.rs
+++ b/src/tokenizer/token.rs
@@ -0,0 +1,120 @@
+#[derive(Debug, PartialEq)]
+pub struct Token {
+    /// The type of the token
+    pub token_type: TokenType,
+    /// The line where the token was found
+    pub line: usize,
+    /// The column where the token was found
+    pub column: usize,
+}
+
+impl Token {
+    pub fn new(token_type: TokenType, line: usize, column: usize) -> Self {
+        Self {
+            token_type,
+            line,
+            column,
+        }
+    }
+}
+
+#[derive(Debug, PartialEq, Hash, Eq)]
+pub enum TokenType {
+    /// Represents a string token
+    String(String),
+    /// Represents a number token
+    Number(Number),
+    /// Represents a boolean token
+    Boolean(bool),
+    /// Represents a keyword token
+    Keyword(Keyword),
+    /// Represents an identifier token
+    Identifier(String),
+    /// Represents a symbol token
+    Symbol(Symbol),
+    /// Represents an end of file token
+    EOF,
+}
+
+#[derive(Debug, PartialEq, Hash, Eq)]
+pub enum Number {
+    /// Represents an integer number
+    Integer(u64),
+    /// Represents a decimal type number with a precision of 64 bits
+    Decimal(u64, u64),
+}
+
+#[derive(Debug, PartialEq, Hash, Eq)]
+pub enum Symbol {
+    // Single Character Symbols
+    /// Represents the `(` symbol
+    LParen,
+    /// Represents the `)` symbol
+    RParen,
+    /// Represents the `{` symbol
+    LBrace,
+    /// Represents the `}` symbol
+    RBrace,
+    /// Represents the `[` symbol
+    LBracket,
+    /// Represents the `]` symbol
+    RBracket,
+    /// Represents the `;` symbol
+    Semicolon,
+    /// Represents the `:` symbol
+    Colon,
+    /// Represents the `,` symbol
+    Comma,
+    /// Represents the `+` symbol
+    Plus,
+    /// Represents the `-` symbol
+    Minus,
+    /// Represents the `*` symbol
+    Asterisk,
+    /// Represents the `/` symbol
+    Slash,
+    /// Represents the `<` symbol
+    LessThan,
+    /// Represents the `>` symbol
+    GreaterThan,
+    /// Represents the `=` symbol
+    Assign,
+    /// Represents the `!` symbol
+    LogicalNot,
+    /// Represents the `.` symbol
+    Dot,
+
+    // Double Character Symbols
+    /// Represents the `==` symbol
+    Equal,
+    /// Represents the `!=` symbol
+    NotEqual,
+    /// Represents the `&&` Symbol
+    LogicalAnd,
+    // Represents the `||` Symbol
+    LogicalOr,
+    /// Represents the `<=` symbol
+    LessThanOrEqual,
+    /// Represents the `>=` symbol
+    GreaterThanOrEqual,
+}
+
+#[derive(Debug, PartialEq, Hash, Eq)]
+pub enum Keyword {
+    /// Represents the `let` keyword
+    Let,
+    /// Represents the `fn` keyword
+    Fn,
+    /// Represents the `if` keyword
+    If,
+    /// Represents the `else` keyword
+    Else,
+    /// Represents the `return` keyword
+    Return,
+    /// Represents the `enum` keyword
+    Enum,
+    /// Represents an import keyword
+    Import,
+    /// Represents an export keyword
+    Export,
+}
--- a/tests/file.stlg
+++ b/tests/file.stlg
@@ -0,0 +1,3 @@
+export fn doThings() {
+    power.myPowerItem(12.45 + 5);
+}