From 245567c35463a10d4f5cf9bed779a7482be28fe8 Mon Sep 17 00:00:00 2001 From: Devin Bidwell Date: Thu, 12 Jun 2025 20:52:39 -0700 Subject: [PATCH 1/6] workspace libs --- Cargo.lock | 311 +++++--- Cargo.toml | 11 +- libs/tokenizer/Cargo.toml | 11 + libs/tokenizer/src/lib.rs | 897 ++++++++++++++++++++++ libs/tokenizer/src/token.rs | 221 ++++++ {tests => libs/tokenizer/tests}/file.stlg | 2 +- src/tokenizer/mod.rs | 5 +- 7 files changed, 1346 insertions(+), 112 deletions(-) create mode 100644 libs/tokenizer/Cargo.toml create mode 100644 libs/tokenizer/src/lib.rs create mode 100644 libs/tokenizer/src/token.rs rename {tests => libs/tokenizer/tests}/file.stlg (96%) diff --git a/Cargo.lock b/Cargo.lock index 62124d4..94811fd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,9 +13,9 @@ dependencies = [ [[package]] name = "adler2" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "ahash" @@ -30,9 +30,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.18" +version = "0.6.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933" dependencies = [ "anstyle", "anstyle-parse", @@ -45,43 +45,44 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" +checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" [[package]] name = "anstyle-parse" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9" dependencies = [ "windows-sys", ] [[package]] name = "anstyle-wincon" -version = "3.0.6" +version = "3.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" +checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882" dependencies = [ "anstyle", + "once_cell_polyfill", "windows-sys", ] [[package]] name = "anyhow" -version = "1.0.93" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" dependencies = [ "backtrace", ] @@ -100,9 +101,9 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "backtrace" -version = "0.3.74" +version = "0.3.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" dependencies = [ "addr2line", "cfg-if", @@ -127,9 +128,9 @@ dependencies = [ [[package]] name = "borsh" -version = "1.5.3" +version = "1.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2506947f73ad44e344215ccd6403ac2ae18cd8e046e581a441bf8d199f257f03" +checksum = "ad8646f98db542e39fc66e68a20b2144f6a732636df7c2354e74645faaa433ce" dependencies = [ "borsh-derive", "cfg_aliases", @@ -137,17 +138,23 @@ dependencies = [ [[package]] name = "borsh-derive" -version = "1.5.3" +version = "1.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2593a3b8b938bd68373196c9832f516be11fa487ef4ae745eb282e6a56a7244" +checksum = "fdd1d3c0c2f5833f22386f252fe8ed005c7f59fdcddeef025c01b4c3b9fd9ac3" dependencies = [ "once_cell", "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.102", ] +[[package]] +name = "bumpalo" +version = "3.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db76d6187cd04dff33004d8e6c9cc4e05cd330500379d2394209271b4aeee" + [[package]] name = "bytecheck" version = "0.6.12" @@ -170,23 +177,17 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - [[package]] name = "bytes" -version = "1.8.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" [[package]] name = "cfg-if" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" [[package]] name = "cfg_aliases" @@ -196,9 +197,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "clap" -version = "4.5.21" +version = "4.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb3b4b9e5a7c7514dfa52869339ee98b3156b0bfb4e8a77c4ff4babb64b1604f" +checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f" dependencies = [ "clap_builder", "clap_derive", @@ -206,9 +207,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.21" +version = "4.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b17a95aa67cc7b5ebd32aa5370189aa0d79069ef1c64ce893bd30fb24bff20ec" +checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e" dependencies = [ "anstream", "anstyle", @@ -218,33 +219,33 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.18" +version = "4.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab" +checksum = "d2c7947ae4cc3d851207c1adb5b5e260ff0cca11446b1d6d1423788e442257ce" dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.102", ] [[package]] name = "clap_lex" -version = "0.7.3" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afb84c814227b90d6895e01398aee0d8033c00e7466aca416fb6a8e0eb19d8a7" +checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" [[package]] name = "colorchoice" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" [[package]] name = "equivalent" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "funty" @@ -254,9 +255,9 @@ checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" [[package]] name = "getrandom" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", "libc", @@ -280,9 +281,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.2" +version = "0.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" [[package]] name = "heck" @@ -292,12 +293,12 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "indexmap" -version = "2.6.0" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", - "hashbrown 0.15.2", + "hashbrown 0.15.4", ] [[package]] @@ -308,27 +309,43 @@ checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" [[package]] name = "itoa" -version = "1.0.14" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "js-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +dependencies = [ + "once_cell", + "wasm-bindgen", +] [[package]] name = "libc" -version = "0.2.164" +version = "0.2.172" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "433bfe06b8c75da9b2e3fbea6e5329ff87748f0b144ef75306e674c3f6f7c13f" +checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" + +[[package]] +name = "log" +version = "0.4.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "memchr" -version = "2.7.4" +version = "2.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" [[package]] name = "miniz_oxide" -version = "0.8.0" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", ] @@ -344,42 +361,48 @@ dependencies = [ [[package]] name = "object" -version = "0.36.5" +version = "0.36.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" dependencies = [ "memchr", ] [[package]] name = "once_cell" -version = "1.20.2" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" [[package]] name = "ppv-lite86" -version = "0.2.20" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ "zerocopy", ] [[package]] name = "proc-macro-crate" -version = "3.2.0" +version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b" +checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35" dependencies = [ "toml_edit", ] [[package]] name = "proc-macro2" -version = "1.0.89" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] @@ -412,9 +435,9 @@ checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" [[package]] name = "quote" -version = "1.0.37" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] @@ -495,9 +518,9 @@ dependencies = [ [[package]] name = "rust_decimal" -version = "1.36.0" +version = "1.37.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b082d80e3e3cc52b2ed634388d436fe1f4de6af5786cc2de9ba9737527bdf555" +checksum = "faa7de2ba56ac291bd90c6b9bece784a52ae1411f9506544b3eae36dd2356d50" dependencies = [ "arrayvec", "borsh", @@ -511,15 +534,21 @@ dependencies = [ [[package]] name = "rustc-demangle" -version = "0.1.24" +version = "0.1.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" +checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f" + +[[package]] +name = "rustversion" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" [[package]] name = "ryu" -version = "1.0.18" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" [[package]] name = "seahash" @@ -529,29 +558,29 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" [[package]] name = "serde" -version = "1.0.215" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.215" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.102", ] [[package]] name = "serde_json" -version = "1.0.133" +version = "1.0.140" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" +checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" dependencies = [ "itoa", "memchr", @@ -594,9 +623,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.87" +version = "2.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" +checksum = "f6397daf94fa90f058bd0fd88429dd9e5738999cca8d701813c80723add80462" dependencies = [ "proc-macro2", "quote", @@ -611,9 +640,9 @@ checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" [[package]] name = "tinyvec" -version = "1.8.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" +checksum = "09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71" dependencies = [ "tinyvec_macros", ] @@ -624,17 +653,26 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" +[[package]] +name = "tokenizer" +version = "0.1.0" +dependencies = [ + "anyhow", + "quick-error", + "rust_decimal", +] + [[package]] name = "toml_datetime" -version = "0.6.8" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" [[package]] name = "toml_edit" -version = "0.22.22" +version = "0.22.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" dependencies = [ "indexmap", "toml_datetime", @@ -643,9 +681,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.13" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" [[package]] name = "utf8parse" @@ -655,9 +693,13 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.11.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" +checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d" +dependencies = [ + "js-sys", + "wasm-bindgen", +] [[package]] name = "version_check" @@ -667,9 +709,67 @@ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" +version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasm-bindgen" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn 2.0.102", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.102", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] [[package]] name = "windows-sys" @@ -746,9 +846,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.6.20" +version = "0.7.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b" +checksum = "74c7b26e3480b707944fc872477815d29a8e429d2f93a1ce000f5fa84a15cbcd" dependencies = [ "memchr", ] @@ -764,21 +864,20 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.7.35" +version = "0.8.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb" dependencies = [ - "byteorder", "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.35" +version = "0.8.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.102", ] diff --git a/Cargo.toml b/Cargo.toml index 32ae60f..b7059ca 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,14 +3,21 @@ name = "stationlang" version = "0.1.0" edition = "2021" +[workspace] +members = ["libs/*"] + +[workspace.dependencies] +quick-error = "2" +rust_decimal = "1" + [[bin]] name = "slang" path = "src/main.rs" [dependencies] clap = { version = "^4.5", features = ["derive"] } -quick-error = "2.0.1" -rust_decimal = "1.36.0" +quick-error = { workspace = true } +rust_decimal = { workspace = true } [dev-dependencies] anyhow = { version = "^1.0", features = ["backtrace"] } diff --git a/libs/tokenizer/Cargo.toml b/libs/tokenizer/Cargo.toml new file mode 100644 index 0000000..100b2b7 --- /dev/null +++ b/libs/tokenizer/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "tokenizer" +version = "0.1.0" +edition = "2024" + +[dependencies] +rust_decimal = { workspace = true } +quick-error = { workspace = true } + +[dev-dependencies] +anyhow = { version = "^1" } diff --git a/libs/tokenizer/src/lib.rs b/libs/tokenizer/src/lib.rs new file mode 100644 index 0000000..7410c14 --- /dev/null +++ b/libs/tokenizer/src/lib.rs @@ -0,0 +1,897 @@ +pub mod token; + +use quick_error::quick_error; +use rust_decimal::Decimal; +use std::{ + cmp::Ordering, + collections::VecDeque, + io::{BufReader, Cursor, Read, Seek, SeekFrom}, + path::PathBuf, +}; +use token::{Keyword, Number, Symbol, Temperature, Token, TokenType}; + +quick_error! { + #[derive(Debug)] + pub enum TokenizerError { + IOError(err: std::io::Error) { + from() + display("IO Error: {}", err) + source(err) + } + NumberParseError(err: std::num::ParseIntError, line: usize, column: usize) { + display("Number Parse Error: {}\nLine: {}, Column: {}", err, line, column) + source(err) + } + DecimalParseError(err: rust_decimal::Error, line: usize, column: usize) { + display("Decimal Parse Error: {}\nLine: {}, Column: {}", err, line, column) + source(err) + } + UnknownSymbolError(char: char, line: usize, column: usize) { + display("Unknown Symbol: {}\nLine: {}, Column: {}", char, line, column) + } + UnknownKeywordOrIdentifierError(val: String, line: usize, column: usize) { + display("Unknown Keyword or Identifier: {}\nLine: {}, Column: {}", val, line, column) + } + } +} + +pub trait Tokenize: Read + Seek {} + +impl Tokenize for T where T: Read + Seek {} + +pub(crate) struct Tokenizer { + reader: BufReader>, + char_buffer: [u8; 1], + line: usize, + column: usize, + returned_eof: bool, +} + +impl Tokenizer { + pub fn from_path(input_file: impl Into) -> Result { + let file = std::fs::File::open(input_file.into())?; + let reader = BufReader::new(Box::new(file) as Box); + + Ok(Self { + reader, + line: 1, + column: 1, + char_buffer: [0], + returned_eof: false, + }) + } +} + +impl From for Tokenizer { + fn from(input: String) -> Self { + let reader = BufReader::new(Box::new(Cursor::new(input)) as Box); + + Self { + reader, + line: 1, + column: 1, + char_buffer: [0], + returned_eof: false, + } + } +} + +impl Tokenizer { + /// Consumes the tokenizer and returns the next token in the stream + /// If there are no more tokens in the stream, this function returns None + /// If there is an error reading the stream, this function returns an error + /// + /// # Important + /// This function will increment the line and column counters + fn next_char(&mut self) -> Result, TokenizerError> { + let bytes_read = self.reader.read(&mut self.char_buffer)?; + + if bytes_read == 0 { + return Ok(None); + } + + // Safety: The buffer is guaranteed to have 1 value as it is initialized with a size of 1 + let c = self.char_buffer[0] as char; + if c == '\n' { + self.line += 1; + self.column = 1; + } else { + self.column += 1; + } + + Ok(Some(c)) + } + + /// Peeks the next character in the stream without consuming it + /// + /// # Important + /// This does not increment the line or column counters + fn peek_next_char(&mut self) -> Result, TokenizerError> { + let current_pos = self.reader.stream_position()?; + + let to_return = if self.reader.read(&mut self.char_buffer)? == 0 { + None + } else { + self.reader.seek(SeekFrom::Start(current_pos))?; + + // Safety: The buffer is guaranteed to have 1 value as it is initialized with a size of 1 + Some(self.char_buffer[0] as char) + }; + + Ok(to_return) + } + + /// Skips the current line in the stream. + /// Useful for skipping comments or empty lines + /// + /// # Important + /// This function will increment the line and column counters + fn skip_line(&mut self) -> Result<(), TokenizerError> { + while let Some(next_char) = self.next_char()? { + if next_char == '\n' { + break; + } + } + Ok(()) + } + + /// Consumes the tokenizer and returns the next token in the stream + /// If there are no more tokens in the stream, this function returns None + pub fn next_token(&mut self) -> Result, TokenizerError> { + while let Some(next_char) = self.next_char()? { + // skip whitespace + if next_char.is_whitespace() { + continue; + } + // skip comments + if next_char == '/' && self.peek_next_char()? == Some('/') { + self.skip_line()?; + continue; + } + + match next_char { + // numbers + '0'..='9' => { + return self.tokenize_number(next_char).map(Some); + } + // strings + '"' | '\'' => return self.tokenize_string(next_char).map(Some), + // symbols excluding `"` and `'` + char if !char.is_alphanumeric() && char != '"' && char != '\'' => { + return self.tokenize_symbol(next_char).map(Some); + } + // keywords and identifiers + char if char.is_alphabetic() => { + return self.tokenize_keyword_or_identifier(next_char).map(Some); + } + _ => { + return Err(TokenizerError::UnknownSymbolError( + next_char, + self.line, + self.column, + )); + } + } + } + if self.returned_eof { + Ok(None) + } else { + self.returned_eof = true; + Ok(Some(Token::new(TokenType::EOF, self.line, self.column))) + } + } + + /// Peeks the next token in the stream without consuming it + /// If there are no more tokens in the stream, this function returns None + pub fn peek_next(&mut self) -> Result, TokenizerError> { + let current_pos = self.reader.stream_position()?; + let column = self.column.clone(); + let line = self.line.clone(); + + let token = self.next_token()?; + self.reader.seek(SeekFrom::Start(current_pos))?; + self.column = column; + self.line = line; + Ok(token) + } + + /// Tokenizes a symbol + fn tokenize_symbol(&mut self, first_symbol: char) -> Result { + /// Helper macro to create a symbol token + macro_rules! symbol { + ($symbol:ident) => { + Ok(Token::new( + TokenType::Symbol(Symbol::$symbol), + self.line, + self.column, + )) + }; + } + + match first_symbol { + // single character symbols + '(' => symbol!(LParen), + ')' => symbol!(RParen), + '{' => symbol!(LBrace), + '}' => symbol!(RBrace), + '[' => symbol!(LBracket), + ']' => symbol!(RBracket), + ';' => symbol!(Semicolon), + ':' => symbol!(Colon), + ',' => symbol!(Comma), + '+' => symbol!(Plus), + '-' => symbol!(Minus), + '/' => symbol!(Slash), + + '.' => symbol!(Dot), + '^' => symbol!(Caret), + + // multi-character symbols + '<' if self.peek_next_char()? == Some('=') => { + self.next_char()?; + symbol!(LessThanOrEqual) + } + '<' => symbol!(LessThan), + + '>' if self.peek_next_char()? == Some('=') => { + self.next_char()?; + symbol!(GreaterThanOrEqual) + } + '>' => symbol!(GreaterThan), + + '=' if self.peek_next_char()? == Some('=') => { + self.next_char()?; + symbol!(Equal) + } + '=' => symbol!(Assign), + + '!' if self.peek_next_char()? == Some('=') => { + self.next_char()?; + symbol!(NotEqual) + } + '!' => symbol!(LogicalNot), + + '*' if self.peek_next_char()? == Some('*') => { + self.next_char()?; + symbol!(Exp) + } + '*' => symbol!(Asterisk), + + '&' if self.peek_next_char()? == Some('&') => { + self.next_char()?; + symbol!(LogicalAnd) + } + '|' if self.peek_next_char()? == Some('|') => { + self.next_char()?; + symbol!(LogicalOr) + } + + _ => Err(TokenizerError::UnknownSymbolError( + first_symbol, + self.line, + self.column, + )), + } + } + + /// Tokenizes a number literal. Also handles temperatures with a suffix of `c`, `f`, or `k`. + fn tokenize_number(&mut self, first_char: char) -> Result { + let mut primary = String::with_capacity(16); + let mut decimal: Option = None; + let mut reading_decimal = false; + + let column = self.column.clone(); + let line = self.line.clone(); + + primary.push(first_char); + + while let Some(next_char) = self.peek_next_char()? { + if next_char.is_whitespace() { + break; + } + + if next_char == '.' { + reading_decimal = true; + self.next_char()?; + continue; + } + + // support underscores in numbers for readability + if next_char == '_' { + self.next_char()?; + continue; + } + + // This is for the times when we have a number followed by a symbol (like a semicolon or =) + if !next_char.is_numeric() { + break; + } + + if reading_decimal { + decimal.get_or_insert_with(String::new).push(next_char); + } else { + primary.push(next_char); + } + self.next_char()?; + } + + let number: Number = if let Some(decimal) = decimal { + let decimal_scale = decimal.len() as u32; + let number = format!("{}{}", primary, decimal) + .parse::() + .map_err(|e| TokenizerError::NumberParseError(e, self.line, self.column))?; + Number::Decimal( + Decimal::try_from_i128_with_scale(number, decimal_scale) + .map_err(|e| TokenizerError::DecimalParseError(e, line, column))?, + ) + } else { + Number::Integer( + primary + .parse() + .map_err(|e| TokenizerError::NumberParseError(e, line, column))?, + ) + }; + + // check if the next char is a temperature suffix + if let Some(next_char) = self.peek_next_char()? { + let temperature = match next_char { + 'c' => Temperature::Celsius(number), + 'f' => Temperature::Fahrenheit(number), + 'k' => Temperature::Kelvin(number), + _ => return Ok(Token::new(TokenType::Number(number), line, column)), + } + .to_kelvin(); + + self.next_char()?; + Ok(Token::new(TokenType::Number(temperature), line, column)) + } else { + Ok(Token::new(TokenType::Number(number), line, column)) + } + } + + /// Tokenizes a string literal + fn tokenize_string(&mut self, beginning_quote: char) -> Result { + let mut buffer = String::with_capacity(16); + + let column = self.column.clone(); + let line = self.line.clone(); + + while let Some(next_char) = self.next_char()? { + if next_char == beginning_quote { + break; + } + + buffer.push(next_char); + } + + Ok(Token::new(TokenType::String(buffer), line, column)) + } + + /// Tokenizes a keyword or an identifier. Also handles boolean literals + fn tokenize_keyword_or_identifier( + &mut self, + first_char: char, + ) -> Result { + macro_rules! keyword { + ($keyword:ident) => {{ + return Ok(Token::new( + TokenType::Keyword(Keyword::$keyword), + self.line, + self.column, + )); + }}; + } + + /// Helper macro to check if the next character is whitespace or not alphanumeric + macro_rules! next_ws { + () => { + matches!(self.peek_next_char()?, Some(x) if x.is_whitespace() || !x.is_alphanumeric()) || matches!(self.peek_next_char()?, None) + }; + } + + let mut buffer = String::with_capacity(16); + let line = self.line.clone(); + let column = self.column.clone(); + + let mut looped_char = Some(first_char); + + while let Some(next_char) = looped_char { + if next_char.is_whitespace() { + break; + } + + if !next_char.is_alphanumeric() { + break; + } + buffer.push(next_char); + + match buffer.as_str() { + "let" if next_ws!() => keyword!(Let), + "fn" if next_ws!() => keyword!(Fn), + "if" if next_ws!() => keyword!(If), + "else" if next_ws!() => keyword!(Else), + "return" if next_ws!() => keyword!(Return), + "enum" if next_ws!() => keyword!(Enum), + "device" if next_ws!() => keyword!(Device), + "loop" if next_ws!() => keyword!(Loop), + "break" if next_ws!() => keyword!(Break), + + // boolean literals + "true" if next_ws!() => { + return Ok(Token::new(TokenType::Boolean(true), self.line, self.column)); + } + "false" if next_ws!() => { + return Ok(Token::new( + TokenType::Boolean(false), + self.line, + self.column, + )); + } + // if the next character is whitespace or not alphanumeric, then we have an identifier + // this is because keywords are checked first + val if next_ws!() => { + return Ok(Token::new( + TokenType::Identifier(val.to_string()), + line, + column, + )); + } + _ => {} + } + + looped_char = self.next_char()?; + } + Err(TokenizerError::UnknownKeywordOrIdentifierError( + buffer, line, column, + )) + } +} + +pub struct TokenizerBuffer { + tokenizer: Tokenizer, + buffer: VecDeque, + history: VecDeque, +} + +impl TokenizerBuffer { + pub fn new(tokenizer: Tokenizer) -> Self { + Self { + tokenizer, + buffer: VecDeque::new(), + history: VecDeque::with_capacity(128), + } + } + + /// Reads the next token from the tokenizer, pushing the value to the back of the history + /// and returning the token + pub fn next(&mut self) -> Result, TokenizerError> { + if let Some(token) = self.buffer.pop_front() { + self.history.push_back(token.clone()); + return Ok(Some(token)); + } + + let token = self.tokenizer.next_token()?; + if let Some(ref token) = token { + self.history.push_back(token.clone()); + } + Ok(token) + } + + /// Peeks the next token in the stream without adding to the history stack + pub fn peek(&mut self) -> Result, TokenizerError> { + if let Some(token) = self.buffer.front() { + return Ok(Some(token.clone())); + } + + let token = self.tokenizer.peek_next()?; + Ok(token) + } + + fn seek_from_current(&mut self, seek_to: i64) -> Result<(), TokenizerError> { + use Ordering::*; + // if seek_to > 0 then we need to check if the buffer has enough tokens to pop, otherwise we need to read from the tokenizer + // if seek_to < 0 then we need to pop from the history and push to the front of the buffer. If not enough, then we throw (we reached the front of the history) + // if seek_to == 0 then we don't need to do anything + + match seek_to.cmp(&0) { + Greater => { + let mut tokens = Vec::with_capacity(seek_to as usize); + for _ in 0..seek_to { + if let Some(token) = self.tokenizer.next_token()? { + tokens.push(token); + } else { + return Err(TokenizerError::IOError(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Unexpected EOF", + ))); + } + } + self.history.extend(tokens); + } + Less => { + let seek_to = seek_to.unsigned_abs() as usize; + let mut tokens = Vec::with_capacity(seek_to); + for _ in 0..seek_to { + if let Some(token) = self.history.pop_back() { + tokens.push(token); + } else { + return Err(TokenizerError::IOError(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Unexpected EOF", + ))); + } + } + self.buffer.extend(tokens.into_iter().rev()); + } + _ => {} + } + + Ok(()) + } + + /// Adds to or removes from the History stack, allowing the user to move back and forth in the stream + pub fn seek(&mut self, from: SeekFrom) -> Result<(), TokenizerError> { + match from { + SeekFrom::Current(seek_to) => self.seek_from_current(seek_to)?, + SeekFrom::End(_) => unimplemented!("SeekFrom::End will not be implemented"), + SeekFrom::Start(_) => unimplemented!("SeekFrom::Start will not be implemented"), + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use anyhow::Result; + use rust_decimal::Decimal; + + const TEST_FILE: &str = "tests/file.stlg"; + + const TEST_STRING: &str = r#" + fn test() { + let x = 10; + return x + 2; + } + "#; + + #[test] + fn test_seek_from_current() -> Result<()> { + let tokenizer = Tokenizer::from(TEST_STRING.to_owned()); + let mut buffer = TokenizerBuffer::new(tokenizer); + + let token = buffer.next()?.unwrap(); + assert_eq!(token.token_type, TokenType::Keyword(Keyword::Fn)); + + buffer.seek(SeekFrom::Current(1))?; + + let token = buffer.next()?.unwrap(); + + assert_eq!(token.token_type, TokenType::Symbol(Symbol::LParen)); + + Ok(()) + } + + #[test] + fn test_tokenizer_from_path_ok() { + let tokenizer = Tokenizer::from_path(TEST_FILE); + assert!(tokenizer.is_ok()); + } + + #[test] + fn test_tokenizer_from_path_err() { + let tokenizer = Tokenizer::from_path("non_existent_file.stlg"); + assert!(tokenizer.is_err()); + } + + #[test] + fn test_next_char() -> Result<()> { + let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned()); + + let char = tokenizer.next_char()?; + + assert_eq!(char, Some('\n')); + assert_eq!(tokenizer.line, 2); + assert_eq!(tokenizer.column, 1); + + let mut tokenizer = Tokenizer::from(String::from("fn")); + + let char = tokenizer.next_char()?; + + assert_eq!(char, Some('f')); + assert_eq!(tokenizer.line, 1); + assert_eq!(tokenizer.column, 2); + + Ok(()) + } + + #[test] + fn test_peek_next_char() -> Result<()> { + let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned()); + + let char = tokenizer.peek_next_char()?; + + assert_eq!(char, Some('\n')); + assert_eq!(tokenizer.line, 1); + assert_eq!(tokenizer.column, 1); + + let char = tokenizer.next_char()?; + assert_eq!(char, Some('\n')); + assert_eq!(tokenizer.line, 2); + assert_eq!(tokenizer.column, 1); + + let char = tokenizer.peek_next_char()?; + assert_eq!(char, Some(' ')); + assert_eq!(tokenizer.line, 2); + assert_eq!(tokenizer.column, 1); + + Ok(()) + } + + #[test] + fn test_temperature_unit() -> Result<()> { + let mut tokenizer = Tokenizer::from(String::from("10c 14f 10k")); + + let token = tokenizer.next_token()?.unwrap(); + + assert_eq!( + token.token_type, + TokenType::Number(Number::Decimal(Decimal::new(28315, 2))) + ); + + let token = tokenizer.next_token()?.unwrap(); + + assert_eq!( + token.token_type, + TokenType::Number(Number::Decimal(Decimal::new(26315, 2))) + ); + + let token = tokenizer.next_token()?.unwrap(); + + assert_eq!(token.token_type, TokenType::Number(Number::Integer(10))); + + Ok(()) + } + + #[test] + fn test_parse_integer() -> Result<()> { + let mut tokenizer = Tokenizer::from(String::from("10")); + + let token = tokenizer.next_token()?.unwrap(); + + assert_eq!(token.token_type, TokenType::Number(Number::Integer(10))); + + Ok(()) + } + + #[test] + fn test_parse_integer_with_underscore() -> Result<()> { + let mut tokenizer = Tokenizer::from(String::from("1_000")); + + let token = tokenizer.next_token()?.unwrap(); + + assert_eq!(token.token_type, TokenType::Number(Number::Integer(1000))); + + Ok(()) + } + + #[test] + fn test_parse_decimal() -> Result<()> { + let mut tokenizer = Tokenizer::from(String::from("10.5")); + + let token = tokenizer.next_token()?.unwrap(); + + assert_eq!( + token.token_type, + TokenType::Number(Number::Decimal(Decimal::new(105, 1))) // 10.5 + ); + + Ok(()) + } + + #[test] + fn test_parse_decimal_with_underscore() -> Result<()> { + let mut tokenizer = Tokenizer::from(String::from("1_000.000_6")); + + let token = tokenizer.next_token()?.unwrap(); + + assert_eq!( + token.token_type, + TokenType::Number(Number::Decimal(Decimal::new(10000006, 4))) // 1000.0006 + ); + + Ok(()) + } + + #[test] + fn test_parse_number_with_symbol() -> Result<()> { + let mut tokenizer = Tokenizer::from(String::from("10;")); + + let token = tokenizer.next_token()?.unwrap(); + + assert_eq!(token.token_type, TokenType::Number(Number::Integer(10))); + + let next_char = tokenizer.next_char()?; + + assert_eq!(next_char, Some(';')); + + Ok(()) + } + + #[test] + fn test_string_parse() -> Result<()> { + let mut tokenizer = Tokenizer::from(String::from(r#""Hello, World!""#)); + + let token = tokenizer.next_token()?.unwrap(); + + assert_eq!( + token.token_type, + TokenType::String(String::from("Hello, World!")) + ); + + let mut tokenizer = Tokenizer::from(String::from(r#"'Hello, World!'"#)); + + let token = tokenizer.next_token()?.unwrap(); + + assert_eq!( + token.token_type, + TokenType::String(String::from("Hello, World!")) + ); + + Ok(()) + } + + #[test] + fn test_symbol_parse() -> Result<()> { + let mut tokenizer = Tokenizer::from(String::from( + "^ ! () [] {} , . ; : + - * / < > = != && || >= <=**", + )); + + let expected_tokens = vec![ + TokenType::Symbol(Symbol::Caret), + TokenType::Symbol(Symbol::LogicalNot), + TokenType::Symbol(Symbol::LParen), + TokenType::Symbol(Symbol::RParen), + TokenType::Symbol(Symbol::LBracket), + TokenType::Symbol(Symbol::RBracket), + TokenType::Symbol(Symbol::LBrace), + TokenType::Symbol(Symbol::RBrace), + TokenType::Symbol(Symbol::Comma), + TokenType::Symbol(Symbol::Dot), + TokenType::Symbol(Symbol::Semicolon), + TokenType::Symbol(Symbol::Colon), + TokenType::Symbol(Symbol::Plus), + TokenType::Symbol(Symbol::Minus), + TokenType::Symbol(Symbol::Asterisk), + TokenType::Symbol(Symbol::Slash), + TokenType::Symbol(Symbol::LessThan), + TokenType::Symbol(Symbol::GreaterThan), + TokenType::Symbol(Symbol::Assign), + TokenType::Symbol(Symbol::NotEqual), + TokenType::Symbol(Symbol::LogicalAnd), + TokenType::Symbol(Symbol::LogicalOr), + TokenType::Symbol(Symbol::GreaterThanOrEqual), + TokenType::Symbol(Symbol::LessThanOrEqual), + TokenType::Symbol(Symbol::Exp), + ]; + + for expected_token in expected_tokens { + let token = tokenizer.next_token()?.unwrap(); + + assert_eq!(token.token_type, expected_token); + } + + Ok(()) + } + + #[test] + fn test_keyword_parse() -> Result<()> { + let mut tokenizer = Tokenizer::from(String::from("let fn if else return enum")); + + let expected_tokens = vec![ + TokenType::Keyword(Keyword::Let), + TokenType::Keyword(Keyword::Fn), + TokenType::Keyword(Keyword::If), + TokenType::Keyword(Keyword::Else), + TokenType::Keyword(Keyword::Return), + TokenType::Keyword(Keyword::Enum), + ]; + + for expected_token in expected_tokens { + let token = tokenizer.next_token()?.unwrap(); + + assert_eq!(token.token_type, expected_token); + } + + Ok(()) + } + + #[test] + fn test_identifier_parse() -> Result<()> { + let mut tokenizer = Tokenizer::from(String::from("fn test")); + + let token = tokenizer.next_token()?.unwrap(); + assert_eq!(token.token_type, TokenType::Keyword(Keyword::Fn)); + let token = tokenizer.next_token()?.unwrap(); + assert_eq!( + token.token_type, + TokenType::Identifier(String::from("test")) + ); + + Ok(()) + } + + #[test] + fn test_boolean_parse() -> Result<()> { + let mut tokenizer = Tokenizer::from(String::from("true false")); + + let token = tokenizer.next_token()?.unwrap(); + assert_eq!(token.token_type, TokenType::Boolean(true)); + let token = tokenizer.next_token()?.unwrap(); + assert_eq!(token.token_type, TokenType::Boolean(false)); + + Ok(()) + } + + #[test] + fn test_full_source() -> Result<()> { + let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned()); + + let expected_tokens = vec![ + TokenType::Keyword(Keyword::Fn), + TokenType::Identifier(String::from("test")), + TokenType::Symbol(Symbol::LParen), + TokenType::Symbol(Symbol::RParen), + TokenType::Symbol(Symbol::LBrace), + TokenType::Keyword(Keyword::Let), + TokenType::Identifier(String::from("x")), + TokenType::Symbol(Symbol::Assign), + TokenType::Number(Number::Integer(10)), + TokenType::Symbol(Symbol::Semicolon), + TokenType::Keyword(Keyword::Return), + TokenType::Identifier(String::from("x")), + TokenType::Symbol(Symbol::Plus), + TokenType::Number(Number::Integer(2)), + TokenType::Symbol(Symbol::Semicolon), + TokenType::Symbol(Symbol::RBrace), + ]; + + for expected_token in expected_tokens { + let token = tokenizer.next_token()?.unwrap(); + + assert_eq!(token.token_type, expected_token); + } + + Ok(()) + } + + #[test] + fn test_peek_next() -> Result<()> { + let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned()); + + let column = tokenizer.column.clone(); + let line = tokenizer.line.clone(); + + let peeked_token = tokenizer.peek_next()?; + + assert_eq!( + peeked_token.unwrap().token_type, + TokenType::Keyword(Keyword::Fn) + ); + assert_eq!(tokenizer.column, column); + assert_eq!(tokenizer.line, line); + + let next_token = tokenizer.next_token()?; + + assert_eq!( + next_token.unwrap().token_type, + TokenType::Keyword(Keyword::Fn) + ); + assert_ne!(tokenizer.column, column); + assert_ne!(tokenizer.line, line); + + Ok(()) + } +} + diff --git a/libs/tokenizer/src/token.rs b/libs/tokenizer/src/token.rs new file mode 100644 index 0000000..5e1c970 --- /dev/null +++ b/libs/tokenizer/src/token.rs @@ -0,0 +1,221 @@ +use rust_decimal::Decimal; + +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Token { + /// The type of the token + pub token_type: TokenType, + /// The line where the token was found + pub line: usize, + /// The column where the token was found + pub column: usize, +} + +impl Token { + pub fn new(token_type: TokenType, line: usize, column: usize) -> Self { + Self { + token_type, + line, + column, + } + } +} + +#[derive(Debug, PartialEq, Hash, Eq, Clone)] +pub enum Temperature { + Celsius(Number), + Fahrenheit(Number), + Kelvin(Number), +} + +impl std::fmt::Display for Temperature { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Temperature::Celsius(n) => write!(f, "{}°C", n), + Temperature::Fahrenheit(n) => write!(f, "{}°F", n), + Temperature::Kelvin(n) => write!(f, "{}K", n), + } + } +} + +impl Temperature { + pub fn to_kelvin(self) -> Number { + match self { + Temperature::Celsius(n) => { + let n = match n { + Number::Integer(i) => Decimal::new(i as i64, 0), + Number::Decimal(d) => d, + }; + Number::Decimal(n + Decimal::new(27315, 2)) + } + Temperature::Fahrenheit(n) => { + let n = match n { + Number::Integer(i) => Decimal::new(i as i64, 0), + Number::Decimal(d) => d, + }; + + let a = n - Decimal::new(32, 0); + let b = Decimal::new(5, 0) / Decimal::new(9, 0); + Number::Decimal(a * b + Decimal::new(27315, 2)) + } + Temperature::Kelvin(n) => n, + } + } +} + +#[derive(Debug, PartialEq, Hash, Eq, Clone)] +pub enum TokenType { + /// Represents a string token + String(String), + /// Represents a number token + Number(Number), + /// Represents a boolean token + Boolean(bool), + /// Represents a keyword token + Keyword(Keyword), + /// Represents an identifier token + Identifier(String), + /// Represents a symbol token + Symbol(Symbol), + /// Represents an end of file token + EOF, +} + +impl std::fmt::Display for TokenType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + TokenType::String(s) => write!(f, "{}", s), + TokenType::Number(n) => write!(f, "{}", n), + TokenType::Boolean(b) => write!(f, "{}", b), + TokenType::Keyword(k) => write!(f, "{:?}", k), + TokenType::Identifier(i) => write!(f, "{}", i), + TokenType::Symbol(s) => write!(f, "{:?}", s), + TokenType::EOF => write!(f, "EOF"), + } + } +} + +#[derive(Debug, PartialEq, Hash, Eq, Clone, Copy)] +pub enum Number { + /// Represents an integer number + Integer(u128), + /// Represents a decimal type number with a precision of 64 bits + Decimal(Decimal), +} + +impl std::fmt::Display for Number { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Number::Integer(i) => write!(f, "{}", i), + Number::Decimal(d) => write!(f, "{}", d.to_string()), + } + } +} + +#[derive(Debug, PartialEq, Hash, Eq, Clone, Copy)] +pub enum Symbol { + // Single Character Symbols + /// Represents the `(` symbol + LParen, + /// Represents the `)` symbol + RParen, + /// Represents the `{` symbol + LBrace, + /// Represents the `}` symbol + RBrace, + /// Represents the `[` symbol + LBracket, + /// Represents the `]` symbol + RBracket, + /// Represents the `;` symbol + Semicolon, + /// Represents the `:` symbol + Colon, + /// Represents the `,` symbol + Comma, + /// Represents the `+` symbol + Plus, + /// Represents the `-` symbol + Minus, + /// Represents the `*` symbol + Asterisk, + /// Represents the `/` symbol + Slash, + /// Represents the `<` symbol + LessThan, + /// Represents the `>` symbol + GreaterThan, + /// Represents the `=` symbol + Assign, + /// Represents the `!` symbol + LogicalNot, + /// Represents the `.` symbol + Dot, + /// Represents the `^` symbol + Caret, + + // Double Character Symbols + /// Represents the `==` symbol + Equal, + /// Represents the `!=` symbol + NotEqual, + /// Represents the `&&` Symbol + LogicalAnd, + // Represents the `||` Symbol + LogicalOr, + /// Represents the `<=` symbol + LessThanOrEqual, + /// Represents the `>=` symbol + GreaterThanOrEqual, + /// Represents the `**` symbol + Exp, +} + +impl Symbol { + pub fn is_operator(&self) -> bool { + match self { + Symbol::Plus | Symbol::Minus | Symbol::Asterisk | Symbol::Slash | Symbol::Exp => true, + _ => false, + } + } + + pub fn is_comparison(&self) -> bool { + match self { + Symbol::LessThan + | Symbol::GreaterThan + | Symbol::Equal + | Symbol::NotEqual + | Symbol::LessThanOrEqual + | Symbol::GreaterThanOrEqual => true, + _ => false, + } + } + + pub fn is_logical(&self) -> bool { + match self { + Symbol::LogicalAnd | Symbol::LogicalOr => true, + _ => false, + } + } +} + +#[derive(Debug, PartialEq, Hash, Eq, Clone, Copy)] +pub enum Keyword { + /// Represents the `let` keyword + Let, + /// Represents the `fn` keyword + Fn, + /// Represents the `if` keyword + If, + /// Represents the `device` keyword. Useful for defining a device at a specific address (ex. d0, d1, d2, etc.) + Device, + /// Represents the `else` keyword + Else, + /// Represents the `return` keyword + Return, + /// Represents the `enum` keyword + Enum, + /// Represents the `loop` keyword + Loop, + /// Represents the `break` keyword + Break, +} diff --git a/tests/file.stlg b/libs/tokenizer/tests/file.stlg similarity index 96% rename from tests/file.stlg rename to libs/tokenizer/tests/file.stlg index 1532af2..dd186bf 100644 --- a/tests/file.stlg +++ b/libs/tokenizer/tests/file.stlg @@ -6,4 +6,4 @@ let roomTemperatureMin = 20c; let roomTemperatureMax = 30c; -let averageTemperature = (roomTemperatureMax + roomTemperatureMin) / 2; \ No newline at end of file +let averageTemperature = (roomTemperatureMax + roomTemperatureMin) / 2; diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index 643dd6a..a2578e5 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -1,6 +1,5 @@ pub mod token; -use crate::boxed; use rust_decimal::Decimal; use std::{ cmp::Ordering, @@ -50,7 +49,7 @@ pub(crate) struct Tokenizer { impl Tokenizer { pub fn from_path(input_file: impl Into) -> Result { let file = std::fs::File::open(input_file.into())?; - let reader = BufReader::new(boxed!(file) as Box); + let reader = BufReader::new(Box::new(file) as Box); Ok(Self { reader, @@ -64,7 +63,7 @@ impl Tokenizer { impl From for Tokenizer { fn from(input: String) -> Self { - let reader = BufReader::new(boxed!(Cursor::new(input)) as Box); + let reader = BufReader::new(Box::new(Cursor::new(input)) as Box); Self { reader, From a28c70add7be61164d5f98a39b3a4637be2cd85e Mon Sep 17 00:00:00 2001 From: Devin Bidwell Date: Thu, 12 Jun 2025 21:22:58 -0700 Subject: [PATCH 2/6] wip --- Cargo.lock | 1 + Cargo.toml | 1 + libs/tokenizer/src/lib.rs | 31 +- libs/tokenizer/src/token.rs | 31 +- src/compiler/mod.rs | 42 +- src/main.rs | 6 +- src/parser/mod.rs | 83 ++-- src/parser/sys_call.rs | 30 +- src/parser/tree_node.rs | 53 +-- src/tokenizer/mod.rs | 895 ------------------------------------ src/tokenizer/token.rs | 221 --------- 11 files changed, 131 insertions(+), 1263 deletions(-) delete mode 100644 src/tokenizer/mod.rs delete mode 100644 src/tokenizer/token.rs diff --git a/Cargo.lock b/Cargo.lock index 94811fd..a6b511d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -602,6 +602,7 @@ dependencies = [ "clap", "quick-error", "rust_decimal", + "tokenizer", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index b7059ca..198c80a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ path = "src/main.rs" clap = { version = "^4.5", features = ["derive"] } quick-error = { workspace = true } rust_decimal = { workspace = true } +tokenizer = { path = "libs/tokenizer" } [dev-dependencies] anyhow = { version = "^1.0", features = ["backtrace"] } diff --git a/libs/tokenizer/src/lib.rs b/libs/tokenizer/src/lib.rs index 7410c14..47b2c17 100644 --- a/libs/tokenizer/src/lib.rs +++ b/libs/tokenizer/src/lib.rs @@ -39,7 +39,7 @@ pub trait Tokenize: Read + Seek {} impl Tokenize for T where T: Read + Seek {} -pub(crate) struct Tokenizer { +pub struct Tokenizer { reader: BufReader>, char_buffer: [u8; 1], line: usize, @@ -185,8 +185,8 @@ impl Tokenizer { /// If there are no more tokens in the stream, this function returns None pub fn peek_next(&mut self) -> Result, TokenizerError> { let current_pos = self.reader.stream_position()?; - let column = self.column.clone(); - let line = self.line.clone(); + let column = self.column; + let line = self.line; let token = self.next_token()?; self.reader.seek(SeekFrom::Start(current_pos))?; @@ -280,8 +280,8 @@ impl Tokenizer { let mut decimal: Option = None; let mut reading_decimal = false; - let column = self.column.clone(); - let line = self.line.clone(); + let column = self.column; + let line = self.line; primary.push(first_char); @@ -353,8 +353,8 @@ impl Tokenizer { fn tokenize_string(&mut self, beginning_quote: char) -> Result { let mut buffer = String::with_capacity(16); - let column = self.column.clone(); - let line = self.line.clone(); + let column = self.column; + let line = self.line; while let Some(next_char) = self.next_char()? { if next_char == beginning_quote { @@ -385,13 +385,13 @@ impl Tokenizer { /// Helper macro to check if the next character is whitespace or not alphanumeric macro_rules! next_ws { () => { - matches!(self.peek_next_char()?, Some(x) if x.is_whitespace() || !x.is_alphanumeric()) || matches!(self.peek_next_char()?, None) + matches!(self.peek_next_char()?, Some(x) if x.is_whitespace() || !x.is_alphanumeric()) || self.peek_next_char()?.is_none() }; } let mut buffer = String::with_capacity(16); - let line = self.line.clone(); - let column = self.column.clone(); + let line = self.line; + let column = self.column; let mut looped_char = Some(first_char); @@ -464,7 +464,7 @@ impl TokenizerBuffer { /// Reads the next token from the tokenizer, pushing the value to the back of the history /// and returning the token - pub fn next(&mut self) -> Result, TokenizerError> { + pub fn next_token(&mut self) -> Result, TokenizerError> { if let Some(token) = self.buffer.pop_front() { self.history.push_back(token.clone()); return Ok(Some(token)); @@ -561,12 +561,12 @@ mod tests { let tokenizer = Tokenizer::from(TEST_STRING.to_owned()); let mut buffer = TokenizerBuffer::new(tokenizer); - let token = buffer.next()?.unwrap(); + let token = buffer.next_token()?.unwrap(); assert_eq!(token.token_type, TokenType::Keyword(Keyword::Fn)); buffer.seek(SeekFrom::Current(1))?; - let token = buffer.next()?.unwrap(); + let token = buffer.next_token()?.unwrap(); assert_eq!(token.token_type, TokenType::Symbol(Symbol::LParen)); @@ -870,8 +870,8 @@ mod tests { fn test_peek_next() -> Result<()> { let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned()); - let column = tokenizer.column.clone(); - let line = tokenizer.line.clone(); + let column = tokenizer.column; + let line = tokenizer.line; let peeked_token = tokenizer.peek_next()?; @@ -894,4 +894,3 @@ mod tests { Ok(()) } } - diff --git a/libs/tokenizer/src/token.rs b/libs/tokenizer/src/token.rs index 5e1c970..9e31ca5 100644 --- a/libs/tokenizer/src/token.rs +++ b/libs/tokenizer/src/token.rs @@ -106,7 +106,7 @@ impl std::fmt::Display for Number { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Number::Integer(i) => write!(f, "{}", i), - Number::Decimal(d) => write!(f, "{}", d.to_string()), + Number::Decimal(d) => write!(f, "{}", d), } } } @@ -172,29 +172,26 @@ pub enum Symbol { impl Symbol { pub fn is_operator(&self) -> bool { - match self { - Symbol::Plus | Symbol::Minus | Symbol::Asterisk | Symbol::Slash | Symbol::Exp => true, - _ => false, - } + matches!( + self, + Symbol::Plus | Symbol::Minus | Symbol::Asterisk | Symbol::Slash | Symbol::Exp + ) } pub fn is_comparison(&self) -> bool { - match self { + matches!( + self, Symbol::LessThan - | Symbol::GreaterThan - | Symbol::Equal - | Symbol::NotEqual - | Symbol::LessThanOrEqual - | Symbol::GreaterThanOrEqual => true, - _ => false, - } + | Symbol::GreaterThan + | Symbol::Equal + | Symbol::NotEqual + | Symbol::LessThanOrEqual + | Symbol::GreaterThanOrEqual, + ) } pub fn is_logical(&self) -> bool { - match self { - Symbol::LogicalAnd | Symbol::LogicalOr => true, - _ => false, - } + matches!(self, Symbol::LogicalAnd | Symbol::LogicalOr) } } diff --git a/src/compiler/mod.rs b/src/compiler/mod.rs index f547ba0..b7ba4a1 100644 --- a/src/compiler/mod.rs +++ b/src/compiler/mod.rs @@ -127,17 +127,14 @@ impl<'a> Compiler<'a> { fn expression(&mut self, expression: Expression) -> Result<(), CompileError> { match expression { - Expression::FunctionExpression(expr) => self.function_expression(expr)?, - Expression::BlockExpression(expr) => self.block_expression(expr)?, - Expression::InvocationExpression(expr) => self.invocation_expression(expr)?, - Expression::BinaryExpression(expr) => self.binary_expression(expr)?, - Expression::DeclarationExpression(var_name, expr) => { + Expression::Function(expr) => self.function_expression(expr)?, + Expression::Block(expr) => self.block_expression(expr)?, + Expression::Invocation(expr) => self.invocation_expression(expr)?, + Expression::Binary(expr) => self.binary_expression(expr)?, + Expression::Declaration(var_name, expr) => { self.declaration_expression(&var_name, *expr)? } - Expression::DeviceDeclarationExpression(DeviceDeclarationExpression { - name, - device, - }) => { + Expression::DeviceDeclaration(DeviceDeclarationExpression { name, device }) => { self.devices.insert(name, device); } _ => todo!("{:?}", expression), @@ -156,11 +153,11 @@ impl<'a> Compiler<'a> { self.push_stack(var_name)?; self.write_output(format!("push {num}"))?; } - Expression::BinaryExpression(expr) => { + Expression::Binary(expr) => { self.binary_expression(expr)?; self.push_stack(var_name)?; } - Expression::SyscallExpression(expr) => { + Expression::Syscall(expr) => { self.syscall_declaration_expression(expr)?; self.push_stack(var_name)?; } @@ -172,6 +169,7 @@ impl<'a> Compiler<'a> { fn syscall_declaration_expression(&mut self, expr: SysCall) -> Result<(), CompileError> { use crate::parser::sys_call::System; + #[allow(clippy::collapsible_match)] match expr { SysCall::System(ref sys) => match sys { System::LoadFromDevice(LiteralOrVariable::Variable(device), value) => { @@ -212,12 +210,12 @@ impl<'a> Compiler<'a> { compiler.write_output("push r15")?; compiler.push_stack(&format!("{op}ExpressionLeft"))?; } - Expression::BinaryExpression(expr) => { + Expression::Binary(expr) => { compiler.binary_expression(expr)?; compiler.push_stack(&format!("{op}ExpressionLeft"))?; } - Expression::PriorityExpression(expr) => match *expr { - Expression::BinaryExpression(expr) => { + Expression::Priority(expr) => match *expr { + Expression::Binary(expr) => { compiler.binary_expression(expr)?; compiler.push_stack(&format!("{op}ExpressionLeft"))?; } @@ -238,12 +236,12 @@ impl<'a> Compiler<'a> { compiler.write_output("push r15")?; compiler.push_stack(&format!("{op}ExpressionRight"))?; } - Expression::BinaryExpression(expr) => { + Expression::Binary(expr) => { compiler.binary_expression(expr)?; compiler.push_stack(&format!("{op}ExpressionRight"))?; } - Expression::PriorityExpression(expr) => match *expr { - Expression::BinaryExpression(expr) => { + Expression::Priority(expr) => match *expr { + Expression::Binary(expr) => { compiler.binary_expression(expr)?; compiler.push_stack(&format!("{op}ExpressionRight"))?; } @@ -304,7 +302,7 @@ impl<'a> Compiler<'a> { to_write.push_str("get r15 db r15\n"); to_write.push_str("push r15\n"); } - Expression::BinaryExpression(expr) => { + Expression::Binary(expr) => { self.binary_expression(expr)?; to_write.push_str("push r0\n"); } @@ -353,11 +351,9 @@ impl<'a> Compiler<'a> { // hoist functions to the top of the block expression.0.sort_by(|a, b| { - if matches!(a, Expression::FunctionExpression(_)) - && matches!(b, Expression::FunctionExpression(_)) - { + if matches!(a, Expression::Function(_)) && matches!(b, Expression::Function(_)) { Ordering::Equal - } else if matches!(a, Expression::FunctionExpression(_)) { + } else if matches!(a, Expression::Function(_)) { Ordering::Less } else { Ordering::Greater @@ -366,7 +362,7 @@ impl<'a> Compiler<'a> { for expr in expression.0 { // if we haven't declared main yet and we have already declared all the function expressions, declare main - if !self.declared_main && !matches!(expr, Expression::FunctionExpression(_)) { + if !self.declared_main && !matches!(expr, Expression::Function(_)) { self.write_output("main:")?; self.declared_main = true; } diff --git a/src/main.rs b/src/main.rs index 4c20682..04f12ea 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,7 +3,6 @@ extern crate quick_error; mod compiler; mod parser; -mod tokenizer; use clap::Parser; use compiler::Compiler; @@ -11,6 +10,7 @@ use parser::Parser as ASTParser; use std::{ fs::File, io::{BufWriter, Read, Write}, + path::PathBuf, }; use tokenizer::{Tokenizer, TokenizerError}; @@ -49,10 +49,10 @@ quick_error! { struct Args { /// What file should be compiled. If not set, input will be read from stdin. #[arg(short, long)] - input_file: Option, + input_file: Option, /// The output file for the compiled program. If not set, output will go to stdout. #[arg(short, long)] - output_file: Option, + output_file: Option, } fn run_logic() -> Result<(), StationlangError> { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 99d4e3e..fbf3124 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,15 +1,13 @@ pub mod sys_call; pub mod tree_node; -use crate::{ - boxed, - tokenizer::{ - token::{Keyword, Symbol, Token, TokenType}, - Tokenizer, TokenizerBuffer, TokenizerError, - }, -}; +use crate::boxed; use std::io::SeekFrom; use sys_call::SysCall; +use tokenizer::{ + token::{Keyword, Symbol, Token, TokenType}, + Tokenizer, TokenizerBuffer, TokenizerError, +}; use tree_node::*; quick_error! { @@ -119,9 +117,7 @@ impl Parser { expressions.push(expression); } - Ok(Some(Expression::BlockExpression(BlockExpression( - expressions, - )))) + Ok(Some(Expression::Block(BlockExpression(expressions)))) } /// Parses the input from the tokenizer buffer and returns the resulting expression @@ -138,7 +134,7 @@ impl Parser { /// Assigns the next token in the tokenizer buffer to the current token fn assign_next(&mut self) -> Result<(), ParseError> { - self.current_token = self.tokenizer.next()?; + self.current_token = self.tokenizer.next_token()?; Ok(()) } @@ -174,43 +170,41 @@ impl Parser { // match declarations with a `let` keyword TokenType::Keyword(Keyword::Let) => self.declaration()?, - TokenType::Keyword(Keyword::Device) => { - Expression::DeviceDeclarationExpression(self.device()?) - } + TokenType::Keyword(Keyword::Device) => Expression::DeviceDeclaration(self.device()?), // match functions with a `fn` keyword - TokenType::Keyword(Keyword::Fn) => Expression::FunctionExpression(self.function()?), + TokenType::Keyword(Keyword::Fn) => Expression::Function(self.function()?), // match syscalls with a `syscall` keyword TokenType::Identifier(ref id) if SysCall::is_syscall(id) => { - Expression::SyscallExpression(self.syscall()?) + Expression::Syscall(self.syscall()?) } // match a variable expression with opening parenthesis TokenType::Identifier(_) if self_matches_peek!(self, TokenType::Symbol(Symbol::LParen)) => { - Expression::InvocationExpression(self.invocation()?) + Expression::Invocation(self.invocation()?) } // match a variable expression with an assignment TokenType::Identifier(_) if self_matches_peek!(self, TokenType::Symbol(Symbol::Assign)) => { - Expression::AssignmentExpression(self.assignment()?) + Expression::Assignment(self.assignment()?) } // match variable expressions with an identifier TokenType::Identifier(ref id) => Expression::Variable(id.clone()), // match block expressions with a `{` symbol - TokenType::Symbol(Symbol::LBrace) => Expression::BlockExpression(self.block()?), + TokenType::Symbol(Symbol::LBrace) => Expression::Block(self.block()?), // match literal expressions with a semi-colon afterwards TokenType::Number(_) | TokenType::String(_) => Expression::Literal(self.literal()?), // match priority expressions with a left parenthesis - TokenType::Symbol(Symbol::LParen) => Expression::PriorityExpression(self.priority()?), + TokenType::Symbol(Symbol::LParen) => Expression::Priority(self.priority()?), _ => { return Err(ParseError::UnexpectedToken(current_token.clone())); @@ -223,13 +217,13 @@ impl Parser { // check if the next or current token is an operator if self_matches_peek!(self, TokenType::Symbol(s) if s.is_operator()) { - return Ok(Some(Expression::BinaryExpression(self.binary(expr)?))); + return Ok(Some(Expression::Binary(self.binary(expr)?))); } // This is an edge case. We need to move back one token if the current token is an operator // so the binary expression can pick up the operator else if self_matches_current!(self, TokenType::Symbol(s) if s.is_operator()) { self.tokenizer.seek(SeekFrom::Current(-1))?; - return Ok(Some(Expression::BinaryExpression(self.binary(expr)?))); + return Ok(Some(Expression::Binary(self.binary(expr)?))); } Ok(Some(expr)) @@ -248,14 +242,12 @@ impl Parser { Ok(Expression::Variable(ident)) } // A priority expression ( -> (1 + 2) <- + 3 ) - TokenType::Symbol(Symbol::LParen) => { - self.priority().map(Expression::PriorityExpression) - } + TokenType::Symbol(Symbol::LParen) => self.priority().map(Expression::Priority), // A function invocation TokenType::Identifier(_) if self_matches_peek!(self, TokenType::Symbol(Symbol::LParen)) => { - self.invocation().map(Expression::InvocationExpression) + self.invocation().map(Expression::Invocation) } _ => Err(ParseError::UnexpectedToken(current_token.clone())), } @@ -322,9 +314,9 @@ impl Parser { // first, make sure the previous expression supports binary expressions match previous { - Expression::BinaryExpression(_) // 1 + 2 + 3 - | Expression::InvocationExpression(_) // add() + 3 - | Expression::PriorityExpression(_) // (1 + 2) + 3 + Expression::Binary(_) // 1 + 2 + 3 + | Expression::Invocation(_) // add() + 3 + | Expression::Priority(_) // (1 + 2) + 3 | Expression::Literal(Literal::Number(_)) // 1 + 2 (no addition of strings) | Expression::Variable(_) // x + 2 | Expression::Negation(_) // -1 + 2 @@ -371,10 +363,7 @@ impl Parser { let right = expressions.remove(index); expressions.insert( index, - Expression::BinaryExpression(BinaryExpression::Exponent( - boxed!(left), - boxed!(right), - )), + Expression::Binary(BinaryExpression::Exponent(boxed!(left), boxed!(right))), ); current_iteration += 1; } @@ -394,17 +383,11 @@ impl Parser { match operator { Symbol::Asterisk => expressions.insert( index, - Expression::BinaryExpression(BinaryExpression::Multiply( - boxed!(left), - boxed!(right), - )), + Expression::Binary(BinaryExpression::Multiply(boxed!(left), boxed!(right))), ), Symbol::Slash => expressions.insert( index, - Expression::BinaryExpression(BinaryExpression::Divide( - boxed!(left), - boxed!(right), - )), + Expression::Binary(BinaryExpression::Divide(boxed!(left), boxed!(right))), ), // safety: we have already checked for the operator _ => unreachable!(), @@ -427,17 +410,11 @@ impl Parser { match operator { Symbol::Plus => expressions.insert( index, - Expression::BinaryExpression(BinaryExpression::Add( - boxed!(left), - boxed!(right), - )), + Expression::Binary(BinaryExpression::Add(boxed!(left), boxed!(right))), ), Symbol::Minus => expressions.insert( index, - Expression::BinaryExpression(BinaryExpression::Subtract( - boxed!(left), - boxed!(right), - )), + Expression::Binary(BinaryExpression::Subtract(boxed!(left), boxed!(right))), ), // safety: we have already checked for the operator _ => unreachable!(), @@ -467,7 +444,7 @@ impl Parser { // Ensure the last expression is a binary expression match expressions.pop().unwrap() { - Expression::BinaryExpression(binary) => Ok(binary), + Expression::Binary(binary) => Ok(binary), _ => unreachable!(), } } @@ -512,7 +489,7 @@ impl Parser { let current_token = token_from_option!(self.current_token); let expression = self.expression()?.ok_or(ParseError::UnexpectedEOF)?; - if let Expression::BlockExpression(_) = expression { + if let Expression::Block(_) = expression { return Err(ParseError::InvalidSyntax( current_token, String::from("Block expressions are not allowed in function invocations"), @@ -568,7 +545,7 @@ impl Parser { if token_matches!(current_token, TokenType::Keyword(Keyword::Return)) { self.assign_next()?; let expression = self.expression()?.ok_or(ParseError::UnexpectedEOF)?; - let return_expr = Expression::ReturnExpression(boxed!(expression)); + let return_expr = Expression::Return(boxed!(expression)); expressions.push(return_expr); self.assign_next()?; } @@ -604,7 +581,7 @@ impl Parser { return Err(ParseError::UnexpectedToken(current_token.clone())); } - Ok(Expression::DeclarationExpression( + Ok(Expression::Declaration( identifier, boxed!(assignment_expression), )) diff --git a/src/parser/sys_call.rs b/src/parser/sys_call.rs index 8d6e688..eec5ade 100644 --- a/src/parser/sys_call.rs +++ b/src/parser/sys_call.rs @@ -151,11 +151,29 @@ impl std::fmt::Display for SysCall { impl SysCall { pub fn is_syscall(identifier: &str) -> bool { - match identifier { - "yield" | "sleep" | "HASH" | "loadFromDevice" | "setOnDevice" => true, - "acos" | "asin" | "atan" | "atan2" | "abs" | "ceil" | "cos" | "floor" | "log" - | "max" | "min" | "rand" | "sin" | "sqrt" | "tan" | "trunc" => true, - _ => false, - } + matches!( + identifier, + "yield" + | "sleep" + | "HASH" + | "loadFromDevice" + | "setOnDevice" + | "acos" + | "asin" + | "atan" + | "atan2" + | "abs" + | "ceil" + | "cos" + | "floor" + | "log" + | "max" + | "min" + | "rand" + | "sin" + | "sqrt" + | "tan" + | "trunc" + ) } } diff --git a/src/parser/tree_node.rs b/src/parser/tree_node.rs index 0940a83..4cfc112 100644 --- a/src/parser/tree_node.rs +++ b/src/parser/tree_node.rs @@ -1,6 +1,5 @@ -use crate::tokenizer::token::Number; - use super::sys_call::SysCall; +use tokenizer::token::Number; #[derive(Debug, Eq, PartialEq, Clone)] pub enum Literal { @@ -92,11 +91,7 @@ impl std::fmt::Display for FunctionExpression { f, "(fn {}({}) {{ {} }})", self.name, - self.arguments - .iter() - .cloned() - .collect::>() - .join(", "), + self.arguments.to_vec().join(", "), self.body ) } @@ -171,20 +166,20 @@ impl std::fmt::Display for DeviceDeclarationExpression { #[derive(Debug, PartialEq, Eq)] pub enum Expression { - AssignmentExpression(AssignmentExpression), - BinaryExpression(BinaryExpression), - BlockExpression(BlockExpression), - DeclarationExpression(String, Box), - FunctionExpression(FunctionExpression), - InvocationExpression(InvocationExpression), + Assignment(AssignmentExpression), + Binary(BinaryExpression), + Block(BlockExpression), + Declaration(String, Box), + Function(FunctionExpression), + Invocation(InvocationExpression), Literal(Literal), - LogicalExpression(LogicalExpression), + Logical(LogicalExpression), Negation(Box), - PriorityExpression(Box), - ReturnExpression(Box), + Priority(Box), + Return(Box), Variable(String), - DeviceDeclarationExpression(DeviceDeclarationExpression), - SyscallExpression(SysCall), + DeviceDeclaration(DeviceDeclarationExpression), + Syscall(SysCall), } impl std::fmt::Display for Expression { @@ -192,18 +187,18 @@ impl std::fmt::Display for Expression { match self { Expression::Literal(l) => write!(f, "{}", l), Expression::Negation(e) => write!(f, "(-{})", e), - Expression::BinaryExpression(e) => write!(f, "{}", e), - Expression::LogicalExpression(e) => write!(f, "{}", e), - Expression::AssignmentExpression(e) => write!(f, "{}", e), - Expression::DeclarationExpression(id, e) => write!(f, "(let {} = {})", id, e), - Expression::FunctionExpression(e) => write!(f, "{}", e), - Expression::BlockExpression(e) => write!(f, "{}", e), - Expression::InvocationExpression(e) => write!(f, "{}", e), + Expression::Binary(e) => write!(f, "{}", e), + Expression::Logical(e) => write!(f, "{}", e), + Expression::Assignment(e) => write!(f, "{}", e), + Expression::Declaration(id, e) => write!(f, "(let {} = {})", id, e), + Expression::Function(e) => write!(f, "{}", e), + Expression::Block(e) => write!(f, "{}", e), + Expression::Invocation(e) => write!(f, "{}", e), Expression::Variable(id) => write!(f, "{}", id), - Expression::PriorityExpression(e) => write!(f, "({})", e), - Expression::ReturnExpression(e) => write!(f, "(return {})", e), - Expression::DeviceDeclarationExpression(e) => write!(f, "{}", e), - Expression::SyscallExpression(e) => write!(f, "{}", e), + Expression::Priority(e) => write!(f, "({})", e), + Expression::Return(e) => write!(f, "(return {})", e), + Expression::DeviceDeclaration(e) => write!(f, "{}", e), + Expression::Syscall(e) => write!(f, "{}", e), } } } diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs deleted file mode 100644 index a2578e5..0000000 --- a/src/tokenizer/mod.rs +++ /dev/null @@ -1,895 +0,0 @@ -pub mod token; - -use rust_decimal::Decimal; -use std::{ - cmp::Ordering, - collections::VecDeque, - io::{BufReader, Cursor, Read, Seek, SeekFrom}, - path::PathBuf, -}; -use token::{Keyword, Number, Symbol, Temperature, Token, TokenType}; - -quick_error! { - #[derive(Debug)] - pub enum TokenizerError { - IOError(err: std::io::Error) { - from() - display("IO Error: {}", err) - source(err) - } - NumberParseError(err: std::num::ParseIntError, line: usize, column: usize) { - display("Number Parse Error: {}\nLine: {}, Column: {}", err, line, column) - source(err) - } - DecimalParseError(err: rust_decimal::Error, line: usize, column: usize) { - display("Decimal Parse Error: {}\nLine: {}, Column: {}", err, line, column) - source(err) - } - UnknownSymbolError(char: char, line: usize, column: usize) { - display("Unknown Symbol: {}\nLine: {}, Column: {}", char, line, column) - } - UnknownKeywordOrIdentifierError(val: String, line: usize, column: usize) { - display("Unknown Keyword or Identifier: {}\nLine: {}, Column: {}", val, line, column) - } - } -} - -pub trait Tokenize: Read + Seek {} - -impl Tokenize for T where T: Read + Seek {} - -pub(crate) struct Tokenizer { - reader: BufReader>, - char_buffer: [u8; 1], - line: usize, - column: usize, - returned_eof: bool, -} - -impl Tokenizer { - pub fn from_path(input_file: impl Into) -> Result { - let file = std::fs::File::open(input_file.into())?; - let reader = BufReader::new(Box::new(file) as Box); - - Ok(Self { - reader, - line: 1, - column: 1, - char_buffer: [0], - returned_eof: false, - }) - } -} - -impl From for Tokenizer { - fn from(input: String) -> Self { - let reader = BufReader::new(Box::new(Cursor::new(input)) as Box); - - Self { - reader, - line: 1, - column: 1, - char_buffer: [0], - returned_eof: false, - } - } -} - -impl Tokenizer { - /// Consumes the tokenizer and returns the next token in the stream - /// If there are no more tokens in the stream, this function returns None - /// If there is an error reading the stream, this function returns an error - /// - /// # Important - /// This function will increment the line and column counters - fn next_char(&mut self) -> Result, TokenizerError> { - let bytes_read = self.reader.read(&mut self.char_buffer)?; - - if bytes_read == 0 { - return Ok(None); - } - - // Safety: The buffer is guaranteed to have 1 value as it is initialized with a size of 1 - let c = self.char_buffer[0] as char; - if c == '\n' { - self.line += 1; - self.column = 1; - } else { - self.column += 1; - } - - Ok(Some(c)) - } - - /// Peeks the next character in the stream without consuming it - /// - /// # Important - /// This does not increment the line or column counters - fn peek_next_char(&mut self) -> Result, TokenizerError> { - let current_pos = self.reader.stream_position()?; - - let to_return = if self.reader.read(&mut self.char_buffer)? == 0 { - None - } else { - self.reader.seek(SeekFrom::Start(current_pos))?; - - // Safety: The buffer is guaranteed to have 1 value as it is initialized with a size of 1 - Some(self.char_buffer[0] as char) - }; - - Ok(to_return) - } - - /// Skips the current line in the stream. - /// Useful for skipping comments or empty lines - /// - /// # Important - /// This function will increment the line and column counters - fn skip_line(&mut self) -> Result<(), TokenizerError> { - while let Some(next_char) = self.next_char()? { - if next_char == '\n' { - break; - } - } - Ok(()) - } - - /// Consumes the tokenizer and returns the next token in the stream - /// If there are no more tokens in the stream, this function returns None - pub fn next_token(&mut self) -> Result, TokenizerError> { - while let Some(next_char) = self.next_char()? { - // skip whitespace - if next_char.is_whitespace() { - continue; - } - // skip comments - if next_char == '/' && self.peek_next_char()? == Some('/') { - self.skip_line()?; - continue; - } - - match next_char { - // numbers - '0'..='9' => { - return self.tokenize_number(next_char).map(Some); - } - // strings - '"' | '\'' => return self.tokenize_string(next_char).map(Some), - // symbols excluding `"` and `'` - char if !char.is_alphanumeric() && char != '"' && char != '\'' => { - return self.tokenize_symbol(next_char).map(Some) - } - // keywords and identifiers - char if char.is_alphabetic() => { - return self.tokenize_keyword_or_identifier(next_char).map(Some) - } - _ => { - return Err(TokenizerError::UnknownSymbolError( - next_char, - self.line, - self.column, - )) - } - } - } - if self.returned_eof { - Ok(None) - } else { - self.returned_eof = true; - Ok(Some(Token::new(TokenType::EOF, self.line, self.column))) - } - } - - /// Peeks the next token in the stream without consuming it - /// If there are no more tokens in the stream, this function returns None - pub fn peek_next(&mut self) -> Result, TokenizerError> { - let current_pos = self.reader.stream_position()?; - let column = self.column.clone(); - let line = self.line.clone(); - - let token = self.next_token()?; - self.reader.seek(SeekFrom::Start(current_pos))?; - self.column = column; - self.line = line; - Ok(token) - } - - /// Tokenizes a symbol - fn tokenize_symbol(&mut self, first_symbol: char) -> Result { - /// Helper macro to create a symbol token - macro_rules! symbol { - ($symbol:ident) => { - Ok(Token::new( - TokenType::Symbol(Symbol::$symbol), - self.line, - self.column, - )) - }; - } - - match first_symbol { - // single character symbols - '(' => symbol!(LParen), - ')' => symbol!(RParen), - '{' => symbol!(LBrace), - '}' => symbol!(RBrace), - '[' => symbol!(LBracket), - ']' => symbol!(RBracket), - ';' => symbol!(Semicolon), - ':' => symbol!(Colon), - ',' => symbol!(Comma), - '+' => symbol!(Plus), - '-' => symbol!(Minus), - '/' => symbol!(Slash), - - '.' => symbol!(Dot), - '^' => symbol!(Caret), - - // multi-character symbols - '<' if self.peek_next_char()? == Some('=') => { - self.next_char()?; - symbol!(LessThanOrEqual) - } - '<' => symbol!(LessThan), - - '>' if self.peek_next_char()? == Some('=') => { - self.next_char()?; - symbol!(GreaterThanOrEqual) - } - '>' => symbol!(GreaterThan), - - '=' if self.peek_next_char()? == Some('=') => { - self.next_char()?; - symbol!(Equal) - } - '=' => symbol!(Assign), - - '!' if self.peek_next_char()? == Some('=') => { - self.next_char()?; - symbol!(NotEqual) - } - '!' => symbol!(LogicalNot), - - '*' if self.peek_next_char()? == Some('*') => { - self.next_char()?; - symbol!(Exp) - } - '*' => symbol!(Asterisk), - - '&' if self.peek_next_char()? == Some('&') => { - self.next_char()?; - symbol!(LogicalAnd) - } - '|' if self.peek_next_char()? == Some('|') => { - self.next_char()?; - symbol!(LogicalOr) - } - - _ => Err(TokenizerError::UnknownSymbolError( - first_symbol, - self.line, - self.column, - )), - } - } - - /// Tokenizes a number literal. Also handles temperatures with a suffix of `c`, `f`, or `k`. - fn tokenize_number(&mut self, first_char: char) -> Result { - let mut primary = String::with_capacity(16); - let mut decimal: Option = None; - let mut reading_decimal = false; - - let column = self.column.clone(); - let line = self.line.clone(); - - primary.push(first_char); - - while let Some(next_char) = self.peek_next_char()? { - if next_char.is_whitespace() { - break; - } - - if next_char == '.' { - reading_decimal = true; - self.next_char()?; - continue; - } - - // support underscores in numbers for readability - if next_char == '_' { - self.next_char()?; - continue; - } - - // This is for the times when we have a number followed by a symbol (like a semicolon or =) - if !next_char.is_numeric() { - break; - } - - if reading_decimal { - decimal.get_or_insert_with(String::new).push(next_char); - } else { - primary.push(next_char); - } - self.next_char()?; - } - - let number: Number = if let Some(decimal) = decimal { - let decimal_scale = decimal.len() as u32; - let number = format!("{}{}", primary, decimal) - .parse::() - .map_err(|e| TokenizerError::NumberParseError(e, self.line, self.column))?; - Number::Decimal( - Decimal::try_from_i128_with_scale(number, decimal_scale) - .map_err(|e| TokenizerError::DecimalParseError(e, line, column))?, - ) - } else { - Number::Integer( - primary - .parse() - .map_err(|e| TokenizerError::NumberParseError(e, line, column))?, - ) - }; - - // check if the next char is a temperature suffix - if let Some(next_char) = self.peek_next_char()? { - let temperature = match next_char { - 'c' => Temperature::Celsius(number), - 'f' => Temperature::Fahrenheit(number), - 'k' => Temperature::Kelvin(number), - _ => return Ok(Token::new(TokenType::Number(number), line, column)), - } - .to_kelvin(); - - self.next_char()?; - Ok(Token::new(TokenType::Number(temperature), line, column)) - } else { - Ok(Token::new(TokenType::Number(number), line, column)) - } - } - - /// Tokenizes a string literal - fn tokenize_string(&mut self, beginning_quote: char) -> Result { - let mut buffer = String::with_capacity(16); - - let column = self.column.clone(); - let line = self.line.clone(); - - while let Some(next_char) = self.next_char()? { - if next_char == beginning_quote { - break; - } - - buffer.push(next_char); - } - - Ok(Token::new(TokenType::String(buffer), line, column)) - } - - /// Tokenizes a keyword or an identifier. Also handles boolean literals - fn tokenize_keyword_or_identifier( - &mut self, - first_char: char, - ) -> Result { - macro_rules! keyword { - ($keyword:ident) => {{ - return Ok(Token::new( - TokenType::Keyword(Keyword::$keyword), - self.line, - self.column, - )); - }}; - } - - /// Helper macro to check if the next character is whitespace or not alphanumeric - macro_rules! next_ws { - () => { - matches!(self.peek_next_char()?, Some(x) if x.is_whitespace() || !x.is_alphanumeric()) || matches!(self.peek_next_char()?, None) - }; - } - - let mut buffer = String::with_capacity(16); - let line = self.line.clone(); - let column = self.column.clone(); - - let mut looped_char = Some(first_char); - - while let Some(next_char) = looped_char { - if next_char.is_whitespace() { - break; - } - - if !next_char.is_alphanumeric() { - break; - } - buffer.push(next_char); - - match buffer.as_str() { - "let" if next_ws!() => keyword!(Let), - "fn" if next_ws!() => keyword!(Fn), - "if" if next_ws!() => keyword!(If), - "else" if next_ws!() => keyword!(Else), - "return" if next_ws!() => keyword!(Return), - "enum" if next_ws!() => keyword!(Enum), - "device" if next_ws!() => keyword!(Device), - "loop" if next_ws!() => keyword!(Loop), - "break" if next_ws!() => keyword!(Break), - - // boolean literals - "true" if next_ws!() => { - return Ok(Token::new(TokenType::Boolean(true), self.line, self.column)) - } - "false" if next_ws!() => { - return Ok(Token::new( - TokenType::Boolean(false), - self.line, - self.column, - )) - } - // if the next character is whitespace or not alphanumeric, then we have an identifier - // this is because keywords are checked first - val if next_ws!() => { - return Ok(Token::new( - TokenType::Identifier(val.to_string()), - line, - column, - )); - } - _ => {} - } - - looped_char = self.next_char()?; - } - Err(TokenizerError::UnknownKeywordOrIdentifierError( - buffer, line, column, - )) - } -} - -pub struct TokenizerBuffer { - tokenizer: Tokenizer, - buffer: VecDeque, - history: VecDeque, -} - -impl TokenizerBuffer { - pub fn new(tokenizer: Tokenizer) -> Self { - Self { - tokenizer, - buffer: VecDeque::new(), - history: VecDeque::with_capacity(128), - } - } - - /// Reads the next token from the tokenizer, pushing the value to the back of the history - /// and returning the token - pub fn next(&mut self) -> Result, TokenizerError> { - if let Some(token) = self.buffer.pop_front() { - self.history.push_back(token.clone()); - return Ok(Some(token)); - } - - let token = self.tokenizer.next_token()?; - if let Some(ref token) = token { - self.history.push_back(token.clone()); - } - Ok(token) - } - - /// Peeks the next token in the stream without adding to the history stack - pub fn peek(&mut self) -> Result, TokenizerError> { - if let Some(token) = self.buffer.front() { - return Ok(Some(token.clone())); - } - - let token = self.tokenizer.peek_next()?; - Ok(token) - } - - fn seek_from_current(&mut self, seek_to: i64) -> Result<(), TokenizerError> { - use Ordering::*; - // if seek_to > 0 then we need to check if the buffer has enough tokens to pop, otherwise we need to read from the tokenizer - // if seek_to < 0 then we need to pop from the history and push to the front of the buffer. If not enough, then we throw (we reached the front of the history) - // if seek_to == 0 then we don't need to do anything - - match seek_to.cmp(&0) { - Greater => { - let mut tokens = Vec::with_capacity(seek_to as usize); - for _ in 0..seek_to { - if let Some(token) = self.tokenizer.next_token()? { - tokens.push(token); - } else { - return Err(TokenizerError::IOError(std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - "Unexpected EOF", - ))); - } - } - self.history.extend(tokens); - } - Less => { - let seek_to = seek_to.unsigned_abs() as usize; - let mut tokens = Vec::with_capacity(seek_to); - for _ in 0..seek_to { - if let Some(token) = self.history.pop_back() { - tokens.push(token); - } else { - return Err(TokenizerError::IOError(std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - "Unexpected EOF", - ))); - } - } - self.buffer.extend(tokens.into_iter().rev()); - } - _ => {} - } - - Ok(()) - } - - /// Adds to or removes from the History stack, allowing the user to move back and forth in the stream - pub fn seek(&mut self, from: SeekFrom) -> Result<(), TokenizerError> { - match from { - SeekFrom::Current(seek_to) => self.seek_from_current(seek_to)?, - SeekFrom::End(_) => unimplemented!("SeekFrom::End will not be implemented"), - SeekFrom::Start(_) => unimplemented!("SeekFrom::Start will not be implemented"), - } - - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use anyhow::Result; - use rust_decimal::Decimal; - - const TEST_FILE: &str = "tests/file.stlg"; - - const TEST_STRING: &str = r#" - fn test() { - let x = 10; - return x + 2; - } - "#; - - #[test] - fn test_seek_from_current() -> Result<()> { - let tokenizer = Tokenizer::from(TEST_STRING.to_owned()); - let mut buffer = TokenizerBuffer::new(tokenizer); - - let token = buffer.next()?.unwrap(); - assert_eq!(token.token_type, TokenType::Keyword(Keyword::Fn)); - - buffer.seek(SeekFrom::Current(1))?; - - let token = buffer.next()?.unwrap(); - - assert_eq!(token.token_type, TokenType::Symbol(Symbol::LParen)); - - Ok(()) - } - - #[test] - fn test_tokenizer_from_path_ok() { - let tokenizer = Tokenizer::from_path(TEST_FILE); - assert!(tokenizer.is_ok()); - } - - #[test] - fn test_tokenizer_from_path_err() { - let tokenizer = Tokenizer::from_path("non_existent_file.stlg"); - assert!(tokenizer.is_err()); - } - - #[test] - fn test_next_char() -> Result<()> { - let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned()); - - let char = tokenizer.next_char()?; - - assert_eq!(char, Some('\n')); - assert_eq!(tokenizer.line, 2); - assert_eq!(tokenizer.column, 1); - - let mut tokenizer = Tokenizer::from(String::from("fn")); - - let char = tokenizer.next_char()?; - - assert_eq!(char, Some('f')); - assert_eq!(tokenizer.line, 1); - assert_eq!(tokenizer.column, 2); - - Ok(()) - } - - #[test] - fn test_peek_next_char() -> Result<()> { - let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned()); - - let char = tokenizer.peek_next_char()?; - - assert_eq!(char, Some('\n')); - assert_eq!(tokenizer.line, 1); - assert_eq!(tokenizer.column, 1); - - let char = tokenizer.next_char()?; - assert_eq!(char, Some('\n')); - assert_eq!(tokenizer.line, 2); - assert_eq!(tokenizer.column, 1); - - let char = tokenizer.peek_next_char()?; - assert_eq!(char, Some(' ')); - assert_eq!(tokenizer.line, 2); - assert_eq!(tokenizer.column, 1); - - Ok(()) - } - - #[test] - fn test_temperature_unit() -> Result<()> { - let mut tokenizer = Tokenizer::from(String::from("10c 14f 10k")); - - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!( - token.token_type, - TokenType::Number(Number::Decimal(Decimal::new(28315, 2))) - ); - - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!( - token.token_type, - TokenType::Number(Number::Decimal(Decimal::new(26315, 2))) - ); - - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!(token.token_type, TokenType::Number(Number::Integer(10))); - - Ok(()) - } - - #[test] - fn test_parse_integer() -> Result<()> { - let mut tokenizer = Tokenizer::from(String::from("10")); - - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!(token.token_type, TokenType::Number(Number::Integer(10))); - - Ok(()) - } - - #[test] - fn test_parse_integer_with_underscore() -> Result<()> { - let mut tokenizer = Tokenizer::from(String::from("1_000")); - - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!(token.token_type, TokenType::Number(Number::Integer(1000))); - - Ok(()) - } - - #[test] - fn test_parse_decimal() -> Result<()> { - let mut tokenizer = Tokenizer::from(String::from("10.5")); - - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!( - token.token_type, - TokenType::Number(Number::Decimal(Decimal::new(105, 1))) // 10.5 - ); - - Ok(()) - } - - #[test] - fn test_parse_decimal_with_underscore() -> Result<()> { - let mut tokenizer = Tokenizer::from(String::from("1_000.000_6")); - - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!( - token.token_type, - TokenType::Number(Number::Decimal(Decimal::new(10000006, 4))) // 1000.0006 - ); - - Ok(()) - } - - #[test] - fn test_parse_number_with_symbol() -> Result<()> { - let mut tokenizer = Tokenizer::from(String::from("10;")); - - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!(token.token_type, TokenType::Number(Number::Integer(10))); - - let next_char = tokenizer.next_char()?; - - assert_eq!(next_char, Some(';')); - - Ok(()) - } - - #[test] - fn test_string_parse() -> Result<()> { - let mut tokenizer = Tokenizer::from(String::from(r#""Hello, World!""#)); - - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!( - token.token_type, - TokenType::String(String::from("Hello, World!")) - ); - - let mut tokenizer = Tokenizer::from(String::from(r#"'Hello, World!'"#)); - - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!( - token.token_type, - TokenType::String(String::from("Hello, World!")) - ); - - Ok(()) - } - - #[test] - fn test_symbol_parse() -> Result<()> { - let mut tokenizer = Tokenizer::from(String::from( - "^ ! () [] {} , . ; : + - * / < > = != && || >= <=**", - )); - - let expected_tokens = vec![ - TokenType::Symbol(Symbol::Caret), - TokenType::Symbol(Symbol::LogicalNot), - TokenType::Symbol(Symbol::LParen), - TokenType::Symbol(Symbol::RParen), - TokenType::Symbol(Symbol::LBracket), - TokenType::Symbol(Symbol::RBracket), - TokenType::Symbol(Symbol::LBrace), - TokenType::Symbol(Symbol::RBrace), - TokenType::Symbol(Symbol::Comma), - TokenType::Symbol(Symbol::Dot), - TokenType::Symbol(Symbol::Semicolon), - TokenType::Symbol(Symbol::Colon), - TokenType::Symbol(Symbol::Plus), - TokenType::Symbol(Symbol::Minus), - TokenType::Symbol(Symbol::Asterisk), - TokenType::Symbol(Symbol::Slash), - TokenType::Symbol(Symbol::LessThan), - TokenType::Symbol(Symbol::GreaterThan), - TokenType::Symbol(Symbol::Assign), - TokenType::Symbol(Symbol::NotEqual), - TokenType::Symbol(Symbol::LogicalAnd), - TokenType::Symbol(Symbol::LogicalOr), - TokenType::Symbol(Symbol::GreaterThanOrEqual), - TokenType::Symbol(Symbol::LessThanOrEqual), - TokenType::Symbol(Symbol::Exp), - ]; - - for expected_token in expected_tokens { - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!(token.token_type, expected_token); - } - - Ok(()) - } - - #[test] - fn test_keyword_parse() -> Result<()> { - let mut tokenizer = Tokenizer::from(String::from("let fn if else return enum")); - - let expected_tokens = vec![ - TokenType::Keyword(Keyword::Let), - TokenType::Keyword(Keyword::Fn), - TokenType::Keyword(Keyword::If), - TokenType::Keyword(Keyword::Else), - TokenType::Keyword(Keyword::Return), - TokenType::Keyword(Keyword::Enum), - ]; - - for expected_token in expected_tokens { - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!(token.token_type, expected_token); - } - - Ok(()) - } - - #[test] - fn test_identifier_parse() -> Result<()> { - let mut tokenizer = Tokenizer::from(String::from("fn test")); - - let token = tokenizer.next_token()?.unwrap(); - assert_eq!(token.token_type, TokenType::Keyword(Keyword::Fn)); - let token = tokenizer.next_token()?.unwrap(); - assert_eq!( - token.token_type, - TokenType::Identifier(String::from("test")) - ); - - Ok(()) - } - - #[test] - fn test_boolean_parse() -> Result<()> { - let mut tokenizer = Tokenizer::from(String::from("true false")); - - let token = tokenizer.next_token()?.unwrap(); - assert_eq!(token.token_type, TokenType::Boolean(true)); - let token = tokenizer.next_token()?.unwrap(); - assert_eq!(token.token_type, TokenType::Boolean(false)); - - Ok(()) - } - - #[test] - fn test_full_source() -> Result<()> { - let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned()); - - let expected_tokens = vec![ - TokenType::Keyword(Keyword::Fn), - TokenType::Identifier(String::from("test")), - TokenType::Symbol(Symbol::LParen), - TokenType::Symbol(Symbol::RParen), - TokenType::Symbol(Symbol::LBrace), - TokenType::Keyword(Keyword::Let), - TokenType::Identifier(String::from("x")), - TokenType::Symbol(Symbol::Assign), - TokenType::Number(Number::Integer(10)), - TokenType::Symbol(Symbol::Semicolon), - TokenType::Keyword(Keyword::Return), - TokenType::Identifier(String::from("x")), - TokenType::Symbol(Symbol::Plus), - TokenType::Number(Number::Integer(2)), - TokenType::Symbol(Symbol::Semicolon), - TokenType::Symbol(Symbol::RBrace), - ]; - - for expected_token in expected_tokens { - let token = tokenizer.next_token()?.unwrap(); - - assert_eq!(token.token_type, expected_token); - } - - Ok(()) - } - - #[test] - fn test_peek_next() -> Result<()> { - let mut tokenizer = Tokenizer::from(TEST_STRING.to_owned()); - - let column = tokenizer.column.clone(); - let line = tokenizer.line.clone(); - - let peeked_token = tokenizer.peek_next()?; - - assert_eq!( - peeked_token.unwrap().token_type, - TokenType::Keyword(Keyword::Fn) - ); - assert_eq!(tokenizer.column, column); - assert_eq!(tokenizer.line, line); - - let next_token = tokenizer.next_token()?; - - assert_eq!( - next_token.unwrap().token_type, - TokenType::Keyword(Keyword::Fn) - ); - assert_ne!(tokenizer.column, column); - assert_ne!(tokenizer.line, line); - - Ok(()) - } -} diff --git a/src/tokenizer/token.rs b/src/tokenizer/token.rs deleted file mode 100644 index 5e1c970..0000000 --- a/src/tokenizer/token.rs +++ /dev/null @@ -1,221 +0,0 @@ -use rust_decimal::Decimal; - -#[derive(Debug, PartialEq, Eq, Clone)] -pub struct Token { - /// The type of the token - pub token_type: TokenType, - /// The line where the token was found - pub line: usize, - /// The column where the token was found - pub column: usize, -} - -impl Token { - pub fn new(token_type: TokenType, line: usize, column: usize) -> Self { - Self { - token_type, - line, - column, - } - } -} - -#[derive(Debug, PartialEq, Hash, Eq, Clone)] -pub enum Temperature { - Celsius(Number), - Fahrenheit(Number), - Kelvin(Number), -} - -impl std::fmt::Display for Temperature { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Temperature::Celsius(n) => write!(f, "{}°C", n), - Temperature::Fahrenheit(n) => write!(f, "{}°F", n), - Temperature::Kelvin(n) => write!(f, "{}K", n), - } - } -} - -impl Temperature { - pub fn to_kelvin(self) -> Number { - match self { - Temperature::Celsius(n) => { - let n = match n { - Number::Integer(i) => Decimal::new(i as i64, 0), - Number::Decimal(d) => d, - }; - Number::Decimal(n + Decimal::new(27315, 2)) - } - Temperature::Fahrenheit(n) => { - let n = match n { - Number::Integer(i) => Decimal::new(i as i64, 0), - Number::Decimal(d) => d, - }; - - let a = n - Decimal::new(32, 0); - let b = Decimal::new(5, 0) / Decimal::new(9, 0); - Number::Decimal(a * b + Decimal::new(27315, 2)) - } - Temperature::Kelvin(n) => n, - } - } -} - -#[derive(Debug, PartialEq, Hash, Eq, Clone)] -pub enum TokenType { - /// Represents a string token - String(String), - /// Represents a number token - Number(Number), - /// Represents a boolean token - Boolean(bool), - /// Represents a keyword token - Keyword(Keyword), - /// Represents an identifier token - Identifier(String), - /// Represents a symbol token - Symbol(Symbol), - /// Represents an end of file token - EOF, -} - -impl std::fmt::Display for TokenType { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - TokenType::String(s) => write!(f, "{}", s), - TokenType::Number(n) => write!(f, "{}", n), - TokenType::Boolean(b) => write!(f, "{}", b), - TokenType::Keyword(k) => write!(f, "{:?}", k), - TokenType::Identifier(i) => write!(f, "{}", i), - TokenType::Symbol(s) => write!(f, "{:?}", s), - TokenType::EOF => write!(f, "EOF"), - } - } -} - -#[derive(Debug, PartialEq, Hash, Eq, Clone, Copy)] -pub enum Number { - /// Represents an integer number - Integer(u128), - /// Represents a decimal type number with a precision of 64 bits - Decimal(Decimal), -} - -impl std::fmt::Display for Number { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Number::Integer(i) => write!(f, "{}", i), - Number::Decimal(d) => write!(f, "{}", d.to_string()), - } - } -} - -#[derive(Debug, PartialEq, Hash, Eq, Clone, Copy)] -pub enum Symbol { - // Single Character Symbols - /// Represents the `(` symbol - LParen, - /// Represents the `)` symbol - RParen, - /// Represents the `{` symbol - LBrace, - /// Represents the `}` symbol - RBrace, - /// Represents the `[` symbol - LBracket, - /// Represents the `]` symbol - RBracket, - /// Represents the `;` symbol - Semicolon, - /// Represents the `:` symbol - Colon, - /// Represents the `,` symbol - Comma, - /// Represents the `+` symbol - Plus, - /// Represents the `-` symbol - Minus, - /// Represents the `*` symbol - Asterisk, - /// Represents the `/` symbol - Slash, - /// Represents the `<` symbol - LessThan, - /// Represents the `>` symbol - GreaterThan, - /// Represents the `=` symbol - Assign, - /// Represents the `!` symbol - LogicalNot, - /// Represents the `.` symbol - Dot, - /// Represents the `^` symbol - Caret, - - // Double Character Symbols - /// Represents the `==` symbol - Equal, - /// Represents the `!=` symbol - NotEqual, - /// Represents the `&&` Symbol - LogicalAnd, - // Represents the `||` Symbol - LogicalOr, - /// Represents the `<=` symbol - LessThanOrEqual, - /// Represents the `>=` symbol - GreaterThanOrEqual, - /// Represents the `**` symbol - Exp, -} - -impl Symbol { - pub fn is_operator(&self) -> bool { - match self { - Symbol::Plus | Symbol::Minus | Symbol::Asterisk | Symbol::Slash | Symbol::Exp => true, - _ => false, - } - } - - pub fn is_comparison(&self) -> bool { - match self { - Symbol::LessThan - | Symbol::GreaterThan - | Symbol::Equal - | Symbol::NotEqual - | Symbol::LessThanOrEqual - | Symbol::GreaterThanOrEqual => true, - _ => false, - } - } - - pub fn is_logical(&self) -> bool { - match self { - Symbol::LogicalAnd | Symbol::LogicalOr => true, - _ => false, - } - } -} - -#[derive(Debug, PartialEq, Hash, Eq, Clone, Copy)] -pub enum Keyword { - /// Represents the `let` keyword - Let, - /// Represents the `fn` keyword - Fn, - /// Represents the `if` keyword - If, - /// Represents the `device` keyword. Useful for defining a device at a specific address (ex. d0, d1, d2, etc.) - Device, - /// Represents the `else` keyword - Else, - /// Represents the `return` keyword - Return, - /// Represents the `enum` keyword - Enum, - /// Represents the `loop` keyword - Loop, - /// Represents the `break` keyword - Break, -} From 8280d4536694f6a6df86e50b8d50536dbf279ff1 Mon Sep 17 00:00:00 2001 From: Devin Bidwell Date: Sun, 15 Jun 2025 22:12:58 -0700 Subject: [PATCH 3/6] update cargo dependencies --- Cargo.lock | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a6b511d..ccbb6a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -146,7 +146,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.102", + "syn 2.0.103", ] [[package]] @@ -226,7 +226,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.102", + "syn 2.0.103", ] [[package]] @@ -325,9 +325,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.172" +version = "0.2.173" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" +checksum = "d8cfeafaffdbc32176b64fb251369d52ea9f0a8fbc6f8759edffef7b525d64bb" [[package]] name = "log" @@ -518,9 +518,9 @@ dependencies = [ [[package]] name = "rust_decimal" -version = "1.37.1" +version = "1.37.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "faa7de2ba56ac291bd90c6b9bece784a52ae1411f9506544b3eae36dd2356d50" +checksum = "b203a6425500a03e0919c42d3c47caca51e79f1132046626d2c8871c5092035d" dependencies = [ "arrayvec", "borsh", @@ -573,7 +573,7 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.102", + "syn 2.0.103", ] [[package]] @@ -624,9 +624,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.102" +version = "2.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6397daf94fa90f058bd0fd88429dd9e5738999cca8d701813c80723add80462" +checksum = "e4307e30089d6fd6aff212f2da3a1f9e32f3223b1f010fb09b7c95f90f3ca1e8" dependencies = [ "proc-macro2", "quote", @@ -736,7 +736,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn 2.0.102", + "syn 2.0.103", "wasm-bindgen-shared", ] @@ -758,7 +758,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn 2.0.102", + "syn 2.0.103", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -880,5 +880,5 @@ checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef" dependencies = [ "proc-macro2", "quote", - "syn 2.0.102", + "syn 2.0.103", ] From e32e941e14fd0170c12cca7252054a2becb71d0c Mon Sep 17 00:00:00 2001 From: Devin Bidwell Date: Sun, 15 Jun 2025 22:24:30 -0700 Subject: [PATCH 4/6] break out the parser and compiler into their own libraries --- Cargo.lock | 19 +++++++++++++++++++ Cargo.toml | 2 ++ libs/compiler/Cargo.toml | 8 ++++++++ .../mod.rs => libs/compiler/src/lib.rs | 11 ++++++----- libs/parser/Cargo.toml | 12 ++++++++++++ src/parser/mod.rs => libs/parser/src/lib.rs | 14 +++++++++++--- {src/parser => libs/parser/src}/sys_call.rs | 0 {src/parser => libs/parser/src}/tree_node.rs | 0 src/main.rs | 15 ++------------- 9 files changed, 60 insertions(+), 21 deletions(-) create mode 100644 libs/compiler/Cargo.toml rename src/compiler/mod.rs => libs/compiler/src/lib.rs (98%) create mode 100644 libs/parser/Cargo.toml rename src/parser/mod.rs => libs/parser/src/lib.rs (99%) rename {src/parser => libs/parser/src}/sys_call.rs (100%) rename {src/parser => libs/parser/src}/tree_node.rs (100%) diff --git a/Cargo.lock b/Cargo.lock index ccbb6a7..9b7942c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -241,6 +241,14 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "compiler" +version = "0.1.0" +dependencies = [ + "parser", + "quick-error", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -380,6 +388,15 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" +[[package]] +name = "parser" +version = "0.1.0" +dependencies = [ + "anyhow", + "quick-error", + "tokenizer", +] + [[package]] name = "ppv-lite86" version = "0.2.21" @@ -600,6 +617,8 @@ version = "0.1.0" dependencies = [ "anyhow", "clap", + "compiler", + "parser", "quick-error", "rust_decimal", "tokenizer", diff --git a/Cargo.toml b/Cargo.toml index 198c80a..e268c1e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,8 @@ clap = { version = "^4.5", features = ["derive"] } quick-error = { workspace = true } rust_decimal = { workspace = true } tokenizer = { path = "libs/tokenizer" } +parser = { path = "libs/parser" } +compiler = { path = "libs/compiler" } [dev-dependencies] anyhow = { version = "^1.0", features = ["backtrace"] } diff --git a/libs/compiler/Cargo.toml b/libs/compiler/Cargo.toml new file mode 100644 index 0000000..edf4fd3 --- /dev/null +++ b/libs/compiler/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "compiler" +version = "0.1.0" +edition = "2024" + +[dependencies] +quick-error = { workspace = true } +parser = { path = "../parser" } diff --git a/src/compiler/mod.rs b/libs/compiler/src/lib.rs similarity index 98% rename from src/compiler/mod.rs rename to libs/compiler/src/lib.rs index b7ba4a1..95df7c0 100644 --- a/src/compiler/mod.rs +++ b/libs/compiler/src/lib.rs @@ -1,6 +1,7 @@ -use crate::parser::sys_call::SysCall; -use crate::parser::tree_node::*; -use crate::parser::Parser as ASTParser; +use parser::Parser as ASTParser; +use parser::sys_call::SysCall; +use parser::tree_node::*; +use quick_error::quick_error; use std::cmp::Ordering; use std::collections::HashMap; use std::io::{BufWriter, Write}; @@ -8,7 +9,7 @@ use std::io::{BufWriter, Write}; quick_error! { #[derive(Debug)] pub enum CompileError { - ParseError(err: crate::parser::ParseError) { + ParseError(err: parser::ParseError) { from() display("Parse error: {}", err) } @@ -168,7 +169,7 @@ impl<'a> Compiler<'a> { } fn syscall_declaration_expression(&mut self, expr: SysCall) -> Result<(), CompileError> { - use crate::parser::sys_call::System; + use parser::sys_call::System; #[allow(clippy::collapsible_match)] match expr { SysCall::System(ref sys) => match sys { diff --git a/libs/parser/Cargo.toml b/libs/parser/Cargo.toml new file mode 100644 index 0000000..2d1639a --- /dev/null +++ b/libs/parser/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "parser" +version = "0.1.0" +edition = "2024" + +[dependencies] +quick-error = { workspace = true } +tokenizer = { path = "../tokenizer" } + + +[dev-dependencies] +anyhow = { version = "1" } diff --git a/src/parser/mod.rs b/libs/parser/src/lib.rs similarity index 99% rename from src/parser/mod.rs rename to libs/parser/src/lib.rs index fbf3124..ece1be2 100644 --- a/src/parser/mod.rs +++ b/libs/parser/src/lib.rs @@ -1,15 +1,23 @@ pub mod sys_call; pub mod tree_node; -use crate::boxed; +use quick_error::quick_error; use std::io::SeekFrom; use sys_call::SysCall; use tokenizer::{ - token::{Keyword, Symbol, Token, TokenType}, Tokenizer, TokenizerBuffer, TokenizerError, + token::{Keyword, Symbol, Token, TokenType}, }; use tree_node::*; +#[macro_export] +/// A macro to create a boxed value. +macro_rules! boxed { + ($e:expr) => { + Box::new($e) + }; +} + quick_error! { #[derive(Debug)] pub enum ParseError { @@ -164,7 +172,7 @@ impl Parser { TokenType::Keyword(e) if matches_keyword!(e, Keyword::Enum, Keyword::If, Keyword::Else) => { - return Err(ParseError::UnsupportedKeyword(current_token.clone())) + return Err(ParseError::UnsupportedKeyword(current_token.clone())); } // match declarations with a `let` keyword diff --git a/src/parser/sys_call.rs b/libs/parser/src/sys_call.rs similarity index 100% rename from src/parser/sys_call.rs rename to libs/parser/src/sys_call.rs diff --git a/src/parser/tree_node.rs b/libs/parser/src/tree_node.rs similarity index 100% rename from src/parser/tree_node.rs rename to libs/parser/src/tree_node.rs diff --git a/src/main.rs b/src/main.rs index 04f12ea..a549747 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,6 @@ #[macro_use] extern crate quick_error; -mod compiler; -mod parser; - use clap::Parser; use compiler::Compiler; use parser::Parser as ASTParser; @@ -14,14 +11,6 @@ use std::{ }; use tokenizer::{Tokenizer, TokenizerError}; -#[macro_export] -/// A macro to create a boxed value. -macro_rules! boxed { - ($e:expr) => { - Box::new($e) - }; -} - quick_error! { #[derive(Debug)] enum StationlangError { @@ -78,8 +67,8 @@ fn run_logic() -> Result<(), StationlangError> { let parser = ASTParser::new(tokenizer); let mut writer: BufWriter> = match args.output_file { - Some(output_file) => BufWriter::new(boxed!(File::create(output_file)?)), - None => BufWriter::new(boxed!(std::io::stdout())), + Some(output_file) => BufWriter::new(Box::new(File::create(output_file)?)), + None => BufWriter::new(Box::new(std::io::stdout())), }; let compiler = Compiler::new(parser, &mut writer); From 94dfd5ec838143d33f8e8a3353b5a5a3008684b2 Mon Sep 17 00:00:00 2001 From: Devin Bidwell Date: Mon, 16 Jun 2025 15:32:04 -0700 Subject: [PATCH 5/6] wip --- Cargo.lock | 206 +++++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 10 +++ src/lib.rs | 14 ++++ 3 files changed, 230 insertions(+) create mode 100644 src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 9b7942c..0581efa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -255,6 +255,41 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +[[package]] +name = "ext-trait" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d772df1c1a777963712fb68e014235e80863d6a91a85c4e06ba2d16243a310e5" +dependencies = [ + "ext-trait-proc_macros", +] + +[[package]] +name = "ext-trait-proc_macros" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ab7934152eaf26aa5aa9f7371408ad5af4c31357073c9e84c3b9d7f11ad639a" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "extension-traits" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a296e5a895621edf9fa8329c83aa1cb69a964643e36cf54d8d7a69b789089537" +dependencies = [ + "ext-trait", +] + +[[package]] +name = "extern-c" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320bea982e85d42441eb25c49b41218e7eaa2657e8f90bc4eca7437376751e23" + [[package]] name = "funty" version = "2.0.0" @@ -309,6 +344,15 @@ dependencies = [ "hashbrown 0.15.4", ] +[[package]] +name = "inventory" +version = "0.3.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab08d7cd2c5897f2c949e5383ea7c7db03fb19130ffcfbf7eda795137ae3cb83" +dependencies = [ + "rustversion", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -343,6 +387,22 @@ version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" +[[package]] +name = "macro_rules_attribute" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf0c9b980bf4f3a37fd7b1c066941dd1b1d0152ce6ee6e8fe8c49b9f6810d862" +dependencies = [ + "macro_rules_attribute-proc_macro", + "paste", +] + +[[package]] +name = "macro_rules_attribute-proc_macro" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58093314a45e00c77d5c508f76e77c3396afbbc0d01506e7fae47b018bac2b1d" + [[package]] name = "memchr" version = "2.7.5" @@ -397,6 +457,12 @@ dependencies = [ "tokenizer", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "ppv-lite86" version = "0.2.21" @@ -406,6 +472,16 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "prettyplease" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8646e95016a7a6c4adea95bafa8a16baab64b583356217f2c85db4a39d9a86" +dependencies = [ + "proc-macro2", + "syn 1.0.109", +] + [[package]] name = "proc-macro-crate" version = "3.3.0" @@ -555,6 +631,15 @@ version = "0.1.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f" +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + [[package]] name = "rustversion" version = "1.0.21" @@ -567,12 +652,56 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +[[package]] +name = "safer-ffi" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "435fdd58b61a6f1d8545274c1dfa458e905ff68c166e65e294a0130ef5e675bd" +dependencies = [ + "extern-c", + "inventory", + "libc", + "macro_rules_attribute", + "paste", + "safer_ffi-proc_macros", + "scopeguard", + "stabby", + "uninit", + "unwind_safe", + "with_builtin_macros", +] + +[[package]] +name = "safer_ffi-proc_macros" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0f25be5ba5f319542edb31925517e0380245ae37df50a9752cdbc05ef948156" +dependencies = [ + "macro_rules_attribute", + "prettyplease", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "seahash" version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" +[[package]] +name = "semver" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" + [[package]] name = "serde" version = "1.0.219" @@ -605,12 +734,53 @@ dependencies = [ "serde", ] +[[package]] +name = "sha2-const-stable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f179d4e11094a893b82fff208f74d448a7512f99f5a0acbd5c679b705f83ed9" + [[package]] name = "simdutf8" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" +[[package]] +name = "stabby" +version = "36.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89b7e94eaf470c2e76b5f15fb2fb49714471a36cc512df5ee231e62e82ec79f8" +dependencies = [ + "rustversion", + "stabby-abi", +] + +[[package]] +name = "stabby-abi" +version = "36.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dc7a63b8276b54e51bfffe3d85da56e7906b2dcfcb29018a8ab666c06734c1a" +dependencies = [ + "rustc_version", + "rustversion", + "sha2-const-stable", + "stabby-macros", +] + +[[package]] +name = "stabby-macros" +version = "36.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eecb7ec5611ec93ec79d120fbe55f31bea234dc1bed1001d4a071bb688651615" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "rand", + "syn 1.0.109", +] + [[package]] name = "stationlang" version = "0.1.0" @@ -621,6 +791,7 @@ dependencies = [ "parser", "quick-error", "rust_decimal", + "safer-ffi", "tokenizer", ] @@ -705,6 +876,21 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "uninit" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e130f2ed46ca5d8ec13c7ff95836827f92f5f5f37fd2b2bf16f33c408d98bb6" +dependencies = [ + "extension-traits", +] + +[[package]] +name = "unwind_safe" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0976c77def3f1f75c4ef892a292c31c0bbe9e3d0702c63044d7c76db298171a3" + [[package]] name = "utf8parse" version = "0.2.2" @@ -873,6 +1059,26 @@ dependencies = [ "memchr", ] +[[package]] +name = "with_builtin_macros" +version = "0.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a59d55032495429b87f9d69954c6c8602e4d3f3e0a747a12dea6b0b23de685da" +dependencies = [ + "with_builtin_macros-proc_macros", +] + +[[package]] +name = "with_builtin_macros-proc_macros" +version = "0.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15bd7679c15e22924f53aee34d4e448c45b674feb6129689af88593e129f8f42" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "wyz" version = "0.5.1" diff --git a/Cargo.toml b/Cargo.toml index e268c1e..f5275d2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,11 +9,17 @@ members = ["libs/*"] [workspace.dependencies] quick-error = "2" rust_decimal = "1" +safer-ffi = { version = "^0.1" } [[bin]] name = "slang" path = "src/main.rs" +[lib] +name = "slanglib" +path = "src/lib.rs" +crate-type = ["staticlib"] + [dependencies] clap = { version = "^4.5", features = ["derive"] } quick-error = { workspace = true } @@ -21,6 +27,10 @@ rust_decimal = { workspace = true } tokenizer = { path = "libs/tokenizer" } parser = { path = "libs/parser" } compiler = { path = "libs/compiler" } +safer-ffi = { workspace = true } + +[features] +headers = ["safer-ffi/headers"] [dev-dependencies] anyhow = { version = "^1.0", features = ["backtrace"] } diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..1958da1 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,14 @@ +use std::io::{BufWriter, Write}; + +use compiler::Compiler; +use parser::Parser; +use safer_ffi::ffi_export; +use tokenizer::Tokenizer; + +#[ffi_export] +fn compile_from_string( + input: &safer_ffi::string::String, + output: &mut safer_ffi::string::String, +) -> i32 { + todo!() +} From 006e3cdf02db45b3d6dfd944ed6a01ce3eee9128 Mon Sep 17 00:00:00 2001 From: Devin Bidwell Date: Tue, 17 Jun 2025 10:06:55 -0700 Subject: [PATCH 6/6] wip --- Cargo.toml | 2 +- libs/compiler/src/lib.rs | 24 +++++++++++++++++++++++- libs/compiler/src/test/mod.rs | 0 libs/compiler/test_files/math.slang | 7 +++++++ 4 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 libs/compiler/src/test/mod.rs create mode 100644 libs/compiler/test_files/math.slang diff --git a/Cargo.toml b/Cargo.toml index f5275d2..2898524 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ name = "slang" path = "src/main.rs" [lib] -name = "slanglib" +name = "slang" path = "src/lib.rs" crate-type = ["staticlib"] diff --git a/libs/compiler/src/lib.rs b/libs/compiler/src/lib.rs index 95df7c0..de54a6d 100644 --- a/libs/compiler/src/lib.rs +++ b/libs/compiler/src/lib.rs @@ -1,3 +1,6 @@ +#[cfg(test)] +mod test; + use parser::Parser as ASTParser; use parser::sys_call::SysCall; use parser::tree_node::*; @@ -104,6 +107,19 @@ impl<'a> Compiler<'a> { Ok(()) } + /// Pop the given variable from the current stack. Errors if the variable is not found in the + /// current scope. + fn pop_current(&mut self, var_name: &str) -> Result { + let last_scope = self + .variable_scope + .last_mut() + .ok_or(CompileError::ScopeError)?; + + last_scope + .remove(var_name) + .ok_or(CompileError::VariableNotFound(var_name.to_string())) + } + fn write_output(&mut self, output: impl Into) -> Result<(), CompileError> { self.output.write_all(output.into().as_bytes())?; self.output.write_all(b"\n")?; @@ -281,6 +297,7 @@ impl<'a> Compiler<'a> { fn invocation_expression(&mut self, expr: InvocationExpression) -> Result<(), CompileError> { let function_name = expr.name; + let args_count = expr.arguments.len(); let function_line = *self .function_locations @@ -307,7 +324,7 @@ impl<'a> Compiler<'a> { self.binary_expression(expr)?; to_write.push_str("push r0\n"); } - _ => todo!("something is up with the arguments"), + _ => todo!("something is up with the arguments: {arg:?}"), } self.push_stack(&format!("{function_name}Invocation{iter_index}"))?; } @@ -320,6 +337,11 @@ impl<'a> Compiler<'a> { self.write_output(format!("j {function_line}"))?; + self.pop_current(&format!("{function_name}ReturnAddress"))?; + for i in 0..args_count { + self.pop_current(&format!("{function_name}Invocation{i}"))?; + } + Ok(()) } diff --git a/libs/compiler/src/test/mod.rs b/libs/compiler/src/test/mod.rs new file mode 100644 index 0000000..e69de29 diff --git a/libs/compiler/test_files/math.slang b/libs/compiler/test_files/math.slang new file mode 100644 index 0000000..2fa5adc --- /dev/null +++ b/libs/compiler/test_files/math.slang @@ -0,0 +1,7 @@ +fn addTemperatures(temp1, temp2) { + let toReturn = temp1 + temp2; +}; + + +addTemperatures(15c, 120c); +addTemperatures(1500f, 20c);