commit 3d4fd06bbaff1c3be3c8388a1e42dd0c51e27c9b Author: TudbuT Date: Sun Jan 14 22:38:04 2024 +0100 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..d5766ad --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,25 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aeapa" +version = "0.1.0" +dependencies = [ + "deborrow", +] + +[[package]] +name = "deborrow" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5f99cc7a6632788aab2c734a6e1e1d8302658f70fb45d99a6e32154a24cc2a2" +dependencies = [ + "deborrow-macro", +] + +[[package]] +name = "deborrow-macro" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "551a13c0871ba8964b30d2407fdfd4c9b8e5f289950c152ff3d0d8de5be6b948" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..d0f51e4 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "aeapa" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +deborrow = "0.3.1" diff --git a/README.md b/README.md new file mode 100644 index 0000000..e7090f9 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# Advanced Easily Parsable Language + +/æepa/ diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..ad5ee4b --- /dev/null +++ b/src/main.rs @@ -0,0 +1,13 @@ +use std::fs; + +use crate::tokenizer::Tokenizer; + +pub mod parser; +pub mod tokenizer; +pub mod value; +pub mod word; + +fn main() { + let tokens = Tokenizer::tokenize(fs::read_to_string("std.aea").unwrap()).unwrap(); + println!("{tokens:?}"); +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..03317ff --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,12 @@ +#[derive(Debug)] +pub struct CodePos { + pub file: Option, + pub char_in_file: Option, + pub line: Option, +} + +#[derive(Debug)] +pub enum ParseError { + InvalidToken(CodePos, char), + StringInIdent(CodePos), +} diff --git a/src/tokenizer.rs b/src/tokenizer.rs new file mode 100644 index 0000000..0ea2f8c --- /dev/null +++ b/src/tokenizer.rs @@ -0,0 +1,124 @@ +use deborrow::deborrow; + +use std::str::Chars; + +use crate::{ + parser::{CodePos, ParseError}, + word::Token, +}; + +pub struct Tokenizer<'a> { + char_iter: Chars<'a>, + string: String, +} + +impl<'a> Tokenizer<'a> { + pub fn tokenize(string: String) -> Result, ParseError> { + let mut parser = Tokenizer { + // SAFETY: string will not be dropped while Parser is in scope + char_iter: unsafe { deborrow(&string).chars() }, + string, + }; + parser.read_tokens() + } + + fn tpos(&self, tokens: &[Token]) -> CodePos { + CodePos { + file: None, + char_in_file: Some(self.string.len() - self.char_iter.as_str().len()), + line: Some( + tokens + .iter() + .filter(|x| matches!(x, Token::Newline(_))) + .count() + + 1, + ), + } + } + + fn read_tokens(&mut self) -> Result, ParseError> { + let mut ret = Vec::new(); + + let mut accum = String::new(); + let mut in_string = false; + let mut in_escape = false; + let mut line = 0; + while let Some(char) = self.char_iter.next() { + if in_string { + if in_escape { + match char { + 'n' => accum.push('\n'), + 'r' => accum.push('\r'), + '\\' => accum.push('\\'), + '"' => accum.push('"'), + char => return Err(ParseError::InvalidToken(self.tpos(&ret), char)), + } + in_escape = false; + continue; + } + match char { + '"' => { + in_string = false; + ret.push(Token::String(accum)); + accum = String::new(); + } + '\\' => in_escape = true, + char => accum.push(char), + } + continue; + } + + let symbol = match char { + ' ' => Some(Token::Space), + '\t' => Some(Token::Space), + '|' => Some(Token::Bar), + '!' => Some(Token::Exclam), + ';' => Some(Token::Semicolon), + ':' => Some(Token::Colon), + '(' => Some(Token::Open), + ')' => Some(Token::Close), + '{' => Some(Token::Begin), + '}' => Some(Token::End), + '^' => Some(Token::Return), + '\n' => { + line += 1; + Some(Token::Newline(line)) + } + '"' => { + in_string = true; + if !accum.is_empty() { + ret.push(Token::Ident(accum)); + accum = String::new(); + } + continue; + } + _ => None, + }; + if symbol.is_none() { + accum.push(char); + } else if accum.is_empty() { + match symbol { + Some(Token::Space) => (), + Some(symbol) => ret.push(symbol), + None => unreachable!(), + } + } else { + match symbol { + Some(symbol) => { + ret.push(Token::Ident(accum)); + if symbol != Token::Space { + ret.push(symbol); + } + accum = String::new(); + } + None => unreachable!(), + } + } + } + if accum.is_empty() { + return Ok(ret); + } + ret.push(Token::Ident(accum)); + Ok(ret) + } +} diff --git a/src/value.rs b/src/value.rs new file mode 100644 index 0000000..b5c9e70 --- /dev/null +++ b/src/value.rs @@ -0,0 +1,16 @@ +#[derive(Clone, Debug, PartialEq)] +pub enum Value { + Null, + U8(u8), + I32(i32), + I64(i64), + I128(i128), + U32(u32), + U64(u64), + U128(u128), + F32(f32), + F64(f64), + //Func(AFunc), + //Array(Vec), + Str(String), +} diff --git a/src/word.rs b/src/word.rs new file mode 100644 index 0000000..7475169 --- /dev/null +++ b/src/word.rs @@ -0,0 +1,27 @@ +use crate::value::Value; + +#[derive(Debug, PartialEq)] +pub enum Word { + Const(Value), + StartCall(String), + StartObjCall(String), + ConfirmCall, +} + +#[derive(Debug, PartialEq, Eq)] +pub enum Token { + Space, + Bar, + Exclam, + Semicolon, + Colon, + Open, + Close, + Begin, + End, + Return, + String(String), + Ident(String), + + Newline(usize), +} diff --git a/std.aea b/std.aea new file mode 100644 index 0000000..d8db540 --- /dev/null +++ b/std.aea @@ -0,0 +1,24 @@ +!aea:exclam_mode! + +fn main | args:Array:String ! ret:int { + print "Hello world!" ; + if args:length :== 0 !! { + ret= 0 ^ + } + print "I got " :+ String:of args:length ! ! :+ " arguments." ! ; + ret= 0 +} + +!aea:paren_mode! + +fn main(args:Array:String) ret:int { + print("Hello world!"); + if(args:length :== (0)) { + ret=(0) ^ + } + print("I got " :+(String:of(args:length)) :+(" arguments.")); + ret=(0) +} + + +