implement basic parser (TODO: more keywords)

This commit is contained in:
Daniella / Tove 2024-01-15 01:20:01 +01:00
parent 3d4fd06bba
commit 0b1ded01a6
Signed by: TudbuT
GPG key ID: B3CF345217F202D3
6 changed files with 292 additions and 34 deletions

View file

@ -1,13 +1,41 @@
use std::fs; use std::fs;
use crate::tokenizer::Tokenizer; use crate::{parser::Parser, tokenizer::Tokenizer, value::Value, word::Word};
pub mod parser; pub mod parser;
pub mod tokenizer; pub mod tokenizer;
pub mod ty;
pub mod value; pub mod value;
pub mod word; pub mod word;
fn main() { fn main() {
let tokens = Tokenizer::tokenize(fs::read_to_string("std.aea").unwrap()).unwrap(); let tokens = Tokenizer::tokenize(fs::read_to_string("std.aea").unwrap()).unwrap();
println!("{tokens:?}"); println!("{tokens:?}");
let words = Parser::parse(&tokens).unwrap();
println!("{words:?}");
let mut stack = Vec::new();
let mut call_stack = Vec::new();
for word in words {
match word {
Word::Const(Value::Str(v)) => stack.push(v),
Word::StartCall(n, o) => {
println!("Starting call to {n} with objcall = {o}");
call_stack.push((n, o))
}
Word::ConfirmCall(p) => {
let dat = call_stack.pop().unwrap();
println!(
"Confirmed call to {} with return popping = {p} and objcall = {}",
dat.0, dat.1
);
if dat.0 == "print" {
println!("{}", stack.pop().unwrap());
}
}
Word::Function(name, args, ret, words) => {
println!("If i was an interpreter I would define function {name}({args:?}) {ret:?} {{{words:?}}}")
}
_ => todo!(),
}
}
} }

View file

@ -1,12 +1,207 @@
use std::mem;
use crate::{
value::Value,
word::{Argdef, Token, TypePat, Word},
};
#[derive(Debug)] #[derive(Debug)]
pub struct CodePos { pub struct CodePos {
pub file: Option<String>, pub file: Option<String>,
pub char_in_file: Option<usize>, pub char_in_file: Option<usize>,
pub token: Option<usize>,
pub line: Option<usize>, pub line: Option<usize>,
} }
#[derive(Debug)] #[derive(Debug)]
pub enum ParseError { pub enum ParseError {
InvalidToken(CodePos, char), InvalidToken(CodePos, String),
StringInIdent(CodePos), StringInIdent(CodePos),
UnexpectedEOF,
UnexpectedOpenParen(CodePos),
FunctionCallWithoutFn(CodePos),
UnexpectedToken(CodePos, Token, String),
ExpectedIdentifierAfterColon(CodePos),
}
pub struct Parser<'a> {
slice: &'a [Token],
index: usize,
file: Option<String>,
}
impl<'a> Parser<'a> {
pub fn new(tokens: &'a [Token]) -> Parser<'a> {
Parser {
slice: tokens,
index: 0,
file: None,
}
}
pub fn parse(tokens: &'a [Token]) -> Result<Vec<Word>, ParseError> {
let mut parser = Parser::new(tokens);
parser.read_level_at(0)
}
fn next(&mut self) -> Result<Token, ParseError> {
self.slice
.get((self.index, self.index += 1).0)
.cloned()
.ok_or(ParseError::UnexpectedEOF)
}
fn next_as_ident(&mut self) -> Result<String, ParseError> {
match self.next()? {
Token::Ident(s) => Ok(s.to_owned()),
other => Err(ParseError::UnexpectedToken(
self.pos(),
other,
"Ident".to_owned(),
)),
}
}
fn assert_next_eq(&mut self, token: Token) -> Result<Token, ParseError> {
let next = self.next()?;
if next == token {
return Ok(next);
}
Err(ParseError::UnexpectedToken(
self.pos(),
next,
format!("{token:?}"),
))
}
fn next_matches(&mut self, f: impl FnOnce(&Token) -> bool) -> bool {
if self.slice.get(self.index).is_some_and(f) {
self.index += 1;
true
} else {
false
}
}
fn pos(&self) -> CodePos {
CodePos {
file: self.file.as_ref().cloned(),
char_in_file: None,
token: Some(self.index),
line: Some(
self.slice[0..self.index]
.iter()
.filter(|t| matches!(t, Token::Newline(_)))
.count(),
),
}
}
pub fn read_level_at(&mut self, index: usize) -> Result<Vec<Word>, ParseError> {
self.index = index;
let mut words = Vec::new();
let mut file = None;
#[derive(Default)]
struct State {
is_objcall: bool,
paren_mode: Vec<bool>,
}
let mut state = State {
..Default::default()
};
while let Ok(token) = self.next() {
match token {
Token::Space => (),
Token::Exclam => words.push(Word::ConfirmCall(false)),
Token::Semicolon => words.push(Word::ConfirmCall(true)),
Token::Colon => {
if words.last().is_some_and(|x| !matches!(x, Word::Const(_))) {
words.push(Word::ConfirmCall(false));
}
state.is_objcall = true;
}
Token::Open => {
state
.paren_mode
.pop()
.ok_or(ParseError::UnexpectedOpenParen(self.pos()))?;
state.paren_mode.push(true);
}
Token::Close => {
words.push(Word::ConfirmCall(
self.next_matches(|x| *x == Token::Semicolon),
));
}
Token::Begin => {
words.push(Word::Block(self.read_level_at(self.index + 1)?));
}
Token::End => break,
Token::Return => words.push(Word::Return),
Token::String(s) => words.push(Word::Const(Value::Str(s))),
Token::Ident(ident) => {
if let Some(keyword) = self.try_match_keyword(&ident)? {
words.push(keyword);
} else {
words.push(Word::StartCall(ident, state.is_objcall));
state.is_objcall = false;
state.paren_mode.push(false);
}
}
Token::Newline(_) => (),
Token::Filename(f) => file = Some(f),
}
if self.file != file {
self.file = file.clone();
}
}
Ok(words)
}
fn try_match_keyword(&mut self, ident: &String) -> Result<Option<Word>, ParseError> {
match ident.as_str() {
"fn" => {
let name = self.next_as_ident()?;
self.assert_next_eq(Token::Open)?;
let mut arr = Vec::new();
let mut args = Vec::new();
loop {
match self.next()? {
Token::Ident(ident) => {
arr.push(Argdef::Untyped(ident));
}
Token::Colon => {
let new_argdef = Argdef::Typed(
match arr.pop().ok_or(ParseError::UnexpectedToken(
self.pos(),
Token::Colon,
"Ident".to_owned(),
))? {
Argdef::Untyped(a) => a,
// TODO: make this error instead?
Argdef::Typed(a, _) => a,
},
TypePat::new(self.next_as_ident()?),
);
arr.push(new_argdef);
}
Token::Close => args = mem::replace(&mut arr, Vec::new()),
Token::Begin => break,
Token::Newline(_) => (),
Token::Filename(_) => (),
token => Err(ParseError::UnexpectedToken(
self.pos(),
token,
"Ident".to_owned(),
))?,
}
}
Ok(Some(Word::Function(
name,
args,
arr,
self.read_level_at(self.index)?,
)))
}
_ => Ok(None),
}
}
} }

View file

@ -26,6 +26,7 @@ impl<'a> Tokenizer<'a> {
CodePos { CodePos {
file: None, file: None,
char_in_file: Some(self.string.len() - self.char_iter.as_str().len()), char_in_file: Some(self.string.len() - self.char_iter.as_str().len()),
token: Some(tokens.len()),
line: Some( line: Some(
tokens tokens
.iter() .iter()
@ -51,7 +52,9 @@ impl<'a> Tokenizer<'a> {
'r' => accum.push('\r'), 'r' => accum.push('\r'),
'\\' => accum.push('\\'), '\\' => accum.push('\\'),
'"' => accum.push('"'), '"' => accum.push('"'),
char => return Err(ParseError::InvalidToken(self.tpos(&ret), char)), char => {
return Err(ParseError::InvalidToken(self.tpos(&ret), char.to_string()))
}
} }
in_escape = false; in_escape = false;
continue; continue;
@ -71,7 +74,6 @@ impl<'a> Tokenizer<'a> {
let symbol = match char { let symbol = match char {
' ' => Some(Token::Space), ' ' => Some(Token::Space),
'\t' => Some(Token::Space), '\t' => Some(Token::Space),
'|' => Some(Token::Bar),
'!' => Some(Token::Exclam), '!' => Some(Token::Exclam),
';' => Some(Token::Semicolon), ';' => Some(Token::Semicolon),
':' => Some(Token::Colon), ':' => Some(Token::Colon),

24
src/ty.rs Normal file
View file

@ -0,0 +1,24 @@
use std::sync::Arc;
use crate::word::TypePat;
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct AEATypeInfo {
base: Arc<AEAType>,
args: Vec<Arc<AEAType>>,
}
#[derive(Debug)]
pub struct AEAType {
name: String,
uname: String,
fields: Vec<(String, TypePat)>,
}
impl PartialEq for AEAType {
fn eq(&self, other: &Self) -> bool {
self.uname == other.uname
}
}
impl Eq for AEAType {}

View file

@ -1,17 +1,35 @@
use crate::value::Value; use crate::{ty::AEATypeInfo, value::Value};
#[derive(Debug, PartialEq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub enum Word { pub struct TypePat {
Const(Value), origin: String,
StartCall(String), ty: Option<AEATypeInfo>,
StartObjCall(String), }
ConfirmCall, impl TypePat {
pub fn new(origin: String) -> TypePat {
TypePat { origin, ty: None }
}
} }
#[derive(Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub enum Argdef {
Untyped(String),
Typed(String, TypePat),
}
#[derive(Clone, Debug, PartialEq)]
pub enum Word {
Const(Value),
StartCall(String, bool),
ConfirmCall(bool),
Block(Vec<Word>),
Function(String, Vec<Argdef>, Vec<Argdef>, Vec<Word>),
Return,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Token { pub enum Token {
Space, Space,
Bar,
Exclam, Exclam,
Semicolon, Semicolon,
Colon, Colon,
@ -24,4 +42,5 @@ pub enum Token {
Ident(String), Ident(String),
Newline(usize), Newline(usize),
Filename(String),
} }

32
std.aea
View file

@ -1,24 +1,14 @@
!aea:exclam_mode! print "Hi 1" ;
fn main | args:Array:String ! ret:int { "Hi 1" print;
print "Hello world!" ;
if args:length :== 0 !! { print("Hi 2");
ret= 0 ^
} "Hi 2" print();
print "I got " :+ String:of args:length ! ! :+ " arguments." ! ; "hi":print;
ret= 0
fn test(a) ret {
ret= print(a) !
} }
!aea:paren_mode! "hiiii" print;
fn main(args:Array:String) ret:int {
print("Hello world!");
if(args:length :== (0)) {
ret=(0) ^
}
print("I got " :+(String:of(args:length)) :+(" arguments."));
ret=(0)
}