From 5f669059a8466dc98ad35563e61fdf93b9bdf1df Mon Sep 17 00:00:00 2001 From: NTTVy03 Date: Sat, 9 Nov 2024 15:01:55 +0700 Subject: [PATCH 01/15] skip trivial first tokens in parsing --- crates/parser/src/grammar.rs | 12 ++ crates/parser/src/input.rs | 48 ++++- crates/parser/src/parser.rs | 2 +- crates/parser/src/token_kind.rs | 4 +- crates/syntax/src/lib.rs | 2 + crates/syntax/src/syntax.rs | 330 ++++++++++++++++++----------- crates/syntax/src/test_programs.rs | 135 ++++++++++++ 7 files changed, 402 insertions(+), 131 deletions(-) create mode 100644 crates/syntax/src/test_programs.rs diff --git a/crates/parser/src/grammar.rs b/crates/parser/src/grammar.rs index 63d498f..98832cc 100644 --- a/crates/parser/src/grammar.rs +++ b/crates/parser/src/grammar.rs @@ -18,10 +18,22 @@ mod template; pub mod entry { + use crate::token_kind::TokenKind; + use super::*; pub fn circom_program(p: &mut Parser) { let m = p.open(); + + while p.at_any(&[ + TokenKind::BlockComment, + TokenKind::CommentLine, + TokenKind::EndLine, + TokenKind::WhiteSpace, + ]) { + p.skip(); + } + pragma::pragma(p); while !p.eof() { match p.current() { diff --git a/crates/parser/src/input.rs b/crates/parser/src/input.rs index 0819515..c8e17c8 100644 --- a/crates/parser/src/input.rs +++ b/crates/parser/src/input.rs @@ -4,7 +4,7 @@ use logos::Lexer; use crate::token_kind::TokenKind; -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub struct Input<'a> { kind: Vec, source: &'a str, @@ -71,7 +71,9 @@ impl<'a> Input<'a> { #[cfg(test)] mod tests { - use std::cmp::min; + // use std::cmp::min; + + use crate::token_kind::TokenKind; use super::Input; @@ -83,11 +85,45 @@ mod tests { "# .to_string(); + let expected_input = Input { + kind: vec![ + TokenKind::EndLine, + TokenKind::WhiteSpace, + TokenKind::BlockComment, + TokenKind::EndLine, + TokenKind::WhiteSpace, + TokenKind::Identifier, + TokenKind::WhiteSpace, + TokenKind::Add, + TokenKind::WhiteSpace, + TokenKind::Number, + TokenKind::EndLine, + TokenKind::WhiteSpace + ], + source: &source, + position: vec![ + {0..1}, + {1..9}, + {9..24}, + {24..25}, + {25..33}, + {33..34}, + {34..35}, + {35..36}, + {36..37}, + {37..39}, + {39..40}, + {40..44}, + ] + }; + let input = Input::new(&source); - for i in 0..min(input.size(), 10) { - println!("kind = {:?}", input.kind[i]); - println!("position {:?}", input.position[i]); - } + assert_eq!(expected_input, input, "Tokens extract from source code are not correct"); + + // for i in 0..min(input.size(), 10) { + // println!("kind = {:?}", input.kind[i]); + // println!("position {:?}", input.position[i]); + // } } } diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs index e3d461c..23c4dde 100644 --- a/crates/parser/src/parser.rs +++ b/crates/parser/src/parser.rs @@ -113,7 +113,7 @@ impl<'a> Parser<'a> { let mut kind: TokenKind; loop { kind = self.input.kind_of(self.pos); - if !kind.is_travial() { + if !kind.is_trivial() { break; } diff --git a/crates/parser/src/token_kind.rs b/crates/parser/src/token_kind.rs index 300028f..26145bf 100644 --- a/crates/parser/src/token_kind.rs +++ b/crates/parser/src/token_kind.rs @@ -248,10 +248,10 @@ impl TokenKind { pub fn is_declaration_kw(self) -> bool { matches!(self, Self::VarKw | Self::ComponentKw | Self::SignalKw) } - pub fn is_travial(self) -> bool { + pub fn is_trivial(self) -> bool { matches!( self, - Self::WhiteSpace | Self::EndLine | Self::CommentLine | Self::Error + Self::WhiteSpace | Self::EndLine | Self::CommentLine | Self::BlockComment | Self::Error ) } } diff --git a/crates/syntax/src/lib.rs b/crates/syntax/src/lib.rs index 7ca0f92..adeb8b0 100644 --- a/crates/syntax/src/lib.rs +++ b/crates/syntax/src/lib.rs @@ -2,3 +2,5 @@ pub mod syntax; pub mod syntax_node; pub mod abstract_syntax_tree; + +pub mod test_programs; \ No newline at end of file diff --git a/crates/syntax/src/syntax.rs b/crates/syntax/src/syntax.rs index 78c0ab2..3483b61 100644 --- a/crates/syntax/src/syntax.rs +++ b/crates/syntax/src/syntax.rs @@ -45,8 +45,10 @@ impl<'a> SyntaxTreeBuilder<'a> { pub fn syntax_tree(source: &str) -> SyntaxNode { let input = Input::new(source); - let mut builder = SyntaxTreeBuilder::new(&input); + let output = Parser::parsing(&input); + + let mut builder = SyntaxTreeBuilder::new(&input); builder.build(output); let green = builder.finish(); SyntaxNode::new_root(green) @@ -55,35 +57,127 @@ impl<'a> SyntaxTreeBuilder<'a> { #[cfg(test)] mod tests { - + use parser::token_kind::TokenKind::{self, *}; use std::hash::{DefaultHasher, Hash, Hasher}; - use rowan::ast::AstNode; + use rowan::{ast::AstNode, TextRange}; - use crate::abstract_syntax_tree::AstCircomProgram; + use crate::{abstract_syntax_tree::AstCircomProgram, test_programs}; use super::SyntaxTreeBuilder; + fn generate_expected_token_kind(ast: &AstCircomProgram) { + let children = ast + .syntax() + .first_child() + .unwrap() + .siblings(rowan::Direction::Next); + + println!("vec!["); + for child in children { + println!("{:?},", child.kind()); + } + println!("];"); + } + + fn generate_expected_token_range(ast: &AstCircomProgram) { + let children = ast + .syntax() + .first_child() + .unwrap() + .siblings(rowan::Direction::Next); + + println!("vec!["); + for child in children { + println!( + "TextRange::new({:?}.into(), {:?}.into()), ", + child.text_range().start(), + child.text_range().end() + ); + } + println!("];"); + } + + fn check_ast_children( + ast: &AstCircomProgram, + expected_kinds: &Vec, + expected_ranges: &Vec, + ) { + let children = ast + .syntax() + .first_child() + .unwrap() + .siblings(rowan::Direction::Next); + + let mut kind_iterator = expected_kinds.iter(); + let mut range_iterator = expected_ranges.iter(); + + for child in children { + if let (Some(expected_kind), Some(expected_range)) = + (kind_iterator.next(), range_iterator.next()) + { + assert_eq!(child.kind(), *expected_kind); + assert_eq!(child.text_range(), *expected_range); + } else { + panic!("Mismatched number of children and expected values"); + } + } + println!(); + } + #[test] - fn other_parser_test() { - let source: String = r#"pragma circom 2.0.0; + fn parser_test_1() { + let source: &str = test_programs::PARSER_TEST_1; + + let expected_pragma = "pragma circom 2.0.0;".to_string(); + let expected_kinds = vec![ + Pragma, + EndLine, + EndLine, + WhiteSpace, + EndLine, + WhiteSpace, + TemplateDef, + EndLine, + WhiteSpace, + TemplateDef, + WhiteSpace, + EndLine, + WhiteSpace, + ]; + let expected_ranges = vec![ + TextRange::new(0.into(), 20.into()), + TextRange::new(20.into(), 21.into()), + TextRange::new(21.into(), 22.into()), + TextRange::new(22.into(), 26.into()), + TextRange::new(26.into(), 27.into()), + TextRange::new(27.into(), 31.into()), + TextRange::new(31.into(), 57.into()), + TextRange::new(57.into(), 58.into()), + TextRange::new(58.into(), 62.into()), + TextRange::new(62.into(), 88.into()), + TextRange::new(88.into(), 89.into()), + TextRange::new(89.into(), 90.into()), + TextRange::new(90.into(), 94.into()), + ]; + + let syntax = SyntaxTreeBuilder::syntax_tree(source); - template Multiplier2 () {} - template Multiplier2 () {} - "# - .to_string(); + if let Some(ast) = AstCircomProgram::cast(syntax) { + check_ast_children(&ast, &expected_kinds, &expected_ranges); - let syntax = SyntaxTreeBuilder::syntax_tree(&source); + // check pragma + let pragma = ast.pragma().unwrap().syntax().text().to_string(); + assert_eq!(pragma, expected_pragma, "Pragma is not correct!"); - if let Some(ast) = AstCircomProgram::cast(syntax) { + // check ast hash let mut hasher = DefaultHasher::default(); ast.syntax().hash(&mut hasher); - // println!("{:#?}", syntax); - println!("{:?}", hasher.finish()); + let _ast_hash = hasher.finish(); + // check template hash let mut h1 = DefaultHasher::default(); - let mut h2 = DefaultHasher::default(); let template = ast.template_list(); @@ -91,124 +185,116 @@ mod tests { template[0].syntax().hash(&mut h1); template[1].syntax().hash(&mut h2); - println!("{}", h1.finish()); - println!("{}", h2.finish()); - println!("{:?}", template[0].syntax().text()); - println!("{:?}", template[1].syntax().text()); - println!("{}", template[0].syntax() == template[0].syntax()); - println!( - "{}", - template[0].syntax().green() == template[1].syntax().green() + assert_ne!( + h1.finish(), + h2.finish(), + "Templates with same syntax should have different hashes!" + ); + + // check template syntax (text & green node) + assert_eq!( + template[0].syntax().text(), + template[1].syntax().text(), + "The syntax (as text) of template 1 and 2 must be the same!" + ); + assert_eq!( + template[0].syntax().green(), + template[1].syntax().green(), + "The syntax (as green node) of template 1 and 2 must be the same!!" ); } + } + + #[test] + fn parser_test_2() { + let source = test_programs::PARSER_TEST_2; + + let syntax = SyntaxTreeBuilder::syntax_tree(source); + + if let Some(ast) = AstCircomProgram::cast(syntax) { + // print_ast_children(&ast); + + println!("Pragma: {:?}", ast.pragma().unwrap().syntax().text()); - // find token + print!("Templates: "); + let templates = ast.template_list(); + for template in templates.iter() { + // print!("{:?} ", template.name().unwrap().name().unwrap().syntax().text()); + print!("{:?} ", template.name().unwrap().syntax().text()); // leading whitespaces + // print!("{:?} ", template.syntax().text()); // leading whitespaces + } + println!(); + + print!("Functions: "); + let functions = ast.function_list(); + for function in functions.iter() { + print!("{:?} ", function.function_name().unwrap().syntax().text()); + // leading whitespaces + // print!("{:?} ", function.syntax().text()); // leading whitespaces + } + println!(); + } } #[test] - fn parser_test() { - let source = r#"/* - Copyright 2018 0KIMS association. - - This file is part of circom (Zero Knowledge Circuit Compiler). - - circom is a free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - circom is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public - License for more details. - - You should have received a copy of the GNU General Public License - along with circom. If not, see . - */ - /* - - Binary Sum - ========== - - This component creates a binary sum componet of ops operands and n bits each operand. - - e is Number of carries: Depends on the number of operands in the input. - - Main Constraint: - in[0][0] * 2^0 + in[0][1] * 2^1 + ..... + in[0][n-1] * 2^(n-1) + - + in[1][0] * 2^0 + in[1][1] * 2^1 + ..... + in[1][n-1] * 2^(n-1) + - + .. - + in[ops-1][0] * 2^0 + in[ops-1][1] * 2^1 + ..... + in[ops-1][n-1] * 2^(n-1) + - === - out[0] * 2^0 + out[1] * 2^1 + + out[n+e-1] *2(n+e-1) - - To waranty binary outputs: - - out[0] * (out[0] - 1) === 0 - out[1] * (out[0] - 1) === 0 - . - . - . - out[n+e-1] * (out[n+e-1] - 1) == 0 - - */ - - - /* - This function calculates the number of extra bits in the output to do the full sum. - */ - pragma circom 2.0.0; - - function nbits(a) { - var n = 1; - var r = 0; - while (n-1> k) & 1; - - // Ensure out is binary - out[k] * (out[k] - 1) === 0; - - lout += out[k] * e2; - - e2 = e2+e2; + } + + #[test] + fn parser_test_5() { + let source = test_programs::PARSER_TEST_5; + + let syntax = SyntaxTreeBuilder::syntax_tree(source); + + if let Some(ast) = AstCircomProgram::cast(syntax) { + // print_ast_children(&ast); + + println!("{:?}", ast.pragma()); + // assert!(ast.pragma().is_none(), "No pragma in source code"); } - - // Ensure the sum; - - lin === lout; } - "#; - let _syntax = SyntaxTreeBuilder::syntax_tree(source); + #[test] + fn parser_test_6() { + let source = test_programs::PARSER_TEST_6; + + let syntax = SyntaxTreeBuilder::syntax_tree(source); + + if let Some(ast) = AstCircomProgram::cast(syntax) { + // print_ast_children(&ast); + + println!("{:?}", ast.pragma()); + // assert!(ast.pragma().is_none(), "No pragma in source code"); + } } } diff --git a/crates/syntax/src/test_programs.rs b/crates/syntax/src/test_programs.rs new file mode 100644 index 0000000..0879679 --- /dev/null +++ b/crates/syntax/src/test_programs.rs @@ -0,0 +1,135 @@ +pub const PARSER_TEST_1: &str = r#"pragma circom 2.0.0; + + + template Multiplier2 () {} + template Multiplier2 () {} + "#; + +pub const PARSER_TEST_2: &str = r#"/* + Copyright 2018 0KIMS association. + + This file is part of circom (Zero Knowledge Circuit Compiler). + + circom is a free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + circom is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with circom. If not, see . +*/ +/* + +Binary Sum +========== + +This component creates a binary sum componet of ops operands and n bits each operand. + +e is Number of carries: Depends on the number of operands in the input. + +Main Constraint: + in[0][0] * 2^0 + in[0][1] * 2^1 + ..... + in[0][n-1] * 2^(n-1) + + + in[1][0] * 2^0 + in[1][1] * 2^1 + ..... + in[1][n-1] * 2^(n-1) + + + .. + + in[ops-1][0] * 2^0 + in[ops-1][1] * 2^1 + ..... + in[ops-1][n-1] * 2^(n-1) + + === + out[0] * 2^0 + out[1] * 2^1 + + out[n+e-1] *2(n+e-1) + +To waranty binary outputs: + + out[0] * (out[0] - 1) === 0 + out[1] * (out[0] - 1) === 0 + . + . + . + out[n+e-1] * (out[n+e-1] - 1) == 0 + + */ + + +/* + This function calculates the number of extra bits in the output to do the full sum. + */ + pragma circom 2.0.0; + +function nbits(a) { + var n = 1; + var r = 0; + while (n-1> k) & 1; + + // Ensure out is binary + out[k] * (out[k] - 1) === 0; + + lout += out[k] * e2; + + e2 = e2+e2; + } + + // Ensure the sum; + + lin === lout; + } + "#; + +pub const PARSER_TEST_3: &str = r#" + +// comment :> + + pragma circom 2.0.0; + + "#; + +pub const PARSER_TEST_4: &str = r#" + +/* +comment +blocks +*/ +pragma circom 2.0.0; + "#; + +pub const PARSER_TEST_5: &str = r#" +// no pragma here + template Multiplier2 () {} + "#; + +pub const PARSER_TEST_6: &str = r#" +/* T _ T */ + template Multiplier2 () {} + "#; + \ No newline at end of file From 6913c55cf23105829d5cf8f4b956f38775d17546 Mon Sep 17 00:00:00 2001 From: NTTVy03 Date: Wed, 13 Nov 2024 13:20:34 +0700 Subject: [PATCH 02/15] remove comment in input test --- crates/parser/src/input.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/crates/parser/src/input.rs b/crates/parser/src/input.rs index c8e17c8..cdfd5d8 100644 --- a/crates/parser/src/input.rs +++ b/crates/parser/src/input.rs @@ -120,10 +120,5 @@ mod tests { let input = Input::new(&source); assert_eq!(expected_input, input, "Tokens extract from source code are not correct"); - - // for i in 0..min(input.size(), 10) { - // println!("kind = {:?}", input.kind[i]); - // println!("position {:?}", input.position[i]); - // } } } From caac481857e878c4dd3113c2ca2e8c1d345d1dd7 Mon Sep 17 00:00:00 2001 From: NTTVy03 Date: Mon, 18 Nov 2024 19:55:59 +0700 Subject: [PATCH 03/15] manage out-of-bound case, update input test --- crates/parser/src/input.rs | 297 +++++++++++++++++++++---------------- 1 file changed, 173 insertions(+), 124 deletions(-) diff --git a/crates/parser/src/input.rs b/crates/parser/src/input.rs index cdfd5d8..bb37613 100644 --- a/crates/parser/src/input.rs +++ b/crates/parser/src/input.rs @@ -1,124 +1,173 @@ -use std::ops::Range; - -use logos::Lexer; - -use crate::token_kind::TokenKind; - -#[derive(Debug, PartialEq)] -pub struct Input<'a> { - kind: Vec, - source: &'a str, - position: Vec>, -} - -impl<'a> Input<'a> { - pub fn new(source: &'a str) -> Self { - let mut input = Input { - source, - kind: Vec::new(), - position: Vec::new(), - }; - - let mut lex = Lexer::::new(source); - - while let Some(tk) = lex.next() { - if tk == TokenKind::CommentBlockOpen { - let mut closed = false; - let mut join_span = lex.span(); - while let Some(t) = lex.next() { - join_span.end = lex.span().end; - if t == TokenKind::CommentBlockClose { - closed = true; - break; - } - } - - if closed { - input.kind.push(TokenKind::BlockComment); - } else { - input.kind.push(TokenKind::Error); - } - input.position.push(join_span); - } else { - input.kind.push(tk); - input.position.push(lex.span()); - } - } - - input - } - - pub fn token_value(&self, index: usize) -> &'a str { - &self.source[self.position[index].start..self.position[index].end] - } - - pub fn kind_of(&self, index: usize) -> TokenKind { - if index < self.kind.len() { - self.kind[index] - } else { - TokenKind::EOF - } - } - - pub fn position_of(&self, index: usize) -> Range { - self.position[index].clone() - } - - pub fn size(&self) -> usize { - self.kind.len() - } -} - -#[cfg(test)] -mod tests { - // use std::cmp::min; - - use crate::token_kind::TokenKind; - - use super::Input; - - #[test] - fn test_input() { - let source = r#" - /*a + b == 10*/ - a + 10 - "# - .to_string(); - - let expected_input = Input { - kind: vec![ - TokenKind::EndLine, - TokenKind::WhiteSpace, - TokenKind::BlockComment, - TokenKind::EndLine, - TokenKind::WhiteSpace, - TokenKind::Identifier, - TokenKind::WhiteSpace, - TokenKind::Add, - TokenKind::WhiteSpace, - TokenKind::Number, - TokenKind::EndLine, - TokenKind::WhiteSpace - ], - source: &source, - position: vec![ - {0..1}, - {1..9}, - {9..24}, - {24..25}, - {25..33}, - {33..34}, - {34..35}, - {35..36}, - {36..37}, - {37..39}, - {39..40}, - {40..44}, - ] - }; - - let input = Input::new(&source); - - assert_eq!(expected_input, input, "Tokens extract from source code are not correct"); - } -} +use std::ops::Range; + +use logos::Lexer; + +use crate::token_kind::TokenKind; + +#[derive(Debug, PartialEq)] +pub struct Input<'a> { + kind: Vec, + source: &'a str, + position: Vec>, +} + +impl<'a> Input<'a> { + pub fn new(source: &'a str) -> Self { + let mut input = Input { + source, + kind: Vec::new(), + position: Vec::new(), + }; + + let mut lex = Lexer::::new(source); + + while let Some(tk) = lex.next() { + if tk == TokenKind::CommentBlockOpen { + let mut closed = false; + let mut join_span = lex.span(); + while let Some(t) = lex.next() { + join_span.end = lex.span().end; + if t == TokenKind::CommentBlockClose { + closed = true; + break; + } + } + + if closed { + input.kind.push(TokenKind::BlockComment); + } else { + input.kind.push(TokenKind::Error); + } + input.position.push(join_span); + } else { + input.kind.push(tk); + input.position.push(lex.span()); + } + } + + input + } + + pub fn token_value(&self, index: usize) -> &'a str { + if index < self.kind.len() { + &self.source[self.position[index].start..self.position[index].end] + } else { + // return error for out of bound index + "" + } + } + + pub fn kind_of(&self, index: usize) -> TokenKind { + if index < self.kind.len() { + self.kind[index] + } else { + TokenKind::EOF + } + } + + pub fn position_of(&self, index: usize) -> Range { + if index < self.kind.len() { + self.position[index].clone() + } else { + // return error for out of bound index + 0..0 + } + + } + + pub fn size(&self) -> usize { + self.kind.len() + } +} + +#[cfg(test)] +mod tests { + use crate::token_kind::TokenKind; + + use super::Input; + + #[test] + fn test_input() { + let source = r#" + /*a + b == 10*/ + a + 10 + "# + .to_string(); + + let expected_input = Input { + kind: vec![ + TokenKind::EndLine, + TokenKind::WhiteSpace, + TokenKind::BlockComment, + TokenKind::EndLine, + TokenKind::WhiteSpace, + TokenKind::Identifier, + TokenKind::WhiteSpace, + TokenKind::Add, + TokenKind::WhiteSpace, + TokenKind::Number, + TokenKind::EndLine, + TokenKind::WhiteSpace + ], + source: &source, + position: vec![ + {0..1}, + {1..9}, + {9..24}, + {24..25}, + {25..33}, + {33..34}, + {34..35}, + {35..36}, + {36..37}, + {37..39}, + {39..40}, + {40..44}, + ] + }; + + let input = Input::new(&source); + + assert_eq!(expected_input, input, "Tokens extract from source code are not correct"); + + // test size method + let expected_size = input.kind.len(); + let size = input.size(); + assert_eq!(expected_size, size, "size method failed"); + + // test methods with index out of bound + let index = input.kind.len(); + + let expected_token_value = ""; + let token_value = input.token_value(index); + assert_eq!(expected_token_value, token_value, "token_value failed (case: index out of bound)"); + + let expected_kind = TokenKind::EOF; + let kind = input.kind_of(index); + assert_eq!(expected_kind, kind, "kind_of failed (case: index out of bound)"); + + let expected_position = 0..0; + let position = input.position_of(index); + assert_eq!(expected_position, position, "position_of failed (case: index out of bound)"); + + // test methods with index in bound + if input.size() == 0 { + return; + } + + let index = input.size() / 2; // a valid index if input size > 0 + + let expected_token_value = &input.source[input.position[index].clone()]; + let token_value = input.token_value(index); + assert_eq!(expected_token_value, token_value, "token_value failed"); + + let expected_kind = input.kind[index]; + let kind = input.kind_of(index); + assert_eq!(expected_kind, kind, "kind_of failed"); + + let expected_position = input.position[index].clone(); + let position = input.position_of(index); + assert_eq!(expected_position, position, "position_of failed"); + + } +} From 8efa4f70d57a52e24b92ed4c5002ec339ed77095 Mon Sep 17 00:00:00 2001 From: NTTVy03 Date: Mon, 18 Nov 2024 21:30:38 +0700 Subject: [PATCH 04/15] update input test --- crates/parser/src/input.rs | 145 +++++++++++++++++++++++++++++++------ 1 file changed, 123 insertions(+), 22 deletions(-) diff --git a/crates/parser/src/input.rs b/crates/parser/src/input.rs index bb37613..ebc1b9e 100644 --- a/crates/parser/src/input.rs +++ b/crates/parser/src/input.rs @@ -72,7 +72,6 @@ impl<'a> Input<'a> { // return error for out of bound index 0..0 } - } pub fn size(&self) -> usize { @@ -87,7 +86,7 @@ mod tests { use super::Input; #[test] - fn test_input() { + fn test_input_1() { let source = r#" /*a + b == 10*/ a + 10 @@ -107,27 +106,121 @@ mod tests { TokenKind::WhiteSpace, TokenKind::Number, TokenKind::EndLine, - TokenKind::WhiteSpace + TokenKind::WhiteSpace, ], source: &source, position: vec![ - {0..1}, - {1..9}, - {9..24}, - {24..25}, - {25..33}, - {33..34}, - {34..35}, - {35..36}, - {36..37}, - {37..39}, - {39..40}, - {40..44}, - ] + { 0..1 }, + { 1..9 }, + { 9..24 }, + { 24..25 }, + { 25..33 }, + { 33..34 }, + { 34..35 }, + { 35..36 }, + { 36..37 }, + { 37..39 }, + { 39..40 }, + { 40..44 }, + ], }; let input = Input::new(&source); + assert_eq!( + expected_input, input, + "Tokens extract from source code are not correct" + ); + + // test size method + let expected_size = input.kind.len(); + let size = input.size(); + assert_eq!(expected_size, size, "size method failed"); + + // test methods with index out of bound + let index = input.kind.len(); + + let expected_token_value = ""; + let token_value = input.token_value(index); + assert_eq!( + expected_token_value, token_value, + "token_value failed (case: index out of bound)" + ); + + let expected_kind = TokenKind::EOF; + let kind = input.kind_of(index); + assert_eq!( + expected_kind, kind, + "kind_of failed (case: index out of bound)" + ); + + let expected_position = 0..0; + let position = input.position_of(index); + assert_eq!( + expected_position, position, + "position_of failed (case: index out of bound)" + ); + + // test methods with index in bound + if input.size() == 0 { + return; + } + + let index = input.size() / 2; // a valid index if input size > 0 + + let expected_token_value = &input.source[input.position[index].clone()]; + let token_value = input.token_value(index); + assert_eq!(expected_token_value, token_value, "token_value failed"); + + let expected_kind = input.kind[index]; + let kind = input.kind_of(index); + assert_eq!(expected_kind, kind, "kind_of failed"); + + let expected_position = input.position[index].clone(); + let position = input.position_of(index); + assert_eq!(expected_position, position, "position_of failed"); + } + + #[test] + fn test_input_2() { + let source = r#" + pragma 2.1.1; + /*a + b == 10* + a + 10 + template + + /* + "# + .to_string(); + + let expected_input = Input { + kind: vec![ + TokenKind::EndLine, + TokenKind::WhiteSpace, + TokenKind::Pragma, + TokenKind::WhiteSpace, + TokenKind::Version, + TokenKind::Semicolon, + TokenKind::EndLine, + TokenKind::WhiteSpace, + TokenKind::Error, + ], + source: &source, + position: vec![ + 0..1, + 1..9, + 9..15, + 15..16, + 16..21, + 21..22, + 22..23, + 23..31, + 31..94, + ], + }; + + let input = Input::new(&source); + assert_eq!(expected_input, input, "Tokens extract from source code are not correct"); // test size method @@ -140,15 +233,24 @@ mod tests { let expected_token_value = ""; let token_value = input.token_value(index); - assert_eq!(expected_token_value, token_value, "token_value failed (case: index out of bound)"); + assert_eq!( + expected_token_value, token_value, + "token_value failed (case: index out of bound)" + ); let expected_kind = TokenKind::EOF; let kind = input.kind_of(index); - assert_eq!(expected_kind, kind, "kind_of failed (case: index out of bound)"); + assert_eq!( + expected_kind, kind, + "kind_of failed (case: index out of bound)" + ); let expected_position = 0..0; let position = input.position_of(index); - assert_eq!(expected_position, position, "position_of failed (case: index out of bound)"); + assert_eq!( + expected_position, position, + "position_of failed (case: index out of bound)" + ); // test methods with index in bound if input.size() == 0 { @@ -156,7 +258,7 @@ mod tests { } let index = input.size() / 2; // a valid index if input size > 0 - + let expected_token_value = &input.source[input.position[index].clone()]; let token_value = input.token_value(index); assert_eq!(expected_token_value, token_value, "token_value failed"); @@ -164,10 +266,9 @@ mod tests { let expected_kind = input.kind[index]; let kind = input.kind_of(index); assert_eq!(expected_kind, kind, "kind_of failed"); - + let expected_position = input.position[index].clone(); let position = input.position_of(index); assert_eq!(expected_position, position, "position_of failed"); - } } From c78761e2ae8378241608491964f93f81ac160b9b Mon Sep 17 00:00:00 2001 From: NTTVy03 Date: Fri, 6 Dec 2024 21:25:09 +0700 Subject: [PATCH 05/15] update input test --- crates/parser/src/input.rs | 285 ++++++++++++++++++++++---------- crates/parser/src/token_kind.rs | 3 + crates/syntax/src/syntax.rs | 12 +- 3 files changed, 209 insertions(+), 91 deletions(-) diff --git a/crates/parser/src/input.rs b/crates/parser/src/input.rs index ebc1b9e..4b1cc55 100644 --- a/crates/parser/src/input.rs +++ b/crates/parser/src/input.rs @@ -81,50 +81,11 @@ impl<'a> Input<'a> { #[cfg(test)] mod tests { - use crate::token_kind::TokenKind; + use crate::token_kind::TokenKind::{self, *}; use super::Input; - #[test] - fn test_input_1() { - let source = r#" - /*a + b == 10*/ - a + 10 - "# - .to_string(); - - let expected_input = Input { - kind: vec![ - TokenKind::EndLine, - TokenKind::WhiteSpace, - TokenKind::BlockComment, - TokenKind::EndLine, - TokenKind::WhiteSpace, - TokenKind::Identifier, - TokenKind::WhiteSpace, - TokenKind::Add, - TokenKind::WhiteSpace, - TokenKind::Number, - TokenKind::EndLine, - TokenKind::WhiteSpace, - ], - source: &source, - position: vec![ - { 0..1 }, - { 1..9 }, - { 9..24 }, - { 24..25 }, - { 25..33 }, - { 33..34 }, - { 34..35 }, - { 35..36 }, - { 36..37 }, - { 37..39 }, - { 39..40 }, - { 40..44 }, - ], - }; - + fn test(source: &str, expected_input: Input) { let input = Input::new(&source); assert_eq!( @@ -182,7 +143,49 @@ mod tests { } #[test] - fn test_input_2() { + fn test_comment_block() { + let source = r#" + /*a + b == 10*/ + a + 10 + "#; + + let expected_input = Input { + kind: vec![ + TokenKind::EndLine, + TokenKind::WhiteSpace, + TokenKind::BlockComment, + TokenKind::EndLine, + TokenKind::WhiteSpace, + TokenKind::Identifier, + TokenKind::WhiteSpace, + TokenKind::Add, + TokenKind::WhiteSpace, + TokenKind::Number, + TokenKind::EndLine, + TokenKind::WhiteSpace, + ], + source: &source, + position: vec![ + { 0..1 }, + { 1..9 }, + { 9..24 }, + { 24..25 }, + { 25..33 }, + { 33..34 }, + { 34..35 }, + { 35..36 }, + { 36..37 }, + { 37..39 }, + { 39..40 }, + { 40..44 }, + ], + }; + + test(source, expected_input); + } + + #[test] + fn test_comment_error() { let source = r#" pragma 2.1.1; /*a + b == 10* @@ -190,8 +193,7 @@ mod tests { template /* - "# - .to_string(); + "#; let expected_input = Input { kind: vec![ @@ -219,56 +221,169 @@ mod tests { ], }; - let input = Input::new(&source); + test(source, expected_input); + } - assert_eq!(expected_input, input, "Tokens extract from source code are not correct"); + #[test] + fn test_pragma() { + let source = r#" + /* test pragma token kinds */ - // test size method - let expected_size = input.kind.len(); - let size = input.size(); - assert_eq!(expected_size, size, "size method failed"); + pragma circom 2.0.0; - // test methods with index out of bound - let index = input.kind.len(); + "#; - let expected_token_value = ""; - let token_value = input.token_value(index); - assert_eq!( - expected_token_value, token_value, - "token_value failed (case: index out of bound)" - ); - - let expected_kind = TokenKind::EOF; - let kind = input.kind_of(index); - assert_eq!( - expected_kind, kind, - "kind_of failed (case: index out of bound)" - ); + let expected_input = Input { + kind: vec![ + EndLine, + WhiteSpace, + BlockComment, + EndLine, + EndLine, + WhiteSpace, + Pragma, + WhiteSpace, + Circom, + WhiteSpace, + Version, + Semicolon, + EndLine, + EndLine, + WhiteSpace, + ], + source: &source, + position: vec![ + 0..1, + 1..9, + 9..38, + 38..39, + 39..40, + 40..44, + 44..50, + 50..51, + 51..57, + 57..58, + 58..63, + 63..64, + 64..65, + 65..66, + 66..70, + ], + }; - let expected_position = 0..0; - let position = input.position_of(index); - assert_eq!( - expected_position, position, - "position_of failed (case: index out of bound)" - ); + test(source, expected_input); + } - // test methods with index in bound - if input.size() == 0 { - return; + #[test] + fn test_function() { + let source = r#" + function nbits(a) { + var n = 1; + var r = 0; + while (n-1 0 + let expected_input = Input { + kind: vec![ + EndLine, WhiteSpace, FunctionKw, WhiteSpace, Identifier, LParen, Identifier, + RParen, WhiteSpace, LCurly, EndLine, WhiteSpace, VarKw, WhiteSpace, Identifier, + WhiteSpace, Assign, WhiteSpace, Number, Semicolon, EndLine, WhiteSpace, VarKw, + WhiteSpace, Identifier, WhiteSpace, Assign, WhiteSpace, Number, Semicolon, EndLine, + WhiteSpace, WhileKw, WhiteSpace, LParen, Identifier, Sub, Number, LessThan, + Identifier, RParen, WhiteSpace, LCurly, EndLine, WhiteSpace, Identifier, Add, Add, + Semicolon, EndLine, WhiteSpace, Identifier, WhiteSpace, Mul, Assign, WhiteSpace, + Number, Semicolon, EndLine, WhiteSpace, RCurly, EndLine, WhiteSpace, ReturnKw, + WhiteSpace, Identifier, Semicolon, EndLine, WhiteSpace, RCurly, + ], + source: &source, + position: vec![ + 0..1, + 1..5, + 5..13, + 13..14, + 14..19, + 19..20, + 20..21, + 21..22, + 22..23, + 23..24, + 24..25, + 25..33, + 33..36, + 36..37, + 37..38, + 38..39, + 39..40, + 40..41, + 41..42, + 42..43, + 43..44, + 44..52, + 52..55, + 55..56, + 56..57, + 57..58, + 58..59, + 59..60, + 60..61, + 61..62, + 62..63, + 63..71, + 71..76, + 76..77, + 77..78, + 78..79, + 79..80, + 80..81, + 81..82, + 82..83, + 83..84, + 84..85, + 85..86, + 86..87, + 87..99, + 99..100, + 100..101, + 101..102, + 102..103, + 103..104, + 104..116, + 116..117, + 117..118, + 118..119, + 119..120, + 120..121, + 121..122, + 122..123, + 123..124, + 124..132, + 132..133, + 133..134, + 134..142, + 142..148, + 148..149, + 149..150, + 150..151, + 151..152, + 152..156, + 156..157, + ], + }; - let expected_token_value = &input.source[input.position[index].clone()]; - let token_value = input.token_value(index); - assert_eq!(expected_token_value, token_value, "token_value failed"); + test(source, expected_input); + } - let expected_kind = input.kind[index]; - let kind = input.kind_of(index); - assert_eq!(expected_kind, kind, "kind_of failed"); + // #[test] + // fn test_gen() { + // let source = r#" + // "#; - let expected_position = input.position[index].clone(); - let position = input.position_of(index); - assert_eq!(expected_position, position, "position_of failed"); - } + // let input = Input::new(&source); + // println!("{:?}", input.kind); + // println!("{:?}", input.position); + // } } diff --git a/crates/parser/src/token_kind.rs b/crates/parser/src/token_kind.rs index 26145bf..779191f 100644 --- a/crates/parser/src/token_kind.rs +++ b/crates/parser/src/token_kind.rs @@ -229,6 +229,7 @@ impl TokenKind { _ => None, } } + pub fn prefix(self) -> Option { match self { Self::Sub => Some(100), @@ -245,9 +246,11 @@ impl TokenKind { _ => None, } } + pub fn is_declaration_kw(self) -> bool { matches!(self, Self::VarKw | Self::ComponentKw | Self::SignalKw) } + pub fn is_trivial(self) -> bool { matches!( self, diff --git a/crates/syntax/src/syntax.rs b/crates/syntax/src/syntax.rs index 3483b61..21b9734 100644 --- a/crates/syntax/src/syntax.rs +++ b/crates/syntax/src/syntax.rs @@ -126,7 +126,7 @@ mod tests { } #[test] - fn parser_test_1() { + fn syntax_test_1() { let source: &str = test_programs::PARSER_TEST_1; let expected_pragma = "pragma circom 2.0.0;".to_string(); @@ -206,7 +206,7 @@ mod tests { } #[test] - fn parser_test_2() { + fn syntax_test_2() { let source = test_programs::PARSER_TEST_2; let syntax = SyntaxTreeBuilder::syntax_tree(source); @@ -237,7 +237,7 @@ mod tests { } #[test] - fn parser_test_3() { + fn syntax_test_3() { let source = test_programs::PARSER_TEST_3; let syntax = SyntaxTreeBuilder::syntax_tree(source); @@ -254,7 +254,7 @@ mod tests { } #[test] - fn parser_test_4() { + fn syntax_test_4() { let source = test_programs::PARSER_TEST_4; let syntax = SyntaxTreeBuilder::syntax_tree(source); @@ -271,7 +271,7 @@ mod tests { } #[test] - fn parser_test_5() { + fn syntax_test_5() { let source = test_programs::PARSER_TEST_5; let syntax = SyntaxTreeBuilder::syntax_tree(source); @@ -285,7 +285,7 @@ mod tests { } #[test] - fn parser_test_6() { + fn syntax_test_6() { let source = test_programs::PARSER_TEST_6; let syntax = SyntaxTreeBuilder::syntax_tree(source); From bfd1f1519bba52be08a0b9032bdaa28edad5dff5 Mon Sep 17 00:00:00 2001 From: NTTVy03 Date: Sun, 8 Dec 2024 15:38:34 +0700 Subject: [PATCH 06/15] make test programs private --- crates/syntax/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/syntax/src/lib.rs b/crates/syntax/src/lib.rs index adeb8b0..d1300bb 100644 --- a/crates/syntax/src/lib.rs +++ b/crates/syntax/src/lib.rs @@ -3,4 +3,4 @@ pub mod syntax_node; pub mod abstract_syntax_tree; -pub mod test_programs; \ No newline at end of file +mod test_programs; \ No newline at end of file From e054b574fa54e0862006da06e0072a11789aa780 Mon Sep 17 00:00:00 2001 From: NTTVy03 Date: Sun, 8 Dec 2024 15:41:47 +0700 Subject: [PATCH 07/15] remove comments in syntax tests --- crates/syntax/src/syntax.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/crates/syntax/src/syntax.rs b/crates/syntax/src/syntax.rs index 21b9734..58d7194 100644 --- a/crates/syntax/src/syntax.rs +++ b/crates/syntax/src/syntax.rs @@ -277,9 +277,8 @@ mod tests { let syntax = SyntaxTreeBuilder::syntax_tree(source); if let Some(ast) = AstCircomProgram::cast(syntax) { - // print_ast_children(&ast); - - println!("{:?}", ast.pragma()); + println!("pragma: {:?}", ast.pragma()); + println!("template list: {:?}", ast.template_list()); // assert!(ast.pragma().is_none(), "No pragma in source code"); } } @@ -294,6 +293,8 @@ mod tests { // print_ast_children(&ast); println!("{:?}", ast.pragma()); + + println!("template list: {:?}", ast.template_list()); // assert!(ast.pragma().is_none(), "No pragma in source code"); } } From 15c9e5f7dfb801952f358f8e2d97036cb7928a67 Mon Sep 17 00:00:00 2001 From: NTTVy03 Date: Sun, 8 Dec 2024 15:45:28 +0700 Subject: [PATCH 08/15] format --- crates/parser/src/grammar.rs | 5 +++-- crates/parser/src/grammar/block.rs | 1 + crates/parser/src/grammar/declaration.rs | 3 +++ crates/parser/src/grammar/expression.rs | 1 + crates/parser/src/grammar/function.rs | 8 +++++--- crates/parser/src/grammar/include.rs | 2 +- crates/parser/src/grammar/list_identity.rs | 1 + crates/parser/src/grammar/main_component.rs | 7 +++++++ crates/parser/src/grammar/template.rs | 2 +- crates/parser/src/output.rs | 1 + crates/parser/src/token_kind.rs | 2 +- crates/syntax/src/syntax.rs | 10 ---------- 12 files changed, 25 insertions(+), 18 deletions(-) diff --git a/crates/parser/src/grammar.rs b/crates/parser/src/grammar.rs index 98832cc..6b3791a 100644 --- a/crates/parser/src/grammar.rs +++ b/crates/parser/src/grammar.rs @@ -35,14 +35,15 @@ pub mod entry { } pragma::pragma(p); + while !p.eof() { match p.current() { TemplateKw => { template::template(p); - } + }, IncludeKw => { include::include(p); - } + }, ComponentKw => main_component::main_component(p), FunctionKw => function::function_parse(p), _ => { diff --git a/crates/parser/src/grammar/block.rs b/crates/parser/src/grammar/block.rs index 7b0f89a..5a40865 100644 --- a/crates/parser/src/grammar/block.rs +++ b/crates/parser/src/grammar/block.rs @@ -2,6 +2,7 @@ use super::*; pub fn block(p: &mut Parser) { p.inc_rcurly(); + if !p.at(LCurly) { p.advance_with_error("Miss {"); } else { diff --git a/crates/parser/src/grammar/declaration.rs b/crates/parser/src/grammar/declaration.rs index 1ebbb70..8022f81 100644 --- a/crates/parser/src/grammar/declaration.rs +++ b/crates/parser/src/grammar/declaration.rs @@ -3,6 +3,9 @@ use super::{ *, }; +// "signal" --> None +// "signal input" --> Some(true) +// "signal output" --> Some(false) fn signal_header(p: &mut Parser) -> Option { let mut res = None; let m = p.open(); diff --git a/crates/parser/src/grammar/expression.rs b/crates/parser/src/grammar/expression.rs index 8932828..8c7fad6 100644 --- a/crates/parser/src/grammar/expression.rs +++ b/crates/parser/src/grammar/expression.rs @@ -1,6 +1,7 @@ use crate::parser::Marker; use super::*; + pub(super) fn expression(p: &mut Parser) { let m = p.open(); circom_expression(p); diff --git a/crates/parser/src/grammar/function.rs b/crates/parser/src/grammar/function.rs index 24b60d3..fad1d44 100644 --- a/crates/parser/src/grammar/function.rs +++ b/crates/parser/src/grammar/function.rs @@ -1,12 +1,15 @@ use crate::grammar::*; +// fucntion name() pub fn function_parse(p: &mut Parser) { let m = p.open(); + p.expect(FunctionKw); - let fn_name_marker = p.open(); + let fn_name_marker = p.open(); p.expect(Identifier); p.close(fn_name_marker, FunctionName); + p.expect(LParen); let arg_marker = p.open(); while !p.at(RParen) && !p.eof() { @@ -15,11 +18,10 @@ pub fn function_parse(p: &mut Parser) { p.expect(Comma); } } - p.close(arg_marker, ParameterList); - p.expect(RParen); block::block(p); + p.close(m, FunctionDef); } diff --git a/crates/parser/src/grammar/include.rs b/crates/parser/src/grammar/include.rs index 7269995..d8f0ac7 100644 --- a/crates/parser/src/grammar/include.rs +++ b/crates/parser/src/grammar/include.rs @@ -2,7 +2,7 @@ use super::*; pub(super) fn include(p: &mut Parser) { // assert!(p.at(IncludeKw)); - + let m = p.open(); p.expect(IncludeKw); p.expect(CircomString); diff --git a/crates/parser/src/grammar/list_identity.rs b/crates/parser/src/grammar/list_identity.rs index 6a4effa..73f85c9 100644 --- a/crates/parser/src/grammar/list_identity.rs +++ b/crates/parser/src/grammar/list_identity.rs @@ -1,5 +1,6 @@ use super::*; +// a, b, c, d pub fn parse(p: &mut Parser) { while p.at(Identifier) && !p.eof() { p.expect(Identifier); diff --git a/crates/parser/src/grammar/main_component.rs b/crates/parser/src/grammar/main_component.rs index 538a73b..3f6f862 100644 --- a/crates/parser/src/grammar/main_component.rs +++ b/crates/parser/src/grammar/main_component.rs @@ -1,13 +1,20 @@ use super::*; +/* +component main {public [signal_list]} = tempid(v1,...,vn); + +{public [signal_list]} is optional +*/ pub fn main_component(p: &mut Parser) { p.expect(ComponentKw); p.expect(MainKw); + p.expect(LCurly); p.expect(PublicKw); p.expect(LBracket); list_identity::parse(p); p.expect(RBracket); + p.expect(Assign); expression::expression(p); } diff --git a/crates/parser/src/grammar/template.rs b/crates/parser/src/grammar/template.rs index 9973366..9249b1f 100644 --- a/crates/parser/src/grammar/template.rs +++ b/crates/parser/src/grammar/template.rs @@ -1,7 +1,7 @@ use crate::grammar::*; /** * template Identifier() {content} - * + * template Identifier( param_1, ... , param_n ) { content } */ pub fn template(p: &mut Parser) { // assert!(p.at(TemplateKw)); diff --git a/crates/parser/src/output.rs b/crates/parser/src/output.rs index 0a4e07d..271c2c3 100644 --- a/crates/parser/src/output.rs +++ b/crates/parser/src/output.rs @@ -30,6 +30,7 @@ impl Output { &self.children } } + impl From> for Output { fn from(events: Vec) -> Self { let mut stack = Vec::new(); diff --git a/crates/parser/src/token_kind.rs b/crates/parser/src/token_kind.rs index 779191f..9ebba15 100644 --- a/crates/parser/src/token_kind.rs +++ b/crates/parser/src/token_kind.rs @@ -229,7 +229,7 @@ impl TokenKind { _ => None, } } - + pub fn prefix(self) -> Option { match self { Self::Sub => Some(100), diff --git a/crates/syntax/src/syntax.rs b/crates/syntax/src/syntax.rs index 58d7194..f44b88c 100644 --- a/crates/syntax/src/syntax.rs +++ b/crates/syntax/src/syntax.rs @@ -212,14 +212,11 @@ mod tests { let syntax = SyntaxTreeBuilder::syntax_tree(source); if let Some(ast) = AstCircomProgram::cast(syntax) { - // print_ast_children(&ast); - println!("Pragma: {:?}", ast.pragma().unwrap().syntax().text()); print!("Templates: "); let templates = ast.template_list(); for template in templates.iter() { - // print!("{:?} ", template.name().unwrap().name().unwrap().syntax().text()); print!("{:?} ", template.name().unwrap().syntax().text()); // leading whitespaces // print!("{:?} ", template.syntax().text()); // leading whitespaces } @@ -243,8 +240,6 @@ mod tests { let syntax = SyntaxTreeBuilder::syntax_tree(source); if let Some(ast) = AstCircomProgram::cast(syntax) { - // print_ast_children(&ast); - println!("Pragma: {:?}", ast.pragma().unwrap().syntax().text()); println!( "Pragma version: {:?}", @@ -260,8 +255,6 @@ mod tests { let syntax = SyntaxTreeBuilder::syntax_tree(source); if let Some(ast) = AstCircomProgram::cast(syntax) { - // print_ast_children(&ast); - println!("Pragma: {:?}", ast.pragma().unwrap().syntax().text()); println!( "Pragma version: {:?}", @@ -290,10 +283,7 @@ mod tests { let syntax = SyntaxTreeBuilder::syntax_tree(source); if let Some(ast) = AstCircomProgram::cast(syntax) { - // print_ast_children(&ast); - println!("{:?}", ast.pragma()); - println!("template list: {:?}", ast.template_list()); // assert!(ast.pragma().is_none(), "No pragma in source code"); } From c76246b5251954ffb6e31f34294754eb5efcd327 Mon Sep 17 00:00:00 2001 From: NTTVy03 Date: Sun, 8 Dec 2024 16:04:32 +0700 Subject: [PATCH 09/15] make Pragma optional, remove ROOT --- crates/parser/src/grammar.rs | 31 +++++++------------------------ 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/crates/parser/src/grammar.rs b/crates/parser/src/grammar.rs index 6b3791a..a0c9f53 100644 --- a/crates/parser/src/grammar.rs +++ b/crates/parser/src/grammar.rs @@ -34,21 +34,14 @@ pub mod entry { p.skip(); } - pragma::pragma(p); - while !p.eof() { match p.current() { - TemplateKw => { - template::template(p); - }, - IncludeKw => { - include::include(p); - }, + Pragma => pragma::pragma(p), + TemplateKw => template::template(p), + IncludeKw => include::include(p), ComponentKw => main_component::main_component(p), FunctionKw => function::function_parse(p), - _ => { - p.advance_with_error("invalid token"); - } + _ => p.advance_with_error("invalid token"), } } p.close(m, CircomProgram); @@ -64,20 +57,10 @@ pub mod entry { impl Scope { pub fn parse(self, p: &mut Parser) { match self { - Self::Block => { - let m = p.open(); - block::block(p); - p.close(m, ROOT); - } + Self::Block => block::block(p), Self::CircomProgram => circom_program(p), - Self::Pragma => { - let m = p.open(); - pragma::pragma(p); - p.close(m, ROOT); - } - Self::Template => { - template::template(p); - } + Self::Pragma => pragma::pragma(p), + Self::Template => template::template(p), } } } From cd125abe3dee9663b84f010eeeff6775c10b08a9 Mon Sep 17 00:00:00 2001 From: NTTVy03 Date: Sun, 8 Dec 2024 16:07:16 +0700 Subject: [PATCH 10/15] rename close() params, use advance() in eat(), fix typo scope --- crates/parser/src/parser.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs index 23c4dde..a4f0efe 100644 --- a/crates/parser/src/parser.rs +++ b/crates/parser/src/parser.rs @@ -52,8 +52,8 @@ impl<'a> Parser<'a> { } } - pub fn close(&mut self, marker_close: Marker, kind: TokenKind) -> Marker { - match marker_close { + pub fn close(&mut self, marker_open: Marker, kind: TokenKind) -> Marker { + match marker_open { Marker::Open(index) => { self.events[index] = Event::Open { kind }; self.events.push(Event::Close); @@ -159,8 +159,7 @@ impl<'a> Parser<'a> { pub fn eat(&mut self, kind: TokenKind) -> bool { if self.at(kind) { - self.events.push(Event::TokenPosition(self.pos)); - self.skip(); + self.advance(); return true; } false @@ -193,7 +192,7 @@ impl<'a> Parser<'a> { } impl Parser<'_> { - pub fn parsing_with_scrope(input: &Input, scope: Scope) -> Output { + pub fn parsing_with_scope(input: &Input, scope: Scope) -> Output { let mut p = Parser::new(input); scope.parse(&mut p); Output::from(p.events) @@ -201,6 +200,6 @@ impl Parser<'_> { pub fn parsing(input: &Input) -> Output { let c = Scope::CircomProgram; - Parser::parsing_with_scrope(input, c) + Parser::parsing_with_scope(input, c) } } From 15470b1035bf20a855ae0be0d83f74ed275e2311 Mon Sep 17 00:00:00 2001 From: NTTVy03 Date: Sun, 8 Dec 2024 16:08:12 +0700 Subject: [PATCH 11/15] parse params using list_identity --- crates/parser/src/grammar/function.rs | 7 +------ crates/parser/src/grammar/template.rs | 12 +++++------- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/crates/parser/src/grammar/function.rs b/crates/parser/src/grammar/function.rs index fad1d44..bca7018 100644 --- a/crates/parser/src/grammar/function.rs +++ b/crates/parser/src/grammar/function.rs @@ -12,12 +12,7 @@ pub fn function_parse(p: &mut Parser) { p.expect(LParen); let arg_marker = p.open(); - while !p.at(RParen) && !p.eof() { - p.expect(Identifier); - if p.at(Comma) { - p.expect(Comma); - } - } + list_identity::parse(p); p.close(arg_marker, ParameterList); p.expect(RParen); diff --git a/crates/parser/src/grammar/template.rs b/crates/parser/src/grammar/template.rs index 9249b1f..be5f17e 100644 --- a/crates/parser/src/grammar/template.rs +++ b/crates/parser/src/grammar/template.rs @@ -6,23 +6,21 @@ use crate::grammar::*; pub fn template(p: &mut Parser) { // assert!(p.at(TemplateKw)); let m = p.open(); + p.expect(TemplateKw); + let name_marker = p.open(); p.expect(Identifier); p.close(name_marker, TemplateName); p.expect(LParen); let arg_marker = p.open(); - while !p.at(RParen) && !p.eof() { - p.expect(Identifier); - if p.at(Comma) { - p.expect(Comma); - } - } - + list_identity::parse(p); p.close(arg_marker, ParameterList); p.expect(RParen); + block::block(p); + p.close(m, TemplateDef); } From 6dbb51a333ccc075c49a609895bce1a32b5c7041 Mon Sep 17 00:00:00 2001 From: NTTVy03 Date: Tue, 10 Dec 2024 22:40:00 +0700 Subject: [PATCH 12/15] return Option in token_value() and position_of() --- crates/parser/src/input.rs | 22 +++++++++++----------- crates/syntax/src/syntax.rs | 3 ++- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/crates/parser/src/input.rs b/crates/parser/src/input.rs index 4b1cc55..8ddf602 100644 --- a/crates/parser/src/input.rs +++ b/crates/parser/src/input.rs @@ -48,12 +48,12 @@ impl<'a> Input<'a> { input } - pub fn token_value(&self, index: usize) -> &'a str { + pub fn token_value(&self, index: usize) -> Option<&'a str> { if index < self.kind.len() { - &self.source[self.position[index].start..self.position[index].end] + Some(&self.source[self.position[index].start..self.position[index].end]) } else { - // return error for out of bound index - "" + // return None for out of bound index + None } } @@ -65,12 +65,12 @@ impl<'a> Input<'a> { } } - pub fn position_of(&self, index: usize) -> Range { + pub fn position_of(&self, index: usize) -> Option> { if index < self.kind.len() { - self.position[index].clone() + Some(self.position[index].clone()) } else { // return error for out of bound index - 0..0 + None } } @@ -101,7 +101,7 @@ mod tests { // test methods with index out of bound let index = input.kind.len(); - let expected_token_value = ""; + let expected_token_value = None; let token_value = input.token_value(index); assert_eq!( expected_token_value, token_value, @@ -115,7 +115,7 @@ mod tests { "kind_of failed (case: index out of bound)" ); - let expected_position = 0..0; + let expected_position = None; let position = input.position_of(index); assert_eq!( expected_position, position, @@ -130,7 +130,7 @@ mod tests { let index = input.size() / 2; // a valid index if input size > 0 let expected_token_value = &input.source[input.position[index].clone()]; - let token_value = input.token_value(index); + let token_value = input.token_value(index).unwrap(); assert_eq!(expected_token_value, token_value, "token_value failed"); let expected_kind = input.kind[index]; @@ -138,7 +138,7 @@ mod tests { assert_eq!(expected_kind, kind, "kind_of failed"); let expected_position = input.position[index].clone(); - let position = input.position_of(index); + let position = input.position_of(index).unwrap(); assert_eq!(expected_position, position, "position_of failed"); } diff --git a/crates/syntax/src/syntax.rs b/crates/syntax/src/syntax.rs index f44b88c..32a273b 100644 --- a/crates/syntax/src/syntax.rs +++ b/crates/syntax/src/syntax.rs @@ -23,7 +23,8 @@ impl<'a> SyntaxTreeBuilder<'a> { match child { Child::Token(token_id) => { let token_kind = self.input.kind_of(*token_id); - let token_value = self.input.token_value(*token_id); + // TODO: return Error to replace .unwrap() + let token_value = self.input.token_value(*token_id).unwrap(); self.builder.start_node(token_kind.into()); self.builder.token(token_kind.into(), token_value); self.builder.finish_node(); From 6d5f0c6d44d8d8d15e8bfa66b68659dc4644552a Mon Sep 17 00:00:00 2001 From: NTTVy03 Date: Tue, 10 Dec 2024 22:51:11 +0700 Subject: [PATCH 13/15] do not allow <--, <== in var declaration --- crates/parser/src/grammar/declaration.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/parser/src/grammar/declaration.rs b/crates/parser/src/grammar/declaration.rs index 8022f81..c37630c 100644 --- a/crates/parser/src/grammar/declaration.rs +++ b/crates/parser/src/grammar/declaration.rs @@ -38,7 +38,7 @@ pub(super) fn var_declaration(p: &mut Parser) { if p.at(LParen) { tuple(p); - if p.at_any(&[Assign, RAssignSignal, RAssignConstraintSignal]) { + if p.at(Assign) { tuple_init(p); } } else { From fbdc5b9857cd6944c9b0df9cb892d44c00ece8dc Mon Sep 17 00:00:00 2001 From: NTTVy03 Date: Tue, 10 Dec 2024 22:54:53 +0700 Subject: [PATCH 14/15] make public signal optional in main component --- crates/parser/src/grammar/main_component.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/crates/parser/src/grammar/main_component.rs b/crates/parser/src/grammar/main_component.rs index 3f6f862..d9c176d 100644 --- a/crates/parser/src/grammar/main_component.rs +++ b/crates/parser/src/grammar/main_component.rs @@ -9,11 +9,13 @@ pub fn main_component(p: &mut Parser) { p.expect(ComponentKw); p.expect(MainKw); - p.expect(LCurly); - p.expect(PublicKw); - p.expect(LBracket); - list_identity::parse(p); - p.expect(RBracket); + if p.at(LCurly) { + p.expect(LCurly); + p.expect(PublicKw); + p.expect(LBracket); + list_identity::parse(p); + p.expect(RBracket); + } p.expect(Assign); expression::expression(p); From 07c4171540a9fbb51df57caa9e2f09671b12b9f6 Mon Sep 17 00:00:00 2001 From: NTTVy03 Date: Fri, 13 Dec 2024 14:32:06 +0700 Subject: [PATCH 15/15] fix format before merge --- crates/parser/src/grammar/block.rs | 2 +- crates/parser/src/grammar/function.rs | 2 +- crates/parser/src/grammar/include.rs | 2 +- crates/parser/src/grammar/main_component.rs | 2 +- crates/parser/src/grammar/template.rs | 2 +- crates/syntax/src/lib.rs | 5 ++--- crates/syntax/src/syntax.rs | 3 +-- crates/syntax/src/test_programs.rs | 1 - 8 files changed, 8 insertions(+), 11 deletions(-) diff --git a/crates/parser/src/grammar/block.rs b/crates/parser/src/grammar/block.rs index 5a40865..03f7ed2 100644 --- a/crates/parser/src/grammar/block.rs +++ b/crates/parser/src/grammar/block.rs @@ -2,7 +2,7 @@ use super::*; pub fn block(p: &mut Parser) { p.inc_rcurly(); - + if !p.at(LCurly) { p.advance_with_error("Miss {"); } else { diff --git a/crates/parser/src/grammar/function.rs b/crates/parser/src/grammar/function.rs index bca7018..4da6276 100644 --- a/crates/parser/src/grammar/function.rs +++ b/crates/parser/src/grammar/function.rs @@ -3,7 +3,7 @@ use crate::grammar::*; // fucntion name() pub fn function_parse(p: &mut Parser) { let m = p.open(); - + p.expect(FunctionKw); let fn_name_marker = p.open(); diff --git a/crates/parser/src/grammar/include.rs b/crates/parser/src/grammar/include.rs index d8f0ac7..7269995 100644 --- a/crates/parser/src/grammar/include.rs +++ b/crates/parser/src/grammar/include.rs @@ -2,7 +2,7 @@ use super::*; pub(super) fn include(p: &mut Parser) { // assert!(p.at(IncludeKw)); - + let m = p.open(); p.expect(IncludeKw); p.expect(CircomString); diff --git a/crates/parser/src/grammar/main_component.rs b/crates/parser/src/grammar/main_component.rs index d9c176d..5129310 100644 --- a/crates/parser/src/grammar/main_component.rs +++ b/crates/parser/src/grammar/main_component.rs @@ -8,7 +8,7 @@ component main {public [signal_list]} = tempid(v1,...,vn); pub fn main_component(p: &mut Parser) { p.expect(ComponentKw); p.expect(MainKw); - + if p.at(LCurly) { p.expect(LCurly); p.expect(PublicKw); diff --git a/crates/parser/src/grammar/template.rs b/crates/parser/src/grammar/template.rs index be5f17e..693fe68 100644 --- a/crates/parser/src/grammar/template.rs +++ b/crates/parser/src/grammar/template.rs @@ -8,7 +8,7 @@ pub fn template(p: &mut Parser) { let m = p.open(); p.expect(TemplateKw); - + let name_marker = p.open(); p.expect(Identifier); p.close(name_marker, TemplateName); diff --git a/crates/syntax/src/lib.rs b/crates/syntax/src/lib.rs index d1300bb..7f4b53c 100644 --- a/crates/syntax/src/lib.rs +++ b/crates/syntax/src/lib.rs @@ -1,6 +1,5 @@ +pub mod abstract_syntax_tree; pub mod syntax; pub mod syntax_node; -pub mod abstract_syntax_tree; - -mod test_programs; \ No newline at end of file +mod test_programs; diff --git a/crates/syntax/src/syntax.rs b/crates/syntax/src/syntax.rs index 32a273b..9b9202f 100644 --- a/crates/syntax/src/syntax.rs +++ b/crates/syntax/src/syntax.rs @@ -164,8 +164,7 @@ mod tests { let syntax = SyntaxTreeBuilder::syntax_tree(source); - - if let Some(ast) = AstCircomProgram::cast(syntax) { + if let Some(ast) = AstCircomProgram::cast(syntax) { check_ast_children(&ast, &expected_kinds, &expected_ranges); // check pragma diff --git a/crates/syntax/src/test_programs.rs b/crates/syntax/src/test_programs.rs index 0879679..655222f 100644 --- a/crates/syntax/src/test_programs.rs +++ b/crates/syntax/src/test_programs.rs @@ -132,4 +132,3 @@ pub const PARSER_TEST_6: &str = r#" /* T _ T */ template Multiplier2 () {} "#; - \ No newline at end of file