From 87b6095b8348c6f512ab73855f049f4ff4ace0c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kevin=20L=C3=A4ufer?= Date: Thu, 12 Dec 2024 09:53:58 -0500 Subject: [PATCH] smt: implement lexer --- patronus/src/smt.rs | 1 + patronus/src/smt/parser.rs | 105 +++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+) create mode 100644 patronus/src/smt/parser.rs diff --git a/patronus/src/smt.rs b/patronus/src/smt.rs index 201c351..a8c7957 100644 --- a/patronus/src/smt.rs +++ b/patronus/src/smt.rs @@ -4,6 +4,7 @@ // author: Kevin Laeufer mod parse; +mod parser; mod serialize; mod solver; diff --git a/patronus/src/smt/parser.rs b/patronus/src/smt/parser.rs new file mode 100644 index 0000000..36f4454 --- /dev/null +++ b/patronus/src/smt/parser.rs @@ -0,0 +1,105 @@ +// Copyright 2024 Cornell University +// released under BSD 3-Clause License +// author: Kevin Laeufer + +use std::fmt::{Debug, Formatter}; +use std::io::{BufRead, Read}; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum SmtParserError { + #[error("I/O operation failed")] + Io(#[from] std::io::Error), +} + +type Result = std::result::Result; + +enum Token<'a> { + Open, + Close, + Value(&'a [u8]), +} + +impl<'a> Debug for Token<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Token::Open => write!(f, "("), + Token::Close => write!(f, ")"), + Token::Value(v) => write!(f, "{}", String::from_utf8_lossy(v)), + } + } +} + +/// lex SMTLib +fn lex(input: &mut impl BufRead, mut on_token: impl FnMut(Token) -> Result<()>) -> Result<()> { + let mut in_escaped = false; + let mut token_buf = Vec::with_capacity(128); + + for c in input.bytes() { + let c = c?; + if in_escaped { + if c == b'|' { + on_token(Token::Value(&token_buf))?; + token_buf.clear(); + in_escaped = false; + } else { + token_buf.push(c); + } + } else { + match c { + b'|' => { + in_escaped = true; + } + b'(' => { + if !token_buf.is_empty() { + on_token(Token::Value(&token_buf))?; + token_buf.clear(); + } + on_token(Token::Open)?; + } + b')' => { + if !token_buf.is_empty() { + on_token(Token::Value(&token_buf))?; + token_buf.clear(); + } + on_token(Token::Close)?; + } + b' ' | b'\n' | b'\r' => { + if !token_buf.is_empty() { + on_token(Token::Value(&token_buf))?; + token_buf.clear(); + } + } + other => { + token_buf.push(other); + } + } + } + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn lex_to_token_str(input: &str) -> String { + let mut tokens = vec![]; + lex(&mut input.as_bytes(), |token| { + tokens.push(format!("{token:?}")); + Ok(()) + }) + .unwrap(); + + tokens.join(" ") + } + + #[test] + fn test_lexer() { + let inp = "(+ a b)"; + assert_eq!(lex_to_token_str(inp), "( + a b )"); + let inp = "(+ |a| b ( )"; + assert_eq!(lex_to_token_str(inp), "( + a b ( )"); + } +}