Skip to content

Commit a98023e

Browse files
committed
update with new user function lex_formula
1 parent 9e1ef87 commit a98023e

File tree

4 files changed

+79
-0
lines changed

4 files changed

+79
-0
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
# Changelog
2+
3+
## [0.2.2] - 2025-09-05
4+
5+
added `lex_formula` for users to inspect raw lexer output
6+
17
## [0.2.1] - 2025-09-04
28

39
just to trigger a new release

README.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,28 @@ let metadata = parse_formula(formula).unwrap();
247247
println!("{}", serde_json::to_string_pretty(&metadata).unwrap());
248248
```
249249

250+
### Inspect Tokens: `lex_formula`
251+
252+
If you want to inspect how the lexer tokenizes a formula (useful when debugging parse errors
253+
or understanding how functions and interactions are split), use `lex_formula` which returns a
254+
JSON array of token objects with `token` and `lexeme` fields.
255+
256+
```rust
257+
use fiasto::lex_formula;
258+
259+
let input = "mpg ~ cyl + wt*hp + poly(disp, 4) - 1";
260+
let tokens = lex_formula(input).unwrap();
261+
println!("{}", serde_json::to_string_pretty(&tokens).unwrap());
262+
```
263+
264+
This prints objects like:
265+
266+
```json
267+
{ "token": "ColumnName", "lexeme": "mpg" }
268+
{ "token": "Tilde", "lexeme": "~" }
269+
{ "token": "Plus", "lexeme": "+" }
270+
```
271+
250272
### Basic Formula
251273
```rust
252274
use fiasto::parse_formula;

examples/lex_formula.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
use fiasto::lex_formula;
2+
3+
fn main() -> Result<(), Box<dyn std::error::Error>> {
4+
let input = "mpg ~ cyl + wt*hp + poly(disp, 4) - 1";
5+
let tokens = lex_formula(input)?;
6+
println!("{}", serde_json::to_string_pretty(&tokens)?);
7+
Ok(())
8+
}

src/lib.rs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,3 +299,46 @@ pub fn parse_formula(formula: &str) -> Result<Value, Box<dyn std::error::Error>>
299299

300300
Ok(serde_json::to_value(meta)?)
301301
}
302+
303+
/// Lex a formula and return JSON describing each token.
304+
///
305+
/// The output is an array of objects with fields:
306+
/// - `token`: token name (enum debug)
307+
/// - `lexeme`: the original slice from the input
308+
///
309+
/// # Example
310+
///
311+
/// ```rust
312+
/// use fiasto::lex_formula;
313+
///
314+
/// let formula = "mpg ~ cyl + wt*hp + poly(disp, 4) - 1";
315+
/// let tokens = lex_formula(formula).unwrap();
316+
/// // tokens is a serde_json::Value::Array of objects like:
317+
/// // { "token": "ColumnName", "lexeme": "mpg" }
318+
/// // { "token": "Tilde", "lexeme": "~" }
319+
/// // { "token": "Plus", "lexeme": "+" }
320+
/// println!("{}", serde_json::to_string_pretty(&tokens).unwrap());
321+
/// ```
322+
pub fn lex_formula(formula: &str) -> Result<Value, Box<dyn std::error::Error>> {
323+
use logos::Logos;
324+
use crate::internal::lexer::Token;
325+
326+
let mut lex = Token::lexer(formula);
327+
let mut tokens = Vec::new();
328+
while let Some(item) = lex.next() {
329+
match item {
330+
Ok(tok) => {
331+
let slice = lex.slice();
332+
let obj = serde_json::json!({
333+
"token": format!("{:?}", tok),
334+
"lexeme": slice,
335+
});
336+
tokens.push(obj);
337+
}
338+
Err(()) => {
339+
return Err(Box::new(crate::internal::errors::ParseError::Lex(lex.slice().to_string())));
340+
}
341+
}
342+
}
343+
Ok(serde_json::Value::Array(tokens))
344+
}

0 commit comments

Comments
 (0)