@@ -299,3 +299,46 @@ pub fn parse_formula(formula: &str) -> Result<Value, Box<dyn std::error::Error>>
299299
300300 Ok ( serde_json:: to_value ( meta) ?)
301301}
302+
303+ /// Lex a formula and return JSON describing each token.
304+ ///
305+ /// The output is an array of objects with fields:
306+ /// - `token`: token name (enum debug)
307+ /// - `lexeme`: the original slice from the input
308+ ///
309+ /// # Example
310+ ///
311+ /// ```rust
312+ /// use fiasto::lex_formula;
313+ ///
314+ /// let formula = "mpg ~ cyl + wt*hp + poly(disp, 4) - 1";
315+ /// let tokens = lex_formula(formula).unwrap();
316+ /// // tokens is a serde_json::Value::Array of objects like:
317+ /// // { "token": "ColumnName", "lexeme": "mpg" }
318+ /// // { "token": "Tilde", "lexeme": "~" }
319+ /// // { "token": "Plus", "lexeme": "+" }
320+ /// println!("{}", serde_json::to_string_pretty(&tokens).unwrap());
321+ /// ```
322+ pub fn lex_formula ( formula : & str ) -> Result < Value , Box < dyn std:: error:: Error > > {
323+ use logos:: Logos ;
324+ use crate :: internal:: lexer:: Token ;
325+
326+ let mut lex = Token :: lexer ( formula) ;
327+ let mut tokens = Vec :: new ( ) ;
328+ while let Some ( item) = lex. next ( ) {
329+ match item {
330+ Ok ( tok) => {
331+ let slice = lex. slice ( ) ;
332+ let obj = serde_json:: json!( {
333+ "token" : format!( "{:?}" , tok) ,
334+ "lexeme" : slice,
335+ } ) ;
336+ tokens. push ( obj) ;
337+ }
338+ Err ( ( ) ) => {
339+ return Err ( Box :: new ( crate :: internal:: errors:: ParseError :: Lex ( lex. slice ( ) . to_string ( ) ) ) ) ;
340+ }
341+ }
342+ }
343+ Ok ( serde_json:: Value :: Array ( tokens) )
344+ }
0 commit comments