Skip to content

Commit 9e1ef87

Browse files
committed
get working star interaction
1 parent aaa8577 commit 9e1ef87

File tree

6 files changed

+163
-126
lines changed

6 files changed

+163
-126
lines changed

CHANGELOG.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,29 @@
22

33
just to trigger a new release
44

5+
## [0.2.2] - 2025-09-05
6+
7+
### Added
8+
9+
- Multiplication/interaction parsing: Support for `*` (full interaction) in formulas was added so expressions like `wt*hp` are parsed correctly and represented as interaction terms in the AST and metadata output.
10+
11+
### Changed
12+
13+
- Parser internals: Improved term parsing and interaction handling to correctly parse chained interactions (`a*b*c`) and mixed interaction operators (`:` and `*`). The implementation centralises interaction handling to avoid double-consuming tokens and to make chaining robust.
14+
15+
- Files changed:
16+
- `src/internal/parse_term.rs` — refactored and documented to parse atomic terms (columns/functions), then build interaction chains by consuming `:` and `*` tokens and constructing `Term::Interaction` nodes.
17+
- `src/internal/parse_rhs.rs` — adjusted plus-separated term handling to avoid double token consumption when iterating `+`-separated terms.
18+
19+
### Added (debug)
20+
21+
- Temporary example `examples/print_tokens.rs` used to inspect lexer output while debugging interaction token ordering. This can be removed after verification.
22+
23+
### Notes
24+
25+
- The changes include extra inline documentation in the modified files. I ran the `examples/mtcars` example to validate behavior and confirmed the output now includes the `wt*hp` interaction and correct generated columns from `poly(disp, 4)`.
26+
27+
528
## [0.2.0] - 2025-09-04
629

730
### Added

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

examples/mtcars.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
use fiasto::parse_formula;
2+
3+
fn main() -> Result<(), Box<dyn std::error::Error>> {
4+
let input = "mpg ~ cyl + wt*hp + poly(disp, 4) - 1";
5+
let result = parse_formula(input)?;
6+
println!("{}", serde_json::to_string_pretty(&result)?);
7+
Ok(())
8+
}
9+
10+
11+

examples/print_tokens.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
use fiasto::internal::lexer::Token;
2+
use logos::Logos;
3+
4+
fn main() {
5+
let input = "mpg ~ cyl + wt*hp + poly(disp, 4) - 1";
6+
let mut lexer = Token::lexer(input);
7+
while let Some(tok) = lexer.next() {
8+
println!("{:?}", tok);
9+
}
10+
}

src/internal/parse_rhs.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ pub fn parse_rhs<'a>(
6666
let mut terms = Vec::new();
6767
let mut has_intercept = true;
6868

69-
// if the next token is not a comma or plus then it is pushed to the parse_term function
69+
// Parse the first term if present (not a comma or plus)
7070
if crate::internal::peek::peek(tokens, *pos).is_some()
7171
&& !matches!(
7272
crate::internal::peek::peek(tokens, *pos).unwrap().0,
@@ -75,7 +75,7 @@ pub fn parse_rhs<'a>(
7575
{
7676
terms.push(crate::internal::parse_term::parse_term(tokens, pos)?);
7777
}
78-
// If the token is a plus then it is pushed to the parse_term function
78+
// Parse additional terms separated by plus signs
7979
while crate::internal::matches::matches(tokens, pos, |t| matches!(t, Token::Plus)) {
8080
terms.push(crate::internal::parse_term::parse_term(tokens, pos)?);
8181
}

src/internal/parse_term.rs

Lines changed: 116 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -106,133 +106,126 @@ pub fn parse_term<'a>(tokens: &'a [(Token, &'a str)], pos: &mut usize) -> Result
106106
}
107107
}
108108

109-
// If the token is a function token or column name then it will parse with `tok`
110-
let (tok, name_slice) = crate::internal::expect::expect(
111-
tokens,
112-
pos,
113-
|t| {
114-
matches!(
115-
t,
116-
Token::Poly
117-
| Token::ColumnName
118-
| Token::Log
119-
| Token::Offset
120-
| Token::Factor
121-
| Token::Scale
122-
| Token::Standardize
123-
| Token::Center
124-
| Token::BSplines
125-
| Token::GaussianProcess
126-
| Token::Monotonic
127-
| Token::MeasurementError
128-
| Token::MissingValues
129-
| Token::ForwardFill
130-
| Token::BackwardFill
131-
| Token::Diff
132-
| Token::Lag
133-
| Token::Lead
134-
| Token::Trunc
135-
| Token::Weights
136-
| Token::Trials
137-
| Token::Censored
138-
| Token::Gr
139-
| Token::Mm
140-
| Token::Mmc
141-
| Token::Cs
142-
| Token::FunctionStart
143-
)
144-
},
145-
"Function token or ColumnName",
146-
)?;
147-
// `tok` is matched to see if it is a function start
148-
// if it is a function start then it will check to see if the token is poly or a column name
149-
// if it is a poly then it will return "poly" else it will return the column name
150-
if crate::internal::matches::matches(tokens, pos, |t| matches!(t, Token::FunctionStart)) {
151-
let fname = match tok {
152-
Token::Poly => "poly".to_string(),
153-
Token::Log => "log".to_string(),
154-
Token::Offset => "offset".to_string(),
155-
Token::Factor => "factor".to_string(),
156-
Token::Scale => "scale".to_string(),
157-
Token::Standardize => "standardize".to_string(),
158-
Token::Center => "center".to_string(),
159-
Token::BSplines => "bs".to_string(),
160-
Token::GaussianProcess => "gp".to_string(),
161-
Token::Monotonic => "mono".to_string(),
162-
Token::MeasurementError => "me".to_string(),
163-
Token::MissingValues => "mi".to_string(),
164-
Token::ForwardFill => "forward_fill".to_string(),
165-
Token::BackwardFill => "backward_fill".to_string(),
166-
Token::Diff => "diff".to_string(),
167-
Token::Lag => "lag".to_string(),
168-
Token::Lead => "lead".to_string(),
169-
Token::Trunc => "trunc".to_string(),
170-
Token::Weights => "weights".to_string(),
171-
Token::Trials => "trials".to_string(),
172-
Token::Censored => "cens".to_string(),
173-
Token::ColumnName => name_slice.to_string(),
174-
_ => unreachable!(),
175-
};
176-
// `parse_arg_list` is defined below
177-
// it returns the argument if followed by a function_end.
178-
// for example if poly(x, 3) is the input then we look for ")" and say that 3 is the argument
179-
let args = crate::internal::parse_arg_list::parse_arg_list(tokens, pos)?;
180-
crate::internal::expect::expect(tokens, pos, |t| matches!(t, Token::FunctionEnd), ")")?;
181-
Ok(Term::Function { name: fname, args })
182-
} else {
183-
// If the token is a column name then it will parse the column name
184-
// If the token is a function token then it will return an error (functions require parentheses)
185-
match tok {
186-
Token::ColumnName => {
187-
// Check if this is followed by an interaction
188-
if crate::internal::matches::matches(tokens, pos, |t| {
189-
matches!(t, Token::InteractionOnly | Token::InteractionAndEffect)
190-
}) {
191-
let _interaction_type = &crate::internal::peek::peek(tokens, *pos).unwrap().0;
192-
*pos += 1; // Skip the interaction token
193-
194-
let right_term = parse_term(tokens, pos)?;
195-
Ok(Term::Interaction {
196-
left: Box::new(Term::Column(name_slice.to_string())),
197-
right: Box::new(right_term),
198-
})
199-
} else {
200-
Ok(Term::Column(name_slice.to_string()))
109+
// Parse the leftmost atomic term (column, function, etc.)
110+
let atomic_term = {
111+
let (tok, name_slice) = crate::internal::expect::expect(
112+
tokens,
113+
pos,
114+
|t| {
115+
matches!(
116+
t,
117+
Token::Poly
118+
| Token::ColumnName
119+
| Token::Log
120+
| Token::Offset
121+
| Token::Factor
122+
| Token::Scale
123+
| Token::Standardize
124+
| Token::Center
125+
| Token::BSplines
126+
| Token::GaussianProcess
127+
| Token::Monotonic
128+
| Token::MeasurementError
129+
| Token::MissingValues
130+
| Token::ForwardFill
131+
| Token::BackwardFill
132+
| Token::Diff
133+
| Token::Lag
134+
| Token::Lead
135+
| Token::Trunc
136+
| Token::Weights
137+
| Token::Trials
138+
| Token::Censored
139+
| Token::Gr
140+
| Token::Mm
141+
| Token::Mmc
142+
| Token::Cs
143+
| Token::FunctionStart
144+
)
145+
},
146+
"Function token or ColumnName",
147+
)?;
148+
if crate::internal::matches::matches(tokens, pos, |t| matches!(t, Token::FunctionStart)) {
149+
let fname = match tok {
150+
Token::Poly => "poly".to_string(),
151+
Token::Log => "log".to_string(),
152+
Token::Offset => "offset".to_string(),
153+
Token::Factor => "factor".to_string(),
154+
Token::Scale => "scale".to_string(),
155+
Token::Standardize => "standardize".to_string(),
156+
Token::Center => "center".to_string(),
157+
Token::BSplines => "bs".to_string(),
158+
Token::GaussianProcess => "gp".to_string(),
159+
Token::Monotonic => "mono".to_string(),
160+
Token::MeasurementError => "me".to_string(),
161+
Token::MissingValues => "mi".to_string(),
162+
Token::ForwardFill => "forward_fill".to_string(),
163+
Token::BackwardFill => "backward_fill".to_string(),
164+
Token::Diff => "diff".to_string(),
165+
Token::Lag => "lag".to_string(),
166+
Token::Lead => "lead".to_string(),
167+
Token::Trunc => "trunc".to_string(),
168+
Token::Weights => "weights".to_string(),
169+
Token::Trials => "trials".to_string(),
170+
Token::Censored => "cens".to_string(),
171+
Token::ColumnName => name_slice.to_string(),
172+
_ => unreachable!(),
173+
};
174+
let args = crate::internal::parse_arg_list::parse_arg_list(tokens, pos)?;
175+
crate::internal::expect::expect(tokens, pos, |t| matches!(t, Token::FunctionEnd), ")")?;
176+
Term::Function { name: fname, args }
177+
} else {
178+
match tok {
179+
Token::ColumnName => {
180+
// Return the atomic column name; interactions ('*' or ':') are
181+
// handled by the loop after atomic term parsing to support
182+
// chained interactions like `a*b*c`.
183+
Term::Column(name_slice.to_string())
201184
}
185+
Token::Poly => return Err(ParseError::Syntax("expected '(' after 'poly'".into())),
186+
Token::Log => return Err(ParseError::Syntax("expected '(' after 'log'".into())),
187+
Token::Offset => return Err(ParseError::Syntax("expected '(' after 'offset'".into())),
188+
Token::Factor => return Err(ParseError::Syntax("expected '(' after 'factor'".into())),
189+
Token::Scale => return Err(ParseError::Syntax("expected '(' after 'scale'".into())),
190+
Token::Standardize => return Err(ParseError::Syntax("expected '(' after 'standardize'".into())),
191+
Token::Center => return Err(ParseError::Syntax("expected '(' after 'center'".into())),
192+
Token::BSplines => return Err(ParseError::Syntax("expected '(' after 'bs'".into())),
193+
Token::GaussianProcess => return Err(ParseError::Syntax("expected '(' after 'gp'".into())),
194+
Token::Monotonic => return Err(ParseError::Syntax("expected '(' after 'mono'".into())),
195+
Token::MeasurementError => return Err(ParseError::Syntax("expected '(' after 'me'".into())),
196+
Token::MissingValues => return Err(ParseError::Syntax("expected '(' after 'mi'".into())),
197+
Token::ForwardFill => return Err(ParseError::Syntax("expected '(' after 'forward_fill'".into())),
198+
Token::BackwardFill => return Err(ParseError::Syntax("expected '(' after 'backward_fill'".into())),
199+
Token::Diff => return Err(ParseError::Syntax("expected '(' after 'diff'".into())),
200+
Token::Lag => return Err(ParseError::Syntax("expected '(' after 'lag'".into())),
201+
Token::Lead => return Err(ParseError::Syntax("expected '(' after 'lead'".into())),
202+
Token::Trunc => return Err(ParseError::Syntax("expected '(' after 'trunc'".into())),
203+
Token::Weights => return Err(ParseError::Syntax("expected '(' after 'weights'".into())),
204+
Token::Trials => return Err(ParseError::Syntax("expected '(' after 'trials'".into())),
205+
Token::Censored => return Err(ParseError::Syntax("expected '(' after 'cens'".into())),
206+
_ => return Err(ParseError::Unexpected {
207+
expected: "term",
208+
found: Some(tok),
209+
}),
202210
}
203-
Token::Poly => Err(ParseError::Syntax("expected '(' after 'poly'".into())),
204-
Token::Log => Err(ParseError::Syntax("expected '(' after 'log'".into())),
205-
Token::Offset => Err(ParseError::Syntax("expected '(' after 'offset'".into())),
206-
Token::Factor => Err(ParseError::Syntax("expected '(' after 'factor'".into())),
207-
Token::Scale => Err(ParseError::Syntax("expected '(' after 'scale'".into())),
208-
Token::Standardize => Err(ParseError::Syntax(
209-
"expected '(' after 'standardize'".into(),
210-
)),
211-
Token::Center => Err(ParseError::Syntax("expected '(' after 'center'".into())),
212-
Token::BSplines => Err(ParseError::Syntax("expected '(' after 'bs'".into())),
213-
Token::GaussianProcess => Err(ParseError::Syntax("expected '(' after 'gp'".into())),
214-
Token::Monotonic => Err(ParseError::Syntax("expected '(' after 'mono'".into())),
215-
Token::MeasurementError => Err(ParseError::Syntax("expected '(' after 'me'".into())),
216-
Token::MissingValues => Err(ParseError::Syntax("expected '(' after 'mi'".into())),
217-
Token::ForwardFill => Err(ParseError::Syntax(
218-
"expected '(' after 'forward_fill'".into(),
219-
)),
220-
Token::BackwardFill => Err(ParseError::Syntax(
221-
"expected '(' after 'backward_fill'".into(),
222-
)),
223-
Token::Diff => Err(ParseError::Syntax("expected '(' after 'diff'".into())),
224-
Token::Lag => Err(ParseError::Syntax("expected '(' after 'lag'".into())),
225-
Token::Lead => Err(ParseError::Syntax("expected '(' after 'lead'".into())),
226-
Token::Trunc => Err(ParseError::Syntax("expected '(' after 'trunc'".into())),
227-
Token::Weights => Err(ParseError::Syntax("expected '(' after 'weights'".into())),
228-
Token::Trials => Err(ParseError::Syntax("expected '(' after 'trials'".into())),
229-
Token::Censored => Err(ParseError::Syntax("expected '(' after 'cens'".into())),
230-
_ => Err(ParseError::Unexpected {
231-
expected: "term",
232-
found: Some(tok),
233-
}),
211+
}
212+
};
213+
214+
// Now check for multiplication (interaction) tokens and build up the interaction chain
215+
let mut term = atomic_term;
216+
loop {
217+
if crate::internal::matches::matches(tokens, pos, |t| matches!(t, Token::InteractionAndEffect | Token::InteractionOnly)) {
218+
// `matches` already consumed the interaction token, so parse the right-hand term now
219+
let right = parse_term(tokens, pos)?;
220+
term = Term::Interaction {
221+
left: Box::new(term),
222+
right: Box::new(right),
223+
};
224+
} else {
225+
break;
234226
}
235227
}
228+
Ok(term)
236229
}
237230

238231
#[cfg(test)]

0 commit comments

Comments
 (0)