@@ -164,7 +164,7 @@ debug_state(pm_parser_t *parser) {
164
164
165
165
PRISM_ATTRIBUTE_UNUSED static void
166
166
debug_token(pm_token_t * token) {
167
- fprintf(stderr, "%s: \"%.*s\"\n", pm_token_type_to_str (token->type), (int) (token->end - token->start), token->start);
167
+ fprintf(stderr, "%s: \"%.*s\"\n", pm_token_type_human (token->type), (int) (token->end - token->start), token->start);
168
168
}
169
169
170
170
#endif
@@ -6719,21 +6719,27 @@ context_terminator(pm_context_t context, pm_token_t *token) {
6719
6719
return token->type == PM_TOKEN_BRACE_RIGHT;
6720
6720
case PM_CONTEXT_PREDICATE:
6721
6721
return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
6722
+ case PM_CONTEXT_NONE:
6723
+ return false;
6722
6724
}
6723
6725
6724
6726
return false;
6725
6727
}
6726
6728
6727
- static bool
6728
- context_recoverable(pm_parser_t *parser, pm_token_t *token) {
6729
+ /**
6730
+ * Returns the context that the given token is found to be terminating, or
6731
+ * returns PM_CONTEXT_NONE.
6732
+ */
6733
+ static pm_context_t
6734
+ context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
6729
6735
pm_context_node_t *context_node = parser->current_context;
6730
6736
6731
6737
while (context_node != NULL) {
6732
- if (context_terminator(context_node->context, token)) return true ;
6738
+ if (context_terminator(context_node->context, token)) return context_node->context ;
6733
6739
context_node = context_node->prev;
6734
6740
}
6735
6741
6736
- return false ;
6742
+ return PM_CONTEXT_NONE ;
6737
6743
}
6738
6744
6739
6745
static bool
@@ -6761,7 +6767,7 @@ context_pop(pm_parser_t *parser) {
6761
6767
}
6762
6768
6763
6769
static bool
6764
- context_p(pm_parser_t *parser, pm_context_t context) {
6770
+ context_p(const pm_parser_t *parser, pm_context_t context) {
6765
6771
pm_context_node_t *context_node = parser->current_context;
6766
6772
6767
6773
while (context_node != NULL) {
@@ -6773,7 +6779,7 @@ context_p(pm_parser_t *parser, pm_context_t context) {
6773
6779
}
6774
6780
6775
6781
static bool
6776
- context_def_p(pm_parser_t *parser) {
6782
+ context_def_p(const pm_parser_t *parser) {
6777
6783
pm_context_node_t *context_node = parser->current_context;
6778
6784
6779
6785
while (context_node != NULL) {
@@ -6796,6 +6802,55 @@ context_def_p(pm_parser_t *parser) {
6796
6802
return false;
6797
6803
}
6798
6804
6805
+ /**
6806
+ * Returns a human readable string for the given context, used in error
6807
+ * messages.
6808
+ */
6809
+ static const char *
6810
+ context_human(pm_context_t context) {
6811
+ switch (context) {
6812
+ case PM_CONTEXT_NONE:
6813
+ assert(false && "unreachable");
6814
+ return "";
6815
+ case PM_CONTEXT_BEGIN: return "begin statement";
6816
+ case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
6817
+ case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
6818
+ case PM_CONTEXT_CASE_WHEN: return "'when' clause";
6819
+ case PM_CONTEXT_CASE_IN: return "'in' clause";
6820
+ case PM_CONTEXT_CLASS: return "class definition";
6821
+ case PM_CONTEXT_DEF: return "method definition";
6822
+ case PM_CONTEXT_DEF_PARAMS: return "method parameters";
6823
+ case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
6824
+ case PM_CONTEXT_ELSE: return "'else' clause";
6825
+ case PM_CONTEXT_ELSIF: return "'elsif' clause";
6826
+ case PM_CONTEXT_EMBEXPR: return "embedded expression";
6827
+ case PM_CONTEXT_ENSURE: return "'ensure' clause";
6828
+ case PM_CONTEXT_ENSURE_DEF: return "'ensure' clause";
6829
+ case PM_CONTEXT_FOR: return "for loop";
6830
+ case PM_CONTEXT_FOR_INDEX: return "for loop index";
6831
+ case PM_CONTEXT_IF: return "if statement";
6832
+ case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
6833
+ case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
6834
+ case PM_CONTEXT_MAIN: return "top level context";
6835
+ case PM_CONTEXT_MODULE: return "module definition";
6836
+ case PM_CONTEXT_PARENS: return "parentheses";
6837
+ case PM_CONTEXT_POSTEXE: return "'END' block";
6838
+ case PM_CONTEXT_PREDICATE: return "predicate";
6839
+ case PM_CONTEXT_PREEXE: return "'BEGIN' block";
6840
+ case PM_CONTEXT_RESCUE_ELSE: return "'else' clause";
6841
+ case PM_CONTEXT_RESCUE_ELSE_DEF: return "'else' clause";
6842
+ case PM_CONTEXT_RESCUE: return "'rescue' clause";
6843
+ case PM_CONTEXT_RESCUE_DEF: return "'rescue' clause";
6844
+ case PM_CONTEXT_SCLASS: return "singleton class definition";
6845
+ case PM_CONTEXT_UNLESS: return "unless statement";
6846
+ case PM_CONTEXT_UNTIL: return "until statement";
6847
+ case PM_CONTEXT_WHILE: return "while statement";
6848
+ }
6849
+
6850
+ assert(false && "unreachable");
6851
+ return "";
6852
+ }
6853
+
6799
6854
/******************************************************************************/
6800
6855
/* Specific token lexers */
6801
6856
/******************************************************************************/
@@ -10385,8 +10440,8 @@ parser_lex(pm_parser_t *parser) {
10385
10440
typedef enum {
10386
10441
PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
10387
10442
PM_BINDING_POWER_STATEMENT = 2,
10388
- PM_BINDING_POWER_MODIFIER = 4, // if unless until while
10389
- PM_BINDING_POWER_MODIFIER_RESCUE = 6, // rescue
10443
+ PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
10444
+ PM_BINDING_POWER_MODIFIER = 6, // if unless until while
10390
10445
PM_BINDING_POWER_COMPOSITION = 8, // and or
10391
10446
PM_BINDING_POWER_NOT = 10, // not
10392
10447
PM_BINDING_POWER_MATCH = 12, // => in
@@ -10440,15 +10495,15 @@ typedef struct {
10440
10495
#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
10441
10496
10442
10497
pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
10498
+ // rescue
10499
+ [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
10500
+
10443
10501
// if unless until while
10444
10502
[PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
10445
10503
[PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
10446
10504
[PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
10447
10505
[PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
10448
10506
10449
- // rescue
10450
- [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
10451
-
10452
10507
// and or
10453
10508
[PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
10454
10509
[PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
@@ -14177,7 +14232,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
14177
14232
* Parse an expression that begins with the previous node that we just lexed.
14178
14233
*/
14179
14234
static inline pm_node_t *
14180
- parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call) {
14235
+ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id ) {
14181
14236
switch (parser->current.type) {
14182
14237
case PM_TOKEN_BRACKET_LEFT_ARRAY: {
14183
14238
parser_lex(parser);
@@ -14595,30 +14650,30 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14595
14650
14596
14651
if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
14597
14652
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
14598
- }
14599
- else {
14653
+ } else {
14600
14654
// Check if `it` is not going to be assigned.
14601
14655
switch (parser->current.type) {
14602
- case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
14603
- case PM_TOKEN_AMPERSAND_EQUAL:
14604
- case PM_TOKEN_CARET_EQUAL:
14605
- case PM_TOKEN_EQUAL:
14606
- case PM_TOKEN_GREATER_GREATER_EQUAL:
14607
- case PM_TOKEN_LESS_LESS_EQUAL:
14608
- case PM_TOKEN_MINUS_EQUAL:
14609
- case PM_TOKEN_PARENTHESIS_RIGHT:
14610
- case PM_TOKEN_PERCENT_EQUAL:
14611
- case PM_TOKEN_PIPE_EQUAL:
14612
- case PM_TOKEN_PIPE_PIPE_EQUAL:
14613
- case PM_TOKEN_PLUS_EQUAL:
14614
- case PM_TOKEN_SLASH_EQUAL:
14615
- case PM_TOKEN_STAR_EQUAL:
14616
- case PM_TOKEN_STAR_STAR_EQUAL:
14617
- break;
14618
- default:
14619
- // Once we know it's neither a method call nor an assignment,
14620
- // we can finally create `it` default parameter.
14621
- node = pm_node_check_it(parser, node);
14656
+ case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
14657
+ case PM_TOKEN_AMPERSAND_EQUAL:
14658
+ case PM_TOKEN_CARET_EQUAL:
14659
+ case PM_TOKEN_EQUAL:
14660
+ case PM_TOKEN_GREATER_GREATER_EQUAL:
14661
+ case PM_TOKEN_LESS_LESS_EQUAL:
14662
+ case PM_TOKEN_MINUS_EQUAL:
14663
+ case PM_TOKEN_PARENTHESIS_RIGHT:
14664
+ case PM_TOKEN_PERCENT_EQUAL:
14665
+ case PM_TOKEN_PIPE_EQUAL:
14666
+ case PM_TOKEN_PIPE_PIPE_EQUAL:
14667
+ case PM_TOKEN_PLUS_EQUAL:
14668
+ case PM_TOKEN_SLASH_EQUAL:
14669
+ case PM_TOKEN_STAR_EQUAL:
14670
+ case PM_TOKEN_STAR_STAR_EQUAL:
14671
+ break;
14672
+ default:
14673
+ // Once we know it's neither a method call nor an
14674
+ // assignment, we can finally create `it` default
14675
+ // parameter.
14676
+ node = pm_node_check_it(parser, node);
14622
14677
}
14623
14678
}
14624
14679
@@ -14656,6 +14711,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14656
14711
// If we get here, then we tried to find something in the
14657
14712
// heredoc but couldn't actually parse anything, so we'll just
14658
14713
// return a missing node.
14714
+ //
14715
+ // parse_string_part handles its own errors, so there is no need
14716
+ // for us to add one here.
14659
14717
node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
14660
14718
} else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
14661
14719
// If we get here, then the part that we parsed was plain string
@@ -16301,6 +16359,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16301
16359
// context of a multiple assignment. We enforce that here. We'll
16302
16360
// still lex past it though and create a missing node place.
16303
16361
if (binding_power != PM_BINDING_POWER_STATEMENT) {
16362
+ pm_parser_err_previous(parser, diag_id);
16304
16363
return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
16305
16364
}
16306
16365
@@ -16487,12 +16546,34 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16487
16546
16488
16547
return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END);
16489
16548
}
16490
- default:
16491
- if (context_recoverable(parser, &parser->current)) {
16549
+ default: {
16550
+ pm_context_t recoverable = context_recoverable(parser, &parser->current);
16551
+
16552
+ if (recoverable != PM_CONTEXT_NONE) {
16492
16553
parser->recovering = true;
16554
+
16555
+ // If the given error is not the generic one, then we'll add it
16556
+ // here because it will provide more context in addition to the
16557
+ // recoverable error that we will also add.
16558
+ if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
16559
+ pm_parser_err_previous(parser, diag_id);
16560
+ }
16561
+
16562
+ // If we get here, then we are assuming this token is closing a
16563
+ // parent context, so we'll indicate that to the user so that
16564
+ // they know how we behaved.
16565
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
16566
+ } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
16567
+ // We're going to make a special case here, because "cannot
16568
+ // parse expression" is pretty generic, and we know here that we
16569
+ // have an unexpected token.
16570
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
16571
+ } else {
16572
+ pm_parser_err_previous(parser, diag_id);
16493
16573
}
16494
16574
16495
16575
return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
16576
+ }
16496
16577
}
16497
16578
}
16498
16579
@@ -17455,15 +17536,12 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
17455
17536
*/
17456
17537
static pm_node_t *
17457
17538
parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
17458
- pm_token_t recovery = parser->previous;
17459
- pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call);
17539
+ pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, diag_id);
17460
17540
17461
17541
switch (PM_NODE_TYPE(node)) {
17462
17542
case PM_MISSING_NODE:
17463
17543
// If we found a syntax error, then the type of node returned by
17464
- // parse_expression_prefix is going to be a missing node. In that
17465
- // case we need to add the error message to the parser's error list.
17466
- pm_parser_err(parser, recovery.end, recovery.end, diag_id);
17544
+ // parse_expression_prefix is going to be a missing node.
17467
17545
return node;
17468
17546
case PM_PRE_EXECUTION_NODE:
17469
17547
case PM_POST_EXECUTION_NODE:
@@ -17472,7 +17550,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
17472
17550
case PM_UNDEF_NODE:
17473
17551
// These expressions are statements, and cannot be followed by
17474
17552
// operators (except modifiers).
17475
- if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER_RESCUE ) {
17553
+ if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER ) {
17476
17554
return node;
17477
17555
}
17478
17556
break;
0 commit comments