Skip to content

Importing the same file twice breaks the parser #1971

@svallory

Description

@svallory

Langium version: 3.5.0
Package name: (mmm... langium, I guess?)

Steps To Reproduce

  1. Uncomment the import ./core/primitives
  2. rebuild everything
  3. try to parse a file containing agent Test Agent

Files

rcl.langium

grammar Rcl

// import "./core/primitives";
import "./core/sections";

// ======== Entry Point ========
entry RclFile:
    (INDENT? WS? NL DEDENT?)* // these are here just so these tokens don't get removec
    agentSection=(Section);

./core/sections.langium

import "../data-types/type-system";

terminal SECTION_TYPE: 
  'agent'
  | 'agentConfig'
  | 'agentDefaults'
  | 'flow'
  | 'messages'
  | ((
      'authentication'
      | 'transaction'
      | 'promotion'
      | 'servicerequest'
      | 'acknowledge'
    )
    __ 'message');

// Reserved section names that have implicit types
ReservedSectionName returns string:
    'Config' | 'Defaults' | 'Messages';

SectionParameter:
    // Only for message sections - expiration parameter
    TypedValue; // Either timestamp or duration string

Section:
    (sectionType=SECTION_TYPE __ sectionName=PROPER_NOUN | sectionType=SECTION_TYPE? __ reservedName=ReservedSectionName) __? sectionParam=SectionParameter? __? (NL | EOF)?;

./data-types/type-system.langium

import "../core/primitives";

// ======== Type Tag Terminals ========
terminal TYPE_TAG_NAME: /(date|datetime|time|email|phone|msisdn|url|zipcode|zip)/; // Specific type keywords
terminal TYPE_TAG_VALUE_CONTENT: /[^\|>]+/;             // Raw string content of tag's value
terminal TYPE_TAG_MODIFIER_CONTENT: /[^>]+/;            // Raw string content of tag's modifier

// ======== Type Conversion ========
TypeConversionValue infers TypeConversionValue:
    LiteralValue | Identifier;

TypeConversion infers TypeConversion:
    LT __? type=TYPE_TAG_NAME __ value=TypeConversionValue (__? '|' __? modifier=STRING)? __? GT;

// ======== Type Tag Rules ========
TypeTag infers TypeTag:
    LT __? type=TYPE_TAG_NAME __ value=TYPE_TAG_VALUE_CONTENT (__? '|' __? modifier=TYPE_TAG_MODIFIER_CONTENT)? __? GT;

TypedValue infers TypedValue:
    TypeConversion | LiteralValue; 

./core/primitives.langium

// ======== Basic Primitive Types and Terminals ========
// This file contains basic primitive types and literals


// ======== Hidden Terminals ========
hidden terminal SL_COMMENT: /#[^\r\n]*/;

// ==== Required space ====
terminal __: /[ ]+/;

// ======== Core Terminals ========
terminal ATOM: /:([_a-zA-Z][\w_]*|\"[^\"\\]*\")/;
terminal STRING: /\"(\\.|[^\"\\])*\"/;
terminal NUMBER returns number: /[0-9]{1,3}(,[0-9]{3})*(\.[0-9]+)?([eE][-+]?[0-9]+)?|[0-9]+(\.[0-9]+)?([eE][-+]?[0-9]+)?/;
// terminal REGEXP_LITERAL: /\/([^\/\\]|\\\\.)*\/[gimyusv]*/;
terminal NL: /[\r\n]+/;                         // Newlines (removed $)

// ======== Native Schema Terminals ========
terminal AGENT_CONFIG_KEYWORD: 'agentConfig';
terminal AGENT_DEFAULTS_KEYWORD: 'agentDefaults';
terminal AGENT_MESSAGES_KEYWORD: 'agentMessages';

// ======== Indentation Terminals ========
// These terminals are REQUIRED by Langium 
// DO NOT REMOVE OR CHANGE THEM TERMINAL
terminal INDENT: 'synthetic:indent';                 // Indentation increase (will be overridden)
terminal DEDENT: 'synthetic:dedent';                 // Indentation decrease (will be overridden)
terminal WS: /[ \t]+/;                               // Newlines - keep WS for this

terminal LT: '<';
terminal GT: '>';
terminal SLASH: '/';

terminal TRUE_KW: /\b(True|On|Yes|Active|Enabled)\b/;
terminal FALSE_KW: /\b(False|Off|No|Inactive|Disabled)\b/;

// ======== Basic Shared Types & Rules ========

// Simplified Boolean and Null types
BooleanLiteral infers BooleanValue:
    value=(TRUE_KW | FALSE_KW);

NullValue infers NullValue:
    value='Null';

LiteralValue infers LiteralValue:
    val_str=STRING
    | val_num=NUMBER
    | val_atom=ATOM
    | val_bool=BooleanLiteral
    | val_null=NullValue;
//    | val_regexp=REGEXP_LITERAL;

// ======== Identifiers & Qualified Names ========
terminal COMMON_NOUN: /[a-z][a-zA-Z0-9_-]*/;
terminal PROPER_NOUN: /[A-Z][\w-]*(?:[ \t]+(?![\/])(?!as\b)[A-Z][\w-]*)*/;

// Identifier can be a PROPER_NOUN or a COMMON_NOUN
Identifier infers Identifier:
    value=(PROPER_NOUN | COMMON_NOUN);

interface AttributePath {
    parts: string[];
}

// Rule for attribute access path like attName or attName.subAttName
AttributePath returns AttributePath:
    parts+=COMMON_NOUN (__? '.' __? parts+=COMMON_NOUN)*;

QualifiedName returns string:
    PROPER_NOUN ('/' __ PROPER_NOUN)*;

The current behavior

If I import the primitives file in the rcl.langium file, I get the following parser errors:

➜ rcl check examples/minimal.rcl
Checking file: examples/minimal.rcl

❌ Parser Errors:
  Line 1: Expecting: one of these possible Token sequences:
  1. [SECTION_TYPE, __, PROPER_NOUN]
  2. [__]
  3. [SECTION_TYPE, __, Config]
  4. [SECTION_TYPE, __, Defaults]
  5. [SECTION_TYPE, __, Messages]
but found: 'agent'

❌ Validation Errors:
  Line 1:1: Expecting: one of these possible Token sequences:
  1. [SECTION_TYPE, __, PROPER_NOUN]
  2. [__]
  3. [SECTION_TYPE, __, Config]
  4. [SECTION_TYPE, __, Defaults]
  5. [SECTION_TYPE, __, Messages]
but found: 'agent'
    └─ "agent"

📊 Summary: 1 error(s), 0 warning(s)

The expected behavior

The behavior of the parser should not change, no matter how many times I import the same file, even if using different paths to import it

Metadata

Metadata

Assignees

No one assigned

    Labels

    parserParser related issue

    Type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions