Introduce the "is_comment" trivia annotation

pmderodat · pmderodat · commit e2f68bb6104d · 2024-02-19T12:19:08.000Z
This new annotation will allow unparsers to determine which trivia
should be preserve during unparsing (no trivia is preserve right now).
diff --git a/langkit/lexer/__init__.py b/langkit/lexer/__init__.py
@@ -240,6 +240,10 @@ def c_name(self) -> str:
         prefixed_name = get_context().lang_name + self.base_name
         return prefixed_name.upper
 
+    @property
+    def is_comment(self) -> bool:
+        return isinstance(self, WithTrivia) and self._is_comment
+
     def __repr__(self) -> str:
         assert self.name is not None
         return '<{} {}>'.format(type(self).__name__,
@@ -271,6 +275,19 @@ class MyToken(LexerToken):
     """
     is_trivia: bool = True
 
+    def __init__(
+        self,
+        start_ignore_layout: bool = False,
+        end_ignore_layout: bool = False,
+        comment: bool = False,
+    ):
+        """
+        :param comment: Whether unparsing must treat this token as a comment,
+            i.e. a trivia to preserve in unparsed sources.
+        """
+        super().__init__(start_ignore_layout, end_ignore_layout)
+        self._is_comment = comment
+
 
 class WithSymbol(TokenAction):
     """
diff --git a/langkit/lkt_lowering.py b/langkit/lkt_lowering.py
@@ -846,27 +846,37 @@ def interpret(
         scope: Scope,
     ) -> Any:
         check_source_language(not args, 'No positional argument allowed')
+        result: dict[str, Any] = {}
 
         try:
             expr = kwargs.pop('start_ignore_layout')
         except KeyError:
-            start_ignore_layout = False
+            result["start_ignore_layout"] = False
         else:
-            start_ignore_layout = parse_static_bool(ctx, expr)
+            result["start_ignore_layout"] = parse_static_bool(ctx, expr)
 
         try:
             expr = kwargs.pop('end_ignore_layout')
         except KeyError:
-            end_ignore_layout = False
+            result["end_ignore_layout"] = False
         else:
-            end_ignore_layout = parse_static_bool(ctx, expr)
+            result["end_ignore_layout"] = parse_static_bool(ctx, expr)
+
+        # The "comment" argument is valid for trivia tokens only
+        if self.name == "trivia":
+            try:
+                expr = kwargs.pop("comment")
+            except KeyError:
+                result["comment"] = False
+            else:
+                result["comment"] = parse_static_bool(ctx, expr)
 
         check_source_language(
             not kwargs,
             'Invalid arguments: {}'.format(', '.join(sorted(kwargs)))
         )
 
-        return (start_ignore_layout, end_ignore_layout)
+        return result
 
 
 class WithLexerAnnotationSpec(AnnotationSpec):
@@ -1452,15 +1462,17 @@ def process_token_rule(
             # Gather token action info from the annotations. If absent,
             # fallback to WithText.
             token_cons = None
-            start_ignore_layout = False
-            end_ignore_layout = False
+            cons_kwargs = {
+                "start_ignore_layout": False,
+                "end_ignore_layout": False,
+            }
             if rule_annot.ignore:
                 token_cons = ignore_constructor
             for name in ('text', 'trivia', 'symbol'):
                 annot = getattr(rule_annot, name)
                 if not annot:
                     continue
-                start_ignore_layout, end_ignore_layout = annot
+                cons_kwargs.update(annot)
 
                 check_source_language(token_cons is None,
                                       'At most one token action allowed')
@@ -1486,7 +1498,7 @@ def process_token_rule(
             check_source_language(token_name not in tokens,
                                   'Duplicate token name')
 
-            token = token_cons(start_ignore_layout, end_ignore_layout)
+            token = token_cons(**cons_kwargs)
             if token_name is not None:
                 tokens[token_name] = token
             if isinstance(token, TokenAction):
diff --git a/langkit/support/langkit_support-generic_api.adb b/langkit/support/langkit_support-generic_api.adb
@@ -148,6 +148,16 @@ package body Langkit_Support.Generic_API is
       return Create_Name (Kind.Id.Token_Kinds (Kind.Index).Name.all);
    end Token_Kind_Name;
 
+   ----------------
+   -- Is_Comment --
+   ----------------
+
+   function Is_Comment (Kind : Token_Kind_Ref) return Boolean is
+   begin
+      Check_Token_Kind (Kind);
+      return Kind.Id.Token_Kinds (Kind.Index).Is_Comment;
+   end Is_Comment;
+
    --------------
    -- To_Index --
    --------------
diff --git a/langkit/support/langkit_support-generic_api.ads b/langkit/support/langkit_support-generic_api.ads
@@ -120,6 +120,11 @@ package Langkit_Support.Generic_API is
    --  Return the name for the given token kind. Raise a
    --  ``Precondition_Failure`` exception if ``Kind`` is ``No_Token_Kind_Ref``.
 
+   function Is_Comment (Kind : Token_Kind_Ref) return Boolean;
+   --  Return whether unparsing must treat the given token kind as a comment,
+   --  i.e. a trivia to preserve in unparsed sources. Raise a
+   --  ``Precondition_Failure`` exception if ``Kind`` is ``No_Token_Kind_Ref``.
+
    type Any_Token_Kind_Index is new Natural;
    subtype Token_Kind_Index is
      Any_Token_Kind_Index range 1 ..  Any_Token_Kind_Index'Last;
diff --git a/langkit/support/langkit_support-internal.ads b/langkit/support/langkit_support-internal.ads
@@ -26,8 +26,9 @@ package Langkit_Support.Internal is
    --  Descriptors for token kinds
 
    type Token_Kind_Descriptor is record
-      Name   : Text_Access;
-      Family : Token_Family_Index;
+      Name       : Text_Access;
+      Family     : Token_Family_Index;
+      Is_Comment : Boolean;
    end record;
    type Token_Kind_Descriptor_Array is
      array (Token_Kind_Index range <>) of Token_Kind_Descriptor;
diff --git a/langkit/templates/pkg_generic_impl_spec_ada.mako b/langkit/templates/pkg_generic_impl_spec_ada.mako
@@ -80,8 +80,9 @@ private package ${ada_lib_name}.Generic_Impl is
          family = ctx.lexer.tokens.token_to_family[token]
          lines += [
             f"{G.token_kind_index(token)} =>",
-            f" (Name   => {name}'Access,",
-            f"  Family => {G.token_family_index(family)})",
+            f" (Name       => {name}'Access,",
+            f"  Family     => {G.token_family_index(family)},",
+            f"  Is_Comment => {token.is_comment})",
          ]
       %>
       ${name} : aliased constant Text_Type :=
diff --git a/testsuite/python_support/lexer_example.lkt b/testsuite/python_support/lexer_example.lkt
@@ -45,7 +45,7 @@ lexer foo_lexer {
 
     family comments {
         @unparse_newline_after
-        @trivia()
+        @trivia(comment=true)
         comment <- p"#(.?)+"
     }
 
diff --git a/testsuite/tests/ada_api/generic_api/analysis.adb b/testsuite/tests/ada_api/generic_api/analysis.adb
@@ -73,8 +73,11 @@ begin
          Family_Name : constant Text_Type :=
            Format_Name (Token_Family_Name (Family), Camel_With_Underscores);
       begin
-         Put_Line
-           ("  " & Image (Kind_Name) & " (" & Image (Family_Name) & ")");
+         Put ("  " & Image (Kind_Name) & " (" & Image (Family_Name));
+         if Is_Comment (Kind) then
+            Put (", is_comment");
+         end if;
+         Put_Line (")");
       end;
    end loop;
    New_Line;
diff --git a/testsuite/tests/ada_api/generic_api/test.out b/testsuite/tests/ada_api/generic_api/test.out
@@ -43,7 +43,7 @@ Token kinds:
   Number (Alphanumericals)
   Identifier (Alphanumericals)
   String (Default_Family)
-  Comment (Comments)
+  Comment (Comments, is_comment)
 
 Token families:
   Alphanumericals
diff --git a/testsuite/tests/grammar/invalid_lexers/test.out b/testsuite/tests/grammar/invalid_lexers/test.out
@@ -169,6 +169,12 @@ token_action_5.lkt:4:5: error: At most one token action allowed
   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 
+== token_action_6.lkt ==
+token_action_6.lkt:4:5: error: Invalid arguments: comment
+4 |     @text(comment=false) example <- "example"
+  |     ^^^^^^^^^^^^^^^^^^^^
+
+
 == token_matcher.lkt ==
 token_matcher.lkt:4:16: error: Invalid lexing expression
 4 |     example <- /
diff --git a/testsuite/tests/grammar/invalid_lexers/token_action_6.lkt b/testsuite/tests/grammar/invalid_lexers/token_action_6.lkt
@@ -0,0 +1,5 @@
+import common
+
+lexer foo_lexer {
+    @text(comment=false) example <- "example"
+}

Original file line number	Diff line number	Diff line change
`@@ -45,7 +45,7 @@ lexer foo_lexer {`
`45`	`45`
`46`	`46`	`family comments {`
`47`	`47`	`@unparse_newline_after`
`48`		`- @trivia()`
	`48`	`+ @trivia(comment=true)`
`49`	`49`	`comment <- p"#(.?)+"`
`50`	`50`	`}`
`51`	`51`