strictdoc-project
diff --git a/‎pyproject.toml
Lines changed: 2 additions & 0 deletions b/‎pyproject.toml
Lines changed: 2 additions & 0 deletions
diff --git a/‎strictdoc/backend/sdoc/models/node.py
Lines changed: 1 addition & 1 deletion b/‎strictdoc/backend/sdoc/models/node.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎strictdoc/backend/sdoc_source_code/comment_parser/marker_lexer.py
Lines changed: 59 additions & 0 deletions b/‎strictdoc/backend/sdoc_source_code/comment_parser/marker_lexer.py
Lines changed: 59 additions & 0 deletions
diff --git a/‎strictdoc/backend/sdoc_source_code/constants.py
Lines changed: 4 additions & 0 deletions b/‎strictdoc/backend/sdoc_source_code/constants.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎strictdoc/backend/sdoc_source_code/helpers/comment_preprocessor.py
Lines changed: 10 additions & 1 deletion b/‎strictdoc/backend/sdoc_source_code/helpers/comment_preprocessor.py
Lines changed: 10 additions & 1 deletion
@@ -49,6 +49,8 @@ classifiers = [
 # @sdoc[SDOC-SRS-89]
 dependencies = [
     "textx >= 4.0.0, == 4.*",
+    "lark >= 1.2.2",
+
     "jinja2 >= 2.11.2",
     # Reading project config from strictdoc.toml file.
     "toml",
 
@@ -487,7 +487,7 @@ def enumerate_meta_fields(
 
             # A field is considered singleline if it goes before the STATEMENT
             # field and vice versa.
-            if field_index > reference_field_index:
+            if field_index >= reference_field_index:
                 is_single_line_field = False
             else:
                 is_single_line_field = True
 
@@ -0,0 +1,59 @@
+from lark import Lark, ParseTree, UnexpectedToken
+
+from strictdoc.backend.sdoc_source_code.constants import (
+    REGEX_REQ,
+    RESERVED_KEYWORDS,
+)
+
+GRAMMAR = f"""
+start: (relation_marker | node_field | _NORMAL_STRING | _WS)*
+
+relation_marker: "@relation" _BRACE_LEFT _WS? (relation_node_uid _SEP _WS)+ "scope=" relation_scope ("," _WS "role=" relation_role)? _WS? _BRACE_RIGHT
+
+relation_node_uid: /{REGEX_REQ}/
+relation_scope: /file|class|function|line|range_start|range_end/
+relation_role: ALPHANUMERIC_WORD
+
+node_field: node_name ":" _WS_INLINE node_multiline_value
+node_name: /(?!({RESERVED_KEYWORDS}))[A-Z_]+/
+node_multiline_value: (NORMAL_STRING_VALUE _NL)+
+NORMAL_STRING_VALUE.2: /[ ]*(?!\\s*@relation)(?![A-Z_]+:)[^\n\r]+/x
+
+NORMAL_STRING: /(?!\\s*@relation)((?![A-Z_]+:)|({RESERVED_KEYWORDS})).+/
+_NORMAL_STRING: NORMAL_STRING
+
+_BRACE_LEFT: /[\\(\\{{]/
+_BRACE_RIGHT: /[\\)\\}}]/
+
+_SEP: ","
+_NL : NL
+_WS : WS
+_WS_INLINE : WS_INLINE
+
+ALPHANUMERIC_WORD: /[a-zA-Z0-9_]+/
+NL: /\\r?\\n/
+
+%import common.WS -> WS
+%import common.WS_INLINE -> WS_INLINE
+"""
+
+
+class MarkerLexer:
+    @staticmethod
+    def parse(source_input: str) -> ParseTree:
+        parser: Lark = Lark(
+            GRAMMAR, parser="lalr", cache=True, propagate_positions=True
+        )
+
+        try:
+            # FIXME: Without rstrip, there is an edge case where the parser
+            #        breaks when resolving conflicts between multiline node
+            #        fields and normal strings.
+            #        See also test: test_31_single_node_field.
+            tree: ParseTree = parser.parse(source_input.rstrip() + "\n")
+        except UnexpectedToken as exception_:
+            print(  # noqa: T201
+                "error: could not parse source comment:\n" + source_input
+            )
+            raise exception_
+        return tree
@@ -1,5 +1,9 @@
 from enum import Enum
 
+REGEX_REQ = r"(?!scope=)[A-Za-z][A-Za-z0-9_\/\.\\-]+"
+REGEX_ROLE = r"[A-Za-z][A-Za-z0-9\\-]+"
+RESERVED_KEYWORDS = "FIXME|NOTE|TODO|TBD|WARNING"
+
 
 class FunctionAttribute(Enum):
     STATIC = "static"
 
@@ -7,15 +7,24 @@
 def preprocess_source_code_comment(comment: str) -> str:
     """
     Remove all Doxygen/Python/etc comment markers for processing.
+
+    FIXME: Maybe there is a more efficient way of doing this with no two
+           re.sub() calls.
     """
 
     def replace_with_spaces(match: Match[str]) -> str:
         # Return a string of spaces with the same length as the matched text.
         return " " * len(match.group(0))
 
-    return re.sub(
+    replacement = re.sub(
         rf"(^/\*\*)|^{WS}*\*/?|(^///)|(^//)|(^#+)",
         replace_with_spaces,
         comment,
         flags=re.MULTILINE,
     )
+    return re.sub(
+        r"^[ \t]+$",
+        "",
+        replacement,
+        flags=re.MULTILINE,
+    )