From ddd2dc0a0464fa5830b0129bdec69200aec74c34 Mon Sep 17 00:00:00 2001
From: Jon Shea <jonshea@jonshea.com>
Date: Tue, 4 Mar 2025 11:03:25 -0500
Subject: [PATCH] Support for string escapes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR adds support for string escapes, as proposed in
https://github.com/tree-sitter/tree-sitter-scala/issues/207

I also slightly changed the definition of `interpolated_string` by
qualifying the open quotes with `token.immediate(…)`. Prior to this
change the rule would incorrectly match `foo ""` as an `interpolated_string_expression`

I ran these changes against all of the `.scala` files in
https://github.com/scala/scala3 and https://github.com/scala/scala.

The files in `scala/scala3` that newly have errors are:
* `tests/neg/fEscapes.scala`
* `tests/neg/unicodeEscapes-interpolations.scala`
* `tests/pos/multiLineOps.scala`
* `tests/run/i14164.scala`

The first two are tests containing examples of invalid escape
sequences that are expected to fail. `multiLineOps.scala` contains a
line `send_! "!"` that now parses with an error. Previously this
parsed as an `interpolated_string_expression`, which is also entirely
incorrect, So this error is a result of the adding
`token.immediate('"')` to the definition of `interpolated_string`, and
I do not think the change is a regression. Similarly, `i14164.scala`
contains a multi-line expression that previously incorrectly parsed to
`interpolated_string_expression`, and now parses more correctly,
though with an error.

The files in `scala/scala` that newly have errors are similar. Two
test files with intentionally broken escape sequences, and the same
`multiLineOps.scala`.
---
 grammar.js               | 110 +++++++++++----
 src/scanner.c            | 178 ++++++++++++++++++++-----
 test/corpus/literals.txt | 281 +++++++++++++++++++++++++++++++++++++--
 3 files changed, 505 insertions(+), 64 deletions(-)

diff --git a/grammar.js b/grammar.js
index 1b1184cf..67bb0b4e 100644
--- a/grammar.js
+++ b/grammar.js
@@ -32,19 +32,24 @@ module.exports = grammar({
   externals: $ => [
     $._automatic_semicolon,
     $._indent,
+    $._outdent,
+    $._simple_string_start,
+    $._simple_string_middle,
+    $._simple_multiline_string_start,
     $._interpolated_string_middle,
-    $._interpolated_string_end,
     $._interpolated_multiline_string_middle,
-    $._interpolated_multiline_string_end,
-    $._outdent,
-    $._simple_multiline_string,
-    $._simple_string,
+    $._raw_string_start,
+    $._raw_string_middle,
+    $._raw_string_multiline_middle,
+    $._single_line_string_end,
+    $._multiline_string_end,
     "else",
     "catch",
     "finally",
     "extends",
     "derives",
     "with",
+    $.error_sentinel,
   ],
 
   inline: $ => [
@@ -209,7 +214,7 @@ module.exports = grammar({
           "package",
           field("name", $.package_identifier),
           // This is slightly more permissive than the EBNF in that it allows any
-          // kind of delcaration inside of the package blocks. As we're more
+          // kind of declaration inside of the package blocks. As we're more
           // concerned with the structure rather than the validity of the program
           // we'll allow it.
           field("body", optional($.template_body)),
@@ -677,7 +682,7 @@ module.exports = grammar({
           // In theory structural_type should just be added to simple_type,
           // but doing so increases the state of template_body to 4000
           $._structural_type,
-          // This adds _simple_type, but not the above intentionall/y.
+          // This adds _simple_type, but not the above intentionally.
           seq($._simple_type, field("arguments", $.arguments)),
           seq($._annotated_type, field("arguments", $.arguments)),
           seq($.compound_type, field("arguments", $.arguments)),
@@ -1540,14 +1545,14 @@ module.exports = grammar({
 
     /**
      * Regex patterns created to avoid matching // comments and /* comment starts.
-     * This could technically match illeagal tokens such as val ?// = 1
+     * This could technically match illegal tokens such as val ?// = 1
      */
     operator_identifier: $ =>
       token(
         choice(
           // opchar minus colon, equal, at
           // Technically speaking, Sm (Math symbols https://www.compart.com/en/unicode/category/Sm)
-          // should be allowed as a single-characeter opchar, however, it includes `=`,
+          // should be allowed as a single-character opchar, however, it includes `=`,
           // so we should to avoid that to prevent bad parsing of `=` as infix term or type.
           /[\-!#%&*+\/\\<>?\u005e\u007c~\u00ac\u00b1\u00d7\u00f7\u2190-\u2194\p{So}]/,
           seq(
@@ -1616,7 +1621,7 @@ module.exports = grammar({
             choice(
               seq(
                 "\\",
-                choice(/[^xu]/, /uu?[0-9a-fA-F]{4}/, /x[0-9a-fA-F]{2}/),
+                choice(/[^xu]/, /[uU]+[0-9a-fA-F]{4}/, /x[0-9a-fA-F]{2}/),
               ),
               /[^\\'\n]/,
             ),
@@ -1625,14 +1630,13 @@ module.exports = grammar({
         ),
       ),
 
-    interpolated_string_expression: $ =>
-      seq(field("interpolator", $.identifier), $.interpolated_string),
-
-    _interpolated_string_start: $ => '"',
-
-    _interpolated_multiline_string_start: $ => '"""',
+    interpolated_string_expression: $ => 
+        choice(
+          seq(field("interpolator", alias($._raw_string_start, $.identifier)), alias($._raw_string, $.interpolated_string)),
+          seq(field("interpolator", $.identifier), $.interpolated_string),
+      ),
 
-    _dollar_escape: $ => seq("$", choice("$", '"')),
+    _dollar_escape: $ => alias(token(seq("$", choice("$", '"'))), $.escape_sequence),
 
     _aliased_interpolation_identifier: $ =>
       alias($._interpolation_identifier, $.identifier),
@@ -1643,28 +1647,88 @@ module.exports = grammar({
     interpolated_string: $ =>
       choice(
         seq(
-          $._interpolated_string_start,
+          token.immediate('"'),
           repeat(
             seq(
               $._interpolated_string_middle,
-              choice($._dollar_escape, $.interpolation),
+              choice($._dollar_escape, $.interpolation, $.escape_sequence),
             ),
           ),
-          $._interpolated_string_end,
+          $._single_line_string_end,
         ),
         seq(
-          $._interpolated_multiline_string_start,
+          token.immediate('"""'),
           repeat(
             seq(
               $._interpolated_multiline_string_middle,
+              // Multiline strings ignore escape sequences
               choice($._dollar_escape, $.interpolation),
             ),
           ),
-          $._interpolated_multiline_string_end,
+          $._multiline_string_end,
+        ),
+      ),
+
+    // We need to handle single-line raw strings separately from interpolated strings,
+    // because raw strings are not parsed for escape sequences. For example, raw strings 
+    // are often used for regular expressions, which contain backslashes that would
+    // be invalid if parsed as escape sequences. We do not special case multiline
+    // raw strings, because multiline strings do not parse escape sequences anyway.
+    // Scala handles multiline raw strings identically to other multiline interpolated,
+    // so we could parse them as interpolated strings, but I think the code is cleaner
+    // if we maintain the distinction.
+    _raw_string: $ => 
+      choice(
+        seq(
+          $._simple_string_start,
+          seq(
+            repeat(
+              seq(
+                $._raw_string_middle,
+                choice($._dollar_escape, $.interpolation),
+              ),
+            ),
+            $._single_line_string_end,
+          ), 
+        ),
+        seq(
+          $._simple_multiline_string_start,
+          repeat(
+            seq(
+              $._raw_string_multiline_middle,
+              choice($._dollar_escape, $.interpolation),
+            )
+          ),
+          $._multiline_string_end,
         ),
       ),
 
-    string: $ => choice($._simple_string, $._simple_multiline_string),
+    escape_sequence: _ => token.immediate(seq(
+      '\\',
+      choice(
+        /[tbnrf"'\\]/,
+        // The Java spec allows any number of u's and U's at the start of a unicode escape.
+        /[uU]+[0-9a-fA-F]{4}/,
+        // Octals are not allowed in Scala 3, but are allowed in Scala 2. tree-sitter 
+        // does not have a mechanism for distinguishing between different versions of a
+        // language, so I think it makes sense to allow them. Maybe in the future we
+        // should move them to a `deprecated` syntax node?
+        /[0-3]?[0-7]{1,2}/,
+      ),
+    )),
+
+    string: $ => choice(
+      seq(
+        $._simple_string_start,
+        repeat(seq($._simple_string_middle, $.escape_sequence)),
+        $._single_line_string_end,
+      ),
+      seq(
+        $._simple_multiline_string_start,
+        /// Multiline strings ignore escape sequences
+        $._multiline_string_end,
+      ),
+    ),
 
     _semicolon: $ => choice(";", $._automatic_semicolon),
 
diff --git a/src/scanner.c b/src/scanner.c
index a95e5f42..4e653ef9 100644
--- a/src/scanner.c
+++ b/src/scanner.c
@@ -15,19 +15,46 @@
 enum TokenType {
   AUTOMATIC_SEMICOLON,
   INDENT,
+  OUTDENT,
+  SIMPLE_STRING_START,
+  SIMPLE_STRING_MIDDLE,
+  SIMPLE_MULTILINE_STRING_START,
   INTERPOLATED_STRING_MIDDLE,
-  INTERPOLATED_STRING_END,
   INTERPOLATED_MULTILINE_STRING_MIDDLE,
-  INTERPOLATED_MULTILINE_STRING_END,
-  OUTDENT,
-  SIMPLE_MULTILINE_STRING,
-  SIMPLE_STRING,
+  RAW_STRING_START,
+  RAW_STRING_MIDDLE,
+  RAW_STRING_MULTILINE_MIDDLE,
+  SINGLE_LINE_STRING_END,
+  MULTILINE_STRING_END,
   ELSE,
   CATCH,
   FINALLY,
   EXTENDS,
   DERIVES,
   WITH,
+  ERROR_SENTINEL
+};
+
+const char* token_name[] = {
+  "AUTOMATIC_SEMICOLON",
+  "INDENT",
+  "OUTDENT",
+  "SIMPLE_STRING_START",
+  "SIMPLE_STRING_MIDDLE",
+  "SIMPLE_MULTILINE_STRING_START",
+  "INTERPOLATED_STRING_MIDDLE",
+  "INTERPOLATED_MULTILINE_STRING_MIDDLE",
+  "RAW_STRING_MIDDLE",
+  "RAW_STRING_MULTILINE_MIDDLE",
+  "SINGLE_LINE_STRING_END",
+  "MULTILINE_STRING_END",
+  "ELSE",
+  "CATCH",
+  "FINALLY",
+  "EXTENDS",
+  "DERIVES",
+  "WITH",
+  "ERROR_SENTINEL"
 };
 
 typedef struct {
@@ -107,43 +134,71 @@ static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
 
 static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
 
-static bool scan_string_content(TSLexer *lexer, bool is_multiline, bool has_interpolation) {
+// We enumerate 3 types of strings that we need to handle differently:
+// 1. Simple strings, `"..."` or `"""..."""`
+// 2. Interpolated strings, `s"..."` or `f"..."` or `foo"..."` or foo"""...""".
+// 3. Raw strings, `raw"..."`
+typedef enum {
+  STRING_MODE_SIMPLE,
+  STRING_MODE_INTERPOLATED,
+  STRING_MODE_RAW
+} StringMode;
+
+static bool scan_string_content(TSLexer *lexer, bool is_multiline, StringMode string_mode) {
+  LOG("scan_string_content(%d, %d, %c)\n", is_multiline, string_mode, lexer->lookahead);
   unsigned closing_quote_count = 0;
   for (;;) {
     if (lexer->lookahead == '"') {
       advance(lexer);
       closing_quote_count++;
       if (!is_multiline) {
-        lexer->result_symbol = has_interpolation ? INTERPOLATED_STRING_END : SIMPLE_STRING;
+        lexer->result_symbol = SINGLE_LINE_STRING_END;
+        lexer->mark_end(lexer);
         return true;
       }
       if (closing_quote_count >= 3 && lexer->lookahead != '"') {
-        lexer->result_symbol = has_interpolation ? INTERPOLATED_MULTILINE_STRING_END : SIMPLE_MULTILINE_STRING;
-        return true;
-      }
-    } else if (lexer->lookahead == '$') {
-      if (is_multiline && has_interpolation) {
-        lexer->result_symbol =  INTERPOLATED_MULTILINE_STRING_MIDDLE;
+        lexer->result_symbol = MULTILINE_STRING_END;
+        lexer->mark_end(lexer);
         return true;
       }
-      if (has_interpolation) {
-        lexer->result_symbol = INTERPOLATED_STRING_MIDDLE;
-        return true;
+    } else if (lexer->lookahead == '$' && string_mode != STRING_MODE_SIMPLE) {
+      switch (string_mode) {
+        case STRING_MODE_INTERPOLATED:
+          lexer->result_symbol = is_multiline ? INTERPOLATED_MULTILINE_STRING_MIDDLE : INTERPOLATED_STRING_MIDDLE;
+          break;
+        case STRING_MODE_RAW:
+          lexer->result_symbol = is_multiline ? RAW_STRING_MULTILINE_MIDDLE : RAW_STRING_MIDDLE;
+          break;
+        default:
+          assert(false);          
       }
-      advance(lexer);
+      lexer->mark_end(lexer);
+      return true;
     } else {
       closing_quote_count = 0;
       if (lexer->lookahead == '\\') {
-        advance(lexer);
-        if (!lexer->eof(lexer)) {
-          advance(lexer);
-        }
-      } else if (lexer->lookahead == '\n') {
-        if (is_multiline) {
+        // Multiline strings ignore escape sequences
+        if (is_multiline || string_mode == STRING_MODE_RAW) {
+          // FIXME: In raw string mode, we have to jump over escaped quotes.
           advance(lexer);
+          // In single-line raw strings, `\"` is not translated to `"`, but it also does
+          // not close the string. Likewise, `\\` is not translated to `\`, but it does
+          // stop the second `\` from stopping a double-quote from closing the string.
+          if (!is_multiline && string_mode == STRING_MODE_RAW && 
+            (lexer->lookahead == '"' || lexer->lookahead == '\\')) {
+            advance(lexer);
+          }
         } else {
-          return false;
+          lexer->result_symbol = string_mode == STRING_MODE_SIMPLE ? SIMPLE_STRING_MIDDLE : INTERPOLATED_STRING_MIDDLE;
+          lexer->mark_end(lexer);
+          return true;
         }
+      // During error recovery and dynamic precedence resolution, the external 
+      // scanner will be invoked with all valid_symbols set to true, which means
+      // we will be asked to scan a string token when we are not actually in a 
+      // string context. Here we detect these cases and return false.
+      } else if (lexer->lookahead == '\n' && !is_multiline) {
+        return false;
       } else if (lexer->eof(lexer)) {
         return false;
       } else {
@@ -185,6 +240,24 @@ static inline void debug_indents(Scanner *scanner) {
 
 bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
                                              const bool *valid_symbols) {
+  #ifdef DEBUG
+  {
+    if (valid_symbols[ERROR_SENTINEL]) {
+      LOG("entering tree_sitter_scala_external_scanner_scan. ERROR_SENTINEL is valid\n");
+    } else {
+      char debug_str[1024] = "entering tree_sitter_scala_external_scanner_scan valid symbols: ";
+      for (unsigned i = 0; i < ERROR_SENTINEL; i++) {
+        if (valid_symbols[i]) {
+          strcat(debug_str, token_name[i]);
+          strcat(debug_str, ", ");
+        }
+      }
+      strcat(debug_str, "\n");
+      LOG("%s", debug_str);
+    }
+  }
+  #endif
+
   Scanner *scanner = (Scanner *)payload;
   int16_t prev = scanner->indents.size > 0 ? *array_back(&scanner->indents) : -1;
   int16_t newline_count = 0;
@@ -249,7 +322,7 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
   }
 
   // This saves the indentation_size and newline_count so it can be used
-  // in subsequent calls for multiple outdent or autosemicolon.
+  // in subsequent calls for multiple outdent or auto-semicolon.
   if (valid_symbols[OUTDENT] &&
       (lexer->lookahead == 0 ||
       (
@@ -388,30 +461,69 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
     skip(lexer);
   }
 
-  if (valid_symbols[SIMPLE_STRING] && lexer->lookahead == '"') {
+  if (valid_symbols[SIMPLE_STRING_START] && lexer->lookahead == '"') {
     advance(lexer);
+    lexer->mark_end(lexer);
 
-    bool is_multiline = false;
     if (lexer->lookahead == '"') {
       advance(lexer);
       if (lexer->lookahead == '"') {
         advance(lexer);
-        is_multiline = true;
-      } else {
-        lexer->result_symbol = SIMPLE_STRING;
+        lexer->result_symbol = SIMPLE_MULTILINE_STRING_START;
+        lexer->mark_end(lexer);
         return true;
       }
     }
 
-    return scan_string_content(lexer, is_multiline, false);
+    lexer->result_symbol = SIMPLE_STRING_START;
+    return true;
+  }
+
+  // We need two tokens of lookahead to determine if we are parsing a raw string,
+  // the `raw` and the `"`, which is why we need to do it in the external scanner.
+  if (valid_symbols[RAW_STRING_START] && lexer->lookahead == 'r') {
+    advance(lexer);
+    if (lexer->lookahead == 'a') {
+      advance(lexer);
+      if (lexer->lookahead == 'w') {
+        advance(lexer);
+        if (lexer->lookahead == '"') {
+          lexer->mark_end(lexer);
+          lexer->result_symbol = RAW_STRING_START;
+          return true;
+        }
+      }
+    }
+  }
+
+  if (valid_symbols[SIMPLE_STRING_MIDDLE]) {
+    return scan_string_content(lexer, false, STRING_MODE_SIMPLE);
   }
 
   if (valid_symbols[INTERPOLATED_STRING_MIDDLE]) {
-    return scan_string_content(lexer, false, true);
+    return scan_string_content(lexer, false, STRING_MODE_INTERPOLATED);
   }
 
+  if (valid_symbols[RAW_STRING_MIDDLE]) {
+    return scan_string_content(lexer, false, STRING_MODE_RAW);
+  }
+
+  if (valid_symbols[RAW_STRING_MULTILINE_MIDDLE]) {
+    return scan_string_content(lexer, true, STRING_MODE_RAW);
+  }  
+
   if (valid_symbols[INTERPOLATED_MULTILINE_STRING_MIDDLE]) {
-    return scan_string_content(lexer, true, true);
+    return scan_string_content(lexer, true, STRING_MODE_INTERPOLATED);
+  }
+
+  // We still need to handle the simple multiline string case, but there is
+  // no `MULTILINE_STRING_MIDDLE` token, and `MULTILINE_STRING_END` is used
+  // by all three of simple raw, and interpolated multiline strings. So this 
+  // check needs to come after the `INTERPOLATED_MULTILINE_STRING_MIDDLE` and
+  // `RAW_STRING_MULTILINE_MIDDLE` check, so that we can be sure we are in a 
+  // simple multiline string context.
+  if (valid_symbols[MULTILINE_STRING_END]) {
+    return scan_string_content(lexer, true, STRING_MODE_SIMPLE);
   }
 
   return false;
diff --git a/test/corpus/literals.txt b/test/corpus/literals.txt
index 8b2d3666..efd308bf 100644
--- a/test/corpus/literals.txt
+++ b/test/corpus/literals.txt
@@ -2,19 +2,56 @@
 Simple strings
 ================================================================================
 
+val emptyString = ""
+
 val oneLineString = "I'm just on one line"
 
+val stringWithCommentLikeContent1 = "// not a comment"
+
+val stringWithCommentLikeContent2 = "/* not a comment */"
+
+val stringWithEscapeSequence = "first line\nsecond line"
+
 val multiLineString = """
   a
   $thisIsntInterpolated
   ${thisEither}
+  no escape codes in multiline strings \uD83D\uDE00 \n
 """
 
-val multiLineString2 = """""""
+val emptyMultilineStringf = """"""
+
+val stringOfOneDoubleQuote = """""""
+
+val multiLineString3 = """\{@inheritDoc\p{Zs}*\}"""
+
+val blackslashDoesNotEscapeClosingQuote = """\"""
 
 --------------------------------------------------------------------------------
 
 (compilation_unit
+  (val_definition
+    (identifier)
+    (string))
+  (val_definition
+    (identifier)
+    (string))
+  (val_definition
+    (identifier)
+    (string))
+  (val_definition
+    (identifier)
+    (string))
+  (val_definition
+    (identifier)
+    (string
+      (escape_sequence)))
+  (val_definition
+    (identifier)
+    (string))
+  (val_definition
+    (identifier)
+    (string))
   (val_definition
     (identifier)
     (string))
@@ -25,15 +62,60 @@ val multiLineString2 = """""""
     (identifier)
     (string)))
 
+================================================================================
+Escape sequences in strings
+================================================================================
+
+val singleEscapeCode = "\n"
+
+val singleCharacterEscapes = "\n\r\t\b\f\'\"\\"
+
+val unicodeEscape = "\uD83D\UDE00"
+
+val repeatedUs = "\uuuuD83D\UUDE00"
+
+--------------------------------------------------------------------------------
+
+(compilation_unit
+  (val_definition
+    (identifier)
+    (string
+      (escape_sequence)))
+  (val_definition
+    (identifier)
+    (string
+      (escape_sequence)
+      (escape_sequence)
+      (escape_sequence)
+      (escape_sequence)
+      (escape_sequence)
+      (escape_sequence)
+      (escape_sequence)
+      (escape_sequence)))
+  (val_definition
+    (identifier)
+    (string
+      (escape_sequence)
+      (escape_sequence)))
+  (val_definition
+    (identifier)
+    (string
+      (escape_sequence)
+      (escape_sequence))))
+
 ================================================================================
 Interpolated strings
 ================================================================================
 
+val empty = s""
+
+val emptyMultiline = s""""""
+
 val string1 = s"a $b ${c}"
 
 val string2 = f"hi $name%s"
 
-val string3 = raw"Not a new line \n${ha}"
+val string3 = raw"Not a really a new line \n${ha}."
 
 val string4 = s"""
 works even in multiline strings, ${name}
@@ -47,9 +129,29 @@ val string7 = s"$$ $a"
 
 val string8 = s"$"$a"
 
+val string9 = s"$"$a\uD83D\UDE00\n"
+
+val multiline = raw"""
+  $$
+  ${interp}
+  \n
+  \x
+  \"
+\"""
+
 --------------------------------------------------------------------------------
 
 (compilation_unit
+  (val_definition
+    (identifier)
+    (interpolated_string_expression
+      (identifier)
+      (interpolated_string)))
+  (val_definition
+    (identifier)
+    (interpolated_string_expression
+      (identifier)
+      (interpolated_string)))
   (val_definition
     (identifier)
     (interpolated_string_expression
@@ -99,16 +201,25 @@ val string8 = s"$"$a"
     (identifier)
     (interpolated_string_expression
       (identifier)
-        (interpolated_string
-          (interpolation
-            (identifier))
-          (interpolation
-            (identifier)))))
+      (interpolated_string
+        (interpolation
+          (identifier))
+        (interpolation
+          (identifier)))))
+  (val_definition
+    (identifier)
+    (interpolated_string_expression
+      (identifier)
+      (interpolated_string
+        (escape_sequence)
+        (interpolation
+          (identifier)))))
   (val_definition
     (identifier)
     (interpolated_string_expression
       (identifier)
       (interpolated_string
+        (escape_sequence)
         (interpolation
           (identifier)))))
   (val_definition
@@ -116,8 +227,162 @@ val string8 = s"$"$a"
     (interpolated_string_expression
       (identifier)
       (interpolated_string
+        (escape_sequence)
+        (interpolation
+          (identifier))
+        (escape_sequence)
+        (escape_sequence)
+        (escape_sequence))))
+  (val_definition
+    (identifier)
+    (interpolated_string_expression
+      (identifier)
+      (interpolated_string
+        (escape_sequence)
+        (interpolation
+          (block
+            (identifier)))))))
+
+================================================================================
+Raw strings
+================================================================================
+
+val emptyRaw = raw""
+
+val emptyMultilineRaw = raw""""""
+
+val invalidEscapeCodesAllowedAndValidEscapesIgnored = raw"\n\t\x\w\g\k"
+
+val ErasedFunctionN = raw"ErasedFunction(\d+)".r
+
+val escapedAndInterpolated = raw"Not a really a new line \n${ha}."
+
+val blackslashQuoteDoesNotCloseString = raw"\""
+
+val doubleSlashQuoteDoesCloseString = raw"\\"
+
+val dollarEscapeInSingleLine = raw"$$"
+
+val slashDoesNotEscapeDollarSign = raw"\$$"
+
+val multiline = raw"""
+  $$
+  ${interp}
+  \n
+  \x
+  \"
+\"""
+
+val ensureIdentifierNamedRawStillWorks = someFunction(raw)
+
+val raw = raw(raw)
+
+--------------------------------------------------------------------------------
+
+(compilation_unit
+  (val_definition
+    (identifier)
+    (interpolated_string_expression
+      (identifier)
+      (interpolated_string)))
+  (val_definition
+    (identifier)
+    (interpolated_string_expression
+      (identifier)
+      (interpolated_string)))
+  (val_definition
+    (identifier)
+    (interpolated_string_expression
+      (identifier)
+      (interpolated_string)))
+  (val_definition
+    (identifier)
+    (field_expression
+      (interpolated_string_expression
+        (identifier)
+        (interpolated_string))
+      (identifier)))
+  (val_definition
+    (identifier)
+    (interpolated_string_expression
+      (identifier)
+      (interpolated_string
+        (interpolation
+          (block
+            (identifier))))))
+  (val_definition
+    (identifier)
+    (interpolated_string_expression
+      (identifier)
+      (interpolated_string)))
+  (val_definition
+    (identifier)
+    (interpolated_string_expression
+      (identifier)
+      (interpolated_string)))
+  (val_definition
+    (identifier)
+    (interpolated_string_expression
+      (identifier)
+      (interpolated_string
+        (escape_sequence))))
+  (val_definition
+    (identifier)
+    (interpolated_string_expression
+      (identifier)
+      (interpolated_string
+        (escape_sequence))))
+  (val_definition
+    (identifier)
+    (interpolated_string_expression
+      (identifier)
+      (interpolated_string
+        (escape_sequence)
+        (interpolation
+          (block
+            (identifier))))))
+  (val_definition
+    (identifier)
+    (call_expression
+      (identifier)
+      (arguments
+        (identifier))))
+  (val_definition
+    (identifier)
+    (call_expression
+      (identifier)
+      (arguments
+        (identifier)))))
+
+================================================================================
+Raw and interpolated strings have equivalent parse trees
+================================================================================
+
+val raw = raw"Foo $$ ${bar}"
+
+val raw = s"Foo $$ ${bar}"
+
+--------------------------------------------------------------------------------
+
+(compilation_unit
+  (val_definition
+    (identifier)
+    (interpolated_string_expression
+      (identifier)
+      (interpolated_string
+        (escape_sequence)
         (interpolation
-          (identifier))))))
+          (block
+            (identifier))))))
+  (val_definition
+    (identifier)
+    (interpolated_string_expression
+      (identifier)
+      (interpolated_string
+        (escape_sequence)
+        (interpolation
+          (block
+            (identifier)))))))
 
 
 ================================================================================