attach token location to error tokens

WebFreak001 · WebFreak001 · commit ad8a9146406c · 2023-02-09T23:47:16.000+01:00
Makes invalid strings error at proper location instead of at 0,0 See confusion in https://forum.dlang.org/post/mcaldhecyexsolqoctsy@forum.dlang.org
diff --git a/src/dparse/lexer.d b/src/dparse/lexer.d
@@ -1208,8 +1208,7 @@ private pure nothrow @safe:
         {
             if (range.index >= range.bytes.length)
             {
-                error("Error: unterminated string literal");
-                token = Token(tok!"");
+                error(token, "Error: unterminated string literal");
                 return;
             }
             version (X86_64)
@@ -1254,8 +1253,7 @@ private pure nothrow @safe:
             {
                 if (range.index >= range.bytes.length)
                 {
-                    error("Error: unterminated string literal");
-                    token = Token(tok!"");
+                    error(token, "Error: unterminated string literal");
                     return;
                 }
                 version (X86_64)
@@ -1280,17 +1278,15 @@ private pure nothrow @safe:
             range.popFront();
             if (range.index >= range.bytes.length)
             {
-                error("Error: unterminated string literal");
-                token = Token(tok!"");
+                error(token, "Error: unterminated string literal");
                 return;
             }
             range.popFront();
             while (true)
             {
                 if (range.index >= range.bytes.length)
                 {
-                    error("Error: unterminated string literal");
-                    token = Token(tok!"");
+                    error(token, "Error: unterminated string literal");
                     return;
                 }
                 else if (range.bytes[range.index] == '"')
@@ -1388,8 +1384,7 @@ private pure nothrow @safe:
                     }
                     else
                     {
-                        error("Error: `\"` expected to end delimited string literal");
-                        token = Token(tok!"");
+                        error(token, "Error: `\"` expected to end delimited string literal");
                         return;
                     }
                 }
@@ -1464,6 +1459,13 @@ private pure nothrow @safe:
         }
 
         advance(_front);
+
+        if (range.index >= range.bytes.length)
+        {
+            error(token, "Error: unterminated token string literal");
+            return;
+        }
+
         while (depth > 0 && !empty)
         {
             auto t = front();
@@ -1503,8 +1505,7 @@ private pure nothrow @safe:
         {
             if (range.index >= range.bytes.length)
             {
-                error("Error: unterminated hex string literal");
-                token = Token(tok!"");
+                error(token, "Error: unterminated hex string literal");
                 return;
             }
             else if (isWhitespace())
@@ -1520,8 +1521,7 @@ private pure nothrow @safe:
                 range.popFront();
                 break loop;
             default:
-                error("Error: invalid character in hex string");
-                token = Token(tok!"");
+                error(token, "Error: invalid character in hex string");
                 return;
             }
         }
@@ -1706,8 +1706,7 @@ private pure nothrow @safe:
         else
         {
     err:
-            error("Error: Expected `'` to end character literal");
-            token = Token(tok!"");
+            error(token, "Error: Expected `'` to end character literal");
         }
     }
 
@@ -1848,6 +1847,12 @@ private pure nothrow @safe:
         auto mark = range.mark();
     };
 
+    void error(ref Token token, string message)
+    {
+        token.type = tok!"";
+        error(message);
+    }
+
     void error(string message)
     {
         _messages ~= Message(range.line, range.column, message, true);
@@ -2449,3 +2454,56 @@ unittest
     immutable t2 = e2.tok;
     immutable t3 = e3.tok;
 }
+
+/// empty '' is invalid syntax, but should still get parsed properly, with an
+/// error token and proper location info
+unittest
+{
+    import std.conv : to;
+    import std.exception : enforce;
+
+    static immutable src = `module foo.bar;
+
+void main() {
+    x = '';
+}
+`;
+
+    LexerConfig cf;
+    StringCache ca = StringCache(16);
+
+    const tokens = getTokensForParser(src, cf, &ca);
+
+    int i;
+    assert(tokens[i++].type == tok!"module");
+    assert(tokens[i++].type == tok!"identifier");
+    assert(tokens[i++].type == tok!".");
+    assert(tokens[i++].type == tok!"identifier");
+    assert(tokens[i++].type == tok!";");
+    assert(tokens[i++].type == tok!"void");
+    assert(tokens[i++].type == tok!"identifier");
+    assert(tokens[i++].type == tok!"(");
+    assert(tokens[i++].type == tok!")");
+    assert(tokens[i++].type == tok!"{");
+    assert(tokens[i++].type == tok!"identifier");
+    assert(tokens[i++].type == tok!"=");
+    assert(tokens[i].type == tok!"");
+    assert(tokens[i].line == tokens[i - 1].line);
+    assert(tokens[i].column == tokens[i - 1].column + 2);
+    i++;
+    assert(tokens[i++].type == tok!";");
+    assert(tokens[i++].type == tok!"}");
+
+    void checkInvalidTrailingString(const Token[] tokens)
+    {
+        assert(tokens.length == 3);
+        assert(tokens[2].index != 0);
+        assert(tokens[2].column >= 4);
+        assert(tokens[2].type == tok!"");
+    }
+
+    checkInvalidTrailingString(getTokensForParser(`x = "foo`, cf, &ca));
+    checkInvalidTrailingString(getTokensForParser(`x = r"foo`, cf, &ca));
+    checkInvalidTrailingString(getTokensForParser("x = `foo", cf, &ca));
+    checkInvalidTrailingString(getTokensForParser("x = q{foo", cf, &ca));
+}