Skip to content

fix(ruby) don't highlight interpolation in single quoted strings #4178

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ Core Grammars:
- fix(swift) - Fixed syntax highlighting for class func/var declarations [guuido]
- fix(yaml) - Fixed wrong escaping behavior in single quoted strings [guuido]
- enh(nim) - Add `concept` and `defer` to list of Nim keywords [Jake Leahy]

- fix(ruby) - Fix non-interpolabale Ruby strings [Boris Verkhovskiy][]

New Grammars:

- added 3rd party TTCN-3 grammar to SUPPORTED_LANGUAGES [Osmocom][]
Expand Down Expand Up @@ -85,7 +86,7 @@ CONTRIBUTORS
[guuido]: https://github.com/guuido
[clsource]: https://github.com/clsource
[Jake Leahy]: https://github.com/ire4ever1190

[Boris Verkhovskiy]: https://github.com/verhovsky

## Version 11.10.0

Expand Down
1 change: 1 addition & 0 deletions src/highlight.js
Original file line number Diff line number Diff line change
Expand Up @@ -1014,6 +1014,7 @@ const HLJS = function(hljs) {
hljs.regex = {
concat: regex.concat,
lookahead: regex.lookahead,
escape: regex.escape,
either: regex.either,
optional: regex.optional,
anyNumberOfTimes: regex.anyNumberOfTimes
Expand Down
200 changes: 148 additions & 52 deletions src/languages/ruby.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@ export default function(hljs) {
/\b([A-Z]+[a-z0-9]+)+/,
// ends in caps
/\b([A-Z]+[a-z0-9]+)+[A-Z]+/,
)
;
);
const CLASS_NAME_WITH_NAMESPACE_RE = regex.concat(CLASS_NAME_RE, /(::\w+)*/)
// very popular ruby built-ins that one might even assume
// are actual keywords (despite that not being the case)
Expand Down Expand Up @@ -122,56 +121,128 @@ export default function(hljs) {
end: /\}/,
keywords: RUBY_KEYWORDS
};
const STRING = {

function string_variants(prefix, delimiters) {
return delimiters.map((d) => {
return {
begin: regex.concat(prefix, regex.escape(d.charAt(0))),
end: regex.escape(d.charAt(1))
}
})
}

const STRING_DELIMITERS = [
"()",
"[]",
"{}",
"<>",
"\\/\\/",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you mean

Suggested change
"\\/\\/",
"//",

d.charAt(1) won't work because it's 4 characters long.

> d = "\\/\\/"
'\\/\\/'
> d.charAt(0)
'\\'
> d.charAt(1)
'/'

https://docs.ruby-lang.org/en/3.1/syntax/literals_rdoc.html#label-Percent+Literals says

Each of the literals in described in this section may use these paired delimiters

  • [...]
  • Any other character, as both beginning and ending delimiters.

Which seems like something regex can't handle.

"%%",
"--"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would we want to flush this out a fit further while we're here?

];

const SINGLE_QUOTED_STRING = {
className: 'string',
contains: [
hljs.BACKSLASH_ESCAPE,
SUBST
],
contains: [ hljs.BACKSLASH_ESCAPE ],
variants: [
{
begin: /'/,
end: /'/
},
...string_variants("%q", STRING_DELIMITERS)
]
}

const DOUBLE_QUOTED_STRING = {
className: 'string',
contains: [
hljs.BACKSLASH_ESCAPE,
SUBST
],
variants: [
{
begin: /"/,
end: /"/
},
...string_variants("%Q", STRING_DELIMITERS)
]
}

// TODO: continue to break these out into smaller more discrete modes
const OLD_STRINGS_TOO_MANY_VARIANTS = {
className: 'string',
contains: [ hljs.BACKSLASH_ESCAPE ],
variants: [
{
begin: /`/,
end: /`/
end: /`/,
contains: [
SUBST
]
},
{
begin: /%[qQwWx]?\(/,
end: /\)/
begin: /%[wWx]?\(/,
end: /\)/,
contains: [
hljs.BACKSLASH_ESCAPE,
SUBST
]
},
{
begin: /%[qQwWx]?\[/,
end: /\]/
begin: /%[wWx]?\[/,
end: /\]/,
contains: [
hljs.BACKSLASH_ESCAPE,
SUBST
]
},
{
begin: /%[qQwWx]?\{/,
end: /\}/
begin: /%[wWx]?\{/,
end: /\}/,
contains: [
hljs.BACKSLASH_ESCAPE,
SUBST
]
},
{
begin: /%[qQwWx]?</,
end: />/
begin: /%[wWx]?</,
end: />/,
contains: [
hljs.BACKSLASH_ESCAPE,
SUBST
]
},
{
begin: /%[qQwWx]?\//,
end: /\//
begin: /%[wWx]?\//,
end: /\//,
contains: [
hljs.BACKSLASH_ESCAPE,
SUBST
]
},
{
begin: /%[qQwWx]?%/,
end: /%/
begin: /%[wWx]?%/,
end: /%/,
contains: [
hljs.BACKSLASH_ESCAPE,
SUBST
]
},
{
begin: /%[qQwWx]?-/,
end: /-/
begin: /%[wWx]?-/,
end: /-/,
contains: [
hljs.BACKSLASH_ESCAPE,
SUBST
]
},
{
begin: /%[qQwWx]?\|/,
end: /\|/
begin: /%[wWx]?\|/,
end: /\|/,
contains: [
hljs.BACKSLASH_ESCAPE,
SUBST
]
},
// in the following expressions, \B in the beginning suppresses recognition of ?-sequences
// where ? is the last character of a preceding identifier, as in: `func?4`
Expand All @@ -181,27 +252,35 @@ export default function(hljs) {
{ begin: /\B\?(\\M-\\C-|\\M-\\c|\\c\\M-|\\M-|\\C-\\M-)[\x20-\x7e]/ },
{ begin: /\B\?\\(c|C-)[\x20-\x7e]/ },
{ begin: /\B\?\\?\S/ },
// heredocs
{
// this guard makes sure that we have an entire heredoc and not a false
// positive (auto-detect, etc.)
begin: regex.concat(
/<<[-~]?'?/,
regex.lookahead(/(\w+)(?=\W)[^\n]*\n(?:[^\n]*\n)*?\s*\1\b/)
),
]
};

const HEREDOC = {
scope: "string",
// this guard makes sure that we have an entire heredoc and not a false
// positive (auto-detect, etc.)
begin: regex.concat(
/<<[-~]?'?/,
regex.lookahead(/(\w+)(?=\W)[^\n]*\n(?:[^\n]*\n)*?\s*\1\b/)
),
contains: [
hljs.END_SAME_AS_BEGIN({
begin: /(\w+)/,
end: /(\w+)/,
contains: [
hljs.END_SAME_AS_BEGIN({
begin: /(\w+)/,
end: /(\w+)/,
contains: [
hljs.BACKSLASH_ESCAPE,
SUBST
]
})
hljs.BACKSLASH_ESCAPE,
SUBST
]
}
})
]
};
}

const STRINGS = [
SINGLE_QUOTED_STRING,
DOUBLE_QUOTED_STRING,
HEREDOC,
OLD_STRINGS_TOO_MANY_VARIANTS
]

// Ruby syntax is underdocumented, but this grammar seems to be accurate
// as of version 2.7.2 (confirmed with (irb and `Ripper.sexp(...)`)
Expand Down Expand Up @@ -316,8 +395,34 @@ export default function(hljs) {
scope: "title.class"
};

const SYMBOL = {
className: 'symbol',
variants: [
{
begin: regex.concat(/:/, RUBY_METHOD_RE)
},
{
begin: /:"/,
end: /"/,
contains: [
hljs.BACKSLASH_ESCAPE,
SUBST
]
},
{
begin: /:'/,
end: /'/,
contains: [
hljs.BACKSLASH_ESCAPE
]
}
],
relevance: 0
};

const RUBY_DEFAULT_CONTAINS = [
STRING,
SYMBOL,
...STRINGS,
CLASS_DEFINITION,
INCLUDE_EXTEND,
OBJECT_CREATION,
Expand All @@ -332,15 +437,6 @@ export default function(hljs) {
begin: hljs.UNDERSCORE_IDENT_RE + '(!|\\?)?:',
relevance: 0
},
{
className: 'symbol',
begin: ':(?!\\s)',
contains: [
STRING,
{ begin: RUBY_METHOD_RE }
],
relevance: 0
},
NUMBER,
{
// negative-look forward attempts to prevent false matches like:
Expand Down
2 changes: 1 addition & 1 deletion test/markup/erb/default.expect.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@

&lt;%-</span><span class="language-ruby"> available_things = things.select(&amp;<span class="hljs-symbol">:available?</span>) </span><span class="language-xml">-%&gt;
&lt;%%</span><span class="language-ruby">- x = <span class="hljs-number">1</span> + <span class="hljs-number">2</span> -</span><span class="language-xml">%%&gt;
&lt;%%</span><span class="language-ruby"> value = <span class="hljs-string">&#x27;real string <span class="hljs-subst">#{<span class="hljs-variable">@value</span>}</span>&#x27;</span> </span><span class="language-xml">%%&gt;
&lt;%%</span><span class="language-ruby"> value = <span class="hljs-string">&quot;real string <span class="hljs-subst">#{<span class="hljs-variable">@value</span>}</span>&quot;</span> </span><span class="language-xml">%%&gt;
&lt;%%</span><span class="language-ruby">= available_things.inspect </span><span class="language-xml">%%&gt;
</span>
2 changes: 1 addition & 1 deletion test/markup/erb/default.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@

<%- available_things = things.select(&:available?) -%>
<%%- x = 1 + 2 -%%>
<%% value = 'real string #{@value}' %%>
<%% value = "real string #{@value}" %%>
<%%= available_things.inspect %%>
14 changes: 13 additions & 1 deletion test/markup/ruby/strings.expect.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,21 @@ c = <span class="hljs-string">?\c\M-x</span> <span class="hljs-comment"># me
c = <span class="hljs-string">?\c?</span> <span class="hljs-comment"># delete, ASCII 7Fh (DEL)</span>
c = <span class="hljs-string">?\C-?</span> <span class="hljs-comment"># delete, ASCII 7Fh (DEL)</span>

<span class="hljs-comment"># symbols</span>
c = <span class="hljs-symbol">:booger</span> <span class="hljs-comment">#=&gt; :booger</span>
c = <span class="hljs-symbol">:&quot;booger&quot;</span> <span class="hljs-comment">#=&gt; :booger</span>
c = <span class="hljs-symbol">:&#x27;booger&#x27;</span> <span class="hljs-comment">#=&gt; :booger</span>
c = <span class="hljs-symbol">:&quot;b<span class="hljs-subst">#{yum}</span>ger&quot;</span> <span class="hljs-comment">#=&gt; :burger</span>

<span class="hljs-comment"># Unicode character(s) of type \u{nnnn ....}, where each nnnn is 1-6 hexadecimal digits ([0-9a-fA-F])</span>
c = <span class="hljs-string">?\u{00AF09}</span>
c = <span class="hljs-string">?\u{0AF09}</span>
c = <span class="hljs-string">?\u{AF9}</span>
c = <span class="hljs-string">?\u{F9}</span>
c = <span class="hljs-string">?\u{F}</span>
c = <span class="hljs-string">?\u{F}</span>

<span class="hljs-comment"># Interpolation</span>
c = <span class="hljs-string">&#x27;a#{1}b&#x27;</span> <span class="hljs-comment">#=&gt; &quot;a\#{1}b&quot;</span>
c = <span class="hljs-string">&quot;a<span class="hljs-subst">#{<span class="hljs-number">1</span>}</span>b&quot;</span> <span class="hljs-comment">#=&gt; &quot;a1b&quot;</span>
c = <span class="hljs-string">%q(a#{1}b)</span> <span class="hljs-comment">#=&gt; &quot;a\#{1}b&quot;</span>
c = <span class="hljs-string">%Q{a<span class="hljs-subst">#{<span class="hljs-number">1</span>}</span>b}</span> <span class="hljs-comment">#=&gt; &quot;a1b&quot;</span>
14 changes: 13 additions & 1 deletion test/markup/ruby/strings.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,21 @@ c = ?\c\M-x # meta control character, where x is an ASCII printable characte
c = ?\c? # delete, ASCII 7Fh (DEL)
c = ?\C-? # delete, ASCII 7Fh (DEL)

# symbols
c = :booger #=> :booger
c = :"booger" #=> :booger
c = :'booger' #=> :booger
c = :"b#{yum}ger" #=> :burger

# Unicode character(s) of type \u{nnnn ....}, where each nnnn is 1-6 hexadecimal digits ([0-9a-fA-F])
c = ?\u{00AF09}
c = ?\u{0AF09}
c = ?\u{AF9}
c = ?\u{F9}
c = ?\u{F}
c = ?\u{F}

# Interpolation
c = 'a#{1}b' #=> "a\#{1}b"
c = "a#{1}b" #=> "a1b"
c = %q(a#{1}b) #=> "a\#{1}b"
c = %Q{a#{1}b} #=> "a1b"