Skip to content

Commit 4422648

Browse files
authored
Add support for syntax highlighting for code_llvm and code_native (#36984)
1 parent 98d1300 commit 4422648

File tree

3 files changed

+724
-11
lines changed

3 files changed

+724
-11
lines changed

stdlib/InteractiveUtils/src/codeview.jl

Lines changed: 253 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,46 @@
11
# This file is a part of Julia. License is MIT: https://julialang.org/license
22

3+
# highlighting settings
4+
highlighting = Dict{Symbol, Bool}(
5+
:warntype => true,
6+
:llvm => true,
7+
:native => true,
8+
)
9+
10+
llstyle = Dict{Symbol, Tuple{Bool, Union{Symbol, Int}}}(
11+
:default => (false, :light_black), # e.g. comma, equal sign, unknown token
12+
:comment => (false, :green),
13+
:label => (false, :light_red),
14+
:instruction => ( true, :light_cyan),
15+
:type => (false, :cyan),
16+
:number => (false, :yellow),
17+
:bracket => (false, :yellow),
18+
:variable => (false, :normal), # e.g. variable, register
19+
:keyword => (false, :light_magenta),
20+
:funcname => (false, :light_yellow),
21+
)
22+
23+
function printstyled_ll(io::IO, x, s::Symbol, trailing_spaces="")
24+
printstyled(io, x, bold=llstyle[s][1], color=llstyle[s][2])
25+
print(io, trailing_spaces)
26+
end
27+
328
# displaying type warnings
429

530
function warntype_type_printer(io::IO, @nospecialize(ty), used::Bool)
631
used || return
732
if ty isa Type && (!Base.isdispatchelem(ty) || ty == Core.Box)
8-
if ty isa Union && Base.is_expected_union(ty)
33+
if highlighting[:warntype] && ty isa Union && Base.is_expected_union(ty)
934
Base.emphasize(io, "::$ty", Base.warn_color()) # more mild user notification
1035
else
1136
Base.emphasize(io, "::$ty")
1237
end
1338
else
14-
Base.printstyled(io, "::$ty", color=:cyan) # show the "good" type
39+
if highlighting[:warntype]
40+
Base.printstyled(io, "::$ty", color=:cyan) # show the "good" type
41+
else
42+
Base.print(io, "::$ty")
43+
end
1544
end
1645
nothing
1746
end
@@ -135,8 +164,15 @@ All metadata and dbg.* calls are removed from the printed bitcode. For the full
135164
To dump the entire module that encapsulates the function (with declarations), set the `dump_module` keyword to true.
136165
Keyword argument `debuginfo` may be one of source (default) or none, to specify the verbosity of code comments.
137166
"""
138-
code_llvm(io::IO, @nospecialize(f), @nospecialize(types), raw::Bool, dump_module::Bool=false, optimize::Bool=true, debuginfo::Symbol=:default) =
139-
print(io, _dump_function(f, types, false, false, !raw, dump_module, :att, optimize, debuginfo))
167+
function code_llvm(io::IO, @nospecialize(f), @nospecialize(types), raw::Bool,
168+
dump_module::Bool=false, optimize::Bool=true, debuginfo::Symbol=:default)
169+
d = _dump_function(f, types, false, false, !raw, dump_module, :att, optimize, debuginfo)
170+
if highlighting[:llvm] && get(io, :color, false)
171+
print_llvm(io, d)
172+
else
173+
print(io, d)
174+
end
175+
end
140176
code_llvm(io::IO, @nospecialize(f), @nospecialize(types=Tuple); raw::Bool=false, dump_module::Bool=false, optimize::Bool=true, debuginfo::Symbol=:default) =
141177
code_llvm(io, f, types, raw, dump_module, optimize, debuginfo)
142178
code_llvm(@nospecialize(f), @nospecialize(types=Tuple); raw=false, dump_module=false, optimize=true, debuginfo::Symbol=:default) =
@@ -151,8 +187,219 @@ generic function and type signature to `io`.
151187
Switch assembly syntax using `syntax` symbol parameter set to `:att` for AT&T syntax or `:intel` for Intel syntax.
152188
Keyword argument `debuginfo` may be one of source (default) or none, to specify the verbosity of code comments.
153189
"""
154-
code_native(io::IO, @nospecialize(f), @nospecialize(types=Tuple); syntax::Symbol=:att, debuginfo::Symbol=:default) =
155-
print(io, _dump_function(f, types, true, false, false, false, syntax, true, debuginfo))
190+
function code_native(io::IO, @nospecialize(f), @nospecialize(types=Tuple);
191+
syntax::Symbol=:att, debuginfo::Symbol=:default)
192+
d = _dump_function(f, types, true, false, false, false, syntax, true, debuginfo)
193+
if highlighting[:native] && get(io, :color, false)
194+
print_native(io, d)
195+
else
196+
print(io, d)
197+
end
198+
end
156199
code_native(@nospecialize(f), @nospecialize(types=Tuple); syntax::Symbol=:att, debuginfo::Symbol=:default) =
157200
code_native(stdout, f, types; syntax=syntax, debuginfo=debuginfo)
158201
code_native(::IO, ::Any, ::Symbol) = error("illegal code_native call") # resolve ambiguous call
202+
203+
## colorized IR and assembly printing
204+
205+
const num_regex = r"^(?:\$?-?\d+|0x[0-9A-Fa-f]+|-?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?)$"
206+
207+
function print_llvm(io::IO, code::String)
208+
buf = IOBuffer(code)
209+
for line in eachline(buf)
210+
m = match(r"^(\s*)((?:[^;]|;\")*)(.*)$", line)
211+
m === nothing && continue
212+
indent, tokens, comment = m.captures
213+
print(io, indent)
214+
print_llvm_tokens(io, tokens)
215+
printstyled_ll(io, comment, :comment)
216+
println(io)
217+
end
218+
end
219+
220+
const llvm_types =
221+
r"^(?:void|half|float|double|x86_\w+|ppc_\w+|label|metadata|type|opaque|token|i\d+)$"
222+
const llvm_cond = r"^(?:[ou]?eq|[ou]?ne|[uso][gl][te]|ord|uno)$" # true|false
223+
224+
function print_llvm_tokens(io, tokens)
225+
m = match(r"^((?:[^\s:]+:)?)(\s*)(.*)", tokens)
226+
if m !== nothing
227+
label, spaces, tokens = m.captures
228+
printstyled_ll(io, label, :label, spaces)
229+
end
230+
m = match(r"^(%[^\s=]+)(\s*)=(\s*)(.*)", tokens)
231+
if m !== nothing
232+
result, spaces, spaces2, tokens = m.captures
233+
printstyled_ll(io, result, :variable, spaces)
234+
printstyled_ll(io, '=', :default, spaces2)
235+
end
236+
m = match(r"^([a-z]\w*)(\s*)(.*)", tokens)
237+
if m !== nothing
238+
inst, spaces, tokens = m.captures
239+
iskeyword = occursin(r"^(?:define|declare|type)$", inst) || occursin("=", tokens)
240+
printstyled_ll(io, inst, iskeyword ? :keyword : :instruction, spaces)
241+
end
242+
243+
print_llvm_operands(io, tokens)
244+
end
245+
246+
function print_llvm_operands(io, tokens)
247+
while !isempty(tokens)
248+
tokens = print_llvm_operand(io, tokens)
249+
end
250+
return tokens
251+
end
252+
253+
function print_llvm_operand(io, tokens)
254+
islabel = false
255+
while !isempty(tokens)
256+
m = match(r"^,(\s*)(.*)", tokens)
257+
if m !== nothing
258+
spaces, tokens = m.captures
259+
printstyled_ll(io, ',', :default, spaces)
260+
break
261+
end
262+
m = match(r"^(\*+|=)(\s*)(.*)", tokens)
263+
if m !== nothing
264+
sym, spaces, tokens = m.captures
265+
printstyled_ll(io, sym, :default, spaces)
266+
continue
267+
end
268+
m = match(r"^(\"[^\"]*\")(\s*)(.*)", tokens)
269+
if m !== nothing
270+
str, spaces, tokens = m.captures
271+
printstyled_ll(io, str, :variable, spaces)
272+
continue
273+
end
274+
m = match(r"^([({\[<])(\s*)(.*)", tokens)
275+
if m !== nothing
276+
bracket, spaces, tokens = m.captures
277+
printstyled_ll(io, bracket, :bracket, spaces)
278+
tokens = print_llvm_operands(io, tokens) # enter
279+
continue
280+
end
281+
m = match(r"^([)}\]>])(\s*)(.*)", tokens)
282+
if m !== nothing
283+
bracket, spaces, tokens = m.captures
284+
printstyled_ll(io, bracket, :bracket, spaces)
285+
break # leave
286+
end
287+
288+
m = match(r"^([^\s,*=(){}\[\]<>]+)(\s*)(.*)", tokens)
289+
m === nothing && break
290+
token, spaces, tokens = m.captures
291+
if occursin(llvm_types, token)
292+
printstyled_ll(io, token, :type)
293+
islabel = token == "label"
294+
elseif occursin(llvm_cond, token) # condition code is instruction-level
295+
printstyled_ll(io, token, :instruction)
296+
elseif occursin(num_regex, token)
297+
printstyled_ll(io, token, :number)
298+
elseif occursin(r"^@.+$", token)
299+
printstyled_ll(io, token, :funcname)
300+
elseif occursin(r"^%.+$", token)
301+
islabel |= occursin(r"^%[^\d].*$", token) & occursin(r"^\]", tokens)
302+
printstyled_ll(io, token, islabel ? :label : :variable)
303+
islabel = false
304+
elseif occursin(r"^[a-z]\w+$", token)
305+
printstyled_ll(io, token, :keyword)
306+
else
307+
printstyled_ll(io, token, :default)
308+
end
309+
print(io, spaces)
310+
end
311+
return tokens
312+
end
313+
314+
function print_native(io::IO, code::String, arch::Symbol=sys_arch_category())
315+
archv = Val(arch)
316+
buf = IOBuffer(code)
317+
for line in eachline(buf)
318+
m = match(r"^(\s*)((?:[^;#/]|#\S|;\"|/[^/])*)(.*)$", line)
319+
m === nothing && continue
320+
indent, tokens, comment = m.captures
321+
print(io, indent)
322+
print_native_tokens(io, tokens, archv)
323+
printstyled_ll(io, comment, :comment)
324+
println(io)
325+
end
326+
end
327+
328+
function sys_arch_category()
329+
if Sys.ARCH === :x86_64 || Sys.ARCH === :i686
330+
:x86
331+
elseif Sys.ARCH === :aarch64 || startswith(string(Sys.ARCH), "arm")
332+
:arm
333+
else
334+
:unsupported
335+
end
336+
end
337+
338+
print_native_tokens(io, line, ::Val) = print(io, line)
339+
340+
const x86_ptr = r"^(?:(?:[xyz]mm|[dq])?word|byte|ptr|offset)$"
341+
const avx512flags = r"^(?:z|r[nduz]-sae|sae|1to1?\d)$"
342+
const arm_cond = r"^(?:eq|ne|cs|ho|cc|lo|mi|pl|vs|vc|hi|ls|[lg][te]|al|nv)$"
343+
const arm_keywords = r"^(?:lsl|lsr|asr|ror|rrx|!|/[zm])$"
344+
345+
function print_native_tokens(io, tokens, arch::Union{Val{:x86}, Val{:arm}})
346+
x86 = arch isa Val{:x86}
347+
m = match(r"^((?:[^\s:]+:|\"[^\"]+\":)?)(\s*)(.*)", tokens)
348+
if m !== nothing
349+
label, spaces, tokens = m.captures
350+
printstyled_ll(io, label, :label, spaces)
351+
end
352+
haslabel = false
353+
m = match(r"^([a-z][\w.]*)(\s*)(.*)", tokens)
354+
if m !== nothing
355+
instruction, spaces, tokens = m.captures
356+
printstyled_ll(io, instruction, :instruction, spaces)
357+
haslabel = occursin(r"^(?:bl?|bl?\.\w{2,5}|[ct]bn?z)?$", instruction)
358+
end
359+
360+
isfuncname = false
361+
while !isempty(tokens)
362+
m = match(r"^([,:*])(\s*)(.*)", tokens)
363+
if m !== nothing
364+
sym, spaces, tokens = m.captures
365+
printstyled_ll(io, sym, :default, spaces)
366+
isfuncname = false
367+
continue
368+
end
369+
m = match(r"^([(){}\[\]])(\s*)(.*)", tokens)
370+
if m !== nothing
371+
bracket, spaces, tokens = m.captures
372+
printstyled_ll(io, bracket, :bracket, spaces)
373+
continue
374+
end
375+
m = match(r"^#([0-9a-fx.-]+)(\s*)(.*)", tokens)
376+
if !x86 && m !== nothing && occursin(num_regex, m.captures[1])
377+
num, spaces, tokens = m.captures
378+
printstyled_ll(io, "#" * num, :number, spaces)
379+
continue
380+
end
381+
382+
m = match(r"^([^\s,:*(){}\[\]][^\s,:*/(){}\[\]]*)(\s*)(.*)", tokens)
383+
m === nothing && break
384+
token, spaces, tokens = m.captures
385+
if occursin(num_regex, token)
386+
printstyled_ll(io, token, :number)
387+
elseif x86 && occursin(x86_ptr, token) || occursin(avx512flags, token)
388+
printstyled_ll(io, token, :keyword)
389+
isfuncname = token == "offset"
390+
elseif !x86 && (occursin(arm_keywords, token) || occursin(arm_cond, token))
391+
printstyled_ll(io, token, :keyword)
392+
elseif occursin(r"^L.+$", token)
393+
printstyled_ll(io, token, :label)
394+
elseif occursin(r"^\$.+$", token)
395+
printstyled_ll(io, token, :funcname)
396+
elseif occursin(r"^%?(?:[a-z][\w.]+|\"[^\"]+\")$", token)
397+
islabel = haslabel & !occursin(',', tokens)
398+
printstyled_ll(io, token, islabel ? :label : isfuncname ? :funcname : :variable)
399+
isfuncname = false
400+
else
401+
printstyled_ll(io, token, :default)
402+
end
403+
print(io, spaces)
404+
end
405+
end

0 commit comments

Comments
 (0)