Skip to content

Make more types jl_static_show readably #58512

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
May 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 105 additions & 18 deletions src/rtutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
*/
#include "platform.h"

#include <float.h>
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
Expand Down Expand Up @@ -691,12 +693,12 @@ static int is_globfunction(jl_value_t *v, jl_datatype_t *dv, jl_sym_t **globname
return 0;
}

static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len, int wrap) JL_NOTSAFEPOINT
static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len, int wrap, int raw) JL_NOTSAFEPOINT
{
size_t n = 0;
if (wrap)
n += jl_printf(out, "\"");
if (!u8_isvalid(str, len)) {
if (!raw && !u8_isvalid(str, len)) {
// alternate print algorithm that preserves data if it's not UTF-8
static const char hexdig[] = "0123456789abcdef";
for (size_t i = 0; i < len; i++) {
Expand All @@ -713,7 +715,11 @@ static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len,
int special = 0;
for (size_t i = 0; i < len; i++) {
uint8_t c = str[i];
if (c < 32 || c == 0x7f || c == '\\' || c == '"' || c == '$') {
if (raw && ((c == '\\' && i == len-1) || c == '"')) {
special = 1;
break;
}
else if (!raw && (c < 32 || c == 0x7f || c == '\\' || c == '"' || c == '$')) {
special = 1;
break;
}
Expand All @@ -722,6 +728,25 @@ static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len,
jl_uv_puts(out, str, len);
n += len;
}
else if (raw) {
// REF: Base.escape_raw_string
int escapes = 0;
for (size_t i = 0; i < len; i++) {
uint8_t c = str[i];
if (c == '\\') {
escapes++;
}
else {
if (c == '"')
for (escapes++; escapes > 0; escapes--)
n += jl_printf(out, "\\");
escapes = 0;
}
n += jl_printf(out, "%c", str[i]);
}
for (; escapes > 0; escapes--)
n += jl_printf(out, "\\");
}
else {
char buf[512];
size_t i = 0;
Expand All @@ -737,18 +762,28 @@ static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len,
return n;
}

static int jl_is_quoted_sym(const char *sn)
{
static const char *const quoted_syms[] = {":", "::", ":=", "=", "==", "===", "=>", "`"};
for (int i = 0; i < sizeof quoted_syms / sizeof *quoted_syms; i++)
if (!strcmp(sn, quoted_syms[i]))
return 1;
return 0;
}

// TODO: in theory, we need a separate function for showing symbols in an
// expression context (where `Symbol("foo\x01bar")` is ok) and a syntactic
// context (where var"" must be used).
static size_t jl_static_show_symbol(JL_STREAM *out, jl_sym_t *name) JL_NOTSAFEPOINT
{
size_t n = 0;
const char *sn = jl_symbol_name(name);
int quoted = !jl_is_identifier(sn) && !jl_is_operator(sn);
if (quoted) {
n += jl_printf(out, "var");
// TODO: this is not quite right, since repr uses String escaping rules, and Symbol uses raw string rules
n += jl_static_show_string(out, sn, strlen(sn), 1);
if (jl_is_identifier(sn) || (jl_is_operator(sn) && !jl_is_quoted_sym(sn))) {
n += jl_printf(out, "%s", sn);
}
else {
n += jl_printf(out, "%s", sn);
n += jl_printf(out, "var");
n += jl_static_show_string(out, sn, strlen(sn), 1, 1);
}
return n;
}
Expand Down Expand Up @@ -777,6 +812,51 @@ static int jl_static_is_function_(jl_datatype_t *vt) JL_NOTSAFEPOINT {
return 0;
}

static size_t jl_static_show_float(JL_STREAM *out, double v,
jl_datatype_t *vt) JL_NOTSAFEPOINT
{
size_t n = 0;
// TODO: non-canonical NaNs do not round-trip
// TOOD: BFloat16
const char *size_suffix = vt == jl_float16_type ? "16" :
vt == jl_float32_type ? "32" :
"";
// Requires minimum 1 (sign) + 17 (sig) + 1 (dot) + 5 ("e-123") + 1 (null)
char buf[32];
// Base B significand digits required to print n base-b significand bits
// (including leading 1): N = 2 + floor(n/log(b, B))
// Float16 5
// Float32 9
// Float64 17
// REF: https://dl.acm.org/doi/pdf/10.1145/93542.93559
if (isnan(v)) {
n += jl_printf(out, "NaN%s", size_suffix);
}
else if (isinf(v)) {
n += jl_printf(out, "%sInf%s", v < 0 ? "-" : "", size_suffix);
}
else if (vt == jl_float64_type) {
n += jl_printf(out, "%#.17g", v);
}
else if (vt == jl_float32_type) {
size_t m = snprintf(buf, sizeof buf, "%.9g", v);
// If the exponent was printed, replace it with 'f'
char *p = (char *)memchr(buf, 'e', m);
if (p)
*p = 'f';
jl_uv_puts(out, buf, m);
n += m;
// If no exponent was printed, we must add one
if (!p)
n += jl_printf(out, "f0");
}
else {
assert(vt == jl_float16_type);
n += jl_printf(out, "Float16(%#.5g)", v);
}
return n;
}

// `v` might be pointing to a field inlined in a structure therefore
// `jl_typeof(v)` may not be the same with `vt` and only `vt` should be
// used to determine the type of the value.
Expand Down Expand Up @@ -954,17 +1034,21 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
int f = *(uint32_t*)jl_data_ptr(v);
n += jl_printf(out, "#<intrinsic #%d %s>", f, jl_intrinsic_name(f));
}
else if (vt == jl_long_type) {
// Avoid unnecessary Int64(x)/Int32(x)
n += jl_printf(out, "%" PRIdPTR, *(intptr_t*)v);
}
else if (vt == jl_int64_type) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like negative Int128 values are still problematic:

julia> ccall(:jl_, Cvoid, (Any,), typemin(Int128))
Int128(0x80000000000000000000000000000000)
julia> ccall(:jl_, Cvoid, (Any,), Int128(-1))
Int128(0xffffffffffffffffffffffffffffffff)

And UInt128 prints with an explicit conversion it doesn't need:

julia> ccall(:jl_, Cvoid, (Any,), UInt128(0))
UInt128(0x00000000000000000000000000000000)
julia> UInt128(0x00000000000000000000000000000000) === 0x00000000000000000000000000000000
true

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It isn't problematic, since that Int128() printing means a bitcast, not a function call

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure - we can fix this as part of the bit-casting fix-ups, but regardless it doesn't round-trip:

julia> Int128(0x80000000000000000000000000000000)
ERROR: InexactError: convert(Int128, 0x80000000000000000000000000000000)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It works just fine: you're just making the mistake of calling eval, when the semantics of this printer have never been to be usable with eval, but rather is a representation for reinterpret.

julia> reinterpret(Int128, 0x80000000000000000000000000000000)
-170141183460469231731687303715884105728

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we interested in an "almost" eval-able human-readable serialization format?

The only trade-off that I can understand is that it's a bit more compact than a proper repr, but more importantly it needs a custom de-serializer which afaict isn't implemented

Since we need a programmatic round-trip, it's not enough IMO to appeal to a de-serializer that "should" be used if it doesn't actually exist

Copy link
Member

@vtjnash vtjnash May 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because it is human readable, and thus a useful debugging tool. Someone should implement a de-serializer if they want to use this for generating precompile statements. Just blinding executing code will never give the correct answers anyways, so these little gotcha are just reminders that using eval is a dead-end approach and not fixable.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a disingenuous argument @vtjnash -- nobody reads this output for debugging (and we've tried); on the contrary, it is actively used for generating precompile statements. It would be better to remove all the "little gotchas".

Copy link
Member

@topolarity topolarity May 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just blinding executing code will never give the correct answers anyways, so these little gotcha are just reminders that using eval is a dead-end approach and not fixable.

I feel like this is lacking justification - all of the recent serialization problems we've enumerated so far (floating point, integer types, bfloat, restricted inner constructors, symbol escaping, etc.) are basic problems around making sure that we print the correct literal (or the correct literal + a direct, loss-less conversion to a built-in datatype). Of course, the other major issue (still unsolved) is around custom constructors making a reinterpret required to round-trip, but the point stands that most of those are direct fixes to the de-parse (print "1.0" not "1") and not problems inherent to eval.

I'm very open to more fundamental arguments about why eval is broken for this, but the only caveat that I'm aware of is that eval may not map type names (Vector, String, Foo) correctly depending on the evaluation context, or that these may have a different meaning (esp. anonymized typenames like #foo##2) in different executions.

It is true that Serialization does this better by using the PkgId / UUID, but is that the only fundamental thing that we gain by moving away from eval? Is there a more fundamental reason why eval cannot be made to work here? Do we trust Serialization to do all of this correctly, or does that have outstanding correct-ness issues too?

Copy link
Member

@KristofferC KristofferC May 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whenever there is an alternative serialization format to describe a set of methods to compile that is better I'm sure we will all happily switch to that. Until that exists it seems reasonable to do incremental improvements to the existing one, even if it is doomed to never be 100% correct in the end.

n += jl_printf(out, "%" PRId64, *(int64_t*)v);
n += jl_printf(out, "Int64(%" PRId64 ")", *(int64_t*)v);
}
else if (vt == jl_int32_type) {
n += jl_printf(out, "%" PRId32, *(int32_t*)v);
n += jl_printf(out, "Int32(%" PRId32 ")", *(int32_t*)v);
}
else if (vt == jl_int16_type) {
n += jl_printf(out, "%" PRId16, *(int16_t*)v);
n += jl_printf(out, "Int16(%" PRId16 ")", *(int16_t*)v);
}
else if (vt == jl_int8_type) {
n += jl_printf(out, "%" PRId8, *(int8_t*)v);
n += jl_printf(out, "Int8(%" PRId8 ")", *(int8_t*)v);
}
else if (vt == jl_uint64_type) {
n += jl_printf(out, "0x%016" PRIx64, *(uint64_t*)v);
Expand All @@ -985,11 +1069,14 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
n += jl_printf(out, "0x%08" PRIx32, *(uint32_t*)v);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you fix-up Pointer types as well?

julia> ccall(:jl_, Cvoid, (Any,), Val(Ptr{Cvoid}(1)))
Base.Val{0x0000000000000001}()

It's a little awkward, because their repr doesn't round-trip either:

julia> Val(Ptr{Cvoid}(1))
Val{Ptr{Nothing} @0x0000000000000001}()

julia> Val{Ptr{Nothing} @0x0000000000000001}()
ERROR: ParseError:
# Error @ REPL[10]:1:18
Val{Ptr{Nothing} @0x0000000000000001}()
#                └─────────────────┘ ── Expected `}`

but regardless of whether it parses properly, I don't think we should be throwing out the type information

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should also consider fixing the jl_tvar_type case also. Right now it only prints the name after checking that the parent UnionAll is present, but we should also check that that name is unique, and if not, to find some way to make it unique in printing.

#endif
}
else if (vt == jl_float16_type) {
n += jl_static_show_float(out, julia_half_to_float(*(uint16_t *)v), vt);
}
else if (vt == jl_float32_type) {
n += jl_printf(out, "%gf", *(float*)v);
n += jl_static_show_float(out, *(float *)v, vt);
}
else if (vt == jl_float64_type) {
n += jl_printf(out, "%g", *(double*)v);
n += jl_static_show_float(out, *(double *)v, vt);
}
else if (vt == jl_bool_type) {
n += jl_printf(out, "%s", *(uint8_t*)v ? "true" : "false");
Expand All @@ -998,7 +1085,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
n += jl_printf(out, "nothing");
}
else if (vt == jl_string_type) {
n += jl_static_show_string(out, jl_string_data(v), jl_string_len(v), 1);
n += jl_static_show_string(out, jl_string_data(v), jl_string_len(v), 1, 0);
}
else if (v == jl_bottom_type) {
n += jl_printf(out, "Union{}");
Expand Down Expand Up @@ -1528,10 +1615,10 @@ void jl_log(int level, jl_value_t *module, jl_value_t *group, jl_value_t *id,
}
jl_printf(str, "\n@ ");
if (jl_is_string(file)) {
jl_static_show_string(str, jl_string_data(file), jl_string_len(file), 0);
jl_static_show_string(str, jl_string_data(file), jl_string_len(file), 0, 0);
}
else if (jl_is_symbol(file)) {
jl_static_show_string(str, jl_symbol_name((jl_sym_t*)file), strlen(jl_symbol_name((jl_sym_t*)file)), 0);
jl_static_show_string(str, jl_symbol_name((jl_sym_t*)file), strlen(jl_symbol_name((jl_sym_t*)file)), 0, 0);
}
jl_printf(str, ":");
jl_static_show(str, line);
Expand Down
54 changes: 52 additions & 2 deletions test/show.jl
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,7 @@ let oldout = stdout, olderr = stderr
redirect_stderr(olderr)
close(wrout)
close(wrerr)
@test fetch(out) == "primitive type Int64 <: Signed\nTESTA\nTESTB\nΑ1Β2\"A\"\nA\n123\"C\"\n"
@test fetch(out) == "primitive type Int64 <: Signed\nTESTA\nTESTB\nΑ1Β2\"A\"\nA\n123.0000000000000000\"C\"\n"
@test fetch(err) == "TESTA\nTESTB\nΑ1Β2\"A\"\n"
finally
redirect_stdout(oldout)
Expand Down Expand Up @@ -1570,8 +1570,58 @@ struct var"%X%" end # Invalid name without '#'
typeof(+),
var"#f#",
typeof(var"#f#"),

# Integers should round-trip (#52677)
1, UInt(1),
Int8(1), Int16(1), Int32(1), Int64(1),
UInt8(1), UInt16(1), UInt32(1), UInt64(1),

# Float round-trip
Float16(1), Float32(1), Float64(1),
Float16(1.5), Float32(1.5), Float64(1.5),
Float16(0.4893243538921085), Float32(0.4893243538921085), Float64(0.4893243538921085),
# Examples that require the full 5, 9, and 17 digits of precision
Float16(0.00010014), Float32(1.00000075f-36), Float64(-1.561051336605761e-182),
floatmax(Float16), floatmax(Float32), floatmax(Float64),
floatmin(Float16), floatmin(Float32), floatmin(Float64),
Float16(0.0), 0.0f0, 0.0,
Float16(-0.0), -0.0f0, -0.0,
Inf16, Inf32, Inf,
-Inf16, -Inf32, -Inf,
nextfloat(Float16(0)), nextfloat(Float32(0)), nextfloat(Float64(0)),
NaN16, NaN32, NaN,
Float16(1e3), 1f7, 1e16,
Float16(-1e3), -1f7, -1e16,
Float16(1e4), 1f8, 1e17,
Float16(-1e4), -1f8, -1e17,

# :var"" escaping rules differ from strings (#58484)
:foo,
:var"bar baz",
:var"a $b", # No escaping for $ in raw string
:var"a\b", # No escaping for backslashes in middle
:var"a\\", # Backslashes must be escaped at the end
:var"a\\\\",
:var"a\"b",
:var"a\"",
:var"\\\"",
:+, :var"+-",
:(=), :(:), :(::), # Requires quoting
Symbol("a\nb"),

Val(Float16(1.0)), Val(1f0), Val(1.0),
Val(:abc), Val(:(=)), Val(:var"a\b"),

Val(1), Val(Int8(1)), Val(Int16(1)), Val(Int32(1)), Val(Int64(1)), Val(Int128(1)),
Val(UInt(1)), Val(UInt8(1)), Val(UInt16(1)), Val(UInt32(1)), Val(UInt64(1)), Val(UInt128(1)),

# BROKEN
# Symbol("a\xffb"),
# User-defined primitive types
# Non-canonical NaNs
# BFloat16
)
@test v == eval(Meta.parse(static_shown(v)))
@test v === eval(Meta.parse(static_shown(v)))
end
end

Expand Down