-
-
Notifications
You must be signed in to change notification settings - Fork 5.6k
Make more types jl_static_show readably #58512
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b6996d2
e73b687
02f54e4
8a9e425
c4d2117
208a56c
054ce44
77c913f
77fcca3
0d2a833
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,8 @@ | |
*/ | ||
#include "platform.h" | ||
|
||
#include <float.h> | ||
#include <math.h> | ||
#include <stdlib.h> | ||
#include <stdio.h> | ||
#include <string.h> | ||
|
@@ -691,12 +693,12 @@ static int is_globfunction(jl_value_t *v, jl_datatype_t *dv, jl_sym_t **globname | |
return 0; | ||
} | ||
|
||
static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len, int wrap) JL_NOTSAFEPOINT | ||
static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len, int wrap, int raw) JL_NOTSAFEPOINT | ||
{ | ||
size_t n = 0; | ||
if (wrap) | ||
n += jl_printf(out, "\""); | ||
if (!u8_isvalid(str, len)) { | ||
if (!raw && !u8_isvalid(str, len)) { | ||
// alternate print algorithm that preserves data if it's not UTF-8 | ||
static const char hexdig[] = "0123456789abcdef"; | ||
for (size_t i = 0; i < len; i++) { | ||
|
@@ -713,7 +715,11 @@ static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len, | |
int special = 0; | ||
for (size_t i = 0; i < len; i++) { | ||
uint8_t c = str[i]; | ||
if (c < 32 || c == 0x7f || c == '\\' || c == '"' || c == '$') { | ||
if (raw && ((c == '\\' && i == len-1) || c == '"')) { | ||
special = 1; | ||
break; | ||
} | ||
else if (!raw && (c < 32 || c == 0x7f || c == '\\' || c == '"' || c == '$')) { | ||
special = 1; | ||
break; | ||
} | ||
|
@@ -722,6 +728,25 @@ static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len, | |
jl_uv_puts(out, str, len); | ||
n += len; | ||
} | ||
else if (raw) { | ||
// REF: Base.escape_raw_string | ||
int escapes = 0; | ||
for (size_t i = 0; i < len; i++) { | ||
uint8_t c = str[i]; | ||
if (c == '\\') { | ||
escapes++; | ||
} | ||
else { | ||
if (c == '"') | ||
for (escapes++; escapes > 0; escapes--) | ||
n += jl_printf(out, "\\"); | ||
escapes = 0; | ||
} | ||
n += jl_printf(out, "%c", str[i]); | ||
} | ||
for (; escapes > 0; escapes--) | ||
n += jl_printf(out, "\\"); | ||
} | ||
else { | ||
char buf[512]; | ||
size_t i = 0; | ||
|
@@ -737,18 +762,28 @@ static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len, | |
return n; | ||
} | ||
|
||
static int jl_is_quoted_sym(const char *sn) | ||
{ | ||
static const char *const quoted_syms[] = {":", "::", ":=", "=", "==", "===", "=>", "`"}; | ||
for (int i = 0; i < sizeof quoted_syms / sizeof *quoted_syms; i++) | ||
if (!strcmp(sn, quoted_syms[i])) | ||
return 1; | ||
return 0; | ||
} | ||
|
||
// TODO: in theory, we need a separate function for showing symbols in an | ||
// expression context (where `Symbol("foo\x01bar")` is ok) and a syntactic | ||
// context (where var"" must be used). | ||
static size_t jl_static_show_symbol(JL_STREAM *out, jl_sym_t *name) JL_NOTSAFEPOINT | ||
{ | ||
size_t n = 0; | ||
const char *sn = jl_symbol_name(name); | ||
int quoted = !jl_is_identifier(sn) && !jl_is_operator(sn); | ||
if (quoted) { | ||
n += jl_printf(out, "var"); | ||
// TODO: this is not quite right, since repr uses String escaping rules, and Symbol uses raw string rules | ||
n += jl_static_show_string(out, sn, strlen(sn), 1); | ||
if (jl_is_identifier(sn) || (jl_is_operator(sn) && !jl_is_quoted_sym(sn))) { | ||
n += jl_printf(out, "%s", sn); | ||
} | ||
else { | ||
n += jl_printf(out, "%s", sn); | ||
n += jl_printf(out, "var"); | ||
n += jl_static_show_string(out, sn, strlen(sn), 1, 1); | ||
} | ||
return n; | ||
} | ||
|
@@ -777,6 +812,51 @@ static int jl_static_is_function_(jl_datatype_t *vt) JL_NOTSAFEPOINT { | |
return 0; | ||
} | ||
|
||
static size_t jl_static_show_float(JL_STREAM *out, double v, | ||
jl_datatype_t *vt) JL_NOTSAFEPOINT | ||
{ | ||
size_t n = 0; | ||
// TODO: non-canonical NaNs do not round-trip | ||
// TOOD: BFloat16 | ||
const char *size_suffix = vt == jl_float16_type ? "16" : | ||
vt == jl_float32_type ? "32" : | ||
""; | ||
// Requires minimum 1 (sign) + 17 (sig) + 1 (dot) + 5 ("e-123") + 1 (null) | ||
char buf[32]; | ||
// Base B significand digits required to print n base-b significand bits | ||
// (including leading 1): N = 2 + floor(n/log(b, B)) | ||
// Float16 5 | ||
// Float32 9 | ||
// Float64 17 | ||
// REF: https://dl.acm.org/doi/pdf/10.1145/93542.93559 | ||
if (isnan(v)) { | ||
n += jl_printf(out, "NaN%s", size_suffix); | ||
} | ||
else if (isinf(v)) { | ||
n += jl_printf(out, "%sInf%s", v < 0 ? "-" : "", size_suffix); | ||
} | ||
else if (vt == jl_float64_type) { | ||
n += jl_printf(out, "%#.17g", v); | ||
} | ||
else if (vt == jl_float32_type) { | ||
size_t m = snprintf(buf, sizeof buf, "%.9g", v); | ||
// If the exponent was printed, replace it with 'f' | ||
char *p = (char *)memchr(buf, 'e', m); | ||
if (p) | ||
*p = 'f'; | ||
jl_uv_puts(out, buf, m); | ||
n += m; | ||
// If no exponent was printed, we must add one | ||
if (!p) | ||
n += jl_printf(out, "f0"); | ||
} | ||
else { | ||
assert(vt == jl_float16_type); | ||
n += jl_printf(out, "Float16(%#.5g)", v); | ||
} | ||
return n; | ||
} | ||
|
||
// `v` might be pointing to a field inlined in a structure therefore | ||
// `jl_typeof(v)` may not be the same with `vt` and only `vt` should be | ||
// used to determine the type of the value. | ||
|
@@ -954,17 +1034,21 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt | |
int f = *(uint32_t*)jl_data_ptr(v); | ||
n += jl_printf(out, "#<intrinsic #%d %s>", f, jl_intrinsic_name(f)); | ||
} | ||
else if (vt == jl_long_type) { | ||
// Avoid unnecessary Int64(x)/Int32(x) | ||
n += jl_printf(out, "%" PRIdPTR, *(intptr_t*)v); | ||
} | ||
else if (vt == jl_int64_type) { | ||
n += jl_printf(out, "%" PRId64, *(int64_t*)v); | ||
n += jl_printf(out, "Int64(%" PRId64 ")", *(int64_t*)v); | ||
} | ||
else if (vt == jl_int32_type) { | ||
n += jl_printf(out, "%" PRId32, *(int32_t*)v); | ||
n += jl_printf(out, "Int32(%" PRId32 ")", *(int32_t*)v); | ||
} | ||
else if (vt == jl_int16_type) { | ||
n += jl_printf(out, "%" PRId16, *(int16_t*)v); | ||
n += jl_printf(out, "Int16(%" PRId16 ")", *(int16_t*)v); | ||
} | ||
else if (vt == jl_int8_type) { | ||
n += jl_printf(out, "%" PRId8, *(int8_t*)v); | ||
n += jl_printf(out, "Int8(%" PRId8 ")", *(int8_t*)v); | ||
} | ||
else if (vt == jl_uint64_type) { | ||
n += jl_printf(out, "0x%016" PRIx64, *(uint64_t*)v); | ||
|
@@ -985,11 +1069,14 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt | |
n += jl_printf(out, "0x%08" PRIx32, *(uint32_t*)v); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you fix-up Pointer types as well? julia> ccall(:jl_, Cvoid, (Any,), Val(Ptr{Cvoid}(1)))
Base.Val{0x0000000000000001}() It's a little awkward, because their julia> Val(Ptr{Cvoid}(1))
Val{Ptr{Nothing} @0x0000000000000001}()
julia> Val{Ptr{Nothing} @0x0000000000000001}()
ERROR: ParseError:
# Error @ REPL[10]:1:18
Val{Ptr{Nothing} @0x0000000000000001}()
# └─────────────────┘ ── Expected `}` but regardless of whether it parses properly, I don't think we should be throwing out the type information There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should also consider fixing the |
||
#endif | ||
} | ||
else if (vt == jl_float16_type) { | ||
n += jl_static_show_float(out, julia_half_to_float(*(uint16_t *)v), vt); | ||
} | ||
else if (vt == jl_float32_type) { | ||
n += jl_printf(out, "%gf", *(float*)v); | ||
n += jl_static_show_float(out, *(float *)v, vt); | ||
} | ||
else if (vt == jl_float64_type) { | ||
n += jl_printf(out, "%g", *(double*)v); | ||
n += jl_static_show_float(out, *(double *)v, vt); | ||
} | ||
else if (vt == jl_bool_type) { | ||
n += jl_printf(out, "%s", *(uint8_t*)v ? "true" : "false"); | ||
|
@@ -998,7 +1085,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt | |
n += jl_printf(out, "nothing"); | ||
} | ||
else if (vt == jl_string_type) { | ||
n += jl_static_show_string(out, jl_string_data(v), jl_string_len(v), 1); | ||
n += jl_static_show_string(out, jl_string_data(v), jl_string_len(v), 1, 0); | ||
} | ||
else if (v == jl_bottom_type) { | ||
n += jl_printf(out, "Union{}"); | ||
|
@@ -1528,10 +1615,10 @@ void jl_log(int level, jl_value_t *module, jl_value_t *group, jl_value_t *id, | |
} | ||
jl_printf(str, "\n@ "); | ||
if (jl_is_string(file)) { | ||
jl_static_show_string(str, jl_string_data(file), jl_string_len(file), 0); | ||
jl_static_show_string(str, jl_string_data(file), jl_string_len(file), 0, 0); | ||
} | ||
else if (jl_is_symbol(file)) { | ||
jl_static_show_string(str, jl_symbol_name((jl_sym_t*)file), strlen(jl_symbol_name((jl_sym_t*)file)), 0); | ||
jl_static_show_string(str, jl_symbol_name((jl_sym_t*)file), strlen(jl_symbol_name((jl_sym_t*)file)), 0, 0); | ||
} | ||
jl_printf(str, ":"); | ||
jl_static_show(str, line); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like negative
Int128
values are still problematic:And
UInt128
prints with an explicit conversion it doesn't need:There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It isn't problematic, since that
Int128()
printing means a bitcast, not a function callThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure - we can fix this as part of the bit-casting fix-ups, but regardless it doesn't round-trip:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It works just fine: you're just making the mistake of calling
eval
, when the semantics of this printer have never been to be usable with eval, but rather is a representation for reinterpret.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why are we interested in an "almost" eval-able human-readable serialization format?
The only trade-off that I can understand is that it's a bit more compact than a proper
repr
, but more importantly it needs a custom de-serializer which afaict isn't implementedSince we need a programmatic round-trip, it's not enough IMO to appeal to a de-serializer that "should" be used if it doesn't actually exist
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because it is human readable, and thus a useful debugging tool. Someone should implement a de-serializer if they want to use this for generating precompile statements. Just blinding executing code will never give the correct answers anyways, so these little gotcha are just reminders that using
eval
is a dead-end approach and not fixable.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a disingenuous argument @vtjnash -- nobody reads this output for debugging (and we've tried); on the contrary, it is actively used for generating precompile statements. It would be better to remove all the "little gotchas".
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I feel like this is lacking justification - all of the recent serialization problems we've enumerated so far (floating point, integer types, bfloat, restricted inner constructors, symbol escaping, etc.) are basic problems around making sure that we print the correct literal (or the correct literal + a direct, loss-less conversion to a built-in datatype). Of course, the other major issue (still unsolved) is around custom constructors making a
reinterpret
required to round-trip, but the point stands that most of those are direct fixes to the de-parse (print "1.0" not "1") and not problems inherent toeval
.I'm very open to more fundamental arguments about why
eval
is broken for this, but the only caveat that I'm aware of is thateval
may not map type names (Vector
,String
,Foo
) correctly depending on the evaluation context, or that these may have a different meaning (esp. anonymized typenames like#foo##2
) in different executions.It is true that Serialization does this better by using the
PkgId / UUID
, but is that the only fundamental thing that we gain by moving away fromeval
? Is there a more fundamental reason whyeval
cannot be made to work here? Do we trust Serialization to do all of this correctly, or does that have outstanding correct-ness issues too?Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Whenever there is an alternative serialization format to describe a set of methods to compile that is better I'm sure we will all happily switch to that. Until that exists it seems reasonable to do incremental improvements to the existing one, even if it is doomed to never be 100% correct in the end.