-
-
Notifications
You must be signed in to change notification settings - Fork 5.6k
Make more types jl_static_show readably #58512
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
b6996d2
e73b687
02f54e4
8a9e425
c4d2117
208a56c
054ce44
77c913f
77fcca3
0d2a833
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,8 @@ | |
*/ | ||
#include "platform.h" | ||
|
||
#include <float.h> | ||
#include <math.h> | ||
#include <stdlib.h> | ||
#include <stdio.h> | ||
#include <string.h> | ||
|
@@ -691,12 +693,12 @@ static int is_globfunction(jl_value_t *v, jl_datatype_t *dv, jl_sym_t **globname | |
return 0; | ||
} | ||
|
||
static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len, int wrap) JL_NOTSAFEPOINT | ||
static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len, int wrap, int raw) JL_NOTSAFEPOINT | ||
{ | ||
size_t n = 0; | ||
if (wrap) | ||
n += jl_printf(out, "\""); | ||
if (!u8_isvalid(str, len)) { | ||
if (!raw && !u8_isvalid(str, len)) { | ||
// alternate print algorithm that preserves data if it's not UTF-8 | ||
static const char hexdig[] = "0123456789abcdef"; | ||
for (size_t i = 0; i < len; i++) { | ||
|
@@ -713,7 +715,11 @@ static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len, | |
int special = 0; | ||
for (size_t i = 0; i < len; i++) { | ||
uint8_t c = str[i]; | ||
if (c < 32 || c == 0x7f || c == '\\' || c == '"' || c == '$') { | ||
if (raw && ((c == '\\' && i == len-1) || c == '"')) { | ||
special = 1; | ||
break; | ||
} | ||
else if (!raw && (c < 32 || c == 0x7f || c == '\\' || c == '"' || c == '$')) { | ||
special = 1; | ||
break; | ||
} | ||
|
@@ -722,6 +728,17 @@ static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len, | |
jl_uv_puts(out, str, len); | ||
n += len; | ||
} | ||
else if (raw) { | ||
for (size_t i = 0; i < len; i++) { | ||
uint8_t c = str[i]; | ||
if (c == '"') | ||
jl_uv_puts(out, "\\\"", 2); | ||
else if (c == '\\' && i == len - 1) | ||
jl_uv_puts(out, "\\\\", 2); | ||
else | ||
jl_uv_puts(out, str + i, 1); | ||
} | ||
} | ||
else { | ||
char buf[512]; | ||
size_t i = 0; | ||
|
@@ -737,18 +754,28 @@ static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len, | |
return n; | ||
} | ||
|
||
static int jl_is_quoted_sym(const char *sn) | ||
{ | ||
static const char *const quoted_syms[] = {":", "::", ":=", "=", "==", "===", "=>", "`"}; | ||
for (int i = 0; i < sizeof quoted_syms / sizeof *quoted_syms; i++) | ||
if (!strcmp(sn, quoted_syms[i])) | ||
return 1; | ||
return 0; | ||
} | ||
|
||
// TODO: in theory, we need a separate function for showing symbols in an | ||
// expression context (where `Symbol("foo\x01bar")` is ok) and a syntactic | ||
// context (where var"" must be used). | ||
static size_t jl_static_show_symbol(JL_STREAM *out, jl_sym_t *name) JL_NOTSAFEPOINT | ||
{ | ||
size_t n = 0; | ||
const char *sn = jl_symbol_name(name); | ||
int quoted = !jl_is_identifier(sn) && !jl_is_operator(sn); | ||
if (quoted) { | ||
n += jl_printf(out, "var"); | ||
// TODO: this is not quite right, since repr uses String escaping rules, and Symbol uses raw string rules | ||
n += jl_static_show_string(out, sn, strlen(sn), 1); | ||
if (jl_is_identifier(sn) || (jl_is_operator(sn) && !jl_is_quoted_sym(sn))) { | ||
n += jl_printf(out, "%s", sn); | ||
} | ||
else { | ||
n += jl_printf(out, "%s", sn); | ||
n += jl_printf(out, "var"); | ||
n += jl_static_show_string(out, sn, strlen(sn), 1, 1); | ||
} | ||
return n; | ||
} | ||
|
@@ -777,6 +804,42 @@ static int jl_static_is_function_(jl_datatype_t *vt) JL_NOTSAFEPOINT { | |
return 0; | ||
} | ||
|
||
static size_t jl_static_show_float(JL_STREAM *out, double v, | ||
jl_datatype_t *vt) JL_NOTSAFEPOINT | ||
{ | ||
size_t n = 0; | ||
// TODO: non-canonical NaNs do not round-trip | ||
// TOOD: BFloat16 | ||
const char *size_suffix = vt == jl_float16_type ? "16" : | ||
vt == jl_float32_type ? "32" : | ||
""; | ||
// Digits required to print p significand bits: ceil(p * log(10, 2)) | ||
// Float16 4 | ||
// Float32 8 | ||
vtjnash marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// Float64 17 | ||
if (isnan(v)) { | ||
n += jl_printf(out, "NaN%s", size_suffix); | ||
} | ||
else if (isinf(v)) { | ||
n += jl_printf(out, "%sInf%s", v < 0 ? "-" : "", size_suffix); | ||
} | ||
else if (vt == jl_float64_type) { | ||
// If no decimal point or exponent will be printed, we must add ".0" | ||
double ipart, fpart; | ||
fpart = modf(v, &ipart); | ||
int dotzero = v == 0 || (fpart == 0.0 && fabs(ipart) < 1e17); | ||
vtjnash marked this conversation as resolved.
Show resolved
Hide resolved
|
||
n += jl_printf(out, "%.17g%s", v, dotzero ? ".0" : ""); | ||
xal-0 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
else if (vt == jl_float32_type) { | ||
n += jl_printf(out, "Float32(%.8g)", v); | ||
vtjnash marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
else { | ||
assert(vt == jl_float16_type); | ||
n += jl_printf(out, "Float16(%.4g)", v); | ||
} | ||
return n; | ||
} | ||
|
||
// `v` might be pointing to a field inlined in a structure therefore | ||
// `jl_typeof(v)` may not be the same with `vt` and only `vt` should be | ||
// used to determine the type of the value. | ||
|
@@ -954,17 +1017,21 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt | |
int f = *(uint32_t*)jl_data_ptr(v); | ||
n += jl_printf(out, "#<intrinsic #%d %s>", f, jl_intrinsic_name(f)); | ||
} | ||
else if (vt == jl_long_type) { | ||
// Avoid unnecessary Int64(x)/Int32(x) | ||
n += jl_printf(out, "%" PRIdPTR, *(intptr_t*)v); | ||
} | ||
else if (vt == jl_int64_type) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like negative julia> ccall(:jl_, Cvoid, (Any,), typemin(Int128))
Int128(0x80000000000000000000000000000000)
julia> ccall(:jl_, Cvoid, (Any,), Int128(-1))
Int128(0xffffffffffffffffffffffffffffffff) And julia> ccall(:jl_, Cvoid, (Any,), UInt128(0))
UInt128(0x00000000000000000000000000000000)
julia> UInt128(0x00000000000000000000000000000000) === 0x00000000000000000000000000000000
true There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It isn't problematic, since that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure - we can fix this as part of the bit-casting fix-ups, but regardless it doesn't round-trip: julia> Int128(0x80000000000000000000000000000000)
ERROR: InexactError: convert(Int128, 0x80000000000000000000000000000000) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It works just fine: you're just making the mistake of calling
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we interested in an "almost" eval-able human-readable serialization format? The only trade-off that I can understand is that it's a bit more compact than a proper Since we need a programmatic round-trip, it's not enough IMO to appeal to a de-serializer that "should" be used if it doesn't actually exist There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because it is human readable, and thus a useful debugging tool. Someone should implement a de-serializer if they want to use this for generating precompile statements. Just blinding executing code will never give the correct answers anyways, so these little gotcha are just reminders that using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a disingenuous argument @vtjnash -- nobody reads this output for debugging (and we've tried); on the contrary, it is actively used for generating precompile statements. It would be better to remove all the "little gotchas". There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I feel like this is lacking justification - all of the recent serialization problems we've enumerated so far (floating point, integer types, bfloat, restricted inner constructors, symbol escaping, etc.) are basic problems around making sure that we print the correct literal (or the correct literal + a direct, loss-less conversion to a built-in datatype). Of course, the other major issue (still unsolved) is around custom constructors making a I'm very open to more fundamental arguments about why It is true that Serialization does this better by using the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Whenever there is an alternative serialization format to describe a set of methods to compile that is better I'm sure we will all happily switch to that. Until that exists it seems reasonable to do incremental improvements to the existing one, even if it is doomed to never be 100% correct in the end. |
||
n += jl_printf(out, "%" PRId64, *(int64_t*)v); | ||
n += jl_printf(out, "Int64(%" PRId64 ")", *(int64_t*)v); | ||
} | ||
else if (vt == jl_int32_type) { | ||
n += jl_printf(out, "%" PRId32, *(int32_t*)v); | ||
n += jl_printf(out, "Int32(%" PRId32 ")", *(int32_t*)v); | ||
} | ||
else if (vt == jl_int16_type) { | ||
n += jl_printf(out, "%" PRId16, *(int16_t*)v); | ||
n += jl_printf(out, "Int16(%" PRId16 ")", *(int16_t*)v); | ||
} | ||
else if (vt == jl_int8_type) { | ||
n += jl_printf(out, "%" PRId8, *(int8_t*)v); | ||
n += jl_printf(out, "Int8(%" PRId8 ")", *(int8_t*)v); | ||
} | ||
else if (vt == jl_uint64_type) { | ||
n += jl_printf(out, "0x%016" PRIx64, *(uint64_t*)v); | ||
|
@@ -985,11 +1052,14 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt | |
n += jl_printf(out, "0x%08" PRIx32, *(uint32_t*)v); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you fix-up Pointer types as well? julia> ccall(:jl_, Cvoid, (Any,), Val(Ptr{Cvoid}(1)))
Base.Val{0x0000000000000001}() It's a little awkward, because their julia> Val(Ptr{Cvoid}(1))
Val{Ptr{Nothing} @0x0000000000000001}()
julia> Val{Ptr{Nothing} @0x0000000000000001}()
ERROR: ParseError:
# Error @ REPL[10]:1:18
Val{Ptr{Nothing} @0x0000000000000001}()
# └─────────────────┘ ── Expected `}` but regardless of whether it parses properly, I don't think we should be throwing out the type information There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should also consider fixing the |
||
#endif | ||
} | ||
else if (vt == jl_float16_type) { | ||
n += jl_static_show_float(out, julia_half_to_float(*(uint16_t *)v), vt); | ||
} | ||
else if (vt == jl_float32_type) { | ||
n += jl_printf(out, "%gf", *(float*)v); | ||
n += jl_static_show_float(out, *(float *)v, vt); | ||
} | ||
else if (vt == jl_float64_type) { | ||
n += jl_printf(out, "%g", *(double*)v); | ||
n += jl_static_show_float(out, *(double *)v, vt); | ||
} | ||
else if (vt == jl_bool_type) { | ||
n += jl_printf(out, "%s", *(uint8_t*)v ? "true" : "false"); | ||
|
@@ -998,7 +1068,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt | |
n += jl_printf(out, "nothing"); | ||
} | ||
else if (vt == jl_string_type) { | ||
n += jl_static_show_string(out, jl_string_data(v), jl_string_len(v), 1); | ||
n += jl_static_show_string(out, jl_string_data(v), jl_string_len(v), 1, 0); | ||
} | ||
else if (v == jl_bottom_type) { | ||
n += jl_printf(out, "Union{}"); | ||
|
@@ -1528,10 +1598,10 @@ void jl_log(int level, jl_value_t *module, jl_value_t *group, jl_value_t *id, | |
} | ||
jl_printf(str, "\n@ "); | ||
if (jl_is_string(file)) { | ||
jl_static_show_string(str, jl_string_data(file), jl_string_len(file), 0); | ||
jl_static_show_string(str, jl_string_data(file), jl_string_len(file), 0, 0); | ||
} | ||
else if (jl_is_symbol(file)) { | ||
jl_static_show_string(str, jl_symbol_name((jl_sym_t*)file), strlen(jl_symbol_name((jl_sym_t*)file)), 0); | ||
jl_static_show_string(str, jl_symbol_name((jl_sym_t*)file), strlen(jl_symbol_name((jl_sym_t*)file)), 0, 0); | ||
} | ||
jl_printf(str, ":"); | ||
jl_static_show(str, line); | ||
|
Uh oh!
There was an error while loading. Please reload this page.