Skip to content

Commit b03ef6b

Browse files
authored
Make more types jl_static_show unambiguously (#58512)
Makes more types survive `jl_static_show` unambiguously: - Symbols - Symbols printed in the `:var"foo"` form use raw string escaping, fixing `:var"a\b"`, `:var"a\\"`, `:var"$a"`, etc. - Symbols that require parens use parens (`:(=)`, ...) - Signed integers: Except for `Int`, signed integers print like `Int8(1)`. - Floats: floats are printed in a naive but reversible (TODO: double check) way. `Inf(16|32|)` and `NaN(16|32|)` are printed, and `Float16`/`Float32` print the type (`Float32(1.5)`). `Float64`s are printed with a trailing `.0` if it is necessary to disambiguate from `Int`. Fixes #52677, #58484 (comment), #58484 (comment), and the specific case mentioned in #58484. Improves the situation for #38902 but does not close it, because a few cases still do not round-trip (inexhaustive list): - Non-canonical NaNs - BFloat16 - User-defined primitive types. This one is tricky, because they can have a size different from any type we have literals for.
1 parent 3a5e1f1 commit b03ef6b

File tree

2 files changed

+157
-20
lines changed

2 files changed

+157
-20
lines changed

src/rtutils.c

Lines changed: 105 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
*/
66
#include "platform.h"
77

8+
#include <float.h>
9+
#include <math.h>
810
#include <stdlib.h>
911
#include <stdio.h>
1012
#include <string.h>
@@ -691,12 +693,12 @@ static int is_globfunction(jl_value_t *v, jl_datatype_t *dv, jl_sym_t **globname
691693
return 0;
692694
}
693695

694-
static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len, int wrap) JL_NOTSAFEPOINT
696+
static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len, int wrap, int raw) JL_NOTSAFEPOINT
695697
{
696698
size_t n = 0;
697699
if (wrap)
698700
n += jl_printf(out, "\"");
699-
if (!u8_isvalid(str, len)) {
701+
if (!raw && !u8_isvalid(str, len)) {
700702
// alternate print algorithm that preserves data if it's not UTF-8
701703
static const char hexdig[] = "0123456789abcdef";
702704
for (size_t i = 0; i < len; i++) {
@@ -713,7 +715,11 @@ static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len,
713715
int special = 0;
714716
for (size_t i = 0; i < len; i++) {
715717
uint8_t c = str[i];
716-
if (c < 32 || c == 0x7f || c == '\\' || c == '"' || c == '$') {
718+
if (raw && ((c == '\\' && i == len-1) || c == '"')) {
719+
special = 1;
720+
break;
721+
}
722+
else if (!raw && (c < 32 || c == 0x7f || c == '\\' || c == '"' || c == '$')) {
717723
special = 1;
718724
break;
719725
}
@@ -722,6 +728,25 @@ static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len,
722728
jl_uv_puts(out, str, len);
723729
n += len;
724730
}
731+
else if (raw) {
732+
// REF: Base.escape_raw_string
733+
int escapes = 0;
734+
for (size_t i = 0; i < len; i++) {
735+
uint8_t c = str[i];
736+
if (c == '\\') {
737+
escapes++;
738+
}
739+
else {
740+
if (c == '"')
741+
for (escapes++; escapes > 0; escapes--)
742+
n += jl_printf(out, "\\");
743+
escapes = 0;
744+
}
745+
n += jl_printf(out, "%c", str[i]);
746+
}
747+
for (; escapes > 0; escapes--)
748+
n += jl_printf(out, "\\");
749+
}
725750
else {
726751
char buf[512];
727752
size_t i = 0;
@@ -737,18 +762,28 @@ static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len,
737762
return n;
738763
}
739764

765+
static int jl_is_quoted_sym(const char *sn)
766+
{
767+
static const char *const quoted_syms[] = {":", "::", ":=", "=", "==", "===", "=>", "`"};
768+
for (int i = 0; i < sizeof quoted_syms / sizeof *quoted_syms; i++)
769+
if (!strcmp(sn, quoted_syms[i]))
770+
return 1;
771+
return 0;
772+
}
773+
774+
// TODO: in theory, we need a separate function for showing symbols in an
775+
// expression context (where `Symbol("foo\x01bar")` is ok) and a syntactic
776+
// context (where var"" must be used).
740777
static size_t jl_static_show_symbol(JL_STREAM *out, jl_sym_t *name) JL_NOTSAFEPOINT
741778
{
742779
size_t n = 0;
743780
const char *sn = jl_symbol_name(name);
744-
int quoted = !jl_is_identifier(sn) && !jl_is_operator(sn);
745-
if (quoted) {
746-
n += jl_printf(out, "var");
747-
// TODO: this is not quite right, since repr uses String escaping rules, and Symbol uses raw string rules
748-
n += jl_static_show_string(out, sn, strlen(sn), 1);
781+
if (jl_is_identifier(sn) || (jl_is_operator(sn) && !jl_is_quoted_sym(sn))) {
782+
n += jl_printf(out, "%s", sn);
749783
}
750784
else {
751-
n += jl_printf(out, "%s", sn);
785+
n += jl_printf(out, "var");
786+
n += jl_static_show_string(out, sn, strlen(sn), 1, 1);
752787
}
753788
return n;
754789
}
@@ -777,6 +812,51 @@ static int jl_static_is_function_(jl_datatype_t *vt) JL_NOTSAFEPOINT {
777812
return 0;
778813
}
779814

815+
static size_t jl_static_show_float(JL_STREAM *out, double v,
816+
jl_datatype_t *vt) JL_NOTSAFEPOINT
817+
{
818+
size_t n = 0;
819+
// TODO: non-canonical NaNs do not round-trip
820+
// TOOD: BFloat16
821+
const char *size_suffix = vt == jl_float16_type ? "16" :
822+
vt == jl_float32_type ? "32" :
823+
"";
824+
// Requires minimum 1 (sign) + 17 (sig) + 1 (dot) + 5 ("e-123") + 1 (null)
825+
char buf[32];
826+
// Base B significand digits required to print n base-b significand bits
827+
// (including leading 1): N = 2 + floor(n/log(b, B))
828+
// Float16 5
829+
// Float32 9
830+
// Float64 17
831+
// REF: https://dl.acm.org/doi/pdf/10.1145/93542.93559
832+
if (isnan(v)) {
833+
n += jl_printf(out, "NaN%s", size_suffix);
834+
}
835+
else if (isinf(v)) {
836+
n += jl_printf(out, "%sInf%s", v < 0 ? "-" : "", size_suffix);
837+
}
838+
else if (vt == jl_float64_type) {
839+
n += jl_printf(out, "%#.17g", v);
840+
}
841+
else if (vt == jl_float32_type) {
842+
size_t m = snprintf(buf, sizeof buf, "%.9g", v);
843+
// If the exponent was printed, replace it with 'f'
844+
char *p = (char *)memchr(buf, 'e', m);
845+
if (p)
846+
*p = 'f';
847+
jl_uv_puts(out, buf, m);
848+
n += m;
849+
// If no exponent was printed, we must add one
850+
if (!p)
851+
n += jl_printf(out, "f0");
852+
}
853+
else {
854+
assert(vt == jl_float16_type);
855+
n += jl_printf(out, "Float16(%#.5g)", v);
856+
}
857+
return n;
858+
}
859+
780860
// `v` might be pointing to a field inlined in a structure therefore
781861
// `jl_typeof(v)` may not be the same with `vt` and only `vt` should be
782862
// used to determine the type of the value.
@@ -957,17 +1037,21 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
9571037
int f = *(uint32_t*)jl_data_ptr(v);
9581038
n += jl_printf(out, "#<intrinsic #%d %s>", f, jl_intrinsic_name(f));
9591039
}
1040+
else if (vt == jl_long_type) {
1041+
// Avoid unnecessary Int64(x)/Int32(x)
1042+
n += jl_printf(out, "%" PRIdPTR, *(intptr_t*)v);
1043+
}
9601044
else if (vt == jl_int64_type) {
961-
n += jl_printf(out, "%" PRId64, *(int64_t*)v);
1045+
n += jl_printf(out, "Int64(%" PRId64 ")", *(int64_t*)v);
9621046
}
9631047
else if (vt == jl_int32_type) {
964-
n += jl_printf(out, "%" PRId32, *(int32_t*)v);
1048+
n += jl_printf(out, "Int32(%" PRId32 ")", *(int32_t*)v);
9651049
}
9661050
else if (vt == jl_int16_type) {
967-
n += jl_printf(out, "%" PRId16, *(int16_t*)v);
1051+
n += jl_printf(out, "Int16(%" PRId16 ")", *(int16_t*)v);
9681052
}
9691053
else if (vt == jl_int8_type) {
970-
n += jl_printf(out, "%" PRId8, *(int8_t*)v);
1054+
n += jl_printf(out, "Int8(%" PRId8 ")", *(int8_t*)v);
9711055
}
9721056
else if (vt == jl_uint64_type) {
9731057
n += jl_printf(out, "0x%016" PRIx64, *(uint64_t*)v);
@@ -988,11 +1072,14 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
9881072
n += jl_printf(out, "0x%08" PRIx32, *(uint32_t*)v);
9891073
#endif
9901074
}
1075+
else if (vt == jl_float16_type) {
1076+
n += jl_static_show_float(out, julia_half_to_float(*(uint16_t *)v), vt);
1077+
}
9911078
else if (vt == jl_float32_type) {
992-
n += jl_printf(out, "%gf", *(float*)v);
1079+
n += jl_static_show_float(out, *(float *)v, vt);
9931080
}
9941081
else if (vt == jl_float64_type) {
995-
n += jl_printf(out, "%g", *(double*)v);
1082+
n += jl_static_show_float(out, *(double *)v, vt);
9961083
}
9971084
else if (vt == jl_bool_type) {
9981085
n += jl_printf(out, "%s", *(uint8_t*)v ? "true" : "false");
@@ -1004,7 +1091,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
10041091
n += jl_printf(out, "Core.GlobalMethods");
10051092
}
10061093
else if (vt == jl_string_type) {
1007-
n += jl_static_show_string(out, jl_string_data(v), jl_string_len(v), 1);
1094+
n += jl_static_show_string(out, jl_string_data(v), jl_string_len(v), 1, 0);
10081095
}
10091096
else if (v == jl_bottom_type) {
10101097
n += jl_printf(out, "Union{}");
@@ -1532,10 +1619,10 @@ void jl_log(int level, jl_value_t *module, jl_value_t *group, jl_value_t *id,
15321619
}
15331620
jl_printf(str, "\n@ ");
15341621
if (jl_is_string(file)) {
1535-
jl_static_show_string(str, jl_string_data(file), jl_string_len(file), 0);
1622+
jl_static_show_string(str, jl_string_data(file), jl_string_len(file), 0, 0);
15361623
}
15371624
else if (jl_is_symbol(file)) {
1538-
jl_static_show_string(str, jl_symbol_name((jl_sym_t*)file), strlen(jl_symbol_name((jl_sym_t*)file)), 0);
1625+
jl_static_show_string(str, jl_symbol_name((jl_sym_t*)file), strlen(jl_symbol_name((jl_sym_t*)file)), 0, 0);
15391626
}
15401627
jl_printf(str, ":");
15411628
jl_static_show(str, line);

test/show.jl

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -703,7 +703,7 @@ let oldout = stdout, olderr = stderr
703703
redirect_stderr(olderr)
704704
close(wrout)
705705
close(wrerr)
706-
@test fetch(out) == "primitive type Int64 <: Signed\nTESTA\nTESTB\nΑ1Β2\"A\"\nA\n123\"C\"\n"
706+
@test fetch(out) == "primitive type Int64 <: Signed\nTESTA\nTESTB\nΑ1Β2\"A\"\nA\n123.0000000000000000\"C\"\n"
707707
@test fetch(err) == "TESTA\nTESTB\nΑ1Β2\"A\"\n"
708708
finally
709709
redirect_stdout(oldout)
@@ -1570,8 +1570,58 @@ struct var"%X%" end # Invalid name without '#'
15701570
typeof(+),
15711571
var"#f#",
15721572
typeof(var"#f#"),
1573+
1574+
# Integers should round-trip (#52677)
1575+
1, UInt(1),
1576+
Int8(1), Int16(1), Int32(1), Int64(1),
1577+
UInt8(1), UInt16(1), UInt32(1), UInt64(1),
1578+
1579+
# Float round-trip
1580+
Float16(1), Float32(1), Float64(1),
1581+
Float16(1.5), Float32(1.5), Float64(1.5),
1582+
Float16(0.4893243538921085), Float32(0.4893243538921085), Float64(0.4893243538921085),
1583+
# Examples that require the full 5, 9, and 17 digits of precision
1584+
Float16(0.00010014), Float32(1.00000075f-36), Float64(-1.561051336605761e-182),
1585+
floatmax(Float16), floatmax(Float32), floatmax(Float64),
1586+
floatmin(Float16), floatmin(Float32), floatmin(Float64),
1587+
Float16(0.0), 0.0f0, 0.0,
1588+
Float16(-0.0), -0.0f0, -0.0,
1589+
Inf16, Inf32, Inf,
1590+
-Inf16, -Inf32, -Inf,
1591+
nextfloat(Float16(0)), nextfloat(Float32(0)), nextfloat(Float64(0)),
1592+
NaN16, NaN32, NaN,
1593+
Float16(1e3), 1f7, 1e16,
1594+
Float16(-1e3), -1f7, -1e16,
1595+
Float16(1e4), 1f8, 1e17,
1596+
Float16(-1e4), -1f8, -1e17,
1597+
1598+
# :var"" escaping rules differ from strings (#58484)
1599+
:foo,
1600+
:var"bar baz",
1601+
:var"a $b", # No escaping for $ in raw string
1602+
:var"a\b", # No escaping for backslashes in middle
1603+
:var"a\\", # Backslashes must be escaped at the end
1604+
:var"a\\\\",
1605+
:var"a\"b",
1606+
:var"a\"",
1607+
:var"\\\"",
1608+
:+, :var"+-",
1609+
:(=), :(:), :(::), # Requires quoting
1610+
Symbol("a\nb"),
1611+
1612+
Val(Float16(1.0)), Val(1f0), Val(1.0),
1613+
Val(:abc), Val(:(=)), Val(:var"a\b"),
1614+
1615+
Val(1), Val(Int8(1)), Val(Int16(1)), Val(Int32(1)), Val(Int64(1)), Val(Int128(1)),
1616+
Val(UInt(1)), Val(UInt8(1)), Val(UInt16(1)), Val(UInt32(1)), Val(UInt64(1)), Val(UInt128(1)),
1617+
1618+
# BROKEN
1619+
# Symbol("a\xffb"),
1620+
# User-defined primitive types
1621+
# Non-canonical NaNs
1622+
# BFloat16
15731623
)
1574-
@test v == eval(Meta.parse(static_shown(v)))
1624+
@test v === eval(Meta.parse(static_shown(v)))
15751625
end
15761626
end
15771627

0 commit comments

Comments
 (0)