Skip to content

Commit 4055e60

Browse files
author
zooster
authored
AstGen: disallow leading zeroes in int literals and int types
This makes `0123` and `u0123` etc. illegal. I'm now confident that this is a good change because I actually caught two C header translation mistakes in `haiku.zig` with this. Clearly, `0123` being octal in C (TIL) can cause confusion, and we make this easier to read by requiring `0o` as the prefix and now also disallowing leading zeroes in integers. For consistency and because it looks weird, we disallow it for integer types too (e.g. `u0123`). Fixes #11963 Fixes #12417
1 parent 2523b44 commit 4055e60

File tree

6 files changed

+74
-27
lines changed

6 files changed

+74
-27
lines changed

lib/std/c/haiku.zig

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -702,7 +702,7 @@ pub const T = struct {
702702
pub const CSETAF = 0x8002;
703703
pub const CSETAW = 0x8003;
704704
pub const CWAITEVENT = 0x8004;
705-
pub const CSBRK = 08005;
705+
pub const CSBRK = 0x8005;
706706
pub const CFLSH = 0x8006;
707707
pub const CXONC = 0x8007;
708708
pub const CQUERYCONNECTED = 0x8008;
@@ -874,7 +874,7 @@ pub const S = struct {
874874
pub const IFDIR = 0o040000;
875875
pub const IFCHR = 0o020000;
876876
pub const IFIFO = 0o010000;
877-
pub const INDEX_DIR = 04000000000;
877+
pub const INDEX_DIR = 0o4000000000;
878878

879879
pub const IUMSK = 0o7777;
880880
pub const ISUID = 0o4000;

lib/std/zig/parser_test.zig

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4254,10 +4254,10 @@ test "zig fmt: integer literals with underscore separators" {
42544254
\\const
42554255
\\ x =
42564256
\\ 1_234_567
4257-
\\ + (0b0_1-0o7_0+0xff_FF ) + 0_0;
4257+
\\ + (0b0_1-0o7_0+0xff_FF ) + 1_0;
42584258
,
42594259
\\const x =
4260-
\\ 1_234_567 + (0b0_1 - 0o7_0 + 0xff_FF) + 0_0;
4260+
\\ 1_234_567 + (0b0_1 - 0o7_0 + 0xff_FF) + 1_0;
42614261
\\
42624262
);
42634263
}
@@ -4266,7 +4266,7 @@ test "zig fmt: hex literals with underscore separators" {
42664266
try testTransform(
42674267
\\pub fn orMask(a: [ 1_000 ]u64, b: [ 1_000] u64) [1_000]u64 {
42684268
\\ var c: [1_000]u64 = [1]u64{ 0xFFFF_FFFF_FFFF_FFFF}**1_000;
4269-
\\ for (c [ 0_0 .. ]) |_, i| {
4269+
\\ for (c [ 1_0 .. ]) |_, i| {
42704270
\\ c[i] = (a[i] | b[i]) & 0xCCAA_CCAA_CCAA_CCAA;
42714271
\\ }
42724272
\\ return c;
@@ -4276,7 +4276,7 @@ test "zig fmt: hex literals with underscore separators" {
42764276
,
42774277
\\pub fn orMask(a: [1_000]u64, b: [1_000]u64) [1_000]u64 {
42784278
\\ var c: [1_000]u64 = [1]u64{0xFFFF_FFFF_FFFF_FFFF} ** 1_000;
4279-
\\ for (c[0_0..]) |_, i| {
4279+
\\ for (c[1_0..]) |_, i| {
42804280
\\ c[i] = (a[i] | b[i]) & 0xCCAA_CCAA_CCAA_CCAA;
42814281
\\ }
42824282
\\ return c;
@@ -4288,14 +4288,14 @@ test "zig fmt: hex literals with underscore separators" {
42884288
test "zig fmt: decimal float literals with underscore separators" {
42894289
try testTransform(
42904290
\\pub fn main() void {
4291-
\\ const a:f64=(10.0e-0+(10.0e+0))+10_00.00_00e-2+00_00.00_10e+4;
4292-
\\ const b:f64=010.0--0_10.0+0_1_0.0_0+1e2;
4291+
\\ const a:f64=(10.0e-0+(10.0e+0))+10_00.00_00e-2+20_00.00_10e+4;
4292+
\\ const b:f64=1_0.0--10_10.0+1_0_0.0_0+1e2;
42934293
\\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b });
42944294
\\}
42954295
,
42964296
\\pub fn main() void {
4297-
\\ const a: f64 = (10.0e-0 + (10.0e+0)) + 10_00.00_00e-2 + 00_00.00_10e+4;
4298-
\\ const b: f64 = 010.0 - -0_10.0 + 0_1_0.0_0 + 1e2;
4297+
\\ const a: f64 = (10.0e-0 + (10.0e+0)) + 10_00.00_00e-2 + 20_00.00_10e+4;
4298+
\\ const b: f64 = 1_0.0 - -10_10.0 + 1_0_0.0_0 + 1e2;
42994299
\\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b });
43004300
\\}
43014301
\\

src/AstGen.zig

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4088,6 +4088,13 @@ fn testDecl(
40884088
true => .signed,
40894089
false => .unsigned,
40904090
};
4091+
if (ident_name_raw.len >= 3 and ident_name_raw[1] == '0') {
4092+
return astgen.failTok(
4093+
test_name_token,
4094+
"primitive integer type '{s}' has leading zero",
4095+
.{ident_name_raw},
4096+
);
4097+
}
40914098
_ = parseBitCount(ident_name_raw[1..]) catch |err| switch (err) {
40924099
error.Overflow => return astgen.failTok(
40934100
test_name_token,
@@ -6791,6 +6798,13 @@ fn identifier(
67916798
true => .signed,
67926799
false => .unsigned,
67936800
};
6801+
if (ident_name_raw.len >= 3 and ident_name_raw[1] == '0') {
6802+
return astgen.failNode(
6803+
ident,
6804+
"primitive integer type '{s}' has leading zero",
6805+
.{ident_name_raw},
6806+
);
6807+
}
67946808
const bit_count = parseBitCount(ident_name_raw[1..]) catch |err| switch (err) {
67956809
error.Overflow => return astgen.failNode(
67966810
ident,
@@ -7021,17 +7035,6 @@ fn integerLiteral(gz: *GenZir, rl: ResultLoc, node: Ast.Node.Index) InnerError!Z
70217035
const main_tokens = tree.nodes.items(.main_token);
70227036
const int_token = main_tokens[node];
70237037
const prefixed_bytes = tree.tokenSlice(int_token);
7024-
if (std.fmt.parseInt(u64, prefixed_bytes, 0)) |small_int| {
7025-
const result: Zir.Inst.Ref = switch (small_int) {
7026-
0 => .zero,
7027-
1 => .one,
7028-
else => try gz.addInt(small_int),
7029-
};
7030-
return rvalue(gz, rl, result, node);
7031-
} else |err| switch (err) {
7032-
error.InvalidCharacter => unreachable, // Caught by the parser.
7033-
error.Overflow => {},
7034-
}
70357038

70367039
var base: u8 = 10;
70377040
var non_prefixed: []const u8 = prefixed_bytes;
@@ -7046,6 +7049,24 @@ fn integerLiteral(gz: *GenZir, rl: ResultLoc, node: Ast.Node.Index) InnerError!Z
70467049
non_prefixed = prefixed_bytes[2..];
70477050
}
70487051

7052+
if (base == 10 and prefixed_bytes.len >= 2 and prefixed_bytes[0] == '0') {
7053+
return astgen.failNodeNotes(node, "integer literal '{s}' has leading zero", .{prefixed_bytes}, &.{
7054+
try astgen.errNoteNode(node, "use '0o' prefix for octal literals", .{}),
7055+
});
7056+
}
7057+
7058+
if (std.fmt.parseUnsigned(u64, non_prefixed, base)) |small_int| {
7059+
const result: Zir.Inst.Ref = switch (small_int) {
7060+
0 => .zero,
7061+
1 => .one,
7062+
else => try gz.addInt(small_int),
7063+
};
7064+
return rvalue(gz, rl, result, node);
7065+
} else |err| switch (err) {
7066+
error.InvalidCharacter => unreachable, // Caught by the parser.
7067+
error.Overflow => {},
7068+
}
7069+
70497070
const gpa = astgen.gpa;
70507071
var big_int = try std.math.big.int.Managed.init(gpa);
70517072
defer big_int.deinit();

test/behavior/math.zig

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -239,10 +239,9 @@ test "quad hex float literal parsing in range" {
239239
}
240240

241241
test "underscore separator parsing" {
242-
try expect(0_0_0_0 == 0);
243242
try expect(1_234_567 == 1234567);
244-
try expect(001_234_567 == 1234567);
245-
try expect(0_0_1_2_3_4_5_6_7 == 1234567);
243+
try expect(1_234_567 == 1234567);
244+
try expect(1_2_3_4_5_6_7 == 1234567);
246245

247246
try expect(0b0_0_0_0 == 0);
248247
try expect(0b1010_1010 == 0b10101010);
@@ -260,7 +259,7 @@ test "underscore separator parsing" {
260259
try expect(0x1_0_1_0_1_0_1_0 == 0x10101010);
261260

262261
try expect(123_456.789_000e1_0 == 123456.789000e10);
263-
try expect(0_1_2_3_4_5_6.7_8_9_0_0_0e0_0_1_0 == 123456.789000e10);
262+
try expect(1_2_3_4_5_6.7_8_9_0_0_0e0_0_1_0 == 123456.789000e10);
264263

265264
try expect(0x1234_5678.9ABC_DEF0p-1_0 == 0x12345678.9ABCDEF0p-10);
266265
try expect(0x1_2_3_4_5_6_7_8.9_A_B_C_D_E_F_0p-0_0_0_1_0 == 0x12345678.9ABCDEF0p-10);
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
fn main() void {
2-
var bad: u128 = 0010_;
2+
var bad: u128 = 10_;
33
_ = bad;
44
}
55

@@ -8,4 +8,4 @@ fn main() void {
88
// target=native
99
//
1010
// :2:21: error: expected expression, found 'invalid bytes'
11-
// :2:26: note: invalid byte: ';'
11+
// :2:24: note: invalid byte: ';'
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
export fn entry1() void {
2+
const T = u000123;
3+
_ = T;
4+
}
5+
export fn entry2() void {
6+
_ = i0;
7+
_ = u0;
8+
var x: i01 = 1;
9+
_ = x;
10+
}
11+
export fn entry3() void {
12+
_ = 000123;
13+
}
14+
export fn entry4() void {
15+
_ = 01;
16+
}
17+
18+
// error
19+
// backend=llvm
20+
// target=native
21+
//
22+
// :2:15: error: primitive integer type 'u000123' has leading zero
23+
// :8:12: error: primitive integer type 'i01' has leading zero
24+
// :12:9: error: integer literal '000123' has leading zero
25+
// :12:9: note: use '0o' prefix for octal literals
26+
// :15:9: error: integer literal '01' has leading zero
27+
// :15:9: note: use '0o' prefix for octal literals

0 commit comments

Comments
 (0)