Skip to content

Commit db82c22

Browse files
authored
Refactor gen_struct_info to emit JSON directly. NFC (#24489)
I wanted to add something to gen_struct_info functionality, but found it difficult to comprehend the custom intermediate format of the C program's output. I changed it to emit the desired result directly as JSON instead, which results in the same output but makes code easier to reason about and debug, as well as possibly tiny bit faster as well.
1 parent 5d8d606 commit db82c22

File tree

2 files changed

+91
-136
lines changed

2 files changed

+91
-136
lines changed

src/lib/libbootstrap.js

Lines changed: 14 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -33,28 +33,19 @@ addToLibrary({
3333
quit_(1, e);
3434
},
3535

36-
// printf/puts implementations for when musl is not pulled in - very
37-
// partial, but enough for bootstrapping structInfo
38-
printf__deps: ['$formatString', '$intArrayToString'],
39-
printf__sig: 'ipp',
40-
printf: (format, varargs) => {
41-
// int printf(const char *restrict format, ...);
42-
// http://pubs.opengroup.org/onlinepubs/000095399/functions/printf.html
43-
// extra effort to support printf, even without a filesystem. very partial, very hackish
44-
var result = formatString(format, varargs);
45-
var string = intArrayToString(result);
46-
if (string.endsWith('\n')) string = string.slice(0, -1); // remove a final \n, as Module.print will do that
47-
out(string);
48-
return result.length;
49-
},
50-
51-
puts__sig: 'ip',
52-
puts: (s) => {
53-
// extra effort to support puts, even without a filesystem. very partial, very hackish
54-
var result = UTF8ToString(s);
55-
var string = result;
56-
if (string.endsWith('\n')) string = string.slice(0, -1); // remove a final \n, as Module.print will do that
57-
out(string);
58-
return result.length;
36+
fd_write__sig: 'iippp',
37+
fd_write: (fd, iov, iovcnt, pnum) => {
38+
// implementation almost copied from libwasi.js one for SYSCALLS_REQUIRE_FILESYSTEM=0
39+
// (the only difference is that we can't use C_STRUCTS here)
40+
var num = 0;
41+
for (var i = 0; i < iovcnt; i++) {
42+
var ptr = {{{ makeGetValue('iov', 0, '*') }}};
43+
var len = {{{ makeGetValue('iov', POINTER_SIZE, '*') }}};
44+
iov += {{{ POINTER_SIZE }}} * 2;
45+
process.stdout.write(HEAPU8.subarray(ptr, ptr + len));
46+
num += len;
47+
}
48+
{{{ makeSetValue('pnum', 0, 'num', '*') }}};
49+
return 0;
5950
},
6051
});

tools/gen_struct_info.py

Lines changed: 77 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
import tempfile
6363
import shlex
6464
import subprocess
65+
import typing
6566

6667
__scriptdir__ = os.path.dirname(os.path.abspath(__file__))
6768
__rootdir__ = os.path.dirname(__scriptdir__)
@@ -107,100 +108,69 @@ def show(msg):
107108
sys.stderr.write('gen_struct_info: %s\n' % msg)
108109

109110

110-
# The following three functions generate C code. The output of the compiled code will be
111-
# parsed later on and then put back together into a dict structure by parse_c_output().
111+
# The Scope class generates C code which, in turn, outputs JSON.
112112
#
113113
# Example:
114-
# c_descent('test1', code)
115-
# c_set('item', 'i%i', '111', code)
116-
# c_set('item2', 'i%i', '9', code)
117-
# c_set('item3', 's%s', '"Hello"', code)
118-
# c_ascent(code)
119-
# c_set('outer', 'f%f', '0.999', code)
120-
#
121-
# Will result in:
122-
# {
123-
# 'test1': {
124-
# 'item': 111,
125-
# 'item2': 9,
126-
# 'item3': 'Hello',
127-
# },
128-
# 'outer': 0.999
129-
# }
130-
def c_set(name, type_, value, code):
131-
code.append('printf("K' + name + '\\n");')
132-
code.append('printf("V' + type_ + '\\n", ' + value + ');')
133-
134-
135-
def c_descent(name, code):
136-
code.append('printf("D' + name + '\\n");')
137-
138-
139-
def c_ascent(code):
140-
code.append('printf("A\\n");')
141-
142-
143-
def parse_c_output(lines):
144-
result = {}
145-
cur_level = result
146-
parent = []
147-
key = None
148-
149-
for line in lines:
150-
arg = line[1:].strip()
151-
if '::' in arg:
152-
arg = arg.split('::', 1)[1]
153-
if line[0] == 'K':
154-
# This is a key
155-
key = arg
156-
elif line[0] == 'V':
157-
# A value
158-
if arg[0] == 'i':
159-
arg = int(arg[1:])
160-
elif arg[0] == 'f':
161-
arg = float(arg[1:])
162-
elif arg[0] == 's':
163-
arg = arg[1:]
164-
165-
cur_level[key] = arg
166-
elif line[0] == 'D':
167-
# Remember the current level as the last parent.
168-
parent.append(cur_level)
169-
170-
# We descend one level.
171-
cur_level[arg] = {}
172-
cur_level = cur_level[arg]
173-
elif line[0] == 'A':
174-
# We return to the parent dict. (One level up.)
175-
cur_level = parent.pop()
176-
177-
return result
178-
179-
180-
def gen_inspect_code(path, struct, code):
181-
if path[0][-1] == '#':
182-
path[0] = path[0].rstrip('#')
183-
prefix = ''
184-
else:
185-
prefix = 'struct '
114+
# with Scope(code) as scope: # generates code that outputs beginning of a JSON object '{\n'
115+
# scope.set('item', '%i', '111') # generates code that outputs '"item": 111'
116+
# scope.set('item2', '%f', '4.2') # generates code that outputs ',\n"item2": 4.2'
117+
# # once the scope is exited, it generates code that outputs the end of the JSON object '\n}'
118+
class Scope:
119+
def __init__(self, code: typing.List[str]):
120+
self.code = code
121+
self.has_data = False
122+
123+
def __enter__(self):
124+
self.code.append('puts("{");')
125+
return self
126+
127+
def __exit__(self, exc_type, exc_val, exc_tb):
128+
if self.has_data:
129+
self.code.append('puts("");')
130+
self.code.append('printf("}");')
131+
132+
def _start_child(self, name: str):
133+
if self.has_data:
134+
self.code.append('puts(",");')
135+
else:
136+
self.has_data = True
137+
if '::' in name:
138+
name = name.split('::', 1)[1]
139+
self.code.append(fr'printf("\"{name}\": ");')
186140

187-
c_descent(path[-1], code)
141+
def child(self, name: str):
142+
self._start_child(name)
143+
return Scope(self.code)
188144

189-
if len(path) == 1:
190-
c_set('__size__', 'i%zu', 'sizeof (' + prefix + path[0] + ')', code)
191-
else:
192-
c_set('__size__', 'i%zu', 'sizeof ((' + prefix + path[0] + ' *)0)->' + '.'.join(path[1:]), code)
193-
# c_set('__offset__', 'i%zu', 'offsetof(' + prefix + path[0] + ', ' + '.'.join(path[1:]) + ')', code)
194-
195-
for field in struct:
196-
if isinstance(field, dict):
197-
# We have to recurse to inspect the nested dict.
198-
fname = list(field.keys())[0]
199-
gen_inspect_code(path + [fname], field[fname], code)
145+
def set(self, name: str, type_: str, value: str):
146+
self._start_child(name)
147+
148+
assert type_.startswith('%')
149+
# We only support numeric defines as they are directly compatible with JSON.
150+
# Extend to string escaping if we ever need that in the future.
151+
assert type_[-1] in {'d', 'i', 'u', 'f', 'F', 'e', 'E'}
152+
153+
self.code.append(f'printf("{type_}", {value});')
154+
155+
def gen_inspect_code(self, path: typing.List[str], struct: typing.List[typing.Union[str, dict]]):
156+
if path[0][-1] == '#':
157+
path[0] = path[0].rstrip('#')
158+
prefix = ''
200159
else:
201-
c_set(field, 'i%zu', 'offsetof(' + prefix + path[0] + ', ' + '.'.join(path[1:] + [field]) + ')', code)
160+
prefix = 'struct '
161+
prefix += path[0]
162+
163+
with self.child(path[-1]) as scope:
164+
path_for_sizeof = [f'({prefix}){{}}'] + path[1:]
165+
scope.set('__size__', '%zu', f'sizeof ({".".join(path_for_sizeof)})')
202166

203-
c_ascent(code)
167+
for field in struct:
168+
if isinstance(field, dict):
169+
# We have to recurse to inspect the nested dict.
170+
fname = list(field.keys())[0]
171+
self.gen_inspect_code(path + [fname], field[fname])
172+
else:
173+
scope.set(field, '%zu', f'offsetof({prefix}, {".".join(path[1:] + [field])})')
204174

205175

206176
def generate_c_code(headers):
@@ -209,28 +179,23 @@ def generate_c_code(headers):
209179
code.extend(f'''#include "{header['name']}"''' for header in headers)
210180

211181
code.append('int main() {')
212-
c_descent('structs', code)
213-
for header in headers:
214-
for name, struct in header['structs'].items():
215-
gen_inspect_code([name], struct, code)
216-
217-
c_ascent(code)
218-
c_descent('defines', code)
219-
for header in headers:
220-
for name, type_ in header['defines'].items():
221-
# Add the necessary python type, if missing.
222-
if '%' not in type_:
223-
if type_[-1] in {'d', 'i', 'u'}:
224-
# integer
225-
type_ = 'i%' + type_
226-
elif type_[-1] in {'f', 'F', 'e', 'E', 'g', 'G'}:
227-
# float
228-
type_ = 'f%' + type_
229-
elif type_[-1] in {'x', 'X', 'a', 'A', 'c', 's'}:
230-
# hexadecimal or string
231-
type_ = 's%' + type_
232-
233-
c_set(name, type_, name, code)
182+
183+
with Scope(code) as root:
184+
with root.child('structs') as structs:
185+
for header in headers:
186+
for name, struct in header['structs'].items():
187+
structs.gen_inspect_code([name], struct)
188+
189+
with root.child('defines') as defines:
190+
for header in headers:
191+
for name, type_ in header['defines'].items():
192+
# Add the necessary python type, if missing.
193+
if '%' not in type_:
194+
type_ = f'%{type_}'
195+
196+
defines.set(name, type_, name)
197+
198+
code.append('puts("");') # Add a newline after the JSON output to flush it.
234199

235200
code.append('return 0;')
236201
code.append('}')
@@ -254,7 +219,6 @@ def generate_cmd(js_file_path, src_file_path, cflags):
254219
'-O0',
255220
'-Werror',
256221
'-Wno-format',
257-
'-nolibc',
258222
'-sBOOTSTRAPPING_STRUCT_INFO',
259223
'-sINCOMING_MODULE_JS_API=',
260224
'-sSTRICT',
@@ -295,7 +259,7 @@ def inspect_headers(headers, cflags):
295259
# Run the compiled program.
296260
show('Calling generated program... ' + js_file_path)
297261
node_args = shared.node_bigint_flags(config.NODE_JS)
298-
info = shared.run_js_tool(js_file_path, node_args=node_args, stdout=shared.PIPE).splitlines()
262+
info = shared.run_js_tool(js_file_path, node_args=node_args, stdout=shared.PIPE)
299263

300264
if not DEBUG:
301265
# Remove all temporary files.
@@ -307,7 +271,7 @@ def inspect_headers(headers, cflags):
307271
os.unlink(wasm_file_path)
308272

309273
# Parse the output of the program into a dict.
310-
return parse_c_output(info)
274+
return json.loads(info)
311275

312276

313277
def merge_info(target, src):

0 commit comments

Comments
 (0)