Skip to content

Commit 7cb18e7

Browse files
committed
build: replace gdb/readelf with extra/gen_provides.py
Add a new Python script `extra/gen_provides.py` to generate the provides.ld file for linked builds, replacing the previous approach that used 'readelf' and 'gdb'. Signed-off-by: Luca Burelli <l.burelli@arduino.cc>
1 parent e698099 commit 7cb18e7

File tree

2 files changed

+252
-18
lines changed

2 files changed

+252
-18
lines changed

extra/build.sh

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -90,23 +90,15 @@ for ext in elf bin hex; do
9090
done
9191

9292
# Generate the provides.ld file for linked builds
93-
echo "Exporting provides.ld"
94-
READELF=${ZEPHYR_SDK_INSTALL_DIR}/arm-zephyr-eabi/bin/arm-zephyr-eabi-readelf
95-
GDB=${ZEPHYR_SDK_INSTALL_DIR}/arm-zephyr-eabi/bin/arm-zephyr-eabi-gdb
96-
$READELF --wide -s ${BUILD_DIR}/zephyr/zephyr.elf | grep FUNC | awk -F' ' '{print "PROVIDE("$8" = 0x"$2");"}' > ${VARIANT_DIR}/provides.ld
97-
$READELF --wide -s ${BUILD_DIR}/zephyr/zephyr.elf | grep kheap_llext_heap | awk -F' ' '{print "PROVIDE("$8" = 0x"$2");"}' >> ${VARIANT_DIR}/provides.ld
98-
$READELF --wide -s ${BUILD_DIR}/zephyr/zephyr.elf | grep kheap_llext_heap | awk -F' ' '{print "PROVIDE(kheap_llext_heap_size = "$3");"}' >> ${VARIANT_DIR}/provides.ld
99-
$READELF --wide -s ${BUILD_DIR}/zephyr/zephyr.elf | grep kheap__system_heap | awk -F' ' '{print "PROVIDE("$8" = 0x"$2");"}' >> ${VARIANT_DIR}/provides.ld
100-
$READELF --wide -s ${BUILD_DIR}/zephyr/zephyr.elf | grep kheap__system_heap | awk -F' ' '{print "PROVIDE(kheap__system_heap_size = "$3");"}' >> ${VARIANT_DIR}/provides.ld
101-
cat ${BUILD_DIR}/zephyr/zephyr.map | grep __device_dts_ord | grep -v rodata | grep -v llext_const_symbol | awk -F' ' '{print "PROVIDE("$2" = "$1");"}' >> ${VARIANT_DIR}/provides.ld
102-
#TEXT_START=`cat variants/$variant/$variant.overlay | grep user_sketch: | cut -f2 -d"@" | cut -f1 -d"{"`
103-
TEXT_START=`$GDB --quiet -ex "p/x sketch_base_addr" ${BUILD_DIR}/zephyr/zephyr.elf -ex "exit" | grep "= 0x" | cut -f 2 -d"="`
104-
echo "PROVIDE(_sketch_start = $TEXT_START);" >> ${VARIANT_DIR}/provides.ld
105-
106-
sed -i 's/PROVIDE(malloc =/PROVIDE(__wrap_malloc =/g' ${VARIANT_DIR}/provides.ld
107-
sed -i 's/PROVIDE(free =/PROVIDE(__wrap_free =/g' ${VARIANT_DIR}/provides.ld
108-
sed -i 's/PROVIDE(realloc =/PROVIDE(__wrap_realloc =/g' ${VARIANT_DIR}/provides.ld
109-
sed -i 's/PROVIDE(calloc =/PROVIDE(__wrap_calloc =/g' ${VARIANT_DIR}/provides.ld
110-
sed -i 's/PROVIDE(random =/PROVIDE(__wrap_random =/g' ${VARIANT_DIR}/provides.ld
93+
echo "Generating exported symbol scripts"
94+
extra/gen_provides.py "${BUILD_DIR}/zephyr/zephyr.elf" -LF \
95+
"+kheap_llext_heap" \
96+
"+kheap__system_heap" \
97+
"*sketch_base_addr=_sketch_start" \
98+
"malloc=__wrap_malloc" \
99+
"free=__wrap_free" \
100+
"realloc=__wrap_realloc" \
101+
"calloc=__wrap_calloc" \
102+
"random=__wrap_random" > ${VARIANT_DIR}/provides.ld
111103

112104
cmake -P extra/gen_arduino_files.cmake $variant

extra/gen_provides.py

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
#!/usr/bin/env python
2+
import argparse
3+
import hashlib
4+
import itertools
5+
import os
6+
import re
7+
import struct
8+
import sys
9+
import textwrap
10+
import traceback
11+
12+
from elftools.construct.macros import UNInt32, UNInt64
13+
from elftools.common.exceptions import ELFError
14+
from elftools.common.utils import parse_cstring_from_stream, struct_parse
15+
from elftools.elf.elffile import ELFFile
16+
from elftools.elf.sections import SymbolTableSection
17+
18+
19+
NativePtr = None
20+
21+
def get_str_at(elf, addr):
22+
for section in elf.iter_sections():
23+
if section['sh_type'] == 'SHT_NOBITS' or addr < section['sh_addr'] or addr >= section['sh_addr'] + section['sh_size']:
24+
continue
25+
26+
file_offset = section['sh_offset'] + addr - section['sh_addr']
27+
return parse_cstring_from_stream(elf.stream, file_offset).decode('utf-8', errors='replace')
28+
29+
return None
30+
31+
def get_ptr_at(elf, addr):
32+
for section in elf.iter_sections():
33+
if section['sh_type'] == 'SHT_NOBITS' or addr < section['sh_addr'] or addr >= section['sh_addr'] + section['sh_size']:
34+
continue
35+
36+
file_offset = section['sh_offset'] + addr - section['sh_addr']
37+
return struct_parse(NativePtr, elf.stream, file_offset)
38+
39+
return None
40+
41+
def get_all_syms(elf):
42+
syms = {}
43+
for section in elf.iter_sections():
44+
if not isinstance(section, SymbolTableSection):
45+
continue
46+
for symbol in section.iter_symbols():
47+
syms[symbol.name] = symbol
48+
49+
return syms
50+
51+
def get_llext_syms(elf):
52+
syms = {}
53+
for section in elf.iter_sections():
54+
if not isinstance(section, SymbolTableSection):
55+
continue
56+
for symbol in section.iter_symbols():
57+
if symbol.name.startswith("__llext_sym_"):
58+
llext_sym_addr = symbol['st_value']
59+
sym_name = get_str_at(elf, get_ptr_at(elf, llext_sym_addr))
60+
sym_value = get_ptr_at(elf, llext_sym_addr + NativePtr.length)
61+
if sym_value:
62+
syms[sym_name] = sym_value
63+
64+
return syms
65+
66+
def main():
67+
global NativePtr
68+
69+
# parse the command-line arguments and invoke ReadElf
70+
argparser = argparse.ArgumentParser(
71+
formatter_class=argparse.RawDescriptionHelpFormatter,
72+
description="Extract symbols from ELF files",
73+
epilog=textwrap.dedent(f'''\
74+
SYMBOL DEFINITION
75+
-----------------
76+
Each <sym> describes symbol names to extract. This can be an exact symbol
77+
name, a regular expression, or a rename expression. Also, the symbol can be
78+
dereferenced, so that the pointer value stored at the symbol address is
79+
exported, or the size of the symbol can be exported as well.
80+
The exact rules are as follows:
81+
82+
- if <sym> starts with a slash ('/'), the rest is treated as a regexp. All
83+
symbols matching the regexp will be extracted. Can be combined with
84+
dereferencing, but not renames.
85+
- if <sym> does not start with a slash, but contains an equals sign ('='),
86+
it is treated as a rename expression, where the part before the equals
87+
is the symbol name to extract, and the part after the equals is
88+
the new name to use in the output.
89+
- if the first char of <sym> is an asterisk ('*'), the symbol is
90+
dereferenced, i.e. the pointer value stored at the symbol address is
91+
exported instead of the symbol address itself.
92+
- if the first char of <sym> is a plus ('+'), in addition to itself, a
93+
second symbol called '<sym>_size' is defined with the size of the
94+
current symbol.
95+
96+
For example, the symbol definition:
97+
98+
*sketch_base_addr=__sketch_start
99+
100+
will export the value stored at the address of the 'sketch_base_addr' symbol
101+
as '__sketch_start', while
102+
103+
/__device_dts_ord_.*
104+
105+
will export all symbols starting with '__device_dts_ord_' as-is. Also,
106+
107+
+kheap_llext_heap
108+
109+
will export the value of the 'kheap_llext_heap' symbol and its size in a
110+
separate 'kheap_llext_heap_size' symbol.
111+
'''))
112+
argparser.add_argument('-v', '--verbose',
113+
action='store_true',
114+
help='Write the source of the symbol definition as a comment')
115+
argparser.add_argument('-L', '--llext',
116+
action='store_true',
117+
help='Extract symbols from the __llext_sym_* symbols')
118+
argparser.add_argument('-F', '--funcs',
119+
action='store_true',
120+
help='Extract all public functions')
121+
argparser.add_argument('file',
122+
help='ELF file to parse')
123+
argparser.add_argument('syms', nargs='*',
124+
help='Symbols to export')
125+
126+
args = argparser.parse_intermixed_args()
127+
128+
exact_syms = set()
129+
regex_syms = set()
130+
deref_syms = set()
131+
sized_syms = set()
132+
rename_map = {}
133+
for sym in args.syms:
134+
sym_class = None
135+
if sym[0] == '/':
136+
# Regexp
137+
sym = f"^{sym[1:]}$"
138+
sym_class = "regexp"
139+
elif '=' in sym:
140+
# Rename expression
141+
sym, new_sym = sym.split('=')
142+
sym_class = "rename"
143+
else:
144+
# Exact symbol
145+
sym_class = "exact"
146+
147+
if sym[0] == '*':
148+
# Dereference symbol
149+
sym = sym[1:]
150+
deref_syms.add(sym)
151+
elif sym[0] == '+':
152+
# Store size as well
153+
sym = sym[1:]
154+
sized_syms.add(sym)
155+
156+
if sym_class == "regexp":
157+
regex_syms.add(sym)
158+
else:
159+
exact_syms.add(sym)
160+
if sym_class == "rename":
161+
rename_map[sym] = new_sym
162+
163+
with open(args.file, 'rb') as file:
164+
try:
165+
elf_sha1 = hashlib.sha1(file.read()).hexdigest()
166+
elf = ELFFile(file)
167+
except ELFError as ex:
168+
sys.stdout.flush()
169+
sys.stderr.write('ELF error: %s\n' % ex)
170+
traceback.print_exc()
171+
sys.exit(1)
172+
173+
if elf.elfclass == 32:
174+
NativePtr = UNInt32("ptr")
175+
elif elf.elfclass == 64:
176+
NativePtr = UNInt64("ptr")
177+
178+
all_syms = get_all_syms(elf)
179+
out_syms = {}
180+
fail = False
181+
182+
for name, sym in all_syms.items():
183+
value = None
184+
comment = []
185+
if name in exact_syms or any(re.match(r, name) for r in regex_syms):
186+
comment = "cmd_line"
187+
value = sym['st_value']
188+
elif args.funcs and (sym['st_info']['type'] == 'STT_FUNC'
189+
and sym['st_info']['bind'] == 'STB_GLOBAL'):
190+
comment = "public_fn"
191+
value = sym['st_value']
192+
elif args.llext and name.startswith("__llext_sym_"):
193+
comment = "llext_sym"
194+
llext_sym_addr = sym['st_value']
195+
name = get_str_at(elf, get_ptr_at(elf, llext_sym_addr))
196+
value = get_ptr_at(elf, llext_sym_addr + NativePtr.length)
197+
198+
if name in deref_syms:
199+
value = get_ptr_at(elf, value)
200+
if name in rename_map:
201+
name = rename_map[name]
202+
203+
if not value:
204+
continue
205+
206+
if name in out_syms:
207+
if out_syms[name][0] != value:
208+
sys.stderr.write(
209+
f"Warning: duplicate symbol {name} with different values: "
210+
f"{out_syms[name][0]:#010x} vs {value:#010x}\n")
211+
fail = True
212+
out_syms[name][1].append(comment)
213+
else:
214+
out_syms[name] = (value, [comment])
215+
216+
if name in sized_syms:
217+
out_syms[name + "_size"] = (sym['st_size'], [f"size of {name}"])
218+
219+
if not out_syms:
220+
sys.stderr.write("No symbols found matching the criteria.\n")
221+
fail = True
222+
223+
if fail:
224+
sys.exit(1)
225+
226+
print(f"""
227+
/*
228+
* Automatically generated by {os.path.basename(sys.argv[0])}, do not edit!
229+
*
230+
* Source: {args.file} (SHA1: {elf_sha1})
231+
*/
232+
233+
""")
234+
comment = ""
235+
for name, (value, comments) in sorted(out_syms.items(), key=lambda x: x[0]):
236+
if args.verbose:
237+
comment = f"\t/* {', '.join(sorted(comments))} */"
238+
print(f"PROVIDE({name} = {value:#010x});{comment}")
239+
240+
#-------------------------------------------------------------------------------
241+
if __name__ == '__main__':
242+
main()

0 commit comments

Comments
 (0)