Skip to content

Commit e951155

Browse files
committed
trace: fix parent handling (and test it!).
Testing parenting handling revealed several issues: 1. By calling "trace_span_start" when CLN_TRACEPARENT is set produces a bogus entry, for which the span_id is overwritten so we never end it. 2. We don't need to close the remote parent when we close the first child: in fact, this causes the remaining traces to be detached from the parent! 3. Suspension should return current to the parent, not to NULL. Now the traces balance as we expect. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
1 parent 924f28a commit e951155

File tree

2 files changed

+62
-32
lines changed

2 files changed

+62
-32
lines changed

common/trace.c

Lines changed: 38 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,36 @@ struct span {
8282
static struct span *active_spans = NULL;
8383
static struct span *current;
8484

85+
static void init_span(struct span *s,
86+
size_t key,
87+
const char *name,
88+
struct span *parent)
89+
{
90+
struct timeabs now = time_now();
91+
92+
s->key = key;
93+
randombytes_buf(s->id, SPAN_ID_SIZE);
94+
s->start_time = (now.ts.tv_sec * 1000000) + now.ts.tv_nsec / 1000;
95+
s->parent = parent;
96+
s->tags = notleak(tal_arr(NULL, struct span_tag, 0));
97+
s->name = notleak(tal_strdup(NULL, name));
98+
s->suspended = false;
99+
100+
/* If this is a new root span we also need to associate a new
101+
* trace_id with it. */
102+
if (!s->parent) {
103+
randombytes_buf(s->trace_id, TRACE_ID_SIZE);
104+
} else {
105+
memcpy(s->parent_id, parent->id, SPAN_ID_SIZE);
106+
memcpy(s->trace_id, parent->trace_id, TRACE_ID_SIZE);
107+
}
108+
}
109+
110+
/* FIXME: forward decls for minimal patch size */
111+
static struct span *trace_span_slot(void);
112+
static size_t trace_key(const void *key);
113+
static void trace_span_clear(struct span *s);
114+
85115
/* If the `CLN_TRACEPARENT` envvar is set, we inject that as the
86116
* parent for the startup. This allows us to integrate the startup
87117
* tracing with whatever tooling we build around it. This only has an
@@ -95,7 +125,10 @@ static void trace_inject_traceparent(void)
95125
return;
96126

97127
assert(strlen(traceparent) == TRACEPARENT_LEN);
98-
trace_span_start("", active_spans);
128+
current = trace_span_slot();
129+
assert(current);
130+
131+
init_span(current, trace_key(active_spans), "", NULL);
99132
current->remote = true;
100133
assert(current && !current->parent);
101134
if (!hex_decode(traceparent + 3, 2*TRACE_ID_SIZE, current->trace_id,
@@ -104,7 +137,8 @@ static void trace_inject_traceparent(void)
104137
SPAN_ID_SIZE)) {
105138
/* We failed to parse the traceparent, abandon. */
106139
fprintf(stderr, "Failed!");
107-
trace_span_end(active_spans);
140+
trace_span_clear(current);
141+
current = NULL;
108142
}
109143
}
110144

@@ -191,9 +225,6 @@ static struct span *trace_span_find(size_t key)
191225
return NULL;
192226
}
193227

194-
/* FIXME: Forward declaration for minimal patch size */
195-
static void trace_span_clear(struct span *s);
196-
197228
/**
198229
* Find an empty slot for a new span.
199230
*/
@@ -286,7 +317,6 @@ static void trace_span_clear(struct span *s)
286317
void trace_span_start(const char *name, const void *key)
287318
{
288319
size_t numkey = trace_key(key);
289-
struct timeabs now = time_now();
290320

291321
if (disable_trace)
292322
return;
@@ -297,23 +327,7 @@ void trace_span_start(const char *name, const void *key)
297327
struct span *s = trace_span_slot();
298328
if (!s)
299329
return;
300-
s->key = numkey;
301-
randombytes_buf(s->id, SPAN_ID_SIZE);
302-
s->start_time = (now.ts.tv_sec * 1000000) + now.ts.tv_nsec / 1000;
303-
s->parent = current;
304-
s->tags = notleak(tal_arr(NULL, struct span_tag, 0));
305-
s->name = notleak(tal_strdup(NULL, name));
306-
s->suspended = false;
307-
308-
/* If this is a new root span we also need to associate a new
309-
* trace_id with it. */
310-
if (!current) {
311-
randombytes_buf(s->trace_id, TRACE_ID_SIZE);
312-
} else {
313-
memcpy(s->parent_id, current->id, SPAN_ID_SIZE);
314-
memcpy(s->trace_id, current->trace_id, TRACE_ID_SIZE);
315-
}
316-
330+
init_span(s, numkey, name, current);
317331
current = s;
318332
trace_check_tree();
319333
DTRACE_PROBE1(lightningd, span_start, s->id);
@@ -358,14 +372,6 @@ void trace_span_end(const void *key)
358372

359373
/* Now reset the span */
360374
trace_span_clear(s);
361-
362-
/* One last special case: if the parent is remote, it must be
363-
* the root. And we should terminate that trace along with
364-
* this one. */
365-
if (current && current->remote) {
366-
assert(current->parent == NULL);
367-
current = NULL;
368-
}
369375
trace_check_tree();
370376
}
371377

@@ -401,7 +407,7 @@ void trace_span_suspend_(const void *key, const char *lbl)
401407
assert(current == span);
402408
assert(!span->suspended);
403409
span->suspended = true;
404-
current = NULL;
410+
current = current->parent;
405411
DTRACE_PROBE1(lightningd, span_suspend, span->id);
406412
if (trace_to_file) {
407413
char span_id[HEX_SPAN_ID_SIZE];

tests/test_misc.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4751,3 +4751,27 @@ def test_tracing(node_factory):
47514751

47524752
assert suspended == set()
47534753
assert traces == set()
4754+
4755+
# Test parent trace
4756+
trace_fnamebase = os.path.join(l1.daemon.lightning_dir, TEST_NETWORK, "l1.parent.trace")
4757+
l1.daemon.env["CLN_DEV_TRACE_FILE"] = trace_fnamebase
4758+
l1.daemon.env["CLN_TRACEPARENT"] = "00-00112233445566778899aabbccddeeff-0123456789abcdef-00"
4759+
l1.start()
4760+
l1.stop()
4761+
4762+
# The parent should set all the trace ids and span ids
4763+
for fname in glob.glob(f"{trace_fnamebase}.*"):
4764+
for linenum, l in enumerate(open(fname, "rt").readlines(), 1):
4765+
# In case an assertion fails
4766+
print(f"Parsing {fname}:{linenum}")
4767+
parts = l.split(maxsplit=2)
4768+
cmd = parts[0]
4769+
spanid = parts[1]
4770+
# This span doesn't actually appear anywhere
4771+
assert spanid != '0123456789abcdef'
4772+
if cmd == 'span_emit':
4773+
# Should be valid JSON
4774+
res = json.loads(parts[2])
4775+
assert res[0]['traceId'] == '00112233445566778899aabbccddeeff'
4776+
# Everyone has a parent!
4777+
assert 'parentId' in res[0]

0 commit comments

Comments
 (0)