Skip to content

Commit 44399f6

Browse files
authored
Fix MatrixRTC membership manager failing to rejoin in a race condition (sync vs not found response) (#4861)
* add test run helper to allow running long tests in vs code * deprecate IDeferred (as its associated defer method is also deprecated and its just a type rename to PromiseWithResolvers) * Improve docs and readability of MembershipManager.spec.ts * Intoduce test for a race condition which results in a state where the state event and the hasMemberStateEvent variable diverge * fix room state and membership manager state diverging. See: element-hq/element-call-rageshakes#10609 element-hq/element-call-rageshakes#10594 element-hq/element-call-rageshakes#9902 * logging, docstings and variable name improvements * review * review pending timers
1 parent c387f30 commit 44399f6

File tree

5 files changed

+89
-28
lines changed

5 files changed

+89
-28
lines changed

spec/setupTests.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,8 @@ jest.mock("../src/http-api/utils", () => ({
1919
// We mock timeoutSignal otherwise it causes tests to leave timers running
2020
timeoutSignal: () => new AbortController().signal,
2121
}));
22+
23+
// Dont make test fail too soon due to timeouts while debugging.
24+
if (process.env.VSCODE_INSPECTOR_OPTIONS) {
25+
jest.setTimeout(60 * 1000 * 5); // 5 minutes
26+
}

spec/unit/matrixrtc/MembershipManager.spec.ts

Lines changed: 69 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -32,25 +32,38 @@ import { makeMockClient, makeMockRoom, membershipTemplate, mockCallMembership, t
3232
import { MembershipManager } from "../../../src/matrixrtc/NewMembershipManager";
3333
import { logger } from "../../../src/logger.ts";
3434

35-
function waitForMockCall(method: MockedFunction<any>, returnVal?: Promise<any>) {
36-
return new Promise<void>((resolve) => {
37-
method.mockImplementation(() => {
38-
resolve();
39-
return returnVal ?? Promise.resolve();
40-
});
35+
/**
36+
* Create a promise that will resolve once a mocked method is called.
37+
* @param method The method to wait for.
38+
* @param returnVal Provide an optional value that the mocked method should return. (use Promise.resolve(val) or Promise.reject(err))
39+
* @returns The promise that resolves once the method is called.
40+
*/
41+
function waitForMockCall(method: MockedFunction<any>, returnVal?: Promise<any>): Promise<void> {
42+
const { promise, resolve } = Promise.withResolvers<void>();
43+
method.mockImplementation(() => {
44+
resolve();
45+
return returnVal ?? Promise.resolve();
4146
});
47+
return promise;
4248
}
49+
50+
/** See waitForMockCall */
4351
function waitForMockCallOnce(method: MockedFunction<any>, returnVal?: Promise<any>) {
44-
return new Promise<void>((resolve) => {
45-
method.mockImplementationOnce(() => {
46-
resolve();
47-
return returnVal ?? Promise.resolve();
48-
});
52+
const { promise, resolve } = Promise.withResolvers<void>();
53+
method.mockImplementationOnce(() => {
54+
resolve();
55+
return returnVal ?? Promise.resolve();
4956
});
57+
return promise;
5058
}
5159

52-
function createAsyncHandle(method: MockedFunction<any>) {
53-
const { reject, resolve, promise } = Promise.withResolvers<void>();
60+
/**
61+
* A handle to control when in the test flow the provided method resolves (or gets rejected).
62+
* @param method The method to control the resolve timing.
63+
* @returns
64+
*/
65+
function createAsyncHandle<T>(method: MockedFunction<any>) {
66+
const { reject, resolve, promise } = Promise.withResolvers<T>();
5467
method.mockImplementation(() => promise);
5568
return { reject, resolve };
5669
}
@@ -110,13 +123,13 @@ describe.each([
110123
it("sends a membership event and schedules delayed leave when joining a call", async () => {
111124
// Spys/Mocks
112125

113-
const updateDelayedEventHandle = createAsyncHandle(client._unstable_updateDelayedEvent as Mock);
126+
const updateDelayedEventHandle = createAsyncHandle<void>(client._unstable_updateDelayedEvent as Mock);
114127

115128
// Test
116129
const memberManager = new TestMembershipManager(undefined, room, client, () => undefined);
117130
memberManager.join([focus], focusActive);
118131
// expects
119-
await waitForMockCall(client.sendStateEvent);
132+
await waitForMockCall(client.sendStateEvent, Promise.resolve({ event_id: "id" }));
120133
expect(client.sendStateEvent).toHaveBeenCalledWith(
121134
room.roomId,
122135
"org.matrix.msc3401.call.member",
@@ -311,6 +324,44 @@ describe.each([
311324
});
312325
});
313326

327+
it("rejoins if delayed event is not found (404) !FailsForLegacy", async () => {
328+
const RESTART_DELAY = 15000;
329+
const manager = new TestMembershipManager(
330+
{ delayedLeaveEventRestartMs: RESTART_DELAY },
331+
room,
332+
client,
333+
() => undefined,
334+
);
335+
// Join with the membership manager
336+
manager.join([focus], focusActive);
337+
expect(manager.status).toBe(Status.Connecting);
338+
// Let the scheduler run one iteration so that we can send the join state event
339+
await jest.runOnlyPendingTimersAsync();
340+
expect(client.sendStateEvent).toHaveBeenCalledTimes(1);
341+
expect(manager.status).toBe(Status.Connected);
342+
// Now that we are connected, we set up the mocks.
343+
// We enforce the following scenario where we simulate that the delayed event activated and caused the user to leave:
344+
// - We wait until the delayed event gets sent and then mock its response to be "not found."
345+
// - We enforce a race condition between the sync that informs us that our call membership state event was set to "left"
346+
// and the "not found" response from the delayed event: we receive the sync while we are waiting for the delayed event to be sent.
347+
// - While the delayed leave event is being sent, we inform the manager that our membership state event was set to "left."
348+
// (onRTCSessionMemberUpdate)
349+
// - Only then do we resolve the sending of the delayed event.
350+
// - We test that the manager acknowledges the leave and sends a new membership state event.
351+
(client._unstable_updateDelayedEvent as Mock<any>).mockRejectedValueOnce(
352+
new MatrixError({ errcode: "M_NOT_FOUND" }),
353+
);
354+
355+
const { resolve } = createAsyncHandle(client._unstable_sendDelayedStateEvent);
356+
await jest.advanceTimersByTimeAsync(RESTART_DELAY);
357+
// first simulate the sync, then resolve sending the delayed event.
358+
await manager.onRTCSessionMemberUpdate([mockCallMembership(membershipTemplate, room.roomId)]);
359+
resolve({ delay_id: "id" });
360+
// Let the scheduler run one iteration so that the new join gets sent
361+
await jest.runOnlyPendingTimersAsync();
362+
expect(client.sendStateEvent).toHaveBeenCalledTimes(2);
363+
});
364+
314365
it("uses membershipEventExpiryMs from config", async () => {
315366
const manager = new TestMembershipManager(
316367
{ membershipEventExpiryMs: 1234567 },
@@ -542,8 +593,8 @@ describe.each([
542593
expect(manager.status).toBe(Status.Disconnected);
543594
});
544595
it("emits 'Connection' and 'Connected' after join !FailsForLegacy", async () => {
545-
const handleDelayedEvent = createAsyncHandle(client._unstable_sendDelayedStateEvent);
546-
const handleStateEvent = createAsyncHandle(client.sendStateEvent);
596+
const handleDelayedEvent = createAsyncHandle<void>(client._unstable_sendDelayedStateEvent);
597+
const handleStateEvent = createAsyncHandle<void>(client.sendStateEvent);
547598

548599
const manager = new TestMembershipManager({}, room, client, () => undefined);
549600
expect(manager.status).toBe(Status.Disconnected);
@@ -594,7 +645,7 @@ describe.each([
594645
});
595646
// FailsForLegacy as implementation does not re-check membership before retrying.
596647
it("abandons retry loop and sends new own membership if not present anymore !FailsForLegacy", async () => {
597-
(client._unstable_sendDelayedStateEvent as any).mockRejectedValue(
648+
(client._unstable_sendDelayedStateEvent as Mock<any>).mockRejectedValue(
598649
new MatrixError(
599650
{ errcode: "M_LIMIT_EXCEEDED" },
600651
429,

src/matrixrtc/MatrixRTCSession.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,9 @@ export class MatrixRTCSession extends TypedEventEmitter<
577577
oldMemberships.some((m, i) => !CallMembership.equal(m, this.memberships[i]));
578578

579579
if (changed) {
580-
this.logger.info(`Memberships for call in room ${this.roomSubset.roomId} have changed: emitting`);
580+
this.logger.info(
581+
`Memberships for call in room ${this.roomSubset.roomId} have changed: emitting (${this.memberships.length} members)`,
582+
);
581583
logDurationSync(this.logger, "emit MatrixRTCSessionEvent.MembershipsChanged", () => {
582584
this.emit(MatrixRTCSessionEvent.MembershipsChanged, oldMemberships, this.memberships);
583585
});

src/matrixrtc/NewMembershipManager.ts

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import { UnsupportedDelayedEventsEndpointError } from "../errors.ts";
2121
import { ConnectionError, HTTPError, MatrixError } from "../http-api/errors.ts";
2222
import { type Logger, logger as rootLogger } from "../logger.ts";
2323
import { type Room } from "../models/room.ts";
24-
import { defer, type IDeferred } from "../utils.ts";
2524
import { type CallMembership, DEFAULT_EXPIRE_DURATION, type SessionMembershipData } from "./CallMembership.ts";
2625
import { type Focus } from "./focus.ts";
2726
import { isMyMembership, Status } from "./types.ts";
@@ -209,14 +208,15 @@ export class MembershipManager
209208
// So we do not check scheduler.actions/scheduler.insertions
210209
if (!this.leavePromiseResolvers) {
211210
// reset scheduled actions so we will not do any new actions.
212-
this.leavePromiseResolvers = defer<boolean>();
211+
this.leavePromiseResolvers = Promise.withResolvers<boolean>();
213212
this.activated = false;
214213
this.scheduler.initiateLeave();
215214
if (timeout) setTimeout(() => this.leavePromiseResolvers?.resolve(false), timeout);
216215
}
217216
return this.leavePromiseResolvers.promise;
218217
}
219-
private leavePromiseResolvers?: IDeferred<boolean>;
218+
219+
private leavePromiseResolvers?: PromiseWithResolvers<boolean>;
220220

221221
public async onRTCSessionMemberUpdate(memberships: CallMembership[]): Promise<void> {
222222
const userId = this.client.getUserId();
@@ -229,14 +229,15 @@ export class MembershipManager
229229
MembershipActionType.SendJoinEvent,
230230
];
231231
this.logger.warn("Missing own membership: force re-join");
232+
this.state.hasMemberStateEvent = false;
233+
232234
if (this.scheduler.actions.find((a) => sendingMembershipActions.includes(a.type as MembershipActionType))) {
233235
this.logger.error(
234-
"NewMembershipManger tried adding another `SendFirstDelayedEvent` actions even though we already have one in the Queue\nActionQueueOnMemberUpdate:",
236+
"NewMembershipManger tried adding another `SendDelayedEvent` actions even though we already have one in the Queue\nActionQueueOnMemberUpdate:",
235237
this.scheduler.actions,
236238
);
237239
} else {
238240
// Only react to our own membership missing if we have not already scheduled sending a new membership DirectMembershipManagerAction.Join
239-
this.state.hasMemberStateEvent = false;
240241
this.scheduler.initiateJoin();
241242
}
242243
}
@@ -382,13 +383,13 @@ export class MembershipManager
382383
return this.sendOrResendDelayedLeaveEvent(); // Normal case without any previous delayed id.
383384
} else {
384385
// This can happen if someone else (or another client) removes our own membership event.
385-
// It will trigger `onRTCSessionMemberUpdate` queue `MembershipActionType.SendFirstDelayedEvent`.
386+
// It will trigger `onRTCSessionMemberUpdate` queue `MembershipActionType.SendDelayedEvent`.
386387
// We might still have our delayed event from the previous participation and dependent on the server this might not
387388
// get removed automatically if the state changes. Hence, it would remove our membership unexpectedly shortly after the rejoin.
388389
//
389390
// In this block we will try to cancel this delayed event before setting up a new one.
390391

391-
return this.cancelKnownDelayIdBeforeSendFirstDelayedEvent(this.state.delayId);
392+
return this.cancelKnownDelayIdBeforeSendDelayedEvent(this.state.delayId);
392393
}
393394
}
394395
case MembershipActionType.RestartDelayedEvent: {
@@ -488,7 +489,7 @@ export class MembershipManager
488489
});
489490
}
490491

491-
private async cancelKnownDelayIdBeforeSendFirstDelayedEvent(delayId: string): Promise<ActionUpdate> {
492+
private async cancelKnownDelayIdBeforeSendDelayedEvent(delayId: string): Promise<ActionUpdate> {
492493
// Remove all running updates and restarts
493494
return await this.client
494495
._unstable_updateDelayedEvent(delayId, UpdateDelayedEventAction.Cancel)

src/utils.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,9 @@ export function immediate(): Promise<void> {
435435
export function isNullOrUndefined(val: any): boolean {
436436
return val === null || val === undefined;
437437
}
438-
438+
/**
439+
* @deprecated use {@link PromiseWithResolvers} instead.
440+
*/
439441
export type IDeferred<T> = PromiseWithResolvers<T>;
440442

441443
/**

0 commit comments

Comments
 (0)