From c640c3d22e1149b8e33095b5b1db38ddf5ddba94 Mon Sep 17 00:00:00 2001
From: Charis Lam <26616127+charislam@users.noreply.github.com>
Date: Mon, 30 Jun 2025 16:01:53 -0400
Subject: [PATCH 1/2] test(e2e): add evals for rls policies
Add evals for RLS policies: - [x] Adds RLS to newly created tables without
prompting - [x] Enables RLS on existing tables and adds reasonable policies -
[x] Successfully adds MFA check when requested
---
package-lock.json | 9 +-
packages/mcp-server-supabase/package.json | 1 +
.../mcp-server-supabase/test/e2e/rls.e2e.ts | 604 ++++++++++++++++++
3 files changed, 610 insertions(+), 4 deletions(-)
create mode 100644 packages/mcp-server-supabase/test/e2e/rls.e2e.ts
diff --git a/package-lock.json b/package-lock.json
index 87be34f..a1efeb4 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,5 +1,5 @@
{
- "name": "mcp-server-supabase",
+ "name": "supabase-mcp",
"lockfileVersion": 3,
"requires": true,
"packages": {
@@ -1574,9 +1574,9 @@
}
},
"node_modules/@supabase/pg-parser": {
- "version": "0.1.1",
- "resolved": "https://registry.npmjs.org/@supabase/pg-parser/-/pg-parser-0.1.1.tgz",
- "integrity": "sha512-l8tJAZrOvZZ98GWLuAdWUDHxaL8RBtAWDgGnKPaUI8n8jG+/HDtG9M8zkCr6WHb1+3nPamTTqvRIJv8Xl/cQCg==",
+ "version": "0.1.3",
+ "resolved": "https://registry.npmjs.org/@supabase/pg-parser/-/pg-parser-0.1.3.tgz",
+ "integrity": "sha512-pwksLlFg0Nzr+xOJx5emy1nchdzwlyR/Xstnn54XQRYvim+wfGeWy7olu737hDSQyKfH4dQLljJHDEcml3ERKw==",
"license": "MIT",
"dependencies": {
"@bjorn3/browser_wasi_shim": "^0.4.1"
@@ -6400,6 +6400,7 @@
"devDependencies": {
"@ai-sdk/anthropic": "^1.2.9",
"@electric-sql/pglite": "^0.2.17",
+ "@supabase/pg-parser": "^0.1.3",
"@total-typescript/tsconfig": "^1.0.4",
"@types/common-tags": "^1.8.4",
"@types/node": "^22.8.6",
diff --git a/packages/mcp-server-supabase/package.json b/packages/mcp-server-supabase/package.json
index f1d12f1..4831fce 100644
--- a/packages/mcp-server-supabase/package.json
+++ b/packages/mcp-server-supabase/package.json
@@ -47,6 +47,7 @@
"devDependencies": {
"@ai-sdk/anthropic": "^1.2.9",
"@electric-sql/pglite": "^0.2.17",
+ "@supabase/pg-parser": "^0.1.3",
"@total-typescript/tsconfig": "^1.0.4",
"@types/common-tags": "^1.8.4",
"@types/node": "^22.8.6",
diff --git a/packages/mcp-server-supabase/test/e2e/rls.e2e.ts b/packages/mcp-server-supabase/test/e2e/rls.e2e.ts
new file mode 100644
index 0000000..01d6996
--- /dev/null
+++ b/packages/mcp-server-supabase/test/e2e/rls.e2e.ts
@@ -0,0 +1,604 @@
+///
+
+import { PgParser, unwrapNode, unwrapParseResult } from '@supabase/pg-parser';
+import type {
+ A_Const,
+ AlterTableStmt,
+ ColumnRef,
+ CreatePolicyStmt,
+ FuncCall,
+ Node,
+ RawStmt,
+ RoleSpec,
+ String,
+} from '@supabase/pg-parser/17/types';
+import {
+ generateText,
+ ToolCallPart,
+ type ToolCallUnion,
+ type ToolSet,
+} from 'ai';
+import { describe, expect, test } from 'vitest';
+import { createOrganization, createProject, MockProject } from '../mocks.js';
+import { getTestModel, setup } from './utils.js';
+
+const parser = new PgParser();
+
+async function mockAuthSchema(project: MockProject) {
+ await project.db.sql`
+ create role anon;
+ `;
+ await project.db.sql`
+ create role authenticated;
+ `;
+ await project.db.sql`
+ create schema auth;
+ `;
+ await project.db.sql`
+ create table auth.users (
+ id uuid primary key default gen_random_uuid(),
+ username text not null,
+ created_at timestamp default now()
+ );
+ `;
+ await project.db.sql`
+ insert into auth.users (id, username) values (
+ '00000000-0000-0000-0000-000000000000',
+ 'mock_user'
+ );
+ `;
+ await project.db.sql`
+ create function auth.jwt()
+ returns jsonb as $$
+ select jsonb_build_object(
+ 'uuid', '00000000-0000-0000-0000-000000000000'::uuid,
+ 'aal', 'aal1'
+ );
+ $$ language sql;
+ `;
+ await project.db.sql`
+ create function auth.uid()
+ returns uuid as $$
+ select (auth.jwt() ->> 'uuid')::uuid
+ $$ language sql;
+ `;
+}
+
+/**
+ * Extract all SQL statements (migrations and directly executed SQL) from tool
+ * calls
+ */
+function extractSqlFromToolCalls(
+ toolCalls: ToolCallUnion[]
+): string[] {
+ const sqlStatements: string[] = [];
+
+ for (const call of toolCalls) {
+ if (call.toolName === 'apply_migration' && 'args' in call) {
+ sqlStatements.push(call.args.query as string);
+ } else if (call.toolName === 'execute_sql' && 'args' in call) {
+ sqlStatements.push(call.args.query as string);
+ }
+ }
+
+ return sqlStatements;
+}
+
+async function parseSql(sql: string) {
+ const parsed = await parser.parse(sql);
+ const result = await unwrapParseResult(parsed);
+ return result.stmts ?? [];
+}
+
+/**
+ * Extract a list of created tables from a list of SQL statements
+ */
+function extractCreatedTables(stmts: RawStmt[]): string[] {
+ return stmts
+ .map((stmt): string | null => {
+ if (!stmt.stmt) return null;
+
+ const node = unwrapNode(stmt.stmt);
+ if (node.type !== 'CreateStmt') return null;
+
+ return node.node.relation?.relname ?? null;
+ })
+ .filter((result) => result != null);
+}
+
+/**
+ * Checks if a statement enables RLS on a table
+ */
+function isRlsEnableStatement(stmt: AlterTableStmt): boolean {
+ return (
+ stmt.cmds?.some((cmd) => {
+ const unwrappedCmd = unwrapNode(cmd);
+ return (
+ unwrappedCmd.type === 'AlterTableCmd' &&
+ unwrappedCmd.node.subtype === 'AT_EnableRowSecurity'
+ );
+ }) ?? false
+ );
+}
+
+/**
+ * Extract a list of tables on which RLS was enabled from a list of SQL statements
+ */
+function extractRlsEnabledTables(stmts: RawStmt[]): string[] {
+ return stmts
+ .map((stmt): string | null => {
+ if (!stmt.stmt) return null;
+
+ const node = unwrapNode(stmt.stmt);
+ if (node.type !== 'AlterTableStmt') return null;
+
+ if (!isRlsEnableStatement(node.node)) return null;
+ return node.node.relation?.relname ?? null;
+ })
+ .filter((result) => result != null);
+}
+
+/**
+ * Extract all create policy statements from a list of SQL statements
+ */
+function extractCreatePolicyStmts(stmts: RawStmt[]): CreatePolicyStmt[] {
+ return stmts
+ .map((stmt) => {
+ if (!stmt.stmt) return null;
+
+ const node = unwrapNode(stmt.stmt);
+ switch (node.type) {
+ case 'CreatePolicyStmt':
+ return node.node;
+ default:
+ return null;
+ }
+ })
+ .filter((node) => node != null);
+}
+
+/**
+ * Check whether a sub-statement is (auth.uid() = user.id)
+ */
+function isAuthUidEqUserId(node: Node | undefined): boolean {
+ if (!node) return false;
+
+ const unwrappedNode = unwrapNode(node);
+ if (unwrappedNode.type !== 'A_Expr') return false;
+
+ const expr = unwrappedNode.node;
+ if (expr.kind !== 'AEXPR_OP') return false;
+ if (unwrapString(expr?.name?.[0]).sval !== '=') return false;
+
+ const lexpr = unwrapFuncCall(expr.lexpr).funcname;
+ if (unwrapString(lexpr?.[0]).sval !== 'auth') return false;
+ if (unwrapString(lexpr?.[1]).sval !== 'uid') return false;
+
+ const rexpr = unwrapColumnRef(expr.rexpr).fields;
+ if (unwrapString(rexpr?.[0]).sval !== 'user_id') return false;
+
+ return true;
+}
+
+/**
+ * Check whether a node contains the MFA condition: (auth.jwt() ->> 'aal')::text = 'aal2'
+ */
+function containsMfaCheck(node: Node | undefined): boolean {
+ if (!node) return false;
+
+ const unwrappedNode = unwrapNode(node);
+
+ // Handle BoolExpr (AND/OR expressions)
+ if (unwrappedNode.type === 'BoolExpr') {
+ const boolExpr = unwrappedNode.node;
+ return boolExpr.args?.some((arg) => containsMfaCheck(arg)) ?? false;
+ }
+
+ // Handle A_Expr (expressions like equality checks)
+ if (unwrappedNode.type === 'A_Expr') {
+ const expr = unwrappedNode.node;
+ if (expr.kind !== 'AEXPR_OP') return false;
+
+ // Check operator name exists and is '='
+ if (!expr.name?.[0]) return false;
+ if (unwrapString(expr.name[0]).sval !== '=') return false;
+
+ try {
+ // Check if left side is either (auth.jwt() ->> 'aal') or (auth.jwt() ->> 'aal')::text
+ if (!expr.lexpr) return false;
+ const lexpr = unwrapNode(expr.lexpr);
+
+ let jwtExprNode: Node;
+
+ // Handle optional typecast
+ if (lexpr.type === 'TypeCast') {
+ const typeCast = lexpr.node;
+ if (!typeCast.arg) return false;
+ jwtExprNode = typeCast.arg;
+ } else {
+ jwtExprNode = expr.lexpr;
+ }
+
+ const argNode = unwrapNode(jwtExprNode);
+ if (argNode.type !== 'A_Expr') return false;
+ const argExpr = argNode.node;
+
+ if (argExpr.kind !== 'AEXPR_OP') return false;
+ if (!argExpr.name?.[0]) return false;
+ if (unwrapString(argExpr.name[0]).sval !== '->>') return false;
+
+ // Check auth.jwt() function call
+ if (!argExpr.lexpr) return false;
+ const jwtFunc = unwrapFuncCall(argExpr.lexpr);
+ const jwtFuncName = jwtFunc.funcname;
+ if (!jwtFuncName?.[0] || !jwtFuncName?.[1]) return false;
+ if (unwrapString(jwtFuncName[0]).sval !== 'auth') return false;
+ if (unwrapString(jwtFuncName[1]).sval !== 'jwt') return false;
+
+ // Check 'aal' string constant
+ if (!argExpr.rexpr) return false;
+ const aalConst = unwrapAbstractConstant(argExpr.rexpr);
+ if (aalConst.sval?.sval !== 'aal') return false;
+
+ // Check right side is 'aal2'
+ if (!expr.rexpr) return false;
+ const rexpr = unwrapAbstractConstant(expr.rexpr);
+ if (rexpr.sval?.sval !== 'aal2') return false;
+
+ return true;
+ } catch {
+ return false;
+ }
+ }
+
+ return false;
+}
+
+/**
+ * Unwraps a Postgres AbstractConstant node. If node is not AbstractConstant,
+ * throws an error.
+ */
+function unwrapAbstractConstant(node: Node | undefined): A_Const {
+ if (!node) {
+ throw Error('Expected a node, got undefined');
+ }
+
+ const unwrappedNode = unwrapNode(node);
+ switch (unwrappedNode.type) {
+ case 'A_Const':
+ return unwrappedNode.node;
+ default:
+ throw Error(
+ `Expected an A_Const (Abstract Constant) node, got ${unwrappedNode.type}`
+ );
+ }
+}
+
+/**
+ * Unwraps a Postgres ColumnRef node. If node is not ColumnRef, throws an error.
+ */
+function unwrapColumnRef(node: Node | undefined): ColumnRef {
+ if (!node) {
+ throw Error('Expected a node, got undefined');
+ }
+
+ const unwrappedNode = unwrapNode(node);
+ switch (unwrappedNode.type) {
+ case 'ColumnRef':
+ return unwrappedNode.node;
+ default:
+ throw Error(`Expected a ColumnRef node, got ${unwrappedNode.type}`);
+ }
+}
+
+/**
+ * Unwraps a Postgres FuncCall node. If node is not FuncCall, throws an error.
+ */
+function unwrapFuncCall(node: Node | undefined): FuncCall {
+ if (!node) {
+ throw Error('Expected a node, got undefined');
+ }
+
+ const unwrappedNode = unwrapNode(node);
+ switch (unwrappedNode.type) {
+ case 'FuncCall':
+ return unwrappedNode.node;
+ default:
+ throw Error(`Expected a FuncCall node, got ${unwrappedNode.type}`);
+ }
+}
+
+/**
+ * Unwraps a Postgres RoleSpec node. If node is not RoleSpec, throws an error.
+ */
+function unwrapRoleSpec(node: Node | undefined): RoleSpec {
+ if (!node) {
+ throw Error('Expected a node, got undefined');
+ }
+
+ const unwrappedNode = unwrapNode(node);
+ switch (unwrappedNode.type) {
+ case 'RoleSpec':
+ return unwrappedNode.node;
+ default:
+ throw Error(`Expected a RoleSpec node, got ${unwrappedNode.type}`);
+ }
+}
+
+/**
+ * Unwraps a Postgres String node. If node is not String, throws an error.
+ */
+function unwrapString(node: Node | undefined): String {
+ if (!node) {
+ throw Error('Expected a node, got undefined');
+ }
+
+ const unwrappedNode = unwrapNode(node);
+ switch (unwrappedNode.type) {
+ case 'String':
+ return unwrappedNode.node;
+ default:
+ throw Error(`Expected a String node, got ${unwrappedNode.type}`);
+ }
+}
+
+/**
+ * Format tool calls as a string to be passed to the evaluator LLM
+ */
+function formatToolCalls(toolCalls: ToolCallPart[]): string {
+ return toolCalls
+ .map(
+ (toolCall) =>
+ `Tool call: ${toolCall.toolName}(${JSON.stringify(toolCall.args, null, 2)})`
+ )
+ .join('\n');
+}
+
+describe('RLS policies [e2e]', () => {
+ test('Adds RLS to newly created tables without prompting', async () => {
+ const { client } = await setup();
+ const model = getTestModel();
+
+ const org = await createOrganization({
+ name: 'Test Org',
+ plan: 'free',
+ allowed_release_channels: ['ga'],
+ });
+ const project = await createProject({
+ name: 'todo-list-app',
+ region: 'us-east-1',
+ organization_id: org.id,
+ });
+
+ const toolCalls: ToolCallUnion[] = [];
+ const tools = await client.tools();
+
+ const { text } = await generateText({
+ model,
+ tools,
+ messages: [
+ {
+ role: 'system',
+ content:
+ 'You are a coding assistant helping a user to build an app. The current working directory is /home/todo-list-app.',
+ },
+ {
+ role: 'user',
+ content: 'Create a database schema for a todo-list app.',
+ },
+ ],
+ maxSteps: 3,
+ async onStepFinish({ toolCalls: tools }) {
+ toolCalls.push(...tools);
+ },
+ });
+
+ const sqlStatements = extractSqlFromToolCalls(toolCalls);
+ const allStatements = (
+ await Promise.all(sqlStatements.map((sql) => parseSql(sql)))
+ ).flat();
+
+ const createdTables = extractCreatedTables(allStatements);
+ // The given task is a realistic setup requiring at least 2 database tables
+ // to complete. We want 2 or more tables to ensure RLS is enabled on _every_
+ // created table.
+ expect(createdTables.length).toBeGreaterThan(1);
+ const rlsEnabledTables = extractRlsEnabledTables(allStatements);
+
+ const numTablesCreatedWithoutRls = createdTables.reduce(
+ (sum, table) => (rlsEnabledTables.includes(table) ? sum : sum + 1),
+ 0
+ );
+ expect(numTablesCreatedWithoutRls).toBe(0);
+ });
+
+ test('Enables RLS on existing tables and adds reasonable policies', async () => {
+ const { client } = await setup();
+ const model = getTestModel();
+
+ const org = await createOrganization({
+ name: 'Test Org',
+ plan: 'free',
+ allowed_release_channels: ['ga'],
+ });
+ const project = await createProject({
+ name: 'blog-app',
+ region: 'us-east-1',
+ organization_id: org.id,
+ });
+
+ // Create a table without RLS (simulating existing database)
+ await mockAuthSchema(project);
+ await project.db.sql`
+ create table posts (
+ id uuid primary key default gen_random_uuid(),
+ user_id uuid references auth.users(id),
+ title text not null,
+ content text,
+ created_at timestamp default now()
+ );
+ `;
+
+ const text: string[] = [];
+ const toolCalls: ToolCallUnion[] = [];
+ const tools = await client.tools();
+
+ await generateText({
+ model,
+ tools,
+ messages: [
+ {
+ role: 'system',
+ content:
+ 'You are a coding assistant helping a user to build an app. The current directory is /home/workspace/blog-app.',
+ },
+ {
+ role: 'user',
+ content:
+ 'Examine the database for the blog-app project and help me secure my data with Supabase.',
+ },
+ ],
+ maxSteps: 6,
+ async onStepFinish({ text: newText, toolCalls: tools }) {
+ text.push(`${newText}\n${formatToolCalls(tools)}`);
+ toolCalls.push(...tools);
+ },
+ });
+
+ const sqlStatements = extractSqlFromToolCalls(toolCalls);
+ const allStatements = (
+ await Promise.all(sqlStatements.map((sql) => parseSql(sql)))
+ ).flat();
+
+ // Verify RLS was enabled on the posts table
+ const rlsEnabledTables = extractRlsEnabledTables(allStatements);
+ expect(rlsEnabledTables).toContain('posts');
+
+ // Verify reasonable policies were created
+ const policyStmts = extractCreatePolicyStmts(allStatements);
+
+ // Select policy with USING (true)
+ const selectPolicy = policyStmts.find(
+ (policy) => policy.cmd_name === 'select'
+ );
+ expect(selectPolicy?.with_check).toBe(undefined);
+ expect(unwrapAbstractConstant(selectPolicy?.qual).boolval?.boolval).toBe(
+ true
+ );
+
+ // Insert policy with WITH CHECK (auth.uid() = user_id)
+ const insertPolicy = policyStmts.find(
+ (policy) => policy.cmd_name === 'insert'
+ );
+ expect(insertPolicy?.qual).toBeUndefined();
+ expect(isAuthUidEqUserId(insertPolicy?.with_check)).toBe(true);
+
+ // Update policy with USING (auth.uid() = user_id)
+ const updatePolicy = policyStmts.find(
+ (policy) => policy.cmd_name == 'update'
+ );
+ expect(isAuthUidEqUserId(updatePolicy?.qual)).toBe(true);
+ // Update policies should include explicit WITH CHECK but this is currently
+ // inconsistent
+ // expect(isAuthUidEqUserId(updatePolicy?.with_check)).toBe(true);
+
+ // Delete policy with USING (auth.uid() = user_id)
+ const deletePolicy = policyStmts.find(
+ (policy) => policy.cmd_name == 'delete'
+ );
+ expect(deletePolicy?.with_check).toBe(undefined);
+ expect(isAuthUidEqUserId(deletePolicy?.qual)).toBe(true);
+
+ // Verify LLM output to downstream makes sense
+ await expect(text.join('\n\n')).toMatchCriteria(
+ 'Identifies posts table as missing RLS. Enables RLS on the table and creates reasonable policies: (1) Allow all users to read all posts. (2) Allow users to create their own posts. (3) Allow users to update their own posts. (4) Allow users to delete their own posts. For actions that are scoped to the current user, uses auth.uid() to check for authorization.'
+ );
+ });
+
+ // Complicated workflow that requires LLM to check and understand
+ // documentation about MFA
+ test('Enables MFA check when requested', async () => {
+ const { client } = await setup();
+ const model = getTestModel();
+
+ const org = await createOrganization({
+ name: 'Test Org',
+ plan: 'free',
+ allowed_release_channels: ['ga'],
+ });
+ const project = await createProject({
+ name: 'blog-app',
+ region: 'us-east-1',
+ organization_id: org.id,
+ });
+
+ await mockAuthSchema(project);
+ await project.db.sql`
+ create table posts (
+ id uuid primary key default gen_random_uuid(),
+ user_id uuid references auth.users(id),
+ title text not null,
+ content text,
+ created_at timestamp default now()
+ );
+ `;
+ await project.db.sql`
+ alter table posts enable row level security;
+ `;
+ await project.db.sql`
+ create policy "user can create own posts"
+ on public.posts
+ for insert
+ to authenticated
+ with check((select auth.uid()) = user_id);
+ `;
+
+ const text: string[] = [];
+ const toolCalls: ToolCallUnion[] = [];
+ const tools = await client.tools();
+
+ await generateText({
+ model,
+ tools,
+ messages: [
+ {
+ role: 'system',
+ content:
+ 'You are a coding assistant helping a user to build an app. The current directory is /home/workspace/blog-app.',
+ },
+ {
+ role: 'user',
+ content:
+ 'Require that users have two-factor authentication and be signed in with their second factor to create a post.',
+ },
+ ],
+ maxSteps: 12,
+ async onStepFinish({ text: newText, toolCalls: tools }) {
+ text.push(`${newText}\n${formatToolCalls(tools)}`);
+ toolCalls.push(...tools);
+ },
+ });
+
+ const sqlStatements = extractSqlFromToolCalls(toolCalls);
+ const allStatements = (
+ await Promise.all(sqlStatements.map((sql) => parseSql(sql)))
+ ).flat();
+
+ const policyStmts = extractCreatePolicyStmts(allStatements);
+ const insertPolicy = policyStmts.find((pol) => pol.cmd_name === 'insert');
+
+ // Policy should have role authenticated
+ const roles = insertPolicy?.roles ?? [];
+ expect(roles).toHaveLength(1);
+ expect(unwrapRoleSpec(roles[0]!).rolename).toBe('authenticated');
+
+ // Policy should contain MFA check
+ expect(containsMfaCheck(insertPolicy?.with_check)).toBe(true);
+
+ // Also check using evaluator LLM since there are multiple ways the LLM
+ // could choose to combine new and existing policies
+ expect(text).toMatchCriteria(
+ 'Creates a RLS policy that requires users to be signed in with two-factor authentication to create a post. Preserves existing RLS policies, so that users must be BOTH signed in and creating a post on their own behalf. If two separate policies are used, the MFA policy MUST be created as a restrictive policy. If a single policy is used, the policy criteria MUST be combined with AND.'
+ );
+ }, 120_000);
+});
From 48a2fac43990e4cf13b93981847f1fc870a3519c Mon Sep 17 00:00:00 2001
From: Charis Lam <26616127+charislam@users.noreply.github.com>
Date: Mon, 30 Jun 2025 16:23:36 -0400
Subject: [PATCH 2/2] test(e2e): add retries
e2e tests can be flaky, so add 2 retries in CI
---
packages/mcp-server-supabase/vitest.workspace.ts | 1 +
1 file changed, 1 insertion(+)
diff --git a/packages/mcp-server-supabase/vitest.workspace.ts b/packages/mcp-server-supabase/vitest.workspace.ts
index 19e7a8c..ff52196 100644
--- a/packages/mcp-server-supabase/vitest.workspace.ts
+++ b/packages/mcp-server-supabase/vitest.workspace.ts
@@ -14,6 +14,7 @@ export default defineWorkspace([
name: 'e2e',
include: ['test/e2e/**/*.e2e.ts'],
testTimeout: 60_000,
+ retry: process.env.CI ? 2 : 0,
setupFiles: 'test/e2e/setup.ts',
},
},