Skip to content

Commit f50615a

Browse files
committed
Metabase: Add test harness for validating Metabase against CrateDB
A basic test case that reads CrateDB's `sys.summit` table through Metabase, after connecting CrateDB as a PostgreSQL database.
1 parent cd26ea2 commit f50615a

File tree

10 files changed

+431
-0
lines changed

10 files changed

+431
-0
lines changed

.github/dependabot.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,16 @@ updates:
9090
schedule:
9191
interval: "daily"
9292

93+
- directory: "/application/metabase"
94+
package-ecosystem: "pip"
95+
schedule:
96+
interval: "daily"
97+
98+
- directory: "/application/metabase"
99+
package-ecosystem: "docker"
100+
schedule:
101+
interval: "daily"
102+
93103
# Frameworks.
94104

95105
- directory: "/framework/dbt/basic"
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
name: Metabase
2+
3+
on:
4+
pull_request:
5+
branches: ~
6+
paths:
7+
- '.github/workflows/application-metabase.yml'
8+
- 'application/metabase/**'
9+
- '/requirements.txt'
10+
push:
11+
branches: [ main ]
12+
paths:
13+
- '.github/workflows/application-metabase.yml'
14+
- 'application/metabase/**'
15+
- '/requirements.txt'
16+
17+
# Allow job to be triggered manually.
18+
workflow_dispatch:
19+
20+
# Run job each night after CrateDB nightly has been published.
21+
schedule:
22+
- cron: '0 3 * * *'
23+
24+
# Cancel in-progress jobs when pushing to the same branch.
25+
concurrency:
26+
cancel-in-progress: true
27+
group: ${{ github.workflow }}-${{ github.ref }}
28+
29+
jobs:
30+
31+
test:
32+
name: "
33+
CrateDB: ${{ matrix.cratedb-version }}
34+
on ${{ matrix.os }}"
35+
runs-on: ${{ matrix.os }}
36+
37+
strategy:
38+
fail-fast: false
39+
matrix:
40+
os: [ "ubuntu-22.04" ]
41+
python-version: [ "3.12" ]
42+
cratedb-version: [ "nightly" ]
43+
44+
steps:
45+
46+
- name: Acquire sources
47+
uses: actions/checkout@v4
48+
49+
- name: Setup Python
50+
uses: actions/setup-python@v5
51+
with:
52+
python-version: ${{ matrix.python-version }}
53+
architecture: x64
54+
cache: "pip"
55+
cache-dependency-path: |
56+
pyproject.toml
57+
requirements.txt
58+
requirements-test.txt
59+
60+
- name: Install uv
61+
uses: astral-sh/setup-uv@v3
62+
with:
63+
version: "latest"
64+
65+
- name: Install utilities
66+
run: |
67+
uv pip install --system -r requirements.txt
68+
69+
- name: Validate application/metabase
70+
run: |
71+
ngr test --accept-no-venv application/metabase

application/metabase/README.md

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Verify Metabase with CrateDB
2+
3+
## About
4+
5+
This folder includes software integration tests for verifying
6+
that Metabase works well together with CrateDB.
7+
The test harness is based on Docker Compose.
8+
9+
## What's Inside
10+
11+
A basic test case that reads CrateDB's `sys.summit` table through
12+
Metabase, after connecting CrateDB as a PostgreSQL database.
13+
14+
## Setup
15+
16+
Setup sandbox and install packages.
17+
```bash
18+
pip install uv
19+
uv venv .venv
20+
source .venv/bin/activate
21+
uv pip install -r requirements.txt -r requirements-test.txt
22+
```
23+
24+
## Usage
25+
26+
Run integration tests.
27+
```bash
28+
pytest
29+
```
30+
31+
Watch service logs.
32+
```shell
33+
docker compose logs -f
34+
```
35+
36+
Note that the setup is configured to keep the containers alive after starting
37+
them. If you want to actively recycle them, invoke `docker compose down` before
38+
running `pytest`.

application/metabase/backlog.md

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# CrateDB <-> Metabase backlog
2+
3+
4+
## metabase/metabase:v0.45.4.3
5+
6+
```
7+
2024-11-22 23:22:07,139 ERROR driver.util :: Failed to connect to Database
8+
org.postgresql.util.PSQLException: The server does not support SSL.
9+
```
10+
11+
```
12+
2024-11-22 23:22:07,290 WARN metabase.email :: Failed to send email
13+
clojure.lang.ExceptionInfo: SMTP host is not set. {:cause :smtp-host-not-set}
14+
```
15+
16+
```
17+
2024-11-22 23:22:08,189 WARN sync.util :: Error running step 'sync-timezone' for postgres Database 2 'cratedb-testdrive'
18+
java.lang.Exception: Unable to parse date string '2024-11-22 23:22:08.175 ' for database engine 'postgres'
19+
```
20+
21+
```
22+
2024-11-22 23:22:08,724 WARN sync.describe-table :: Don't know how to map column type '_int4' to a Field base_type, falling back to :type/*.
23+
2024-11-22 23:22:08,724 WARN sync.describe-table :: Don't know how to map column type '_int4' to a Field base_type, falling back to :type/*.
24+
2024-11-22 23:22:08,725 WARN sync.describe-table :: Don't know how to map column type 'regclass' to a Field base_type, falling back to :type/*.
25+
2024-11-22 23:22:08,725 WARN sync.describe-table :: Don't know how to map column type '_int4' to a Field base_type, falling back to :type/*.
26+
2024-11-22 23:22:08,726 WARN sync.describe-table :: Don't know how to map column type '_int2' to a Field base_type, falling back to :type/*.
27+
...
28+
```
29+
30+
```
31+
2024-11-22 23:22:13,900 WARN sync.util :: Error fingerprinting Table 12 'sys.jobs'
32+
clojure.lang.ExceptionInfo: Error executing query: ERROR: line 2:359: no viable alternative at input 'SELECT "source"."substring531" AS "substring531", "source"."substring532" AS "substring532", "source"."substring533" AS "substring533", "source"."started" AS "started", "source"."substring534" AS "substring534", "source"."substring535" AS "substring535", "source"."substring536" AS "substring536" FROM (SELECT "sys"."jobs"."id" AS "id", ("sys"."jobs"."node"#>'
33+
```
34+
35+
```
36+
2024-11-22 23:22:14,390 WARN sync.util :: Error fingerprinting Table 13 'sys.nodes'
37+
clojure.lang.ExceptionInfo: Error executing query: ERROR: line 2:97: no viable alternative at input 'SELECT "source"."load['probe_timestamp']" AS "load['probe_timestamp']", ("source"."fs['total']"#>'
38+
```
39+
40+
```
41+
2024-11-22 23:22:23,588 ERROR models.field-values :: Error fetching field values
42+
clojure.lang.ExceptionInfo: Error executing query: ERROR: Cannot ORDER BY 'conffeqop': invalid data type 'integer_array'.
43+
44+
2024-11-22 23:22:23,599 ERROR models.field-values :: Error fetching field values
45+
clojure.lang.ExceptionInfo: Error executing query: ERROR: Cannot ORDER BY 'conkey': invalid data type 'smallint_array'.
46+
```
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
networks:
2+
metanet-demo:
3+
driver: bridge
4+
5+
services:
6+
7+
# Metabase
8+
# https://www.metabase.com/docs/latest/installation-and-operation/running-metabase-on-docker#example-docker-compose-yaml-file
9+
metabase:
10+
image: metabase/metabase:v0.45.4.3
11+
container_name: metabase
12+
hostname: metabase
13+
volumes:
14+
- /dev/urandom:/dev/random:ro
15+
ports:
16+
- 3000:3000
17+
networks:
18+
- metanet-demo
19+
healthcheck:
20+
test: curl --fail -I http://localhost:3000/api/health || exit 1
21+
interval: 15s
22+
timeout: 5s
23+
retries: 5
24+
25+
# CrateDB
26+
# https://github.com/crate/crate
27+
cratedb:
28+
image: crate/crate:nightly
29+
container_name: cratedb
30+
hostname: cratedb
31+
ports:
32+
- 4200:4200
33+
- 5432:5432
34+
networks:
35+
- metanet-demo
36+
healthcheck:
37+
# https://github.com/crate/docker-crate/pull/151/files
38+
test: curl --max-time 25 http://localhost:4200 || exit 1
39+
interval: 30s
40+
timeout: 30s
41+
42+
# Wait for all defined services to be fully available by probing their health
43+
# status, even when using `docker compose up --detach`.
44+
# https://marcopeg.com/2019/docker-compose-healthcheck/
45+
wait:
46+
image: dadarek/wait-for-dependencies
47+
depends_on:
48+
metabase:
49+
condition: service_healthy
50+
cratedb:
51+
condition: service_healthy

application/metabase/metabase_rig.py

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
import time
2+
from functools import lru_cache
3+
4+
import requests
5+
from metabase_api import Metabase_API
6+
7+
8+
class MetabaseRig:
9+
"""
10+
Support end-to-end testing of CrateDB and Metabase.
11+
12+
https://www.metabase.com/docs/latest/api-documentation
13+
14+
Authenticate your requests with a session token
15+
https://www.metabase.com/learn/metabase-basics/administration/administration-and-operation/metabase-api#authenticate-your-requests-with-a-session-token
16+
"""
17+
def __init__(self, url: str):
18+
self.username = "foobar@example.org"
19+
self.password = "123456metabase"
20+
self.mb = None
21+
22+
self.url = url
23+
self.api_url = f"{url.rstrip('/')}/api"
24+
self.session = requests.Session()
25+
self.session_token = None
26+
27+
def get_setup_token(self) -> str:
28+
response = self.session.get(f"{self.api_url}/session/properties")
29+
return response.json()["setup-token"]
30+
31+
def setup(self):
32+
"""
33+
Run Metabase setup, create admin user, and return a session ID.
34+
35+
https://www.metabase.com/docs/latest/api/setup#post-apisetup
36+
https://discourse.metabase.com/t/rest-api-for-initial-setup-process/3419
37+
"""
38+
response = self.session.post(f"{self.api_url}/setup", json={
39+
"prefs": {
40+
"allow_tracking": "false",
41+
"site_locale": "en",
42+
"site_name": "Hotzenplotz",
43+
},
44+
"user": {
45+
"password": self.password,
46+
"password_confirm": self.password,
47+
"email": self.username,
48+
},
49+
"token": self.get_setup_token(),
50+
})
51+
self.session_token = response.json()["id"]
52+
53+
def login(self):
54+
self.session.post(f"{self.api_url}/session", json={
55+
"username": self.username,
56+
"password": self.password,
57+
})
58+
self.mb = Metabase_API(self.url, self.username, self.password)
59+
60+
def get_databases(self):
61+
return self.session.get(f"{self.api_url}/database").json()
62+
63+
def database(self, name: str) -> "MetabaseDatabase":
64+
return MetabaseDatabase(rig=self, name=name)
65+
66+
67+
class MetabaseDatabase:
68+
def __init__(self, rig: MetabaseRig, name: str):
69+
self.rig = rig
70+
self.name = name
71+
self.timeout = 15
72+
73+
@property
74+
@lru_cache(maxsize=None)
75+
def id(self):
76+
return self.rig.mb.get_item_id("database", self.name)
77+
78+
def create(self):
79+
"""
80+
https://www.metabase.com/docs/latest/api/database#post-apidatabase
81+
"""
82+
self.rig.session.post(
83+
f"{self.rig.api_url}/database",
84+
json={
85+
"engine": "postgres",
86+
"name": self.name,
87+
"details": {
88+
"host": "cratedb",
89+
"port": 5432,
90+
"user": "crate",
91+
},
92+
},
93+
)
94+
95+
def exists(self):
96+
try:
97+
response = self.rig.session.get(f"{self.rig.api_url}/database/{self.id}")
98+
return response.status_code == 200
99+
except ValueError as ex:
100+
if "There is no DB with the name" not in str(ex):
101+
raise
102+
return False
103+
104+
def schema(self, name: str):
105+
response = self.rig.session.get(f"{self.rig.api_url}/database/{self.id}/schema/{name}")
106+
response.raise_for_status()
107+
return response.json()
108+
109+
def table_names(self, schema_name: str):
110+
names = []
111+
for item in self.schema(name=schema_name):
112+
names.append(f"{item['schema']}.{item['name']}")
113+
return names
114+
115+
def table_id_by_name(self, name: str):
116+
return self.rig.mb.get_item_id("table", name)
117+
118+
def query(self, table: str):
119+
response = self.rig.session.post(
120+
f"{self.rig.api_url}/dataset",
121+
json={
122+
"database": self.id,
123+
"query": {
124+
"source-table": self.table_id_by_name(table),
125+
},
126+
"type": "query",
127+
"parameters": [],
128+
}
129+
)
130+
return response.json()
131+
132+
def wait_database(self):
133+
def condition():
134+
return self.exists()
135+
return self._wait(condition, f"Database not found: {self.name}")
136+
137+
def wait_schema(self, name: str):
138+
def condition():
139+
try:
140+
if schema := self.schema(name):
141+
return schema
142+
except requests.RequestException:
143+
pass
144+
return False
145+
return self._wait(condition, f"Database schema '{name}' not found in database '{self.name}'")
146+
147+
def wait_table(self, schema: str, name: str):
148+
def condition():
149+
if schema_info := self.wait_schema(schema):
150+
for item in schema_info:
151+
if item["name"] == name and item["initial_sync_status"] == "complete":
152+
return True
153+
return self._wait(condition, f"Table not found: {schema}.{name}")
154+
155+
def _wait(self, condition, timeout_message):
156+
timeout = self.timeout
157+
while True:
158+
if result := condition():
159+
return result
160+
if timeout == 0:
161+
raise TimeoutError(timeout_message)
162+
timeout -= 1
163+
time.sleep(1)

0 commit comments

Comments
 (0)