Skip to content

Commit 6045ac9

Browse files
committed
Singer/Meltano: Add example singerfile-to-cratedb
1 parent ce46d48 commit 6045ac9

File tree

6 files changed

+256
-0
lines changed

6 files changed

+256
-0
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
catalog.json
2+
tap_countries.singer
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Meltano Singer File -> CrateDB example
2+
3+
## About
4+
5+
Import data from a file in Singer format (JSONL) into CrateDB, using
6+
[tap-singer-jsonl] and [meltano-target-cratedb].
7+
8+
## Configuration
9+
10+
### tap-singer-jsonl
11+
12+
Within the `extractors` section, have a look at `tap-singer-jsonl`'s
13+
`config.local.paths` section, how to configure JSONL files in Singer
14+
format as pipeline source(s).
15+
16+
### target-cratedb
17+
18+
Within the `loaders` section, at `target-cratedb`, adjust
19+
`config.sqlalchemy_url` to match your database connectivity settings
20+
as pipeline target.
21+
22+
## Usage
23+
24+
Install dependencies.
25+
```shell
26+
meltano install
27+
```
28+
29+
Discover data schema.
30+
```shell
31+
meltano invoke tap-singer-jsonl --discover > catalog.json
32+
```
33+
34+
Run plugin standalone, testdrive.
35+
```shell
36+
meltano invoke tap-singer-jsonl --catalog catalog.json
37+
```
38+
39+
Invoke data transfer to CrateDB database.
40+
```shell
41+
meltano run tap-singer-jsonl target-cratedb
42+
```
43+
44+
## Screenshot
45+
46+
Enjoy the list of countries.
47+
```sql
48+
crash --command 'SELECT "code", "name", "capital", "emoji", "languages[1]" FROM "melty"."countries" ORDER BY "name" LIMIT 42;'
49+
```
50+
51+
![image](https://github.com/crate-workbench/meltano-target-cratedb/assets/453543/fa7076cc-267e-446c-a4f3-aa1283778ace)
52+
53+
54+
## Development
55+
In order to link the sandbox to a development installation of [meltano-target-cratedb],
56+
configure the `pip_url` of the component like this:
57+
```yaml
58+
pip_url: --editable=/path/to/sources/meltano-target-cratedb
59+
```
60+
61+
62+
[meltano-target-cratedb]: https://github.com/crate-workbench/meltano-target-cratedb
63+
[tap-singer-jsonl]: https://github.com/kgpayne/tap-singer-jsonl
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# A Meltano project is just a directory on your filesystem containing text-based files.
2+
# At a minimum, a Meltano project must contain a project file named `meltano.yml`,
3+
# which contains your project configuration, and tells Meltano that a particular
4+
# directory is a Meltano project.
5+
---
6+
version: 1
7+
default_environment: dev
8+
send_anonymous_usage_stats: false
9+
project_id: f14797b9-9d1c-414c-851c-c91e08ddbc2e
10+
11+
environments:
12+
- name: dev
13+
- name: staging
14+
- name: prod
15+
16+
plugins:
17+
18+
# Configure data source.
19+
# In Singer jargon, it is an "extractor", wrapped into a "tap".
20+
extractors:
21+
22+
- name: tap-singer-jsonl
23+
variant: kgpayne
24+
pip_url: git+https://github.com/crate-workbench/tap-singer-jsonl@fix-paths
25+
config:
26+
source: local
27+
add_record_metadata: false
28+
local:
29+
# Note: Configure Singer file(s) here.
30+
paths:
31+
- "tap_countries.singer"
32+
33+
# Configure data sinks.
34+
# In Singer jargon, it is a "loader", wrapped into a "target".
35+
loaders:
36+
37+
- name: target-jsonl
38+
variant: andyh1203
39+
pip_url: target-jsonl
40+
41+
- name: target-cratedb
42+
namespace: cratedb
43+
variant: cratedb
44+
# Acquire from PyPI.
45+
pip_url: meltano-target-cratedb
46+
# Acquire from GitHub.
47+
# pip_url: git+https://github.com/crate-workbench/meltano-target-cratedb.git
48+
49+
# Note: Configure your database server and credentials here.
50+
config:
51+
sqlalchemy_url: crate://crate@localhost/
52+
add_record_metadata: true
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
{
2+
"plugin_type": "extractors",
3+
"name": "tap-singer-jsonl",
4+
"namespace": "tap_singer_jsonl",
5+
"variant": "kgpayne",
6+
"label": "Singer JSONL",
7+
"docs": "https://hub.meltano.com/extractors/tap-singer-jsonl--kgpayne",
8+
"repo": "https://github.com/kgpayne/tap-singer-jsonl",
9+
"pip_url": "tap-singer-jsonl",
10+
"executable": "tap-singer-jsonl",
11+
"description": "Read Singer-formatted JSONL Files",
12+
"logo_url": "https://hub.meltano.com/assets/logos/extractors/singer.png",
13+
"capabilities": [
14+
"discover"
15+
],
16+
"settings_group_validation": [
17+
[
18+
"local.folders"
19+
],
20+
[
21+
"local.paths"
22+
],
23+
[
24+
"source",
25+
"s3.bucket"
26+
],
27+
[
28+
"source",
29+
"s3.paths"
30+
]
31+
],
32+
"settings": [
33+
{
34+
"name": "source",
35+
"kind": "string",
36+
"value": "local",
37+
"label": "Source",
38+
"description": "The source configuration to use when reading `.singer.gz` files. Currently `local` and `s3` are supported."
39+
},
40+
{
41+
"name": "add_record_metadata",
42+
"kind": "boolean",
43+
"value": true,
44+
"label": "Add Record Metadata",
45+
"description": "Whether to inject `_sdc_*` metadata columns."
46+
},
47+
{
48+
"name": "local.folders",
49+
"kind": "array",
50+
"label": "Folders",
51+
"description": "Array of directory paths to scan for `.singer.gz` files."
52+
},
53+
{
54+
"name": "local.recursive",
55+
"kind": "boolean",
56+
"value": false,
57+
"label": "Recursive",
58+
"description": "Whether to scan directories recursively when discovering `.singer.gz` files."
59+
},
60+
{
61+
"name": "local.paths",
62+
"kind": "array",
63+
"label": "Paths",
64+
"description": "Array of file paths to singer-formatted files. **Note:** extension is ignored, and compression is inferred automatically by `smart_open`. Both `local.folders` and `local.paths` can be specified together."
65+
},
66+
{
67+
"name": "s3.bucket",
68+
"kind": "string",
69+
"label": "Bucket",
70+
"description": "S3 bucket name."
71+
},
72+
{
73+
"name": "s3.prefix",
74+
"kind": "string",
75+
"label": "Prefix",
76+
"description": "S3 key prefix. **Note:** key prefixes will be scanned recursively."
77+
},
78+
{
79+
"name": "s3.paths",
80+
"kind": "array",
81+
"label": "Paths",
82+
"description": "S3 file paths to singer-formatted files. **Note:** extension is ignored, and compression is inferred automatically by `smart_open`. Both `s3.prefix` and `s3.paths` can be specified together."
83+
}
84+
]
85+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
{
2+
"plugin_type": "loaders",
3+
"name": "target-jsonl",
4+
"namespace": "target_jsonl",
5+
"variant": "andyh1203",
6+
"label": "JSON Lines (JSONL)",
7+
"docs": "https://hub.meltano.com/loaders/target-jsonl--andyh1203",
8+
"repo": "https://github.com/andyh1203/target-jsonl",
9+
"pip_url": "target-jsonl",
10+
"description": "JSONL loader",
11+
"logo_url": "https://hub.meltano.com/assets/logos/loaders/jsonl.png",
12+
"settings": [
13+
{
14+
"name": "destination_path",
15+
"kind": "string",
16+
"value": "output",
17+
"label": "Destination Path",
18+
"description": "Sets the destination path the JSONL files are written to, relative\nto the project root.\n\nThe directory needs to exist already, it will not be created\nautomatically.\n\nTo write JSONL files to the project root, set an empty string (`\"\"`).\n"
19+
},
20+
{
21+
"name": "do_timestamp_file",
22+
"kind": "boolean",
23+
"value": false,
24+
"label": "Include Timestamp in File Names",
25+
"description": "Specifies if the files should get timestamped.\n\nBy default, the resulting file will not have a timestamp in the file name (i.e. `exchange_rate.jsonl`).\n\nIf this option gets set to `true`, the resulting file will have a timestamp associated with it (i.e. `exchange_rate-{timestamp}.jsonl`).\n"
26+
},
27+
{
28+
"name": "custom_name",
29+
"kind": "string",
30+
"label": "Custom File Name Override",
31+
"description": "Specifies a custom name for the filename, instead of the stream name.\n\nThe file name will be `{custom_name}-{timestamp}.jsonl`, if `do_timestamp_file` is `true`.\nOtherwise the file name will be `{custom_name}.jsonl`.\n\nIf custom name is not provided, the stream name will be used.\n"
32+
}
33+
]
34+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
[tool.poe.tasks]
2+
3+
test = [
4+
5+
# Acquire Singer file in JSONL format.
6+
{ cmd = "wget --no-clobber https://github.com/MeltanoLabs/target-postgres/raw/v0.0.9/target_postgres/tests/data_files/tap_countries.singer" },
7+
8+
# Install recipe.
9+
{ cmd = "meltano install" },
10+
11+
# Discover data schema.
12+
{ shell = "meltano invoke tap-singer-jsonl --discover > catalog.json" },
13+
14+
# Run plugin standalone, testdrive.
15+
{ cmd = "meltano invoke tap-singer-jsonl --catalog catalog.json" },
16+
17+
# Invoke pipeline, loading data into database, for real.
18+
{ cmd = "meltano run tap-singer-jsonl target-cratedb" },
19+
20+
]

0 commit comments

Comments
 (0)