1
+ // SPDX-FileCopyrightText: 2023 Friedrich-Alexander-Universitat Erlangen-Nurnberg
2
+ //
3
+ // SPDX-License-Identifier: AGPL-3.0-only
4
+
5
+ // Example 1: Airports
6
+ // Learning goals:
7
+ // - Understand the core concepts pipeline, block, and pipe
8
+ // - Understand the general structure of a pipeline
9
+
10
+ // 1. This Jayvee model describes a pipeline
11
+ // from a CSV file in the web
12
+ // to a SQLite file sink.
13
+ pipeline AirportsPipeline {
14
+
15
+ // 2. We describe the structure of the pipeline,
16
+ // usually at the top of the pipeline.
17
+ // by connecting blocks via pipes.
18
+
19
+ // 3. Syntax of a pipe
20
+ // connecting the block AirportsExtractor
21
+ // with the block AirportsTextFileInterpreter.
22
+ AirportsExtractor -> AirportsTextFileInterpreter;
23
+
24
+ // 4. The output of the preceding block is hereby used
25
+ // as input for the succeeding block.
26
+
27
+ // 5. Pipes can be further chained,
28
+ // leading to an overview of the pipeline.
29
+ AirportsTextFileInterpreter
30
+ -> AirportsCSVInterpreter
31
+ //-> NameHeaderWriter
32
+ -> AirportsTableInterpreter
33
+ -> AirportsLoader;
34
+
35
+
36
+ // 6. Below the pipes, we usually define the blocks
37
+ // that are connected by the pipes.
38
+
39
+ // 7. Blocks instantiate a blocktype by using the oftype keyword.
40
+ // The blocktype defines the available properties that the block
41
+ // can use to specify the intended behavior of the block
42
+ block AirportsExtractor oftype HttpExtractor {
43
+
44
+ // 8. Properties are assigned to concrete values.
45
+ // Here, we specify the URL where the file shall be downloaded from.
46
+ url: "https://opendata.rhein-kreis-neuss.de/api/explore/v2.1/catalog/datasets/rhein-kreis-neuss-flughafen-weltweit/exports/csv?lang=en&timezone=Europe%2FBerlin&use_labels=true&delimiter=%3B";
47
+ }
48
+
49
+ // 9. The HttpExtractor requires no input and produces a binary file as output.
50
+ // This file has to be interpreted, e.g., as text file.
51
+ block AirportsTextFileInterpreter oftype TextFileInterpreter { }
52
+
53
+ // 10. Next, we interpret the text file as sheet.
54
+ // A sheet only contains text cells and is useful for manipulating the shape of data before assigning more strict value types to cells.
55
+ block AirportsCSVInterpreter oftype CSVInterpreter {
56
+ //enclosing: '"';
57
+ delimiter: ";";
58
+ }
59
+
60
+ // 11. We can write into cells of a sheet using the CellWriter blocktype.
61
+ block NameHeaderWriter oftype CellWriter {
62
+ // 12. We utilize a syntax similar to spreadsheet programs.
63
+ // Cell ranges can be described using the keywords "cell", "row", "column", or "range" that indicate which
64
+ // cells are selected for the write action.
65
+ at: cell A1;
66
+
67
+ // 13. For each cell we selected with the "at" property above,
68
+ // we can specify what value shall be written into the cell.
69
+ write: ["name"];
70
+ }
71
+
72
+ // 14. As a next step, we interpret the sheet as a table by adding structure.
73
+ // We define a valuetype per column that specifies the data type of the column.
74
+ // Rows that include values that are not valid according to the their valuetypes are dropped automatically.
75
+ block AirportsTableInterpreter oftype TableInterpreter {
76
+ header: true;
77
+ columns: [
78
+ "Lfd. Nummer" oftype integer,
79
+ "Name des Flughafens" oftype text,
80
+ "Ort" oftype text,
81
+ "Land" oftype text,
82
+ "IATA" oftype text,
83
+ "ICAO" oftype text,
84
+ "Latitude" oftype decimal,
85
+ "Longitude" oftype decimal,
86
+ "Altitude" oftype decimal,
87
+ "Zeitzone" oftype decimal,
88
+ "DST" oftype text,
89
+ "Zeitzonen-Datenbank" oftype text,
90
+ "geo_punkt" oftype text
91
+ ];
92
+ }
93
+
94
+ // 15. As a last step, we load the table into a sink,
95
+ // here into a sqlite file.
96
+ // The structural information of the table is used
97
+ // to generate the correct table.
98
+ block AirportsLoader oftype SQLiteLoader {
99
+ table: "Airports";
100
+ file: "./Airports.sqlite";
101
+ }
102
+
103
+ // 16. Congratulations!
104
+ // You can now use the sink for your data analysis, app,
105
+ // or whatever you want to do with the cleaned data.
106
+ }
0 commit comments