diff --git a/.github/workflows/branch_replication.yml b/.github/workflows/branch_replication.yml index 2bc6f6d0..a6afb716 100644 --- a/.github/workflows/branch_replication.yml +++ b/.github/workflows/branch_replication.yml @@ -31,6 +31,9 @@ jobs: # BigQuery git branch -f master-bigquery master git push -u origin master-bigquery --force + # Dremio + git branch -f master-dremio master + git push -u origin master-dremio --force - name: Verify move diff --git a/.github/workflows/pr_job_dremio.yml b/.github/workflows/pr_job_dremio.yml new file mode 100644 index 00000000..40f96819 --- /dev/null +++ b/.github/workflows/pr_job_dremio.yml @@ -0,0 +1,64 @@ +name: dbt PR job (Dremio) + +on: + pull_request: + types: + - opened + - reopened + - synchronize + - ready_for_review + branches: + - master-dremio + # push: + # branches-ignore: + # - master + # - master-dremio + +jobs: + run: + runs-on: ubuntu-20.04 + steps: + - name: checkout + uses: actions/checkout@v2 + + - uses: actions/setup-python@v2 + with: + python-version: '3.8' + + - name: install requirements + run: pip install -q -r requirements.txt + + - name: install datafold-sdk + run: pip install -q datafold-sdk + + - name: dbt deps + run: dbt deps + + - name: Find Current Pull Request + uses: jwalton/gh-find-current-pr@v1.3.0 + id: findPR + + - name: upload seed files outside dbt + working-directory: datagen + run: | + python dremio__upload_seeds.py + env: + DREMIO_TOKEN: ${{ secrets.DREMIO_TOKEN }} + DREMIO_FOLDER: "${{ format('{0}_{1}', 'PR_NUM', steps.findPr.outputs.pr) }}" + DREMIO_USER: ${{ secrets.DREMIO_USER }} + + - name: dbt build + run: dbt build --full-refresh --profiles-dir ./ --profile demo_dremio --exclude "seeds/" + env: + DREMIO_TOKEN: ${{ secrets.DREMIO_TOKEN }} + DREMIO_FOLDER: "${{ format('{0}_{1}', 'PR_NUM', steps.findPr.outputs.pr) }}" + DREMIO_USER: ${{ secrets.DREMIO_USER }} + + - name: submit artifacts to datafold + run: | + set -ex + datafold dbt upload --ci-config-id 434 --run-type ${DATAFOLD_RUN_TYPE} --commit-sha ${GIT_SHA} + env: + DATAFOLD_API_KEY: ${{ secrets.DATAFOLD_APIKEY_BIGQUERY }} + DATAFOLD_RUN_TYPE: "${{ 'pull_request' }}" + GIT_SHA: "${{ github.event.pull_request.head.sha }}" diff --git a/.github/workflows/prod_job_dremio.yml b/.github/workflows/prod_job_dremio.yml new file mode 100644 index 00000000..03872c9a --- /dev/null +++ b/.github/workflows/prod_job_dremio.yml @@ -0,0 +1,77 @@ +name: dbt prod (Dremio) + +on: + workflow_dispatch: + push: + branches: + - master-dremio + schedule: + - cron: '20 0 * * *' + +jobs: + run: + runs-on: ubuntu-20.04 + + steps: + - name: checkout master-dremio + uses: actions/checkout@v2 + with: + ref: master-dremio + + - name: Verify and set branch and SHA + run: | + # echo "Checking if the current branch is master-dremio" + # CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD) + # if [ "$CURRENT_BRANCH" != "master-dremio" ]; then + # echo "This workflow runs only on master-dremio." + # exit 1 + # fi + echo "GH_REF=$(git symbolic-ref HEAD)" >> $GITHUB_ENV + echo "GH_SHA=$(git rev-parse HEAD)" >> $GITHUB_ENV + + - name: Verify environment variables + run: | + echo "GH_REF is $GH_REF" + echo "GH_SHA is $GH_SHA" + env: + GH_REF: "${{ env.GH_REF }}" + GH_SHA: "${{ env.GH_SHA }}" + + + - uses: actions/setup-python@v2 + with: + python-version: '3.8' + + - name: install requirements + run: pip install -q -r requirements.txt + + - name: install datafold-sdk + run: pip install -q datafold-sdk + + - name: dbt deps + run: dbt deps + + - name: upload seed files outside dbt + working-directory: datagen + run: | + python dremio__upload_seeds.py + env: + DREMIO_TOKEN: ${{ secrets.DREMIO_TOKEN }} + DREMIO_FOLDER: ${{ 'prod' }} + DREMIO_USER: ${{ secrets.DREMIO_USER }} + + - name: dbt build + run: dbt build --full-refresh --profiles-dir ./ --profile demo_dremio --exclude "seeds/" + env: + DREMIO_TOKEN: ${{ secrets.DREMIO_TOKEN }} + DREMIO_FOLDER: ${{ 'prod' }} + DREMIO_USER: ${{ secrets.DREMIO_USER }} + + - name: submit artifacts to datafold + run: | + set -ex + datafold dbt upload --ci-config-id 434 --run-type ${DATAFOLD_RUN_TYPE} --commit-sha ${GIT_SHA} + env: + DATAFOLD_API_KEY: ${{ secrets.DATAFOLD_APIKEY_BIGQUERY }} + DATAFOLD_RUN_TYPE: "${{ env.GH_REF == 'refs/heads/master-dremio' && 'production' || 'pull_request' }}" + GIT_SHA: "${{ env.GH_REF == 'refs/heads/master-dremio' && env.GH_SHA || github.event.pull_request.head.sha }}" diff --git a/datagen/dremio__upload_seeds.py b/datagen/dremio__upload_seeds.py new file mode 100755 index 00000000..427e8073 --- /dev/null +++ b/datagen/dremio__upload_seeds.py @@ -0,0 +1,210 @@ +#!/usr/bin/env python3 + +import os +from http.cookies import SimpleCookie + +from dremio.flight.connection import DremioFlightEndpointConnection +from dremio.flight.query import DremioFlightEndpointQuery +from dremio.middleware.cookie import CookieMiddlewareFactory +from pyarrow import flight +import csv + + +class DFDremioFlightEndpointConnection(DremioFlightEndpointConnection): + project_id: str + + def __init__(self, params): + super().__init__(params) + self.project_id = params.get('project_id') + + def connect(self) -> flight.FlightClient: + """Connects to Dremio Flight server endpoint with the + provided credentials.""" + try: + # Default to use an unencrypted TCP connection. + scheme = "grpc+tcp" + client_cookie_middleware = CookieMiddlewareFactory() + tls_args = {} + + if self.tls: + tls_args = self._set_tls_connection_args() + scheme = "grpc+tls" + + if self.project_id: + cookie = SimpleCookie() + ''' + Load "project_id=" into the Cookie container. + Note we're no longer using it as a black box, and the client + is making up its own cookie which is less than conformant + to RFC 6265. This should ideally not be used in production + systems. + ''' + cookie['project_id'] = self.project_id + ''' + Update the middleware's cookie jar dict, normally intended to be + internal-only. + ''' + client_cookie_middleware.cookies.update(cookie.items()) + + if self.username and (self.password or self.token): + print('software') + return self._connect_to_software(tls_args, client_cookie_middleware, scheme) + + elif self.token: + print('cloud') + return self._connect_to_cloud(tls_args, client_cookie_middleware, scheme) + + raise ConnectionError("Username+token or token must be supplied.") + + except Exception: + print("There was an error trying to connect to the Dremio Flight Endpoint") + raise + + +class DFDremioFlightEndpoint: + def __init__(self, connection_args: dict) -> None: + self.connection_args = connection_args + self.dremio_flight_conn = DFDremioFlightEndpointConnection( + self.connection_args) + + def connect(self) -> flight.FlightClient: + return self.dremio_flight_conn.connect() + + def get_reader(self, client: flight.FlightClient) -> flight.FlightStreamReader: + dremio_flight_query = DremioFlightEndpointQuery( + self.connection_args.get("query"), client, self.dremio_flight_conn + ) + return dremio_flight_query.get_reader() + + +def run_query(query: str): + config = { + 'hostname': 'data.dremio.cloud', + 'port': 443, + 'token': os.environ.get('DREMIO_TOKEN'), + 'username': None, + 'password': None, + 'tls': True, + # 'disable_certificate_verification': True, + 'path_to_certs': os.path.join(os.path.dirname(__file__), + "./dremio_bundle.pem"), + 'query': query, + 'project_id': 'e94ab14a-43d8-44dd-8f59-cffbb1f0f12f', + } + endpoint = DFDremioFlightEndpoint(config) + client = endpoint.connect() + cursor = endpoint.get_reader(client) + data = cursor.read_all() + + print(data.to_pandas()) + return data + + +def column_type(col): + if col in ['org_id', 'user_id', 'price']: + return 'int' + elif col in ['created_at', 'event_timestamp']: + return 'timestamp' + elif col in ['is_first_user']: + return 'boolean' + else: + return 'varchar' + + +def column_quote(col): + if col in ['org_id', 'user_id', 'price']: + return False + elif col in ['created_at', 'event_timestamp']: + return True + elif col in ['is_first_user']: + return False + else: + return True + + +def create_query(file): + table = file.split('.')[0] + full_table_name = schema + table + + with open(file_path + file, 'r') as file: + first_line = file.readline().strip() + columns = first_line.split(',') + + columns_types_string = ', '.join([f'{col} {column_type(col)}' for col in columns]) + create_query = f"""create table {full_table_name} ({columns_types_string});""" + return create_query + + +def drop_query(file): + table = file.split('.')[0] + full_table_name = schema + table + drop_query = f"""DROP TABLE IF EXISTS {full_table_name};""" + return drop_query + + +def create_insert_statements(file, batch_size): + + csv_file_path = file_path + file + table = file.split('.')[0] + full_table_name = schema + table + + with open(file_path + file, 'r') as file: + columns_string = file.readline().strip() + columns_list = columns_string.split(',') + + # List to hold the generated SQL statements + sql_statements = [] + + # Read the CSV file + with open(csv_file_path, 'r') as file: + reader = csv.DictReader(file) + rows = list(reader) + + # Process the rows in batches + for i in range(0, len(rows), batch_size): + batch = rows[i:i + batch_size] + values1 = [] + + for row in batch: + elements = [] + for col in columns_list: + # elements.append(f"'{row[col]}'" if column_quote(col) else f"{row[col]}") + element_to_append = f"{row[col]}" + element_to_append = element_to_append.replace('T', ' ').replace('Z', '') if column_type(col) == 'timestamp' else element_to_append + element_to_append = element_to_append.replace("'", "''").replace('’', "''") if column_type(col) == 'varchar' else element_to_append + element_to_append = f"'{element_to_append}'" if column_quote(col) else f"{element_to_append}" + elements.append(element_to_append) + values1.append(f"({', '.join(elements)})") + + # Create the SQL statement for the current batch + sql_statement = f"insert into {full_table_name} ({columns_string}) values {', '.join(values1)};" + sql_statements.append(sql_statement) + + return sql_statements + + +file_path = '../seeds/' +target_schema = os.environ.get('DREMIO_FOLDER') +schema = '"Alexey S3".alexeydremiobucket.' + target_schema + '.' + + +seed_files = [] +for path in os.listdir(file_path): + if os.path.isfile(os.path.join(file_path, path)): + if path.endswith(".csv"): + seed_files.append(path) +print(seed_files) + + +for filename in seed_files: + print(f'\n{filename} ==========================================') + print(drop_query(filename)) + run_query(drop_query(filename)) + + print(create_query(filename)) + run_query(create_query(filename)) + + insert_statements = create_insert_statements(filename, 500) + for statement in insert_statements: + print(statement) + run_query(statement) diff --git a/datagen/dremio_bundle.pem b/datagen/dremio_bundle.pem new file mode 100644 index 00000000..297a3d6c --- /dev/null +++ b/datagen/dremio_bundle.pem @@ -0,0 +1,94 @@ +-----BEGIN CERTIFICATE----- +MIIFZjCCBE6gAwIBAgIQMH+CdwNjtfQSpGZga7E1uTANBgkqhkiG9w0BAQsFADBG +MQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExM +QzETMBEGA1UEAxMKR1RTIENBIDFENDAeFw0yNDAyMTMxMTAwMTVaFw0yNDA1MTMx +MTU0MDlaMBwxGjAYBgNVBAMTEWRhdGEuZHJlbWlvLmNsb3VkMIIBIjANBgkqhkiG +9w0BAQEFAAOCAQ8AMIIBCgKCAQEAtL3bCtz3PVSK2WEk6LrAAyl6ml2UCV0y6png +dLXp8N06EDlD7F4CKAFT08zuW9uNcszedbp0DCoaPC6/XdQw6zVE7xs/UAu7lVru +PsfqJJs30QPUeO7F1hWIXcJq8jEuiqQBPPONwBIkEHBDoQ3lxR77ocmR2Vkme+XL +XfEV4F1T2ohadJsgdHQ9MiMLudmAdo+sFVnLsEB/RJ7gjc7jjOH2KG02R7kvmYlb +bKjoyAYA1Z/Kma0PhuzkWHkpBumDXuZV1akTeuWXMCJPlpuqfbmaOn6tfJM1hgJB +K6AzU4OitKYZVLr0pGUBQYpVrz1nFv23P973P3T/2emuur0/kwIDAQABo4ICeDCC +AnQwDgYDVR0PAQH/BAQDAgWgMBMGA1UdJQQMMAoGCCsGAQUFBwMBMAwGA1UdEwEB +/wQCMAAwHQYDVR0OBBYEFNwuFvpFsEEZXFY2FtcuYerKx+YoMB8GA1UdIwQYMBaA +FCXiGA6yV5GUKuXUXYaQg95Ts7iSMHgGCCsGAQUFBwEBBGwwajA1BggrBgEFBQcw +AYYpaHR0cDovL29jc3AucGtpLmdvb2cvcy9ndHMxZDQvejZzODcxMW5adkUwMQYI +KwYBBQUHMAKGJWh0dHA6Ly9wa2kuZ29vZy9yZXBvL2NlcnRzL2d0czFkNC5kZXIw +HAYDVR0RBBUwE4IRZGF0YS5kcmVtaW8uY2xvdWQwIQYDVR0gBBowGDAIBgZngQwB +AgEwDAYKKwYBBAHWeQIFAzA8BgNVHR8ENTAzMDGgL6AthitodHRwOi8vY3Jscy5w +a2kuZ29vZy9ndHMxZDQvbzZ1MEt1eVlFbzAuY3JsMIIBBAYKKwYBBAHWeQIEAgSB +9QSB8gDwAHYASLDja9qmRzQP5WoC+p0w6xxSActW3SyB2bu/qznYhHMAAAGNolbU +bgAABAMARzBFAiEAy1ZMs7ua31CPBZ5nX21gHW2RVlVhL5wYRPDprdz/wx0CIETM +kiBPJwdwL6f/DyVcCafK6aGqL7dbiaWwFth6SVxFAHYAdv+IPwq2+5VRwmHM9Ye6 +NLSkzbsp3GhCCp/mZ0xaOnQAAAGNolbUeQAABAMARzBFAiEAlDmaj0oVJMsiw5Fm +6IIFzIW65ZQMsLWMtw8s0E0l6oQCIC1vn9nkSTmblQdI+hzBbQCZYUOP+2esYFGX +3ISY8QgjMA0GCSqGSIb3DQEBCwUAA4IBAQAffqZ4n1mRWBsWa5g6NJpm6WrRN/fo +ekh7jZSfGtjOmsBfpGICquEenQtPamuFo94l4B3ruNMdGIeUfKcXrkncH8aXKTaE +KbiaW/uuK5xsXIN7Z1n3PpsJWO/kVn8j8ox+CZUCr5g6ZDbOKChwErnqG8f0gldC +MSuXs7vYI/nVLIre7GPa9Nm6OacpUbexj1uTNF0lweaskqCEHoCT0QxsMc+6QnUm +eMfAZ3N5cWn4yb1+1PUY1K8s5jriZPKcP14zbT7hhokxZDv3+yJjH3dQrySw2s0i +A9FPfaxbxAzx4cOh/GjiHIvbpjJSoK5IU6ttAcHT15CUsonvmh/FW/vN +-----END CERTIFICATE----- +-----BEGIN CERTIFICATE----- +MIIFjDCCA3SgAwIBAgINAgCOsgIzNmWLZM3bmzANBgkqhkiG9w0BAQsFADBHMQsw +CQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEU +MBIGA1UEAxMLR1RTIFJvb3QgUjEwHhcNMjAwODEzMDAwMDQyWhcNMjcwOTMwMDAw +MDQyWjBGMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZp +Y2VzIExMQzETMBEGA1UEAxMKR1RTIENBIDFENDCCASIwDQYJKoZIhvcNAQEBBQAD +ggEPADCCAQoCggEBAKvAqqPCE27l0w9zC8dTPIE89bA+xTmDaG7y7VfQ4c+mOWhl +UebUQpK0yv2r678RJExK0HWDjeq+nLIHN1Em5j6rARZixmyRSjhIR0KOQPGBMUld +saztIIJ7O0g/82qj/vGDl//3t4tTqxiRhLQnTLXJdeB+2DhkdU6IIgx6wN7E5NcU +H3Rcsejcqj8p5Sj19vBm6i1FhqLGymhMFroWVUGO3xtIH91dsgy4eFKcfKVLWK3o +2190Q0Lm/SiKmLbRJ5Au4y1euFJm2JM9eB84Fkqa3ivrXWUeVtye0CQdKvsY2Fka +zvxtxvusLJzLWYHk55zcRAacDA2SeEtBbQfD1qsCAwEAAaOCAXYwggFyMA4GA1Ud +DwEB/wQEAwIBhjAdBgNVHSUEFjAUBggrBgEFBQcDAQYIKwYBBQUHAwIwEgYDVR0T +AQH/BAgwBgEB/wIBADAdBgNVHQ4EFgQUJeIYDrJXkZQq5dRdhpCD3lOzuJIwHwYD +VR0jBBgwFoAU5K8rJnEaK0gnhS9SZizv8IkTcT4waAYIKwYBBQUHAQEEXDBaMCYG +CCsGAQUFBzABhhpodHRwOi8vb2NzcC5wa2kuZ29vZy9ndHNyMTAwBggrBgEFBQcw +AoYkaHR0cDovL3BraS5nb29nL3JlcG8vY2VydHMvZ3RzcjEuZGVyMDQGA1UdHwQt +MCswKaAnoCWGI2h0dHA6Ly9jcmwucGtpLmdvb2cvZ3RzcjEvZ3RzcjEuY3JsME0G +A1UdIARGMEQwCAYGZ4EMAQIBMDgGCisGAQQB1nkCBQMwKjAoBggrBgEFBQcCARYc +aHR0cHM6Ly9wa2kuZ29vZy9yZXBvc2l0b3J5LzANBgkqhkiG9w0BAQsFAAOCAgEA +IVToy24jwXUr0rAPc924vuSVbKQuYw3nLflLfLh5AYWEeVl/Du18QAWUMdcJ6o/q +FZbhXkBH0PNcw97thaf2BeoDYY9Ck/b+UGluhx06zd4EBf7H9P84nnrwpR+4GBDZ +K+Xh3I0tqJy2rgOqNDflr5IMQ8ZTWA3yltakzSBKZ6XpF0PpqyCRvp/NCGv2KX2T +uPCJvscp1/m2pVTtyBjYPRQ+QuCQGAJKjtN7R5DFrfTqMWvYgVlpCJBkwlu7+7KY +3cTIfzE7cmALskMKNLuDz+RzCcsYTsVaU7Vp3xL60OYhqFkuAOOxDZ6pHOj9+OJm +YgPmOT4X3+7L51fXJyRH9KfLRP6nT31D5nmsGAOgZ26/8T9hsBW1uo9ju5fZLZXV +VS5H0HyIBMEKyGMIPhFWrlt/hFS28N1zaKI0ZBGD3gYgDLbiDT9fGXstpk+Fmc4o +lVlWPzXe81vdoEnFbr5M272HdgJWo+WhT9BYM0Ji+wdVmnRffXgloEoluTNcWzc4 +1dFpgJu8fF3LG0gl2ibSYiCi9a6hvU0TppjJyIWXhkJTcMJlPrWx1VytEUGrX2l0 +JDwRjW/656r0KVB02xHRKvm2ZKI03TglLIpmVCK3kBKkKNpBNkFt8rhafcCKOb9J +x/9tpNFlQTl7B39rJlJWkR17QnZqVptFePFORoZmFzM= +-----END CERTIFICATE----- +-----BEGIN CERTIFICATE----- +MIIFVzCCAz+gAwIBAgINAgPlk28xsBNJiGuiFzANBgkqhkiG9w0BAQwFADBHMQsw +CQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEU +MBIGA1UEAxMLR1RTIFJvb3QgUjEwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIyMDAw +MDAwWjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZp +Y2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjEwggIiMA0GCSqGSIb3DQEBAQUA +A4ICDwAwggIKAoICAQC2EQKLHuOhd5s73L+UPreVp0A8of2C+X0yBoJx9vaMf/vo +27xqLpeXo4xL+Sv2sfnOhB2x+cWX3u+58qPpvBKJXqeqUqv4IyfLpLGcY9vXmX7w +Cl7raKb0xlpHDU0QM+NOsROjyBhsS+z8CZDfnWQpJSMHobTSPS5g4M/SCYe7zUjw +TcLCeoiKu7rPWRnWr4+wB7CeMfGCwcDfLqZtbBkOtdh+JhpFAz2weaSUKK0Pfybl +qAj+lug8aJRT7oM6iCsVlgmy4HqMLnXWnOunVmSPlk9orj2XwoSPwLxAwAtcvfaH +szVsrBhQf4TgTM2S0yDpM7xSma8ytSmzJSq0SPly4cpk9+aCEI3oncKKiPo4Zor8 +Y/kB+Xj9e1x3+naH+uzfsQ55lVe0vSbv1gHR6xYKu44LtcXFilWr06zqkUspzBmk +MiVOKvFlRNACzqrOSbTqn3yDsEB750Orp2yjj32JgfpMpf/VjsPOS+C12LOORc92 +wO1AK/1TD7Cn1TsNsYqiA94xrcx36m97PtbfkSIS5r762DL8EGMUUXLeXdYWk70p +aDPvOmbsB4om3xPXV2V4J95eSRQAogB/mqghtqmxlbCluQ0WEdrHbEg8QOB+DVrN +VjzRlwW5y0vtOUucxD/SVRNuJLDWcfr0wbrM7Rv1/oFB2ACYPTrIrnqYNxgFlQID +AQABo0IwQDAOBgNVHQ8BAf8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4E +FgQU5K8rJnEaK0gnhS9SZizv8IkTcT4wDQYJKoZIhvcNAQEMBQADggIBAJ+qQibb +C5u+/x6Wki4+omVKapi6Ist9wTrYggoGxval3sBOh2Z5ofmmWJyq+bXmYOfg6LEe +QkEzCzc9zolwFcq1JKjPa7XSQCGYzyI0zzvFIoTgxQ6KfF2I5DUkzps+GlQebtuy +h6f88/qBVRRiClmpIgUxPoLW7ttXNLwzldMXG+gnoot7TiYaelpkttGsN/H9oPM4 +7HLwEXWdyzRSjeZ2axfG34arJ45JK3VmgRAhpuo+9K4l/3wV3s6MJT/KYnAK9y8J +ZgfIPxz88NtFMN9iiMG1D53Dn0reWVlHxYciNuaCp+0KueIHoI17eko8cdLiA6Ef +MgfdG+RCzgwARWGAtQsgWSl4vflVy2PFPEz0tv/bal8xa5meLMFrUKTX5hgUvYU/ +Z6tGn6D/Qqc6f1zLXbBwHSs09dR2CQzreExZBfMzQsNhFRAbd03OIozUhfJFfbdT +6u9AWpQKXCBfTkBdYiJ23//OYb2MI3jSNwLgjt7RETeJ9r/tSQdirpLsQBqvFAnZ +0E6yove+7u7Y/9waLd64NnHi/Hm3lCXRSHNboTXns5lndcEZOitHTtNCjv0xyBZm +2tIMPNuzjsmhDYAPexZ3FL//2wmUspO8IFgV6dtxQ/PeEMMA3KgqlbbC1j+Qa3bb +bP6MvPJwNQzcmRk13NfIRmPVNnGuV/u3gm3c +-----END CERTIFICATE----- diff --git a/models/core/dim_orgs.sql b/models/core/dim_orgs.sql index a2fc2f65..35b9f52b 100644 --- a/models/core/dim_orgs.sql +++ b/models/core/dim_orgs.sql @@ -34,12 +34,12 @@ WITH orgs AS ( SELECT - org_id + orgs.org_id , created_at , num_users , sub_created_at , sub_plan , sub_price FROM orgs -LEFT JOIN user_count USING (org_id) -LEFT JOIN subscriptions USING (org_id) +LEFT JOIN user_count on orgs.org_id = user_count.org_id +LEFT JOIN subscriptions on orgs.org_id = subscriptions.org_id diff --git a/models/core_/dim__orgs.sql b/models/core_/dim__orgs.sql index e5634fb9..7107e58f 100644 --- a/models/core_/dim__orgs.sql +++ b/models/core_/dim__orgs.sql @@ -34,12 +34,12 @@ WITH orgs AS ( SELECT - org_id + orgs.org_id , created_at , num_users , sub_created_at , sub_plan , sub_price FROM orgs -LEFT JOIN user_count USING (org_id) -LEFT JOIN subscriptions USING (org_id) +LEFT JOIN user_count on orgs.org_id = user_count.org_id +LEFT JOIN subscriptions on orgs.org_id = subscriptions.org_id diff --git a/models/syncs/sales_sync.sql b/models/syncs/sales_sync.sql index f84f9d43..b0042b2e 100644 --- a/models/syncs/sales_sync.sql +++ b/models/syncs/sales_sync.sql @@ -1,8 +1,15 @@ WITH org_events AS ( SELECT - * - FROM {{ ref('dim_orgs') }} - LEFT JOIN {{ ref('feature_used') }} USING (org_id) + a.org_id + , a.created_at + , a.num_users + , a.sub_created_at + , a.sub_plan + , a.sub_price + , b.event_timestamp + , b.activity + FROM {{ ref('dim_orgs') }} a + LEFT JOIN {{ ref('feature_used') }} b on a.org_id = b.org_id WHERE sub_plan IS NULL ) @@ -29,6 +36,9 @@ WITH org_events AS ( AND CAST(created_at AS DATE) > ( CAST('2022-11-01' AS DATE) - 60 ) + {% elif target.name == 'dr' %} + CAST(event_timestamp AS DATE) > DATE_SUB(DATE '2022-11-01', 30) + AND CAST(created_at AS DATE) > DATE_SUB(DATE '2022-11-01', 60) {% else %} event_timestamp::date > ('2022-11-01'::date - 30) AND created_at::date > ('2022-11-01'::date - 60) diff --git a/models/syncs_/sales__sync.sql b/models/syncs_/sales__sync.sql index b8ee7f8f..88003284 100644 --- a/models/syncs_/sales__sync.sql +++ b/models/syncs_/sales__sync.sql @@ -1,8 +1,15 @@ WITH org_events AS ( SELECT - * - FROM {{ ref('dim__orgs') }} - LEFT JOIN {{ ref('feature__used') }} USING (org_id) + a.org_id + , a.created_at + , a.num_users + , a.sub_created_at + , a.sub_plan + , a.sub_price + , b.event_timestamp + , b.activity + FROM {{ ref('dim__orgs') }} a + LEFT JOIN {{ ref('feature__used') }} b on a.org_id = b.org_id WHERE sub_plan IS NULL ) @@ -29,6 +36,9 @@ WITH org_events AS ( AND CAST(created_at AS DATE) > ( CURRENT_DATE - 60 ) + {% elif target.name == 'dr' %} + CAST(event_timestamp AS DATE) > DATE_SUB(current_date, 30) + AND CAST(created_at AS DATE) > DATE_SUB(current_date, 60) {% else %} event_timestamp::date > (current_date - 30) AND created_at::date > (current_date - 60) diff --git a/profiles.yml b/profiles.yml index 08c9dac0..05227750 100644 --- a/profiles.yml +++ b/profiles.yml @@ -49,3 +49,21 @@ demo_bigquery: auth_provider_x509_cert_url: "https://www.googleapis.com/oauth2/v1/certs" client_x509_cert_url: "{{ env_var('BIGQUERY_DEMO_CLIENT_X509_CERT_URL') }}" universe_domain: "googleapis.com" + +demo_dremio: + target: dr + outputs: + dr: + cloud_host: api.dremio.cloud + cloud_project_id: e94ab14a-43d8-44dd-8f59-cffbb1f0f12f + pat: "{{ env_var('DREMIO_TOKEN') }}" + object_storage_source: "Alexey S3" + # object_storage_path: "alexeydremiobucket.folder2" + object_storage_path: "{{ 'alexeydremiobucket.' + env_var('DREMIO_FOLDER') }}" + dremio_space: demo + dremio_space_folder: no_schema + threads: 5 + type: dremio + use_ssl: true + user: "{{ env_var('DREMIO_USER') }}" + diff --git a/requirements.txt b/requirements.txt index 6b103705..b56fa111 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,7 @@ -dbt-core==1.8.2 -dbt-snowflake==1.8.2 -dbt-databricks==1.8.2 -dbt-bigquery==1.8.2 +dbt-core==1.7.13 +dbt-snowflake +dbt-databricks +dbt-bigquery +dbt-dremio==1.7.0 +# "https://github.com/dremio-hub/arrow-flight-client-examples/releases/download/dremio-flight-python-v1.1.0/dremio_flight-1.1.0-py3-none-any.whl +dremio-flight @ https://github.com/dremio-hub/arrow-flight-client-examples/releases/download/dremio-flight-python-v1.1.0/dremio_flight-1.1.0-py3-none-any.whl