From 92c1e46bd11e1bfc30181f25d6a54a13675dd164 Mon Sep 17 00:00:00 2001 From: Paul Cornell Date: Mon, 23 Jun 2025 16:56:11 -0700 Subject: [PATCH 1/3] Workflow Endpoint: how to encrypt secrets --- api-reference/workflow/secrets.mdx | 327 +++++++++++++++++++++++++++++ 1 file changed, 327 insertions(+) create mode 100644 api-reference/workflow/secrets.mdx diff --git a/api-reference/workflow/secrets.mdx b/api-reference/workflow/secrets.mdx new file mode 100644 index 00000000..ba7ac67d --- /dev/null +++ b/api-reference/workflow/secrets.mdx @@ -0,0 +1,327 @@ +--- +title: Secrets +--- + +When you make calls to the [Unstructured Workflow Endpoint](/api-reference/workflow/overview), you might need to include a +third-party secret as part of the request. These third-party secrets typically include things like API keys, passwords, +contents of key files, and other sensitive information that are required when creating +[source connectors](/api-reference/workflow/source-connectors) and [destination connectors](/api-reference/workflow/destination-connectors). + +Instead of sending these secrets to Unstructured programmatically in plain text, which presents a security risk, you must instead follow Unstructured's +process for more securely handling each secret as follows: + +1. Get the private key for your Unstructured user account. +2. Use this private key to encrypt the target secret locally. +3. Register the encrypted secret with your Unstructured account. Unstructured returns a registration code that represents the encrypted secret + that was registered. +4. Whenever you need to include the target secret in a call to the Unstructured Workflow Endpoint, specify the registration code instead + of the target secret. + +If the Unstructured Workflow Endpoint expects a registration code instead of a plain-text secret, and you do not provide a registration code +or provide the wrong one, the call will fail. + +## Requirements + +While you can use a REST API client such as `curl` or Postman to complete some of the following steps, you can only use a programming language such as +Python to complete other steps. Both approaches are shown where applicable. + +To complete the following steps, you must have the following: + +* A programming language, such as Python, installed on your local development machine. +* Optionally, a REST API client such as `curl` or Postman installed on your local development machine. +* An Unstructured user account, including the Unstructure API URL and API key for the account. + + + +* The following Python code examples assume that you have the following two environment variables set locally: + + - `UNSTRUCTURED_API_URL`, set to the Workflow Endpoint API URL for your Unstructured user account. + - `UNSTRUCTURED_API_KEY`, set to the API key for your Unstructured user account. + +## Step 1: Get the private key + + + + The following Python code examples assumes that you have installed the following packages into your + Python virtual environment: + + - `requests` + + The following function returns the PEM version of the private key for your Unstructured user account: + + ```python + import requests + import os + + def get_public_key_pem() -> str: + """ + Gets the PEM version of the public key for the calling Unstructured user. + + Args: + None. + + Returns: + str: A string representation of the user's public key. + """ + + headers = { + "unstructured-api-key": os.getenv("UNSTRUCTURED_API_KEY"), + "Accept": "application/json", + "Content-Type": "application/json" + } + + encryption_key_response = requests.request( + method="POST", + url=f"{os.getenv("UNSTRUCTURED_API_URL")}/api/v1/users/retrieve", + headers=headers + ) + + encryption_public_key = encryption_key_response.json().get("pem_key") + + return encryption_public_key + ``` + + You could call this function as follows: + + ```python + public_key = get_public_key_pem() + + print(public_key) + ``` + + The function returns the public key in PEM format. + + + ```bash + curl --request 'POST' --location \ + "$UNSTRUCTURED_API_URL/users/retrieve" \ + --header 'accept: application/json' \ + --header "unstructured-api-key: $UNSTRUCTURED_API_KEY" \ + | jq .pem_key + ``` + + + - ... + + + + +## Step 2: Encrypt the secret + + + + The following Python code examples assumes that you have installed the following packages into your + Python virtual environment: + + - `cryptography` + + The following function encrypts a string by using envelope encryption: + + ```python + from cryptography.hazmat.primitives.asymmetric import padding, rsa + from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes + from cryptography.hazmat.backends import default_backend + import os + import base64 + + def envelope_encrypt(public_key_pem: str, plaintext: str) -> dict: + """ + Encrypts a string by using envelope encryption. + + Args: + public_key_pem (str): The public key in PEM format. + plaintext (str): The string to encrypt. + + Returns: + dict: A dictionary with the encrypted AES key, iv, and ciphertext (all Base64-encoded). + """ + + # Load the public RSA key. + public_key = serialization.load_pem_public_key( + public_key_pem.encode('utf-8'), + backend=default_backend() + ) + + # Generate a random AES key. + aes_key = os.urandom(32) # 256-bit AES key. + + # Generate a random IV. + iv = os.urandom(16) + + # Encrypt by using AES-CFB. + cipher = Cipher( + algorithms.AES(aes_key), + modes.CFB(iv), + ) + encryptor = cipher.encryptor() + ciphertext = encryptor.update(plaintext.encode('utf-8')) + encryptor.finalize() + + # Encrypt the AES key by using the RSA public key. + encrypted_key = public_key.encrypt( + aes_key, + padding.OAEP( + mgf=padding.MGF1(algorithm=hashes.SHA256()), + algorithm=hashes.SHA256(), + label=None + ) + ) + + # Return all encrypted components, Base64-encoded. + return { + 'encrypted_aes_key': base64.b64encode(encrypted_key).decode('utf-8'), + 'aes_iv': base64.b64encode(iv).decode('utf-8'), + 'encrypted_value': base64.b64encode(ciphertext).decode('utf-8'), + 'type': 'rsa_aes', + } + ``` + + You could call this function as follows: + + ```python + import json + + google_drive_creds_json_file = "/Users//Downloads/.json" + + with open(google_drive_creds_json_file, "r") as f: + google_json = json.load(f) + secret_account_key = json.dumps(google_json) + + # Get the PEM version of the private key for your Unstructured user account. + public_key_pem = get_public_key_pem() + + encrypted_secret = envelope_encrypt( + public_key_pem=public_key_pem, + plaintext=secret_account_key + ) + + print(encrypted_secret) + ``` + + + +## Step 3: Register the encypted secret + + + + The following Python code examples assumes that you have installed the following packages into your + Python virtual environment: + + - `requests` + + The following function returns the secret's registration code payload: + + ```python + import requests + import os + + def register_secret(encrypted_secret: dict) -> dict: + """ + Registers an encrypted secret in the caller's Unstructured account and then + gets the secret's registration code payload. + + Args: + encrypted_secret (dict): The encrypted secret to register. + + Returns: + dict: The secret's registration code payload. + """ + + headers = { + "unstructured-api-key": os.getenv("UNSTRUCTURED_API_KEY"), + "Accept": "application/json", + "Content-Type": "application/json" + } + + create_secret_response = requests.request( + method="POST", + url=f"{os.getenv("UNSTRUCTURED_API_URL")}/api/v1/users/secrets", + headers=headers, + json=encrypted_secret + ).json() + + return create_secret_response + ``` + + You could call this function as follows: + + ```python + import json + + google_drive_creds_json_file = "/Users//Downloads/.json" + + with open(google_drive_creds_json_file, "r") as f: + google_json = json.load(f) + secret_account_key = json.dumps(google_json) + + # Get the PEM version of the private key for your Unstructured user account. + public_key_pem = get_public_key_pem() + + encrypted_secret = envelope_encrypt( + public_key_pem=public_key_pem, + plaintext=secret_account_key + ) + + secret_payload = register_secret( + encrypted_secret=encrypted_secret + ) + + print(secret_payload) + ``` + + + ```bash + curl --request 'POST' --location \ + "$UNSTRUCTURED_API_URL/users/secrets" \ + --header 'accept: application/json' \ + --header 'Content-Type: application/json' \ + --header "unstructured-api-key: $UNSTRUCTURED_API_KEY" \ + --data \ + '{ + "encrypted_value": "", + "type": "rsa", + "encrypted_aes_key": "", + "aes_iv": "" + }' \ + | jq . + ``` + + + - ... + + + + +## Step 4: Use the registration code + + + + ```python + + ``` + + + ```bash + curl --request 'POST' --location \ + "$UNSTRUCTURED_API_URL/sources" \ + --header 'accept: application/json' \ + --header "unstructured-api-key: $UNSTRUCTURED_API_KEY" \ + --header 'content-type: application/json' \ + --data \ + '{ + "name": "", + "type": "google_drive", + "config": { + "drive_id": "", + "service_account_key": { + "id": "", + "type": "rsa_aes" + } + } + }' + ``` + + + - ... + + + \ No newline at end of file From 1d74e749be1b45a10b039d49a71ad14052a98094 Mon Sep 17 00:00:00 2001 From: Paul Cornell Date: Tue, 24 Jun 2025 17:14:31 -0700 Subject: [PATCH 2/3] Begin replacing REST calls with strongly-typed ones --- api-reference/workflow/secrets.mdx | 72 +++++++++++++++++------------- 1 file changed, 42 insertions(+), 30 deletions(-) diff --git a/api-reference/workflow/secrets.mdx b/api-reference/workflow/secrets.mdx index ba7ac67d..90dedd8a 100644 --- a/api-reference/workflow/secrets.mdx +++ b/api-reference/workflow/secrets.mdx @@ -3,42 +3,39 @@ title: Secrets --- When you make calls to the [Unstructured Workflow Endpoint](/api-reference/workflow/overview), you might need to include a -third-party secret as part of the request. These third-party secrets typically include things like API keys, passwords, -contents of key files, and other sensitive information that are required when creating -[source connectors](/api-reference/workflow/source-connectors) and [destination connectors](/api-reference/workflow/destination-connectors). +secret as part of the request. This secret is typically something such as the contents of a +private key file that a third-party service requires for programmatic authentication. These secrets are +typically required when creating +[source connectors](/api-reference/workflow/source-connectors) or +[destination connectors](/api-reference/workflow/destination-connectors) that work with specific third parties. -Instead of sending these secrets to Unstructured programmatically in plain text, which presents a security risk, you must instead follow Unstructured's -process for more securely handling each secret as follows: +Instead of sending a secret to Unstructured programmatically in plain text, which presents a security risk, +you must instead follow Unstructured's process for more securely sending the secret as follows: -1. Get the private key for your Unstructured user account. -2. Use this private key to encrypt the target secret locally. -3. Register the encrypted secret with your Unstructured account. Unstructured returns a registration code that represents the encrypted secret - that was registered. -4. Whenever you need to include the target secret in a call to the Unstructured Workflow Endpoint, specify the registration code instead - of the target secret. +1. Get the PEM version of the public key for your Unstructured user account. +2. Use this PEM to encrypt the secret locally. +3. Register the encrypted version of the secret with your Unstructured account. Unstructured returns a unique ID for the registered secret, + along with the type of encryption that was used. +4. Specify the registered secret's ID and encryption type in the call to the Unstructured Workflow Endpoint as needed. -If the Unstructured Workflow Endpoint expects a registration code instead of a plain-text secret, and you do not provide a registration code -or provide the wrong one, the call will fail. +The following sections describe how to complete the preceding process. ## Requirements -While you can use a REST API client such as `curl` or Postman to complete some of the following steps, you can only use a programming language such as -Python to complete other steps. Both approaches are shown where applicable. +While you can use a REST API client such as `curl` or Postman to complete most of the following steps, you can only use Python to +complete the step of encrypting the secret locally. Otherwise, both approaches are shown for the other steps. To complete the following steps, you must have the following: -* A programming language, such as Python, installed on your local development machine. +* Python installed on your local development machine. * Optionally, a REST API client such as `curl` or Postman installed on your local development machine. -* An Unstructured user account, including the Unstructure API URL and API key for the account. - - - -* The following Python code examples assume that you have the following two environment variables set locally: +* An Unstructured account, including a valid Unstructured API key for that account. +* The following examples assume that you have the following two environment variables set locally: - `UNSTRUCTURED_API_URL`, set to the Workflow Endpoint API URL for your Unstructured user account. - `UNSTRUCTURED_API_KEY`, set to the API key for your Unstructured user account. -## Step 1: Get the private key +## Step 1: Get the PEM version of the public key @@ -110,12 +107,12 @@ To complete the following steps, you must have the following: - The following Python code examples assumes that you have installed the following packages into your - Python virtual environment: - - - `cryptography` + The following Python code examples assumes that you have installed the `cryptography` package into your + Python virtual environment. - The following function encrypts a string by using envelope encryption: + The following function encrypts a string by using envelope encryption. You must supply the function with the + PEM version of the public key for your Unstructured user account that you got from the previous step, and the plain-text version + of the secret that you need to encrypt. ```python from cryptography.hazmat.primitives.asymmetric import padding, rsa @@ -153,6 +150,7 @@ To complete the following steps, you must have the following: algorithms.AES(aes_key), modes.CFB(iv), ) + encryptor = cipher.encryptor() ciphertext = encryptor.update(plaintext.encode('utf-8')) + encryptor.finalize() @@ -175,7 +173,9 @@ To complete the following steps, you must have the following: } ``` - You could call this function as follows: + You could call the preceding `envelope_encrypt`function as follows. The following example extracts the contents of the specified + private key file as plain text and then passes it to the preceding `envelope_encrypt` function along + with the PEM version of the public key for your Unstructured user account. ```python import json @@ -196,10 +196,22 @@ To complete the following steps, you must have the following: print(encrypted_secret) ``` + + The result of printing the encrypted secret that is returned looks similar to the following: + + ```bash + { + "encrypted_aes_key": "...", + "aes_iv": "...", + "encrypted_value": "...", + "type": "rsa_aes" + } + ``` + -## Step 3: Register the encypted secret +## Step 3: Register the encrypted secret @@ -291,7 +303,7 @@ To complete the following steps, you must have the following: -## Step 4: Use the registration code +## Step 4: Use the registered secret's ID and encryption type From 537147a946e2399acec777c9e0bec32915195398 Mon Sep 17 00:00:00 2001 From: Paul Cornell Date: Wed, 25 Jun 2025 13:30:57 -0700 Subject: [PATCH 3/3] Add updated code and commands; update Google Drive how-to docs. --- api-reference/workflow/secrets.mdx | 483 ++++++++++++------ .../workflow/sources/google-drive.mdx | 12 + docs.json | 1 + .../google-drive-api-placeholders.mdx | 2 +- .../google_drive_rest_create.mdx | 5 +- .../source_connectors/google_drive_sdk.mdx | 5 +- 6 files changed, 359 insertions(+), 149 deletions(-) diff --git a/api-reference/workflow/secrets.mdx b/api-reference/workflow/secrets.mdx index 90dedd8a..9d36cb73 100644 --- a/api-reference/workflow/secrets.mdx +++ b/api-reference/workflow/secrets.mdx @@ -5,116 +5,168 @@ title: Secrets When you make calls to the [Unstructured Workflow Endpoint](/api-reference/workflow/overview), you might need to include a secret as part of the request. This secret is typically something such as the contents of a private key file that a third-party service requires for programmatic authentication. These secrets are -typically required when creating -[source connectors](/api-reference/workflow/source-connectors) or -[destination connectors](/api-reference/workflow/destination-connectors) that work with specific third parties. - -Instead of sending a secret to Unstructured programmatically in plain text, which presents a security risk, +typically required when creating [source connectors](/api-reference/workflow/sources/overview) or +[destination connectors](/api-reference/workflow/destinations/overview) that work with specific third-party services. + +Instead of programmatically sending a secret to Unstructured in plain text, which presents a security risk, you must instead follow Unstructured's process for more securely sending the secret as follows: -1. Get the PEM version of the public key for your Unstructured user account. -2. Use this PEM to encrypt the secret locally. +1. Call Unstructured to get the Privacy Enhanced Mail (PEM) version of the public key for your Unstructured user account. +2. Use this PEM to encrypt your plain-text secret locally. 3. Register the encrypted version of the secret with your Unstructured account. Unstructured returns a unique ID for the registered secret, along with the type of encryption that was used. 4. Specify the registered secret's ID and encryption type in the call to the Unstructured Workflow Endpoint as needed. +The source and destination connectors that require you to follow this process currently include the following: + +- [Google Drive source connector](/api-reference/workflow/sources/google-drive) + +Unstructured plans to add this requirement to other source and destination connectors in the future. + The following sections describe how to complete the preceding process. ## Requirements +import GetStartedSimpleAPIOnly from '/snippets/general-shared-text/get-started-simple-api-only.mdx'; + While you can use a REST API client such as `curl` or Postman to complete most of the following steps, you can only use Python to -complete the step of encrypting the secret locally. Otherwise, both approaches are shown for the other steps. +complete the step of encrypting the plain-text secret locally. Otherwise, both approaches are shown for the other steps. To complete the following steps, you must have the following: -* Python installed on your local development machine. -* Optionally, a REST API client such as `curl` or Postman installed on your local development machine. -* An Unstructured account, including a valid Unstructured API key for that account. -* The following examples assume that you have the following two environment variables set locally: +- Python installed on your local development machine. +- The `unstructured-client` package installed into your Python virtual environment. +- Optionally, a REST API client such as `curl` or Postman installed on your local development machine. +- An Unstructured account, including a valid Unstructured API key for that account. To get your API key, do the following: + + + +- Some of the following steps also require you to specify the Unstructured Workflow Endpoint API URL for your Unstructured user account. + This URL was provided to you when your Unstructured account was created. + If you do not have this URL, contact Unstructured Sales at [sales@unstructured.io](mailto:sales@unstructured.io). + + + The default URL for the Unstructured Worfklow Endpoint is `https://platform.unstructuredapp.io/api/v1`. + However, you should always use the URL that was provided to you when your Unstructured account was created. + + +- The following steps assume that you have the following two environment variables set locally: - `UNSTRUCTURED_API_URL`, set to the Workflow Endpoint API URL for your Unstructured user account. - `UNSTRUCTURED_API_KEY`, set to the API key for your Unstructured user account. ## Step 1: Get the PEM version of the public key +In this step, you call the Unstructured Workflow Endpoint to get the PEM version of the public key for your +Unstructured user account. The result is a string that begins with `-----BEGIN PUBLIC KEY-----` and ends with +`-----END PUBLIC KEY-----`. + - The following Python code examples assumes that you have installed the following packages into your - Python virtual environment: - - - `requests` - - The following function returns the PEM version of the private key for your Unstructured user account: - ```python - import requests import os - def get_public_key_pem() -> str: - """ - Gets the PEM version of the public key for the calling Unstructured user. - - Args: - None. - - Returns: - str: A string representation of the user's public key. - """ - - headers = { - "unstructured-api-key": os.getenv("UNSTRUCTURED_API_KEY"), - "Accept": "application/json", - "Content-Type": "application/json" - } + from unstructured_client import UnstructuredClient + from unstructured_client.models.operations import RetrieveRequest - encryption_key_response = requests.request( - method="POST", - url=f"{os.getenv("UNSTRUCTURED_API_URL")}/api/v1/users/retrieve", - headers=headers + # This code assumes you want to use the default API URL for the + # Unstructured Workflow Endpoint: https://platform.unstructuredapp.io/api/v1 + # To use a different URL, set the UnstructuredClient constructor's + # server_url parameter to the target URL. + with UnstructuredClient(api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")) as client: + response = client.users.retrieve( + request=RetrieveRequest() ) - encryption_public_key = encryption_key_response.json().get("pem_key") - - return encryption_public_key + print(response.pem_auth_response.pem_key) ``` - You could call this function as follows: - - ```python - public_key = get_public_key_pem() + The output looks similar to the following: - print(public_key) + ```bash + -----BEGIN PUBLIC KEY----- + MII...redacted...YTv/ + 5VI...redacted...wrX + 2Yy...redacted...YPG + TTt...redacted...Vwj + EU0...redacted...SXI + jAV...redacted...3Wu + ytz...redacted...kvi + yL+...redacted...ZDf + r+t...redacted...AE= + -----END PUBLIC KEY----- ``` - - The function returns the public key in PEM format. ```bash curl --request 'POST' --location \ "$UNSTRUCTURED_API_URL/users/retrieve" \ --header 'accept: application/json' \ - --header "unstructured-api-key: $UNSTRUCTURED_API_KEY" \ - | jq .pem_key + --header "unstructured-api-key: $UNSTRUCTURED_API_KEY" ``` + + The output looks similar to the following. Line breaks and whitespace have been added to the output for readability: + + ```json + { + "pem_key": "-----BEGIN PUBLIC KEY-----\nMII...redacted...AE=\n-----END PUBLIC KEY-----\n", + "tenant_id": "324...redacted...183", + "user_id": "eef...redacted...9d0" + } + ``` + + Copy only the contents of the `pem_key` field from the output. Ignore the `tenant_id` and `user_id` fields. - - ... + 1. In the method drop-down list, select **POST**. + 2. In the address box, enter the following URL: + + ```text + {{UNSTRUCTURED_API_URL}}/users/retrieve + ``` + + 3. On the **Headers** tab, enter the following headers: + + - **Key**: `unstructured-api-key`, **Value**: `{{UNSTRUCTURED_API_KEY}}` + - **Key**: `accept`, **Value**: `application/json` + + 5. Click **Send**. + + The response body looks similar to the following: + + ```json + { + "pem_key": "-----BEGIN PUBLIC KEY-----\nMII...redacted...AE=\n-----END PUBLIC KEY-----\n", + "tenant_id": "324...redacted...183", + "user_id": "eef...redacted...9d0" + } + ``` + + 6. Copy only the contents of the `pem_key` field from the response body. Ignore the `tenant_id` and `user_id` fields. ## Step 2: Encrypt the secret +In this step, you use the PEM version of the public key for your Unstructured user account that you got from +the previous step to encrypt the target plain-text secret. The result is a JSON-formatted object that contains +keys named `encrypted_aes_key`, `aes_iv`, `encrypted_value`, and `type`. All of the keys' values except the one for `type` are +Base64-encoded. + +This step can be completed only by using Python on your local development machine. + - The following Python code examples assumes that you have installed the `cryptography` package into your + The following code requires you to install the `cryptography` package into your Python virtual environment. - The following function encrypts a string by using envelope encryption. You must supply the function with the + The following `envelope_encrypt` function encrypts the target plain-text string by using envelope encryption. You must supply the function with the PEM version of the public key for your Unstructured user account that you got from the previous step, and the plain-text version - of the secret that you need to encrypt. + of the secret that you want to encrypt. ```python + from cryptography.hazmat.primitives import serialization, hashes from cryptography.hazmat.primitives.asymmetric import padding, rsa from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes from cryptography.hazmat.backends import default_backend @@ -135,7 +187,7 @@ To complete the following steps, you must have the following: # Load the public RSA key. public_key = serialization.load_pem_public_key( - public_key_pem.encode('utf-8'), + public_key_pem.encode("utf-8"), backend=default_backend() ) @@ -150,9 +202,8 @@ To complete the following steps, you must have the following: algorithms.AES(aes_key), modes.CFB(iv), ) - encryptor = cipher.encryptor() - ciphertext = encryptor.update(plaintext.encode('utf-8')) + encryptor.finalize() + ciphertext = encryptor.update(plaintext.encode("utf-8")) + encryptor.finalize() # Encrypt the AES key by using the RSA public key. encrypted_key = public_key.encrypt( @@ -166,118 +217,104 @@ To complete the following steps, you must have the following: # Return all encrypted components, Base64-encoded. return { - 'encrypted_aes_key': base64.b64encode(encrypted_key).decode('utf-8'), - 'aes_iv': base64.b64encode(iv).decode('utf-8'), - 'encrypted_value': base64.b64encode(ciphertext).decode('utf-8'), - 'type': 'rsa_aes', + "encrypted_aes_key": base64.b64encode(encrypted_key).decode("utf-8"), + "aes_iv": base64.b64encode(iv).decode("utf-8"), + "encrypted_value": base64.b64encode(ciphertext).decode("utf-8"), + "type": "rsa_aes", } ``` - You could call the preceding `envelope_encrypt`function as follows. The following example extracts the contents of the specified - private key file as plain text and then passes it to the preceding `envelope_encrypt` function along - with the PEM version of the public key for your Unstructured user account. + You could call the preceding `envelope_encrypt` function with code similar to the following. This code gets + the plain-text contents of the specified service account key file for + a Google Cloud service account. The code then encrypts the plain-text contents + by using the PEM version of the public key file for the user in the Unstructured account. ```python - import json + import json - google_drive_creds_json_file = "/Users//Downloads/.json" + # Get the plain-text contents of the specified service account key file for + # a Google Cloud service account. + # Alternatively, you could get the plain-text contents of the service account key file + # by some other means, and then pass those contents as a string + # directly to the envelope_encrypt function. + google_drive_creds_json_file = "/Users//Downloads/.json" with open(google_drive_creds_json_file, "r") as f: google_json = json.load(f) secret_account_key = json.dumps(google_json) - # Get the PEM version of the private key for your Unstructured user account. - public_key_pem = get_public_key_pem() - + # Encrypt the plain text by using the PEM version of the public key file for + # the user in the Unstructured account. encrypted_secret = envelope_encrypt( - public_key_pem=public_key_pem, + public_key_pem="""-----BEGIN PUBLIC KEY----- + MII...redacted...YTv/ + 5VI...redacted...wrX + 2Yy...redacted...YPG + TTt...redacted...Vwj + EU0...redacted...SXI + jAV...redacted...3Wu + ytz...redacted...kvi + yL+...redacted...ZDf + r+t...redacted...AE= + -----END PUBLIC KEY-----""", plaintext=secret_account_key ) - print(encrypted_secret) + print(json.dumps(encrypted_secret, indent=4)) ``` - The result of printing the encrypted secret that is returned looks similar to the following: + The output looks similar to the following: - ```bash + ```json { - "encrypted_aes_key": "...", - "aes_iv": "...", - "encrypted_value": "...", + "encrypted_aes_key": "x3+...redacted...9zD", + "aes_iv": "k2N...redacted...g==", + "encrypted_value": "gM1...redacted...A2m", "type": "rsa_aes" } ``` - ## Step 3: Register the encrypted secret +In this step, you call the Unstructured Workflow Endpoint again, this time to register the encrypted secret that you got from +the previous step. The result is a JSON-formatted object that contains keys named `id` and `type`. + - The following Python code examples assumes that you have installed the following packages into your - Python virtual environment: - - - `requests` - - The following function returns the secret's registration code payload: - ```python - import requests import os - def register_secret(encrypted_secret: dict) -> dict: - """ - Registers an encrypted secret in the caller's Unstructured account and then - gets the secret's registration code payload. - - Args: - encrypted_secret (dict): The encrypted secret to register. - - Returns: - dict: The secret's registration code payload. - """ - - headers = { - "unstructured-api-key": os.getenv("UNSTRUCTURED_API_KEY"), - "Accept": "application/json", - "Content-Type": "application/json" - } + from unstructured_client import UnstructuredClient + from unstructured_client.models.operations import StoreSecretRequest + + # This code assumes you want to use the default API URL for the + # Unstructured Workflow Endpoint: https://platform.unstructuredapp.io/api/v1 + # To use a different URL, set the UnstructuredClient constructor's + # server_url parameter to the target URL. + with UnstructuredClient(api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")) as client: + response = client.users.store_secret( + request=StoreSecretRequest( + encrypted_secret={ + "encrypted_aes_key": "x3+...redacted...9zD", + "aes_iv": "k2N...redacted...g==", + "encrypted_value": "gM1...redacted...A2m", + "type": "rsa_aes" + } + ) + ) - create_secret_response = requests.request( - method="POST", - url=f"{os.getenv("UNSTRUCTURED_API_URL")}/api/v1/users/secrets", - headers=headers, - json=encrypted_secret - ).json() - - return create_secret_response + print(response.secret_reference.model_dump_json(indent=4)) ``` - You could call this function as follows: - - ```python - import json - - google_drive_creds_json_file = "/Users//Downloads/.json" - - with open(google_drive_creds_json_file, "r") as f: - google_json = json.load(f) - secret_account_key = json.dumps(google_json) - - # Get the PEM version of the private key for your Unstructured user account. - public_key_pem = get_public_key_pem() - - encrypted_secret = envelope_encrypt( - public_key_pem=public_key_pem, - plaintext=secret_account_key - ) - - secret_payload = register_secret( - encrypted_secret=encrypted_secret - ) + The output looks similar to the following: - print(secret_payload) + ```json + { + "id": "09e...redacted...260", + "type": "rsa_aes" + } ``` @@ -289,26 +326,118 @@ To complete the following steps, you must have the following: --header "unstructured-api-key: $UNSTRUCTURED_API_KEY" \ --data \ '{ - "encrypted_value": "", - "type": "rsa", - "encrypted_aes_key": "", - "aes_iv": "" - }' \ - | jq . + "encrypted_aes_key": "x3+...redacted...9zD", + "aes_iv": "k2N...redacted...g==", + "encrypted_value": "gM1...redacted...A2m", + "type": "rsa_aes" + }' + ``` + + The output looks similar to the following. Line breaks and whitespace have been added to the output for readability: + + ```json + { + "id": "09e...redacted...260", + "type": "rsa_aes" + } ``` - - ... + 1. In the method drop-down list, select **POST**. + 2. In the address box, enter the following URL: + + ```text + {{UNSTRUCTURED_API_URL}}/users/secrets + ``` + + 3. On the **Headers** tab, enter the following headers: + + - **Key**: `unstructured-api-key`, **Value**: `{{UNSTRUCTURED_API_KEY}}` + - **Key**: `accept`, **Value**: `application/json` + - **Key**: `Content-Type`, **Value**: `application/json` + 4. On the **Body** tab, select **raw** and **JSON**, and specify the encrypted secret, for example: + + ```json + { + "encrypted_aes_key": "x3+...redacted...9zD", + "aes_iv": "k2N...redacted...g==", + "encrypted_value": "gM1...redacted...A2m", + "type": "rsa_aes" + } + ``` + + 5. Click **Send**. + + The response body looks similar to the following: + + ```json + { + "id": "09e...redacted...260", + "type": "rsa_aes" + } + ``` ## Step 4: Use the registered secret's ID and encryption type +In this step, you use the registered secret's ID and encryption type to specify the secret when you call the +Unstructured Workflow Endpoint. This step shows how to specify the registered secret's ID and encryption type when +you create a new [Google Drive source connector](/api-reference/workflow/sources/google-drive). + ```python + import os + from unstructured_client import UnstructuredClient + from unstructured_client.models.operations import CreateSourceRequest + from unstructured_client.models.shared import ( + CreateSourceConnector, + SourceConnectorType, + GoogleDriveSourceConnectorConfigInput + ) + + # This code assumes you want to use the default API URL for the + # Unstructured Workflow Endpoint: https://platform.unstructuredapp.io/api/v1 + # To use a different URL, set the UnstructuredClient constructor's + # server_url parameter to the target URL. + with UnstructuredClient(api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")) as client: + response = client.sources.create_source( + request=CreateSourceRequest( + create_source_connector=CreateSourceConnector( + name="", + type=SourceConnectorType.GOOGLE_DRIVE, + config=GoogleDriveSourceConnectorConfigInput( + drive_id="1oK..redacted...bmf", + service_account_key={ + "id": "09e...redacted...260", + "type": "rsa_aes" + } + ) + ) + ) + ) + + print(response.source_connector_information.model_dump_json(indent=4)) + ``` + + The output looks similar to the following: + + ```json + { + "config": { + "drive_id": "1oK...redacted...bmf", + "recursive": true, + "service_account_key": "**********" + }, + "created_at": "", + "id": "3c2...redacted...17e", + "name": "", + "type": "google_drive", + "updated_at": "" + } ``` @@ -323,17 +452,79 @@ To complete the following steps, you must have the following: "name": "", "type": "google_drive", "config": { - "drive_id": "", + "drive_id": ""1oK..redacted...bmf"", "service_account_key": { - "id": "", + "id": "09e...redacted...260", "type": "rsa_aes" } } }' ``` + + The output looks similar to the following: + + ```json + { + "config": { + "drive_id": "1oK...redacted...bmf", + "recursive": true, + "service_account_key": "**********" + }, + "created_at": "", + "id": "3c2...redacted...17e", + "name": "", + "type": "google_drive", + "updated_at": "" + } + ``` - - ... - + 1. In the method drop-down list, select **POST**. + 2. In the address box, enter the following URL: + + ```text + {{UNSTRUCTURED_API_URL}}/sources + ``` + + 3. On the **Headers** tab, enter the following headers: + + - **Key**: `unstructured-api-key`, **Value**: `{{UNSTRUCTURED_API_KEY}}` + - **Key**: `accept`, **Value**: `application/json` + - **Key**: `content-Type`, **Value**: `application/json` + + 4. On the **Body** tab, select **raw** and **JSON**, and specify the connector settings, for example: + + ```json + { + "name": "", + "type": "google_drive", + "config": { + "drive_id": "1oK..redacted...bmf", + "service_account_key": { + "id": "09e...redacted...260", + "type": "rsa_aes" + } + } + } + ``` + + 5. Click **Send**. + + The response body looks similar to the following: + + ```json + { + "config": { + "drive_id": "1oK...redacted...bmf", + "recursive": true, + "service_account_key": "**********" + }, + "created_at": "", + "id": "3c2...redacted...17e", + "name": "", + "type": "google_drive", + "updated_at": "" + } + ``` \ No newline at end of file diff --git a/api-reference/workflow/sources/google-drive.mdx b/api-reference/workflow/sources/google-drive.mdx index 64175ba6..66000b19 100644 --- a/api-reference/workflow/sources/google-drive.mdx +++ b/api-reference/workflow/sources/google-drive.mdx @@ -16,6 +16,18 @@ import GoogleDrivePrerequisites from '/snippets/general-shared-text/google-drive To create a Google Drive source connector, see the following examples. + + In the following examples, you must specify the `service_account_key` value as a JSON-formatted object + that contains the ID of the related registered secret and its encryption type. This information represents the + encrypted version of the contents of the Google Cloud service account's `credentials.json` key file. You get this + information by following the instructions in [Secrets](/api-reference/workflow/secrets). + + If you specify the `service_account_key` value as a plain-text string instead, + Unstructured might still create the connector successfully. However, when you then try to test or use the new connector, + the connector will fail and the following error message is returned: + `Field is sensitive and must be wrapped in as a secret reference or new secret value`. + + import GoogleDriveSDK from '/snippets/source_connectors/google_drive_sdk.mdx'; import GoogleDriveAPIRESTCreate from '/snippets/source_connectors/google_drive_rest_create.mdx'; diff --git a/docs.json b/docs.json index a79aa4b5..34c5f7f3 100644 --- a/docs.json +++ b/docs.json @@ -202,6 +202,7 @@ }, "api-reference/workflow/workflows", "api-reference/workflow/jobs", + "api-reference/workflow/secrets", { "group": "Endpoint Playground", "openapi": "https://platform.unstructuredapp.io/openapi.json" diff --git a/snippets/general-shared-text/google-drive-api-placeholders.mdx b/snippets/general-shared-text/google-drive-api-placeholders.mdx index 548b478e..106eebd3 100644 --- a/snippets/general-shared-text/google-drive-api-placeholders.mdx +++ b/snippets/general-shared-text/google-drive-api-placeholders.mdx @@ -1,6 +1,6 @@ - `` (_required_) - A unique name for this connector. - `` - The ID for the target Google Drive folder or drive. -- `` - The contents of the `credentials.json` key file as a single-line string. +- For `service_account_key`, specify the ID of the registered secret and its encryption type, representing the encrypted contents of the `credentials.json` key file. For more information, see [Secrets](/api-reference/workflow/secrets). - For `extensions`, set one or more `` values (such as `pdf` or `docx`) to process files with only those extensions. The default is to include all extensions. diff --git a/snippets/source_connectors/google_drive_rest_create.mdx b/snippets/source_connectors/google_drive_rest_create.mdx index 23ea85bd..52adf98e 100644 --- a/snippets/source_connectors/google_drive_rest_create.mdx +++ b/snippets/source_connectors/google_drive_rest_create.mdx @@ -10,7 +10,10 @@ curl --request 'POST' --location \ "type": "google_drive", "config": { "drive_id": "", - "service_account_key": "", + "service_account_key": { + "id": "", + "type": "" + }, "extensions": [ "", "" diff --git a/snippets/source_connectors/google_drive_sdk.mdx b/snippets/source_connectors/google_drive_sdk.mdx index 136391d1..189e3dcc 100644 --- a/snippets/source_connectors/google_drive_sdk.mdx +++ b/snippets/source_connectors/google_drive_sdk.mdx @@ -17,7 +17,10 @@ with UnstructuredClient(api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")) as clien type=SourceConnectorType.GOOGLE_DRIVE, config=GoogleDriveSourceConnectorConfigInput( drive_id="", - service_account_key="", + service_account_key={ + "id": "", + "type": "" + }, extensions=[ "", ""