From a75e05c046c39413a2bc9adf68f3e38b420b3beb Mon Sep 17 00:00:00 2001 From: ryannikolaidis Date: Thu, 18 Apr 2024 17:19:44 +0000 Subject: [PATCH] Update code snippets --- snippets/destination_connectors/astra.py.mdx | 58 +++++++++++++++++++ .../source_connectors/salesforce_api.py.mdx | 31 ++++++++++ 2 files changed, 89 insertions(+) create mode 100644 snippets/destination_connectors/astra.py.mdx create mode 100644 snippets/source_connectors/salesforce_api.py.mdx diff --git a/snippets/destination_connectors/astra.py.mdx b/snippets/destination_connectors/astra.py.mdx new file mode 100644 index 00000000..27b60dd7 --- /dev/null +++ b/snippets/destination_connectors/astra.py.mdx @@ -0,0 +1,58 @@ +```python +import os + +from unstructured.ingest.connector.astra import ( + AstraAccessConfig, + AstraWriteConfig, + SimpleAstraConfig, +) +from unstructured.ingest.connector.local import SimpleLocalConfig +from unstructured.ingest.interfaces import ( + ChunkingConfig, + EmbeddingConfig, + PartitionConfig, + ProcessorConfig, + ReadConfig, +) +from unstructured.ingest.runner import LocalRunner +from unstructured.ingest.runner.writers.astra import ( + AstraWriter, +) +from unstructured.ingest.runner.writers.base_writer import Writer + + +def get_writer() -> Writer: + return AstraWriter( + connector_config=SimpleAstraConfig( + access_config=AstraAccessConfig( + token=os.getenv("ASTRA_DB_TOKEN"), api_endpoint=os.getenv("ASTRA_DB_ENDPOINT") + ), + collection_name="test_collection", + embedding_dimension=384, + ), + write_config=AstraWriteConfig(batch_size=80), + ) + + +if __name__ == "__main__": + writer = get_writer() + runner = LocalRunner( + processor_config=ProcessorConfig( + verbose=True, + output_dir="local-output-to-astra", + num_processes=2, + ), + connector_config=SimpleLocalConfig( + input_path="example-docs/book-war-and-peace-1p.txt", + ), + read_config=ReadConfig(), + partition_config=PartitionConfig(), + chunking_config=ChunkingConfig(chunk_elements=True), + embedding_config=EmbeddingConfig( + provider="langchain-huggingface", + ), + writer=writer, + writer_kwargs={}, + ) + runner.run() +``` diff --git a/snippets/source_connectors/salesforce_api.py.mdx b/snippets/source_connectors/salesforce_api.py.mdx new file mode 100644 index 00000000..66c9500c --- /dev/null +++ b/snippets/source_connectors/salesforce_api.py.mdx @@ -0,0 +1,31 @@ +```python +import os + +from unstructured.ingest.connector.salesforce import SalesforceAccessConfig, SimpleSalesforceConfig +from unstructured.ingest.interfaces import PartitionConfig, ProcessorConfig, ReadConfig +from unstructured.ingest.runner import SalesforceRunner + +if __name__ == "__main__": + runner = SalesforceRunner( + processor_config=ProcessorConfig( + verbose=True, + output_dir="salesforce-output", + num_processes=2, + ), + read_config=ReadConfig(), + partition_config=PartitionConfig( + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + ), + connector_config=SimpleSalesforceConfig( + access_config=SalesforceAccessConfig( + consumer_key=os.getenv("SALESFORCE_CONSUMER_KEY"), + ), + username=os.getenv("SALESFORCE_USERNAME"), + private_key=os.getenv("SALESFORCE_PRIVATE_KEY_PATH"), + categories=["EmailMessage", "Account", "Lead", "Case", "Campaign"], + recursive=True, + ), + ) + runner.run() +```