Skip to content

Commit 05b5846

Browse files
added PDF Generator API
1 parent 797017b commit 05b5846

File tree

5 files changed

+78
-3
lines changed

5 files changed

+78
-3
lines changed

README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ It includes:
99
- [SEC Filing Search and Full-Text Search API](#sec-edgar-filings-query-api)
1010
- [Real-Time Filing Stream API](#sec-edgar-filings-real-time-stream-api)
1111
- [Filing Download & PDF Render API](#filing-render--download-api)
12+
- [PDF Generator API](#pdf-generator-api)
1213

1314
**Converter & Extractor APIs:**
1415

@@ -175,6 +176,28 @@ with open("filename.pdf", "wb") as f:
175176

176177
> See the documentation for more details: https://sec-api.io/docs/sec-filings-render-api
177178
179+
## PDF Generator API
180+
181+
SEC filings, including Forms 10-K, 10-Q, 8-K, and others, are typically published in HTML, XML, or text formats. The PDF Generator API enables the conversion of any SEC filing or exhibit into a PDF file, preserving all original formatting, tables, images, and other elements from the filing.
182+
183+
```python
184+
from sec_api import PdfGeneratorApi
185+
186+
pdfGeneratorApi = PdfGeneratorApi("YOUR_API_KEY")
187+
188+
# Form 8-K exhibit URL
189+
edgar_file_url = "https://www.sec.gov/ix?doc=/Archives/edgar/data/1320695/000132069520000148/ths12-31x201910krecast.htm"
190+
# Form 10-K filing URL
191+
# edgar_file_url = "https://www.sec.gov/Archives/edgar/data/320193/000032019320000096/aapl-20200926.htm"
192+
193+
pdf_file = pdfGeneratorApi.get_pdf(edgar_file_url)
194+
195+
with open("filename.pdf", "wb") as f:
196+
f.write(pdf_file)
197+
```
198+
199+
> See the documentation for more details: https://sec-api.io/docs/sec-filings-render-api
200+
178201
## SEC EDGAR Filings Real-Time Stream API
179202

180203
The Stream API provides a live stream (aka feed) of newly published filings on SEC EDGAR via WebSockets. A new filing is sent to your connected client as soon as it is published.

examples.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from sec_api.index import (
22
RenderApi,
3+
PdfGeneratorApi,
34
XbrlApi,
45
ExtractorApi,
56
MappingApi,
@@ -43,6 +44,24 @@
4344
f.write(binary_data)
4445
# """
4546

47+
#
48+
# PDF Generator API
49+
#
50+
"""
51+
pdfGeneratorApi = PdfGeneratorApi("YOUR_API_KEY")
52+
53+
# Form 8-K exhibit URL
54+
edgar_file_url = "https://www.sec.gov/ix?doc=/Archives/edgar/data/1320695/000132069520000148/ths12-31x201910krecast.htm"
55+
# Form 10-K filing URL
56+
# edgar_file_url = "https://www.sec.gov/Archives/edgar/data/320193/000032019320000096/aapl-20200926.htm"
57+
58+
pdf_file = pdfGeneratorApi.get_pdf(edgar_file_url)
59+
60+
with open("filename.pdf", "wb") as f:
61+
f.write(pdf_file)
62+
# """
63+
64+
4665
#
4766
# XBRL-to-JSON API example
4867
#

sec_api/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from sec_api.index import QueryApi
33
from sec_api.index import FullTextSearchApi
44
from sec_api.index import RenderApi
5+
from sec_api.index import PdfGeneratorApi
56

67
# Extractor & Converter APIs
78
from sec_api.index import XbrlApi

sec_api/index.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55

66
query_api_endpoint = "https://api.sec-api.io"
77
full_text_search_api_endpoint = "https://api.sec-api.io/full-text-search"
8-
render_api_endpoint = "https://archive.sec-api.io"
8+
filing_download_api_endpoint = "https://archive.sec-api.io"
9+
pdf_generator_api_endpoint = "https://api.sec-api.io/filing-reader"
910
xbrl_api_endpoint = "https://api.sec-api.io/xbrl-to-json"
1011
extractor_api_endpoint = "https://api.sec-api.io/extractor"
1112
#
@@ -102,7 +103,7 @@ class RenderApi:
102103

103104
def __init__(self, api_key, proxies=None):
104105
self.api_key = api_key
105-
self.api_endpoint = render_api_endpoint
106+
self.api_endpoint = filing_download_api_endpoint
106107
self.proxies = proxies if proxies else {}
107108

108109
def get_filing(self, url, return_binary=False):
@@ -146,6 +147,37 @@ def get_file(self, url, return_binary=False):
146147
handle_api_error(response)
147148

148149

150+
class PdfGeneratorApi:
151+
"""
152+
Base class for PDF Generator API
153+
"""
154+
155+
def __init__(self, api_key, proxies=None):
156+
self.api_key = api_key
157+
self.api_endpoint = pdf_generator_api_endpoint
158+
self.proxies = proxies if proxies else {}
159+
160+
def get_pdf(self, url):
161+
response = {}
162+
file_url = re.sub(r"ix\?doc=/", "", url)
163+
_url = (
164+
self.api_endpoint + "?type=pdf&url=" + file_url + "&token=" + self.api_key
165+
)
166+
167+
# use backoff strategy to handle "too many requests" error.
168+
for x in range(3):
169+
response = requests.get(_url, proxies=self.proxies)
170+
if response.status_code == 200:
171+
return response.content
172+
elif response.status_code == 429:
173+
# wait 500 * (x + 1) milliseconds and try again
174+
time.sleep(0.5 * (x + 1))
175+
else:
176+
handle_api_error(response)
177+
else:
178+
handle_api_error(response)
179+
180+
149181
class XbrlApi:
150182
"""
151183
Base class for XBRL-to-JSON API

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setup(
77
name="sec-api",
8-
version="1.0.23",
8+
version="1.0.24",
99
author="SEC API",
1010
author_email="support@sec-api.io",
1111
description="SEC EDGAR Filings API",

0 commit comments

Comments
 (0)