|
10 | 10 | NOTE: the functionality is currently in BETA. The API calls or the provided outputs may change in the future.
|
11 | 11 | """
|
12 | 12 |
|
| 13 | +import json |
13 | 14 | from eventregistry.Base import *
|
14 | 15 | from eventregistry.ReturnInfo import *
|
15 | 16 |
|
@@ -75,17 +76,27 @@ def detectLanguage(self, text):
|
75 | 76 | return self._er.jsonRequestAnalytics("/api/v1/detectLanguage", { "text": text })
|
76 | 77 |
|
77 | 78 |
|
78 |
| - def extractArticleInfo(self, url, proxyUrl = None): |
| 79 | + def extractArticleInfo(self, url, proxyUrl = None, headers = None, cookies = None): |
79 | 80 | """
|
80 | 81 | extract all available information about an article available at url `url`. Returned information will include
|
81 | 82 | article title, body, authors, links in the articles, ...
|
82 | 83 | @param url: article url to extract article information from
|
83 | 84 | @param proxyUrl: proxy that should be used for downloading article information. format: {schema}://{username}:{pass}@{proxy url/ip}
|
| 85 | + @param headers: dict with headers to set in the request (optional) |
| 86 | + @param cookies: dict with cookies to set in the request (optional) |
84 | 87 | @returns: dict
|
85 | 88 | """
|
86 | 89 | params = { "url": url }
|
87 | 90 | if proxyUrl:
|
88 | 91 | params["proxyUrl"] = proxyUrl
|
| 92 | + if headers: |
| 93 | + if isinstance(headers, dict): |
| 94 | + headers = json.dumps(headers) |
| 95 | + params["headers"] = headers |
| 96 | + if cookies: |
| 97 | + if isinstance(cookies, dict): |
| 98 | + cookies = json.dumps(cookies) |
| 99 | + params["cookies"] = cookies |
89 | 100 | return self._er.jsonRequestAnalytics("/api/v1/extractArticleInfo", params)
|
90 | 101 |
|
91 | 102 |
|
|
0 commit comments