Skip to content

Commit cf33531

Browse files
authored
Merge pull request #391 from jdepoix/feature/avoid-open-tcp-connections
Feature/avoid open tcp connections
2 parents ab5ebac + 6576822 commit cf33531

File tree

4 files changed

+29
-5
lines changed

4 files changed

+29
-5
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ therefore integrated it into this module, to make setting it up as easy as possi
283283
### Using [Webshare](https://www.webshare.io/?referral_code=w0xno53eb50g)
284284

285285
Once you have created a [Webshare account](https://www.webshare.io/?referral_code=w0xno53eb50g) and purchased a
286-
"Residential Proxy" package that suites your workload, open the
286+
"Residential Proxy" package that suits your workload, open the
287287
[Webshare Proxy Settings](https://dashboard.webshare.io/proxy/settings) to retrieve your "Proxy Username" and
288288
"Proxy Password". Using this information you can initialize the `YouTubeTranscriptApi` as follows:
289289

@@ -508,7 +508,7 @@ using residential proxies as explained in
508508
[Working around IP bans](#working-around-ip-bans-requestblocked-or-ipblocked-exception). To use
509509
[Webshare residential proxies](https://www.webshare.io/?referral_code=w0xno53eb50g) through the CLI, you will have to
510510
create a [Webshare account](https://www.webshare.io/?referral_code=w0xno53eb50g) and purchase a residential
511-
proxy package that suites your workload. Then you can use the "Proxy Username" and "Proxy Password" which you can find
511+
proxy package that suits your workload. Then you can use the "Proxy Username" and "Proxy Password" which you can find
512512
in your [Webshare Proxy Settings](https://dashboard.webshare.io/proxy/settings), to run the following command:
513513

514514
```

youtube_transcript_api/_api.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ def __init__(
4848
http_client.cookies = _load_cookie_jar(cookie_path)
4949
if proxy_config is not None:
5050
http_client.proxies = proxy_config.to_requests_dict()
51+
if proxy_config.prevent_keeping_connections_alive():
52+
http_client.headers.update({"Connection": "close"})
5153
self._fetcher = TranscriptListFetcher(http_client)
5254

5355
def fetch(
@@ -59,7 +61,7 @@ def fetch(
5961
"""
6062
Retrieves the transcript for a single video. This is just a shortcut for
6163
calling:
62-
`YouTubeTranscriptApi.list_transcripts(video_id, proxies).find_transcript(languages).fetch()`
64+
`YouTubeTranscriptApi().list(video_id).find_transcript(languages).fetch(preserve_formatting=preserve_formatting)`
6365
6466
:param video_id: the ID of the video you want to retrieve the transcript for.
6567
Make sure that this is the actual ID, NOT the full URL to the video!

youtube_transcript_api/proxies.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,14 @@ def to_requests_dict(self) -> RequestsProxyConfigDict:
3232
"""
3333
pass
3434

35+
def prevent_keeping_connections_alive(self) -> bool:
36+
"""
37+
If you are using rotating proxies, it can be useful to prevent the HTTP
38+
client from keeping TCP connections alive, as your IP won't be rotated on
39+
every request, if your connection stays open.
40+
"""
41+
return False
42+
3543

3644
class GenericProxyConfig(ProxyConfig):
3745
"""
@@ -76,7 +84,7 @@ class WebshareProxyConfig(GenericProxyConfig):
7684
7785
If you don't have a Webshare account yet, you will have to create one
7886
at https://www.webshare.io/?referral_code=w0xno53eb50g and purchase a residential
79-
proxy package that suites your workload, to be able to use this proxy config.
87+
proxy package that suits your workload, to be able to use this proxy config.
8088
8189
Once you have created an account you only need the "Proxy Username" and
8290
"Proxy Password" that you can find in your Webshare settings
@@ -130,3 +138,6 @@ def http_url(self) -> str:
130138
@property
131139
def https_url(self) -> str:
132140
return self.url
141+
142+
def prevent_keeping_connections_alive(self) -> bool:
143+
return True

youtube_transcript_api/test/test_api.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
RequestBlocked,
2727
VideoUnplayable,
2828
)
29-
from youtube_transcript_api.proxies import GenericProxyConfig
29+
from youtube_transcript_api.proxies import GenericProxyConfig, WebshareProxyConfig
3030

3131

3232
def get_asset_path(filename: str) -> Path:
@@ -330,6 +330,17 @@ def test_fetch__with_proxy(self, to_requests_dict):
330330
)
331331
to_requests_dict.assert_any_call()
332332

333+
@patch("youtube_transcript_api.proxies.GenericProxyConfig.to_requests_dict")
334+
def test_fetch__with_proxy_prevent_alive_connections(self, to_requests_dict):
335+
proxy_config = WebshareProxyConfig(
336+
proxy_username="username", proxy_password="password"
337+
)
338+
339+
YouTubeTranscriptApi(proxy_config=proxy_config).fetch("GJLlxj_dtq8")
340+
341+
request = httpretty.last_request()
342+
self.assertEqual(request.headers.get("Connection"), "close")
343+
333344
def test_fetch__with_cookies(self):
334345
cookie_path = get_asset_path("example_cookies.txt")
335346
transcript = YouTubeTranscriptApi(cookie_path=cookie_path).fetch("GJLlxj_dtq8")

0 commit comments

Comments
 (0)