@@ -58,8 +58,6 @@ def get_channel(
5858 ``"streams"``: Streams
5959 """
6060
61- sort_by_map = {"newest" : "dd" , "oldest" : "da" , "popular" : "p" }
62-
6361 base_url = ""
6462 if channel_url :
6563 base_url = channel_url
@@ -68,13 +66,12 @@ def get_channel(
6866 elif channel_username :
6967 base_url = f"https://www.youtube.com/@{ channel_username } "
7068
71- url = "{base_url}/{content_type}?view=0&sort={sort_by}& flow=grid" .format (
69+ url = "{base_url}/{content_type}?view=0&flow=grid" .format (
7270 base_url = base_url ,
7371 content_type = content_type ,
74- sort_by = sort_by_map [sort_by ],
7572 )
7673 api_endpoint = "https://www.youtube.com/youtubei/v1/browse"
77- videos = get_videos (url , api_endpoint , type_property_map [content_type ], limit , sleep )
74+ videos = get_videos (url , api_endpoint , type_property_map [content_type ], limit , sleep , sort_by )
7875 for video in videos :
7976 yield video
8077
@@ -162,15 +159,39 @@ def get_search(
162159 yield video
163160
164161
162+
163+ def get_video (
164+ id : str ,
165+ ) -> dict :
166+
167+ """Get a single video.
168+
169+ Parameters:
170+ id (``str``):
171+ The video id from the video you want to get.
172+ """
173+
174+ session = get_session ()
175+ url = f"https://www.youtube.com/watch?v={ id } "
176+ html = get_initial_data (session , url )
177+ client = json .loads (
178+ get_json_from_html (html , "INNERTUBE_CONTEXT" , 2 , '"}},' ) + '"}}'
179+ )["client" ]
180+ session .headers ["X-YouTube-Client-Name" ] = "1"
181+ session .headers ["X-YouTube-Client-Version" ] = client ["clientVersion" ]
182+ data = json .loads (
183+ get_json_from_html (html , "var ytInitialData = " , 0 , "};" ) + "}"
184+ )
185+ return next (search_dict (data , "videoPrimaryInfoRenderer" ))
186+
187+
188+
165189def get_videos (
166- url : str , api_endpoint : str , selector : str , limit : int , sleep : int
190+ url : str , api_endpoint : str , selector : str , limit : int , sleep : int , sort_by : str = None
167191) -> Generator [dict , None , None ]:
168- session = requests .Session ()
169- session .headers [
170- "User-Agent"
171- ] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36"
192+ session = get_session ()
172193 is_first = True
173- quit = False
194+ quit_it = False
174195 count = 0
175196 while True :
176197 if is_first :
@@ -184,8 +205,10 @@ def get_videos(
184205 data = json .loads (
185206 get_json_from_html (html , "var ytInitialData = " , 0 , "};" ) + "}"
186207 )
187- next_data = get_next_data (data )
208+ next_data = get_next_data (data , sort_by )
188209 is_first = False
210+ if sort_by and sort_by != "newest" :
211+ continue
189212 else :
190213 data = get_ajax_data (session , api_endpoint , api_key , next_data , client )
191214 next_data = get_next_data (data )
@@ -194,20 +217,28 @@ def get_videos(
194217 count += 1
195218 yield result
196219 if count == limit :
197- quit = True
220+ quit_it = True
198221 break
199222 except GeneratorExit :
200- quit = True
223+ quit_it = True
201224 break
202225
203- if not next_data or quit :
226+ if not next_data or quit_it :
204227 break
205228
206229 time .sleep (sleep )
207230
208231 session .close ()
209232
210233
234+ def get_session () -> requests .Session :
235+ session = requests .Session ()
236+ session .headers [
237+ "User-Agent"
238+ ] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
239+ session .headers ["Accept-Language" ] = "en"
240+ return session
241+
211242def get_initial_data (session : requests .Session , url : str ) -> str :
212243 session .cookies .set ("CONSENT" , "YES+cb" , domain = ".youtube.com" )
213244 response = session .get (url )
@@ -237,13 +268,23 @@ def get_json_from_html(html: str, key: str, num_chars: int = 2, stop: str = '"')
237268 return html [pos_begin :pos_end ]
238269
239270
240- def get_next_data (data : dict ) -> dict :
241- raw_next_data = next (search_dict (data , "continuationEndpoint" ), None )
242- if not raw_next_data :
271+ def get_next_data (data : dict , sort_by : str = None ) -> dict :
272+ # Youtube, please don't change the order of these
273+ sort_by_map = {
274+ "newest" : 0 ,
275+ "popular" : 1 ,
276+ "oldest" : 2 ,
277+ }
278+ if sort_by and sort_by != "newest" :
279+ endpoint = next (
280+ search_dict (data , "feedFilterChipBarRenderer" ), None )["contents" ][sort_by_map [sort_by ]]["chipCloudChipRenderer" ]["navigationEndpoint" ]
281+ else :
282+ endpoint = next (search_dict (data , "continuationEndpoint" ), None )
283+ if not endpoint :
243284 return None
244285 next_data = {
245- "token" : raw_next_data ["continuationCommand" ]["token" ],
246- "click_params" : {"clickTrackingParams" : raw_next_data ["clickTrackingParams" ]},
286+ "token" : endpoint ["continuationCommand" ]["token" ],
287+ "click_params" : {"clickTrackingParams" : endpoint ["clickTrackingParams" ]},
247288 }
248289
249290 return next_data
0 commit comments