Skip to content

Commit 05f3826

Browse files
committed
Fix zoom chat downloader
1 parent 14b9521 commit 05f3826

File tree

1 file changed

+30
-11
lines changed

1 file changed

+30
-11
lines changed

chat_downloader/sites/zoom.py

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
from ..utils.core import (
1111
time_to_seconds,
1212
regex_search,
13-
ensure_seconds
13+
ensure_seconds,
14+
multi_get,
1415
)
1516
from ..errors import (
1617
SiteError,
@@ -34,6 +35,7 @@ class ZoomChatDownloader(BaseChatDownloader):
3435

3536
_ZOOM_HOMEPAGE = 'https://zoom.us/'
3637
_ZOOM_PATH_TEMPLATE = 'rec/play/{id}'
38+
_ZOOM_API_TEMPLATE = 'nws/recording/1.0/play/info/{file_id}'
3739

3840
_INITIAL_INFO_REGEX = r'(?s)window\.__data__\s*=\s*({.+?});'
3941
_CHAT_MESSAGES_REGEX = r'window\.__data__\.chatList\.push\((\{[\s\S]+?\})\)'
@@ -43,7 +45,7 @@ class ZoomChatDownloader(BaseChatDownloader):
4345
}
4446

4547
_REMAPPING = {
46-
'username': 'author_name',
48+
'userName': 'author_name',
4749
'time': 'time_text',
4850
'content': 'message',
4951
}
@@ -116,16 +118,34 @@ def get_chat_by_video_id(self, video_id, params, base_url=_ZOOM_HOMEPAGE):
116118
raise ParsingError('Error parsing video')
117119

118120
initial_info = self._parse_js_dict(json_string)
119-
120121
video_type = 'video' if initial_info.get('isVideo') else 'not_video'
121122

122-
return Chat(
123-
self._get_chat_messages(page_data, params),
123+
file_id = initial_info.get('fileId')
124+
if not file_id:
125+
raise ParsingError('Error parsing video. Unable to find file ID.')
126+
127+
api_url = base_url + self._ZOOM_API_TEMPLATE.format(file_id=file_id)
128+
129+
api_data = self._session_get_json(api_url)
124130

125-
title=initial_info.get('topic'),
131+
if api_data.get('errorCode') != 0:
132+
raise ZoomError(
133+
f'An error occured: {api_data.get("errorMessage")} ({api_data.get("errorCode")})')
134+
135+
result = api_data.get('result')
136+
if not result:
137+
raise ZoomError(
138+
f'Unable to find chat messages for video {video_id}')
139+
140+
chat_messages = result.get('meetingChatList') or []
141+
title = multi_get(result, 'meet', 'topic')
142+
return Chat(
143+
self._get_chat_messages(chat_messages, params),
144+
title=title,
126145
video_type=video_type,
127-
start_time=initial_info.get('fileStartTime'),
128-
id=initial_info.get('recordingId'),
146+
start_time=result.get('fileStartTime'),
147+
id=video_id,
148+
duration=result.get('duration'),
129149
)
130150

131151
def _parse_js_dict(self, json_string):
@@ -136,12 +156,11 @@ def _parse_js_dict(self, json_string):
136156
result = re.sub(r":\s+'(.*)'", ": \"\\g<1>\"", result, 0, re.MULTILINE)
137157
return json.loads(result)
138158

139-
def _get_chat_messages(self, page_data, params):
159+
def _get_chat_messages(self, messages, params):
140160
start_time = ensure_seconds(params.get('start_time'), 0)
141161
end_time = ensure_seconds(params.get('end_time'), float('inf'))
142162

143-
for item in re.findall(self._CHAT_MESSAGES_REGEX, page_data):
144-
data = self._parse_js_dict(item)
163+
for data in messages:
145164
data = r.remap_dict(data, self._REMAPPING)
146165

147166
# Process time inforamtion

0 commit comments

Comments
 (0)