@@ -432,7 +432,7 @@ def get_file_info_by_url(self, share_url, pwd='') -> FileDetail:
432
432
return FileDetail (LanZouCloud .FILE_CANCELLED , pwd = pwd , url = share_url )
433
433
434
434
# 这里获取下载直链 304 重定向前的链接
435
- if '输入密码 ' in first_page : # 文件设置了提取码时
435
+ if 'id="pwdload"' in first_page or 'id="passwddiv" ' in first_page : # 文件设置了提取码时
436
436
if len (pwd ) == 0 :
437
437
return FileDetail (LanZouCloud .LACK_PASSWORD , pwd = pwd , url = share_url ) # 没给提取码直接退出
438
438
# data : 'action=downprocess&sign=AGZRbwEwU2IEDQU6BDRUaFc8DzxfMlRjCjTPlVkWzFSYFY7ATpWYw_c_c&p='+pwd,
@@ -1012,9 +1012,10 @@ def get_folder_info_by_url(self, share_url, dir_pwd='') -> FolderDetail:
1012
1012
html = requests .get (share_url , headers = self ._headers ).text
1013
1013
except requests .RequestException :
1014
1014
return FolderDetail (LanZouCloud .NETWORK_ERROR )
1015
- if '文件不存在' in html :
1015
+ if '文件不存在' in html or '文件取消' in html :
1016
1016
return FolderDetail (LanZouCloud .FILE_CANCELLED )
1017
- if '请输入密码' in html and len (dir_pwd ) == 0 :
1017
+ # 要求输入密码, 用户描述中可能带有"输入密码",所以不用这个字符串判断
1018
+ if ('id="pwdload"' in html or 'id="passwddiv"' in html ) and len (dir_pwd ) == 0 :
1018
1019
return FolderDetail (LanZouCloud .LACK_PASSWORD )
1019
1020
try :
1020
1021
# 获取文件需要的参数
@@ -1024,17 +1025,36 @@ def get_folder_info_by_url(self, share_url, dir_pwd='') -> FolderDetail:
1024
1025
k = re .findall (r"var [0-9a-z]{6} = '([0-9a-z]{15,})';" , html )[0 ]
1025
1026
# 文件夹的信息
1026
1027
folder_id = re .findall (r"'fid':'?(\d+)'?," , html )[0 ]
1027
- folder_name = re .findall (r"var.+?='(.+?)';\n.+document.title" , html )[0 ]
1028
- folder_time = re .findall (r'class="rets">([\d\-]+?)<a' , html )[0 ] # 日期不全 %m-%d
1029
- folder_desc = re .findall (r'id="filename">(.+?)</span>' , html ) # 无描述时无法完成匹配
1030
- folder_desc = folder_desc [0 ] if len (folder_desc ) == 1 else ''
1028
+ folder_name = re .findall (r"var.+?='(.+?)';\n.+document.title" , html ) or \
1029
+ re .findall (r'<div class="user-title">(.+?)</div>' , html )
1030
+ folder_name = folder_name [0 ]
1031
+
1032
+ folder_time = re .findall (r'class="rets">([\d\-]+?)<a' , html ) # ['%m-%d'] 或者 None (vip自定义)
1033
+ folder_time = folder_time [0 ] if folder_time else datetime .today ().strftime ("%m-%d" ) # 没有就设为现在
1034
+ folder_desc = re .findall (r'id="filename">(.+?)</span>' , html ) or \
1035
+ re .findall (r'<div class="user-radio-\d"></div>(.+?)</div>' , html )
1036
+ folder_desc = folder_desc [0 ] if folder_desc else ""
1031
1037
except IndexError :
1032
1038
return FolderDetail (LanZouCloud .FAILED )
1033
1039
1040
+ # 提取子文件夹信息(vip用户分享的文件夹可以递归包含子文件夹)
1041
+ sub_folders = FolderList ()
1042
+ # 文件夹描述放在 filesize 一栏, 迷惑行为
1043
+ all_sub_folders = re .findall (
1044
+ r'mbxfolder"><a href="(.+?)".+class="filename">(.+?)<div class="filesize">(.+?)</div>' , html )
1045
+ for url , name , desc in all_sub_folders :
1046
+ url = self ._host_url + url
1047
+ time_str = datetime .today ().strftime ('%Y-%m-%d' ) # 网页没有时间信息, 设置为今天
1048
+ sub_folders .append (FolderInfo (name = name , desc = desc , url = url , time = time_str , pwd = dir_pwd ))
1049
+
1050
+ # 提取改文件夹下全部文件
1034
1051
page = 1
1035
1052
files = FileList ()
1036
1053
while True :
1054
+ if page >= 2 : # 连续的请求需要稍等一下
1055
+ sleep (0.6 )
1037
1056
try :
1057
+ logger .debug (f"Parse page { page } ..." )
1038
1058
post_data = {'lx' : lx , 'pg' : page , 'k' : k , 't' : t , 'fid' : folder_id , 'pwd' : dir_pwd }
1039
1059
resp = self ._post (self ._host_url + '/filemoreajax.php' , data = post_data , headers = self ._headers ).json ()
1040
1060
except (requests .RequestException , AttributeError ):
@@ -1058,14 +1078,15 @@ def get_folder_info_by_url(self, share_url, dir_pwd='') -> FolderDetail:
1058
1078
continue
1059
1079
else :
1060
1080
return FolderDetail (LanZouCloud .FAILED ) # 其它未知错误
1081
+
1061
1082
# 通过文件的时间信息补全文件夹的年份(如果有文件的话)
1062
1083
if files : # 最后一个文件上传时间最早,文件夹的创建年份与其相同
1063
1084
folder_time = files [- 1 ].time .split ('-' )[0 ] + '-' + folder_time
1064
1085
else : # 可恶,没有文件,日期就设置为今年吧
1065
1086
folder_time = datetime .today ().strftime ('%Y-%m-%d' )
1066
- return FolderDetail ( LanZouCloud . SUCCESS ,
1067
- FolderInfo (folder_name , folder_id , dir_pwd , folder_time , folder_desc , share_url ),
1068
- files )
1087
+
1088
+ this_folder = FolderInfo (folder_name , folder_id , dir_pwd , folder_time , folder_desc , share_url )
1089
+ return FolderDetail ( LanZouCloud . SUCCESS , this_folder , files , sub_folders )
1069
1090
1070
1091
def get_folder_info_by_id (self , folder_id ):
1071
1092
"""通过 id 获取文件夹及内部文件信息"""
@@ -1173,20 +1194,21 @@ def _down_big_file(self, name, total_size, file_list, save_path, *, callback=Non
1173
1194
return LanZouCloud .SUCCESS
1174
1195
1175
1196
def down_dir_by_url (self , share_url , dir_pwd = '' , save_path = './Download' , * , callback = None , mkdir = True ,
1176
- overwrite = False ,
1197
+ overwrite = False , recursive = False ,
1177
1198
failed_callback = None , downloaded_handler = None ) -> int :
1178
1199
"""通过分享链接下载文件夹
1179
1200
:param overwrite: 下载时是否覆盖原文件, 对大文件也生效
1180
1201
:param save_path 文件夹保存路径
1181
1202
:param mkdir 是否在 save_path 下创建与远程文件夹同名的文件夹
1182
- :param callback: 用于显示单个文件下载进度的回调函数
1203
+ :param callback 用于显示单个文件下载进度的回调函数
1204
+ :param recursive 是否递归下载子文件夹(vip用户)
1183
1205
:param failed_callback 用于处理下载失败文件的回调函数,
1184
1206
def failed_callback(code, file):
1185
1207
print(f"文件名: {file.name}, 时间: {file.time}, 大小: {file.size}, 类型: {file.type}") # 共有属性
1186
1208
if hasattr(file, 'url'): # 使用 URL 下载时
1187
- print(f"文件下载失败, 链接: {file.url}, 错误码: code")
1209
+ print(f"文件下载失败, 链接: {file.url}, 错误码: { code} ")
1188
1210
else: # 登录后使用 ID 下载时
1189
- print(f"文件下载失败, ID: {file.id}, 错误码: code")
1211
+ print(f"文件下载失败, ID: {file.id}, 错误码: { code} ")
1190
1212
:param downloaded_handler: 单个文件下载完成后进一步处理的回调函数 downloaded_handle(file_path)
1191
1213
"""
1192
1214
folder_detail = self .get_folder_info_by_url (share_url , dir_pwd )
@@ -1213,10 +1235,18 @@ def failed_callback(code, file):
1213
1235
if failed_callback is not None :
1214
1236
failed_callback (code , file )
1215
1237
1238
+ # 如果有子文件夹则递归下载子文件夹
1239
+ if recursive and folder_detail .sub_folders :
1240
+ for sub_folder in folder_detail .sub_folders :
1241
+ self .down_dir_by_url (sub_folder .url , dir_pwd , save_path , callback = callback ,
1242
+ overwrite = overwrite ,
1243
+ recursive = True , failed_callback = failed_callback ,
1244
+ downloaded_handler = downloaded_handler )
1245
+
1216
1246
return LanZouCloud .SUCCESS
1217
1247
1218
1248
def down_dir_by_id (self , folder_id , save_path = './Download' , * , callback = None , mkdir = True , overwrite = False ,
1219
- failed_callback = None , downloaded_handler = None ) -> int :
1249
+ failed_callback = None , downloaded_handler = None , recursive = False ) -> int :
1220
1250
"""登录用户通过id下载文件夹"""
1221
1251
file_list = self .get_file_list (folder_id )
1222
1252
if len (file_list ) == 0 :
@@ -1245,4 +1275,12 @@ def down_dir_by_id(self, folder_id, save_path='./Download', *, callback=None, mk
1245
1275
if failed_callback is not None :
1246
1276
failed_callback (code , file )
1247
1277
1278
+ if recursive :
1279
+ sub_folders = self .get_dir_list (folder_id )
1280
+ if len (sub_folders ) != 0 :
1281
+ for sub_folder in sub_folders :
1282
+ self .down_dir_by_id (sub_folder .id , save_path , callback = callback , overwrite = overwrite ,
1283
+ failed_callback = failed_callback , downloaded_handler = downloaded_handler ,
1284
+ recursive = True )
1285
+
1248
1286
return LanZouCloud .SUCCESS
0 commit comments