@@ -464,67 +464,72 @@ def get_file_info_by_url(self, share_url, pwd='') -> FileDetail:
464
464
return FileDetail (LanZouCloud .FILE_CANCELLED , pwd = pwd , url = share_url )
465
465
466
466
# 这里获取下载直链 304 重定向前的链接
467
- if 'id="pwdload"' in first_page or 'id="passwddiv"' in first_page : # 文件设置了提取码时
468
- if len (pwd ) == 0 :
469
- return FileDetail (LanZouCloud .LACK_PASSWORD , pwd = pwd , url = share_url ) # 没给提取码直接退出
470
- # data : 'action=downprocess&sign=AGZRbwEwU2IEDQU6BDRUaFc8DzxfMlRjCjTPlVkWzFSYFY7ATpWYw_c_c&p='+pwd,
471
- sign = re .search (r"sign=(\w+?)&" , first_page ).group (1 )
472
- post_data = {'action' : 'downprocess' , 'sign' : sign , 'p' : pwd }
473
- link_info = self ._post (self ._host_url + '/ajaxm.php' , post_data ) # 保存了重定向前的链接信息和文件名
474
- second_page = self ._get (share_url ) # 再次请求文件分享页面,可以看见文件名,时间,大小等信息(第二页)
475
- if not link_info or not second_page .text :
476
- return FileDetail (LanZouCloud .NETWORK_ERROR , pwd = pwd , url = share_url )
477
- link_info = link_info .json ()
478
- second_page = remove_notes (second_page .text )
479
- # 提取文件信息
480
- f_name = link_info ['inf' ].replace ("*" , "_" )
481
- f_size = re .search (r'大小.+?(\d[\d.,]+\s?[BKM]?)<' , second_page )
482
- f_size = f_size .group (1 ).replace ("," , "" ) if f_size else '0 M'
483
- f_time = re .search (r'class="n_file_infos">(.+?)</span>' , second_page )
484
- f_time = time_format (f_time .group (1 )) if f_time else time_format ('0 小时前' )
485
- f_desc = re .search (r'class="n_box_des">(.*?)</div>' , second_page )
486
- f_desc = f_desc .group (1 ) if f_desc else ''
487
- else : # 文件没有设置提取码时,文件信息都暴露在分享页面上
488
- para = re .search (r'<iframe.*?src="(.+?)"' , first_page ).group (1 ) # 提取下载页面 URL 的参数
489
- # 文件名位置变化很多
490
- f_name = re .search (r"<title>(.+?) - 蓝奏云</title>" , first_page ) or \
491
- re .search (r'<div class="filethetext".+?>([^<>]+?)</div>' , first_page ) or \
492
- re .search (r'<div style="font-size.+?>([^<>].+?)</div>' , first_page ) or \
493
- re .search (r"var filename = '(.+?)';" , first_page ) or \
494
- re .search (r'id="filenajax">(.+?)</div>' , first_page ) or \
495
- re .search (r'<div class="b"><span>([^<>]+?)</span></div>' , first_page )
496
- f_name = f_name .group (1 ).replace ("*" , "_" ) if f_name else "未匹配到文件名"
497
- # 匹配文件时间,文件没有时间信息就视为今天,统一表示为 2020-01-01 格式
498
- f_time = re .search (r'>(\d+\s?[秒天分小][钟时]?前|[昨前]天\s?[\d:]+?|\d+\s?天前|\d{4}-\d\d-\d\d)<' , first_page )
499
- f_time = time_format (f_time .group (1 )) if f_time else time_format ('0 小时前' )
500
- # 匹配文件大小
501
- f_size = re .search (r'大小.+?(\d[\d.,]+\s?[BKM]?)<' , first_page )
502
- f_size = f_size .group (1 ).replace ("," , "" ) if f_size else '0 M'
503
- f_desc = re .search (r'文件描述.+?<br>\n?\s*(.*?)\s*</td>' , first_page )
504
- f_desc = f_desc .group (1 ) if f_desc else ''
505
- first_page = self ._get (self ._host_url + para )
506
- if not first_page :
507
- return FileDetail (LanZouCloud .NETWORK_ERROR , name = f_name , time = f_time , size = f_size , desc = f_desc ,
508
- pwd = pwd , url = share_url )
509
- first_page = remove_notes (first_page .text )
510
- # 一般情况 sign 的值就在 data 里,有时放在变量后面
511
- sign = re .search (r"'sign':(.+?)," , first_page ).group (1 )
512
- if len (sign ) < 20 : # 此时 sign 保存在变量里面, 变量名是 sign 匹配的字符
513
- sign = re .search (rf"var { sign } \s*=\s*'(.+?)';" , first_page ).group (1 )
514
- post_data = {'action' : 'downprocess' , 'sign' : sign , 'ves' : 1 }
515
- # 某些特殊情况 share_url 会出现 webpage 参数, post_data 需要更多参数
516
- # https://github.com/zaxtyson/LanZouCloud-API/issues/74
517
- if "?webpage=" in share_url :
518
- ajax_data = re .search (r"var ajaxdata\s*=\s*'(.+?)';" , first_page ).group (1 )
519
- web_sign = re .search (r"var websign\s*=\s*'(.+?)';" , first_page ).group (1 )
520
- web_sign_key = re .search (r"var websignkey\s*=\s*'(.+?)';" , first_page ).group (1 )
521
- post_data = {'action' : 'downprocess' , 'signs' : ajax_data , 'sign' : sign , 'ves' : 1 ,
522
- 'websign' : web_sign , 'websignkey' : web_sign_key }
523
- link_info = self ._post (self ._host_url + '/ajaxm.php' , post_data )
524
- if not link_info :
525
- return FileDetail (LanZouCloud .NETWORK_ERROR , name = f_name , time = f_time , size = f_size , desc = f_desc ,
526
- pwd = pwd , url = share_url )
527
- link_info = link_info .json ()
467
+ try :
468
+ if 'id="pwdload"' in first_page or 'id="passwddiv"' in first_page : # 文件设置了提取码时
469
+ if len (pwd ) == 0 :
470
+ return FileDetail (LanZouCloud .LACK_PASSWORD , pwd = pwd , url = share_url ) # 没给提取码直接退出
471
+ # data : 'action=downprocess&sign=AGZRbwEwU2IEDQU6BDRUaFc8DzxfMlRjCjTPlVkWzFSYFY7ATpWYw_c_c&p='+pwd,
472
+ sign = re .search (r"sign=(\w+?)&" , first_page ).group (1 )
473
+ post_data = {'action' : 'downprocess' , 'sign' : sign , 'p' : pwd }
474
+ link_info = self ._post (self ._host_url + '/ajaxm.php' , post_data ) # 保存了重定向前的链接信息和文件名
475
+ second_page = self ._get (share_url ) # 再次请求文件分享页面,可以看见文件名,时间,大小等信息(第二页)
476
+ if not link_info or not second_page .text :
477
+ return FileDetail (LanZouCloud .NETWORK_ERROR , pwd = pwd , url = share_url )
478
+ link_info = link_info .json ()
479
+ second_page = remove_notes (second_page .text )
480
+ # 提取文件信息
481
+ f_name = link_info ['inf' ].replace ("*" , "_" )
482
+ f_size = re .search (r'大小.+?(\d[\d.,]+\s?[BKM]?)<' , second_page )
483
+ f_size = f_size .group (1 ).replace ("," , "" ) if f_size else '0 M'
484
+ f_time = re .search (r'class="n_file_infos">(.+?)</span>' , second_page )
485
+ f_time = time_format (f_time .group (1 )) if f_time else time_format ('0 小时前' )
486
+ f_desc = re .search (r'class="n_box_des">(.*?)</div>' , second_page )
487
+ f_desc = f_desc .group (1 ) if f_desc else ''
488
+ else : # 文件没有设置提取码时,文件信息都暴露在分享页面上
489
+ para = re .search (r'<iframe.*?src="(.+?)"' , first_page ).group (1 ) # 提取下载页面 URL 的参数
490
+ # 文件名位置变化很多
491
+ f_name = re .search (r"<title>(.+?) - 蓝奏云</title>" , first_page ) or \
492
+ re .search (r'<div class="filethetext".+?>([^<>]+?)</div>' , first_page ) or \
493
+ re .search (r'<div style="font-size.+?>([^<>].+?)</div>' , first_page ) or \
494
+ re .search (r"var filename = '(.+?)';" , first_page ) or \
495
+ re .search (r'id="filenajax">(.+?)</div>' , first_page ) or \
496
+ re .search (r'<div class="b"><span>([^<>]+?)</span></div>' , first_page )
497
+ f_name = f_name .group (1 ).replace ("*" , "_" ) if f_name else "未匹配到文件名"
498
+ # 匹配文件时间,文件没有时间信息就视为今天,统一表示为 2020-01-01 格式
499
+ f_time = re .search (r'>(\d+\s?[秒天分小][钟时]?前|[昨前]天\s?[\d:]+?|\d+\s?天前|\d{4}-\d\d-\d\d)<' , first_page )
500
+ f_time = time_format (f_time .group (1 )) if f_time else time_format ('0 小时前' )
501
+ # 匹配文件大小
502
+ f_size = re .search (r'大小.+?(\d[\d.,]+\s?[BKM]?)<' , first_page )
503
+ f_size = f_size .group (1 ).replace ("," , "" ) if f_size else '0 M'
504
+ f_desc = re .search (r'文件描述.+?<br>\n?\s*(.*?)\s*</td>' , first_page )
505
+ f_desc = f_desc .group (1 ) if f_desc else ''
506
+ first_page = self ._get (self ._host_url + para )
507
+ if not first_page :
508
+ return FileDetail (LanZouCloud .NETWORK_ERROR , name = f_name , time = f_time , size = f_size , desc = f_desc ,
509
+ pwd = pwd , url = share_url )
510
+ first_page = remove_notes (first_page .text )
511
+ # 一般情况 sign 的值就在 data 里,有时放在变量后面
512
+ sign = re .search (r"'sign':(.+?)," , first_page ).group (1 )
513
+ if len (sign ) < 20 : # 此时 sign 保存在变量里面, 变量名是 sign 匹配的字符
514
+ sign = re .search (rf"var { sign } \s*=\s*'(.+?)';" , first_page ).group (1 )
515
+ post_data = {'action' : 'downprocess' , 'sign' : sign , 'ves' : 1 }
516
+ # 某些特殊情况 share_url 会出现 webpage 参数, post_data 需要更多参数
517
+ # https://github.com/zaxtyson/LanZouCloud-API/issues/74
518
+ # https://github.com/zaxtyson/LanZouCloud-API/issues/81
519
+ if "?webpage=" in share_url :
520
+ ajax_data = re .search (r"var ajaxdata\s*=\s*'(.+?)';" , first_page ).group (1 )
521
+ web_sign = re .search (r"var a?websigna?\s*=\s*'(.+?)';" , first_page ).group (1 )
522
+ web_sign_key = re .search (r"var c?websignkeyc?\s*=\s*'(.+?)';" , first_page ).group (1 )
523
+ post_data = {'action' : 'downprocess' , 'signs' : ajax_data , 'sign' : sign , 'ves' : 1 ,
524
+ 'websign' : web_sign , 'websignkey' : web_sign_key }
525
+ link_info = self ._post (self ._host_url + '/ajaxm.php' , post_data )
526
+ if not link_info :
527
+ return FileDetail (LanZouCloud .NETWORK_ERROR , name = f_name , time = f_time , size = f_size , desc = f_desc ,
528
+ pwd = pwd , url = share_url )
529
+ link_info = link_info .json ()
530
+ except AttributeError as e : # 正则匹配失败
531
+ logger .error (e )
532
+ return FileDetail (LanZouCloud .FAILED )
528
533
529
534
# 这里开始获取文件直链
530
535
if link_info ['zt' ] != 1 : # 返回信息异常,无法获取直链
@@ -541,16 +546,20 @@ def get_file_info_by_url(self, share_url, pwd='') -> FileDetail:
541
546
if '网络异常' not in download_page_html : # 没有遇到验证码
542
547
direct_url = download_page .headers ['Location' ] # 重定向后的真直链
543
548
else : # 遇到验证码,验证后才能获取下载直链
544
- file_token = re .findall ("'file':'(.+?)'" , download_page_html )[0 ]
545
- file_sign = re .findall ("'sign':'(.+?)'" , download_page_html )[0 ]
546
- check_api = 'https://vip.d0.baidupan.com/file/ajax.php'
547
- post_data = {'file' : file_token , 'el' : 2 , 'sign' : file_sign }
548
- sleep (2 ) # 这里必需等待2s, 否则直链返回 ?SignError
549
- resp = self ._post (check_api , post_data )
550
- direct_url = resp .json ()['url' ]
551
- if not direct_url :
552
- return FileDetail (LanZouCloud .CAPTCHA_ERROR , name = f_name , time = f_time , size = f_size , desc = f_desc ,
553
- pwd = pwd , url = share_url )
549
+ try :
550
+ file_token = re .findall ("'file':'(.+?)'" , download_page_html )[0 ]
551
+ file_sign = re .findall ("'sign':'(.+?)'" , download_page_html )[0 ]
552
+ check_api = 'https://vip.d0.baidupan.com/file/ajax.php'
553
+ post_data = {'file' : file_token , 'el' : 2 , 'sign' : file_sign }
554
+ sleep (2 ) # 这里必需等待2s, 否则直链返回 ?SignError
555
+ resp = self ._post (check_api , post_data )
556
+ direct_url = resp .json ()['url' ]
557
+ if not direct_url :
558
+ return FileDetail (LanZouCloud .CAPTCHA_ERROR , name = f_name , time = f_time , size = f_size , desc = f_desc ,
559
+ pwd = pwd , url = share_url )
560
+ except IndexError as e :
561
+ logger .error (e )
562
+ return FileDetail (LanZouCloud .FAILED )
554
563
555
564
f_type = f_name .split ('.' )[- 1 ]
556
565
return FileDetail (LanZouCloud .SUCCESS ,
@@ -808,20 +817,23 @@ def _call_back(read_monitor):
808
817
monitor = MultipartEncoderMonitor (post_data , _call_back )
809
818
result = self ._post ('https://pc.woozooo.com/fileup.php' , data = monitor , headers = tmp_header , timeout = 3600 )
810
819
if not result : # 网络异常
820
+ file .close ()
811
821
return LanZouCloud .NETWORK_ERROR
812
822
else :
813
823
result = result .json ()
814
824
if result ["zt" ] != 1 :
815
825
logger .debug (f'Upload failed: result={ result } ' )
826
+ file .close ()
816
827
return LanZouCloud .FAILED # 上传失败
817
828
818
829
if uploaded_handler is not None :
819
830
file_id = int (result ["text" ][0 ]["id" ])
820
831
uploaded_handler (file_id , is_file = True ) # 对已经上传的文件再进一步处理
821
832
822
833
if need_delete :
823
- file .close ()
824
834
os .remove (file_path )
835
+
836
+ file .close ()
825
837
return LanZouCloud .SUCCESS
826
838
827
839
def _upload_big_file (self , file_path , dir_id , * , callback = None , uploaded_handler = None ):
0 commit comments