Skip to content

Commit e24b9d2

Browse files
committed
fixed #81
1 parent ab2e9ec commit e24b9d2

File tree

3 files changed

+90
-73
lines changed

3 files changed

+90
-73
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@
3434

3535
# 更新日志
3636

37+
## `v2.6.8`
38+
39+
- 修复分享链接带有 `webpage` 参数时无法下载的问题[#81](https://github.com/zaxtyson/LanZouCloud-API/issues/81)
40+
41+
3742
## `v2.6.7`
3843

3944
- 修复分享链接带有 `webpage` 参数时无法下载的问题[#74](https://github.com/zaxtyson/LanZouCloud-API/issues/74)

lanzou/api/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from lanzou.api.core import LanZouCloud
22

3-
version = '2.6.7.1'
3+
version = '2.6.8'
44

55
__all__ = ['utils', 'types', 'models', 'LanZouCloud', 'version']

lanzou/api/core.py

Lines changed: 84 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -464,67 +464,72 @@ def get_file_info_by_url(self, share_url, pwd='') -> FileDetail:
464464
return FileDetail(LanZouCloud.FILE_CANCELLED, pwd=pwd, url=share_url)
465465

466466
# 这里获取下载直链 304 重定向前的链接
467-
if 'id="pwdload"' in first_page or 'id="passwddiv"' in first_page: # 文件设置了提取码时
468-
if len(pwd) == 0:
469-
return FileDetail(LanZouCloud.LACK_PASSWORD, pwd=pwd, url=share_url) # 没给提取码直接退出
470-
# data : 'action=downprocess&sign=AGZRbwEwU2IEDQU6BDRUaFc8DzxfMlRjCjTPlVkWzFSYFY7ATpWYw_c_c&p='+pwd,
471-
sign = re.search(r"sign=(\w+?)&", first_page).group(1)
472-
post_data = {'action': 'downprocess', 'sign': sign, 'p': pwd}
473-
link_info = self._post(self._host_url + '/ajaxm.php', post_data) # 保存了重定向前的链接信息和文件名
474-
second_page = self._get(share_url) # 再次请求文件分享页面,可以看见文件名,时间,大小等信息(第二页)
475-
if not link_info or not second_page.text:
476-
return FileDetail(LanZouCloud.NETWORK_ERROR, pwd=pwd, url=share_url)
477-
link_info = link_info.json()
478-
second_page = remove_notes(second_page.text)
479-
# 提取文件信息
480-
f_name = link_info['inf'].replace("*", "_")
481-
f_size = re.search(r'大小.+?(\d[\d.,]+\s?[BKM]?)<', second_page)
482-
f_size = f_size.group(1).replace(",", "") if f_size else '0 M'
483-
f_time = re.search(r'class="n_file_infos">(.+?)</span>', second_page)
484-
f_time = time_format(f_time.group(1)) if f_time else time_format('0 小时前')
485-
f_desc = re.search(r'class="n_box_des">(.*?)</div>', second_page)
486-
f_desc = f_desc.group(1) if f_desc else ''
487-
else: # 文件没有设置提取码时,文件信息都暴露在分享页面上
488-
para = re.search(r'<iframe.*?src="(.+?)"', first_page).group(1) # 提取下载页面 URL 的参数
489-
# 文件名位置变化很多
490-
f_name = re.search(r"<title>(.+?) - 蓝奏云</title>", first_page) or \
491-
re.search(r'<div class="filethetext".+?>([^<>]+?)</div>', first_page) or \
492-
re.search(r'<div style="font-size.+?>([^<>].+?)</div>', first_page) or \
493-
re.search(r"var filename = '(.+?)';", first_page) or \
494-
re.search(r'id="filenajax">(.+?)</div>', first_page) or \
495-
re.search(r'<div class="b"><span>([^<>]+?)</span></div>', first_page)
496-
f_name = f_name.group(1).replace("*", "_") if f_name else "未匹配到文件名"
497-
# 匹配文件时间,文件没有时间信息就视为今天,统一表示为 2020-01-01 格式
498-
f_time = re.search(r'>(\d+\s?[秒天分小][钟时]?前|[昨前]天\s?[\d:]+?|\d+\s?天前|\d{4}-\d\d-\d\d)<', first_page)
499-
f_time = time_format(f_time.group(1)) if f_time else time_format('0 小时前')
500-
# 匹配文件大小
501-
f_size = re.search(r'大小.+?(\d[\d.,]+\s?[BKM]?)<', first_page)
502-
f_size = f_size.group(1).replace(",", "") if f_size else '0 M'
503-
f_desc = re.search(r'文件描述.+?<br>\n?\s*(.*?)\s*</td>', first_page)
504-
f_desc = f_desc.group(1) if f_desc else ''
505-
first_page = self._get(self._host_url + para)
506-
if not first_page:
507-
return FileDetail(LanZouCloud.NETWORK_ERROR, name=f_name, time=f_time, size=f_size, desc=f_desc,
508-
pwd=pwd, url=share_url)
509-
first_page = remove_notes(first_page.text)
510-
# 一般情况 sign 的值就在 data 里,有时放在变量后面
511-
sign = re.search(r"'sign':(.+?),", first_page).group(1)
512-
if len(sign) < 20: # 此时 sign 保存在变量里面, 变量名是 sign 匹配的字符
513-
sign = re.search(rf"var {sign}\s*=\s*'(.+?)';", first_page).group(1)
514-
post_data = {'action': 'downprocess', 'sign': sign, 'ves': 1}
515-
# 某些特殊情况 share_url 会出现 webpage 参数, post_data 需要更多参数
516-
# https://github.com/zaxtyson/LanZouCloud-API/issues/74
517-
if "?webpage=" in share_url:
518-
ajax_data = re.search(r"var ajaxdata\s*=\s*'(.+?)';", first_page).group(1)
519-
web_sign = re.search(r"var websign\s*=\s*'(.+?)';", first_page).group(1)
520-
web_sign_key = re.search(r"var websignkey\s*=\s*'(.+?)';", first_page).group(1)
521-
post_data = {'action': 'downprocess', 'signs': ajax_data, 'sign': sign, 'ves': 1,
522-
'websign': web_sign, 'websignkey': web_sign_key}
523-
link_info = self._post(self._host_url + '/ajaxm.php', post_data)
524-
if not link_info:
525-
return FileDetail(LanZouCloud.NETWORK_ERROR, name=f_name, time=f_time, size=f_size, desc=f_desc,
526-
pwd=pwd, url=share_url)
527-
link_info = link_info.json()
467+
try:
468+
if 'id="pwdload"' in first_page or 'id="passwddiv"' in first_page: # 文件设置了提取码时
469+
if len(pwd) == 0:
470+
return FileDetail(LanZouCloud.LACK_PASSWORD, pwd=pwd, url=share_url) # 没给提取码直接退出
471+
# data : 'action=downprocess&sign=AGZRbwEwU2IEDQU6BDRUaFc8DzxfMlRjCjTPlVkWzFSYFY7ATpWYw_c_c&p='+pwd,
472+
sign = re.search(r"sign=(\w+?)&", first_page).group(1)
473+
post_data = {'action': 'downprocess', 'sign': sign, 'p': pwd}
474+
link_info = self._post(self._host_url + '/ajaxm.php', post_data) # 保存了重定向前的链接信息和文件名
475+
second_page = self._get(share_url) # 再次请求文件分享页面,可以看见文件名,时间,大小等信息(第二页)
476+
if not link_info or not second_page.text:
477+
return FileDetail(LanZouCloud.NETWORK_ERROR, pwd=pwd, url=share_url)
478+
link_info = link_info.json()
479+
second_page = remove_notes(second_page.text)
480+
# 提取文件信息
481+
f_name = link_info['inf'].replace("*", "_")
482+
f_size = re.search(r'大小.+?(\d[\d.,]+\s?[BKM]?)<', second_page)
483+
f_size = f_size.group(1).replace(",", "") if f_size else '0 M'
484+
f_time = re.search(r'class="n_file_infos">(.+?)</span>', second_page)
485+
f_time = time_format(f_time.group(1)) if f_time else time_format('0 小时前')
486+
f_desc = re.search(r'class="n_box_des">(.*?)</div>', second_page)
487+
f_desc = f_desc.group(1) if f_desc else ''
488+
else: # 文件没有设置提取码时,文件信息都暴露在分享页面上
489+
para = re.search(r'<iframe.*?src="(.+?)"', first_page).group(1) # 提取下载页面 URL 的参数
490+
# 文件名位置变化很多
491+
f_name = re.search(r"<title>(.+?) - 蓝奏云</title>", first_page) or \
492+
re.search(r'<div class="filethetext".+?>([^<>]+?)</div>', first_page) or \
493+
re.search(r'<div style="font-size.+?>([^<>].+?)</div>', first_page) or \
494+
re.search(r"var filename = '(.+?)';", first_page) or \
495+
re.search(r'id="filenajax">(.+?)</div>', first_page) or \
496+
re.search(r'<div class="b"><span>([^<>]+?)</span></div>', first_page)
497+
f_name = f_name.group(1).replace("*", "_") if f_name else "未匹配到文件名"
498+
# 匹配文件时间,文件没有时间信息就视为今天,统一表示为 2020-01-01 格式
499+
f_time = re.search(r'>(\d+\s?[秒天分小][钟时]?前|[昨前]天\s?[\d:]+?|\d+\s?天前|\d{4}-\d\d-\d\d)<', first_page)
500+
f_time = time_format(f_time.group(1)) if f_time else time_format('0 小时前')
501+
# 匹配文件大小
502+
f_size = re.search(r'大小.+?(\d[\d.,]+\s?[BKM]?)<', first_page)
503+
f_size = f_size.group(1).replace(",", "") if f_size else '0 M'
504+
f_desc = re.search(r'文件描述.+?<br>\n?\s*(.*?)\s*</td>', first_page)
505+
f_desc = f_desc.group(1) if f_desc else ''
506+
first_page = self._get(self._host_url + para)
507+
if not first_page:
508+
return FileDetail(LanZouCloud.NETWORK_ERROR, name=f_name, time=f_time, size=f_size, desc=f_desc,
509+
pwd=pwd, url=share_url)
510+
first_page = remove_notes(first_page.text)
511+
# 一般情况 sign 的值就在 data 里,有时放在变量后面
512+
sign = re.search(r"'sign':(.+?),", first_page).group(1)
513+
if len(sign) < 20: # 此时 sign 保存在变量里面, 变量名是 sign 匹配的字符
514+
sign = re.search(rf"var {sign}\s*=\s*'(.+?)';", first_page).group(1)
515+
post_data = {'action': 'downprocess', 'sign': sign, 'ves': 1}
516+
# 某些特殊情况 share_url 会出现 webpage 参数, post_data 需要更多参数
517+
# https://github.com/zaxtyson/LanZouCloud-API/issues/74
518+
# https://github.com/zaxtyson/LanZouCloud-API/issues/81
519+
if "?webpage=" in share_url:
520+
ajax_data = re.search(r"var ajaxdata\s*=\s*'(.+?)';", first_page).group(1)
521+
web_sign = re.search(r"var a?websigna?\s*=\s*'(.+?)';", first_page).group(1)
522+
web_sign_key = re.search(r"var c?websignkeyc?\s*=\s*'(.+?)';", first_page).group(1)
523+
post_data = {'action': 'downprocess', 'signs': ajax_data, 'sign': sign, 'ves': 1,
524+
'websign': web_sign, 'websignkey': web_sign_key}
525+
link_info = self._post(self._host_url + '/ajaxm.php', post_data)
526+
if not link_info:
527+
return FileDetail(LanZouCloud.NETWORK_ERROR, name=f_name, time=f_time, size=f_size, desc=f_desc,
528+
pwd=pwd, url=share_url)
529+
link_info = link_info.json()
530+
except AttributeError as e: # 正则匹配失败
531+
logger.error(e)
532+
return FileDetail(LanZouCloud.FAILED)
528533

529534
# 这里开始获取文件直链
530535
if link_info['zt'] != 1: # 返回信息异常,无法获取直链
@@ -541,16 +546,20 @@ def get_file_info_by_url(self, share_url, pwd='') -> FileDetail:
541546
if '网络异常' not in download_page_html: # 没有遇到验证码
542547
direct_url = download_page.headers['Location'] # 重定向后的真直链
543548
else: # 遇到验证码,验证后才能获取下载直链
544-
file_token = re.findall("'file':'(.+?)'", download_page_html)[0]
545-
file_sign = re.findall("'sign':'(.+?)'", download_page_html)[0]
546-
check_api = 'https://vip.d0.baidupan.com/file/ajax.php'
547-
post_data = {'file': file_token, 'el': 2, 'sign': file_sign}
548-
sleep(2) # 这里必需等待2s, 否则直链返回 ?SignError
549-
resp = self._post(check_api, post_data)
550-
direct_url = resp.json()['url']
551-
if not direct_url:
552-
return FileDetail(LanZouCloud.CAPTCHA_ERROR, name=f_name, time=f_time, size=f_size, desc=f_desc,
553-
pwd=pwd, url=share_url)
549+
try:
550+
file_token = re.findall("'file':'(.+?)'", download_page_html)[0]
551+
file_sign = re.findall("'sign':'(.+?)'", download_page_html)[0]
552+
check_api = 'https://vip.d0.baidupan.com/file/ajax.php'
553+
post_data = {'file': file_token, 'el': 2, 'sign': file_sign}
554+
sleep(2) # 这里必需等待2s, 否则直链返回 ?SignError
555+
resp = self._post(check_api, post_data)
556+
direct_url = resp.json()['url']
557+
if not direct_url:
558+
return FileDetail(LanZouCloud.CAPTCHA_ERROR, name=f_name, time=f_time, size=f_size, desc=f_desc,
559+
pwd=pwd, url=share_url)
560+
except IndexError as e:
561+
logger.error(e)
562+
return FileDetail(LanZouCloud.FAILED)
554563

555564
f_type = f_name.split('.')[-1]
556565
return FileDetail(LanZouCloud.SUCCESS,
@@ -808,20 +817,23 @@ def _call_back(read_monitor):
808817
monitor = MultipartEncoderMonitor(post_data, _call_back)
809818
result = self._post('https://pc.woozooo.com/fileup.php', data=monitor, headers=tmp_header, timeout=3600)
810819
if not result: # 网络异常
820+
file.close()
811821
return LanZouCloud.NETWORK_ERROR
812822
else:
813823
result = result.json()
814824
if result["zt"] != 1:
815825
logger.debug(f'Upload failed: result={result}')
826+
file.close()
816827
return LanZouCloud.FAILED # 上传失败
817828

818829
if uploaded_handler is not None:
819830
file_id = int(result["text"][0]["id"])
820831
uploaded_handler(file_id, is_file=True) # 对已经上传的文件再进一步处理
821832

822833
if need_delete:
823-
file.close()
824834
os.remove(file_path)
835+
836+
file.close()
825837
return LanZouCloud.SUCCESS
826838

827839
def _upload_big_file(self, file_path, dir_id, *, callback=None, uploaded_handler=None):

0 commit comments

Comments
 (0)