Skip to content

Commit 1b27df4

Browse files
authored
Allow last-modified to be absent from header (#628)
Some time in the last year the server stopped adding "Last-Modified" to some HTTP response headers. That broke the download file handler. Fixes #622
1 parent 3624640 commit 1b27df4

File tree

2 files changed

+53
-26
lines changed

2 files changed

+53
-26
lines changed

kaggle/api/kaggle_api_extended.py

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
1-
#!/usr/bin/python
2-
#
3-
# Copyright 2024 Kaggle Inc
4-
#
5-
# Licensed under the Apache License, Version 2.0 (the "License");
6-
# you may not use this file except in compliance with the License.
7-
# You may obtain a copy of the License at
8-
#
9-
# http://www.apache.org/licenses/LICENSE-2.0
10-
#
11-
# Unless required by applicable law or agreed to in writing, software
12-
# distributed under the License is distributed on an "AS IS" BASIS,
13-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14-
# See the License for the specific language governing permissions and
15-
# limitations under the License.
16-
1+
#!/usr/bin/python
2+
#
3+
# Copyright 2024 Kaggle Inc
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
1717
#!/usr/bin/python
1818
#
1919
# Copyright 2019 Kaggle Inc
@@ -1956,8 +1956,12 @@ def download_file(self,
19561956
size = int(response.headers['Content-Length'])
19571957
size_read = 0
19581958
open_mode = 'wb'
1959-
remote_date = datetime.strptime(response.headers['Last-Modified'],
1960-
'%a, %d %b %Y %H:%M:%S %Z')
1959+
last_modified = response.headers.get('Last-Modified')
1960+
if last_modified is None:
1961+
remote_date = datetime.now()
1962+
else:
1963+
remote_date = datetime.strptime(response.headers['Last-Modified'],
1964+
'%a, %d %b %Y %H:%M:%S %Z')
19611965
remote_date_timestamp = time.mktime(remote_date.timetuple())
19621966

19631967
if not quiet:
@@ -3757,8 +3761,12 @@ def download_needed(self, response, outfile, quiet=True):
37573761
quiet: suppress verbose output (default is True)
37583762
"""
37593763
try:
3760-
remote_date = datetime.strptime(response.headers['Last-Modified'],
3761-
'%a, %d %b %Y %H:%M:%S %Z')
3764+
last_modified = response.headers.get('Last-Modified')
3765+
if last_modified is None:
3766+
remote_date = datetime.now()
3767+
else:
3768+
remote_date = datetime.strptime(response.headers['Last-Modified'],
3769+
'%a, %d %b %Y %H:%M:%S %Z')
37623770
file_exists = os.path.isfile(outfile)
37633771
if file_exists:
37643772
local_date = datetime.fromtimestamp(os.path.getmtime(outfile))

src/kaggle/api/kaggle_api_extended.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1520,8 +1520,19 @@ def dataset_download_files(self,
15201520
z.extractall(effective_path)
15211521
except zipfile.BadZipFile as e:
15221522
raise ValueError(
1523-
'Bad zip file, please report on '
1524-
'www.github.com/kaggle/kaggle-api', e)
1523+
f"The file {outfile} is corrupted or not a valid zip file. "
1524+
"Please report this issue at https://www.github.com/kaggle/kaggle-api"
1525+
)
1526+
except FileNotFoundError:
1527+
raise FileNotFoundError(
1528+
f"The file {outfile} was not found. "
1529+
"Please report this issue at https://www.github.com/kaggle/kaggle-api"
1530+
)
1531+
except Exception as e:
1532+
raise RuntimeError(
1533+
f"An unexpected error occurred: {e}. "
1534+
"Please report this issue at https://www.github.com/kaggle/kaggle-api"
1535+
)
15251536

15261537
try:
15271538
os.remove(outfile)
@@ -1929,8 +1940,12 @@ def download_file(self,
19291940
size = int(response.headers['Content-Length'])
19301941
size_read = 0
19311942
open_mode = 'wb'
1932-
remote_date = datetime.strptime(response.headers['Last-Modified'],
1933-
'%a, %d %b %Y %H:%M:%S %Z')
1943+
last_modified = response.headers.get('Last-Modified')
1944+
if last_modified is None:
1945+
remote_date = datetime.now()
1946+
else:
1947+
remote_date = datetime.strptime(response.headers['Last-Modified'],
1948+
'%a, %d %b %Y %H:%M:%S %Z')
19341949
remote_date_timestamp = time.mktime(remote_date.timetuple())
19351950

19361951
if not quiet:
@@ -3730,8 +3745,12 @@ def download_needed(self, response, outfile, quiet=True):
37303745
quiet: suppress verbose output (default is True)
37313746
"""
37323747
try:
3733-
remote_date = datetime.strptime(response.headers['Last-Modified'],
3734-
'%a, %d %b %Y %H:%M:%S %Z')
3748+
last_modified = response.headers.get('Last-Modified')
3749+
if last_modified is None:
3750+
remote_date = datetime.now()
3751+
else:
3752+
remote_date = datetime.strptime(response.headers['Last-Modified'],
3753+
'%a, %d %b %Y %H:%M:%S %Z')
37353754
file_exists = os.path.isfile(outfile)
37363755
if file_exists:
37373756
local_date = datetime.fromtimestamp(os.path.getmtime(outfile))

0 commit comments

Comments
 (0)