Skip to content

Commit 4294649

Browse files
authored
Implement Custom Content-Disposition Header Parser to get rid of the CGI dependency (#1088)
2 parents c98806b + 8a3066c commit 4294649

File tree

3 files changed

+97
-13
lines changed

3 files changed

+97
-13
lines changed

ads/common/utils.py

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
#!/usr/bin/env python
2-
# -*- coding: utf-8; -*-
32

43
# Copyright (c) 2020, 2024 Oracle and/or its affiliates.
54
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
65

7-
from __future__ import absolute_import, print_function
86

97
import collections
108
import contextlib
@@ -23,9 +21,8 @@
2321
from datetime import datetime
2422
from enum import Enum
2523
from io import DEFAULT_BUFFER_SIZE
26-
from pathlib import Path
2724
from textwrap import fill
28-
from typing import Dict, Optional, Union
25+
from typing import Dict, Optional, Tuple, Union
2926
from urllib import request
3027
from urllib.parse import urlparse
3128

@@ -501,13 +498,13 @@ def print_user_message(
501498
if is_documentation_mode() and is_notebook():
502499
if display_type.lower() == "tip":
503500
if "\n" in msg:
504-
t = "<b>{}:</b>".format(title.upper().strip()) if title else ""
501+
t = f"<b>{title.upper().strip()}:</b>" if title else ""
505502

506503
user_message = "{}{}".format(
507504
t,
508505
"".join(
509506
[
510-
"<br>&nbsp;&nbsp;+&nbsp;{}".format(x.strip())
507+
f"<br>&nbsp;&nbsp;+&nbsp;{x.strip()}"
511508
for x in msg.strip().split("\n")
512509
]
513510
),
@@ -646,7 +643,7 @@ def ellipsis_strings(raw, n=24):
646643
else:
647644
n2 = int(n) // 2 - 3
648645
n1 = n - n2 - 3
649-
result.append("{0}...{1}".format(s[:n1], s[-n2:]))
646+
result.append(f"{s[:n1]}...{s[-n2:]}")
650647

651648
return result
652649

@@ -942,9 +939,9 @@ def generate_requirement_file(
942939
with open(os.path.join(file_path, file_name), "w") as req_file:
943940
for lib in requirements:
944941
if requirements[lib]:
945-
req_file.write("{}=={}\n".format(lib, requirements[lib]))
942+
req_file.write(f"{lib}=={requirements[lib]}\n")
946943
else:
947-
req_file.write("{}\n".format(lib))
944+
req_file.write(f"{lib}\n")
948945

949946

950947
def _get_feature_type_and_dtype(column):
@@ -966,7 +963,7 @@ def to_dataframe(
966963
pd.Series,
967964
np.ndarray,
968965
pd.DataFrame,
969-
]
966+
],
970967
):
971968
"""
972969
Convert to pandas DataFrame.
@@ -1391,7 +1388,7 @@ def remove_file(file_path: str, auth: Optional[Dict] = None) -> None:
13911388
fs = fsspec.filesystem(scheme, **auth)
13921389
try:
13931390
fs.rm(file_path)
1394-
except FileNotFoundError as e:
1391+
except FileNotFoundError:
13951392
raise FileNotFoundError(f"`{file_path}` not found.")
13961393
except Exception as e:
13971394
raise e
@@ -1786,3 +1783,36 @@ def get_log_links(
17861783
console_link_url = f"https://cloud.oracle.com/logging/log-groups/{log_group_id}?region={region}"
17871784

17881785
return console_link_url
1786+
1787+
1788+
def parse_content_disposition(header: str) -> Tuple[str, Dict[str, str]]:
1789+
"""
1790+
Parses a Content-Disposition header into its main disposition and a dictionary of parameters.
1791+
1792+
For example:
1793+
'attachment; filename="example.txt"'
1794+
will be parsed into:
1795+
('attachment', {'filename': 'example.txt'})
1796+
1797+
Parameters
1798+
----------
1799+
header (str): The Content-Disposition header string.
1800+
1801+
Returns
1802+
-------
1803+
Tuple[str, Dict[str, str]]: A tuple containing the disposition and a dictionary of parameters.
1804+
"""
1805+
if not header:
1806+
return "", {}
1807+
1808+
parts = header.split(";")
1809+
# The first part is the main disposition (e.g., "attachment").
1810+
disposition = parts[0].strip().lower()
1811+
params: Dict[str, str] = {}
1812+
1813+
# Process each subsequent part to extract key-value pairs.
1814+
for part in parts[1:]:
1815+
if "=" in part:
1816+
key, value = part.split("=", 1)
1817+
params[key.strip().lower()] = value.strip().strip('"')
1818+
return disposition, params

ads/model/datascience_model.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
# Copyright (c) 2022, 2025 Oracle and/or its affiliates.
44
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
55

6-
import cgi
76
import json
87
import logging
98
import os
@@ -1776,7 +1775,9 @@ def _update_from_oci_dsc_model(
17761775
# Update artifact info
17771776
try:
17781777
artifact_info = self.dsc_model.get_artifact_info()
1779-
_, file_name_info = cgi.parse_header(artifact_info["Content-Disposition"])
1778+
_, file_name_info = utils.parse_content_disposition(
1779+
artifact_info["Content-Disposition"]
1780+
)
17801781

17811782
if self.dsc_model._is_model_by_reference():
17821783
_, file_extension = os.path.splitext(file_name_info["filename"])

tests/unitary/default_setup/common/test_common_utils.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
extract_region,
2929
folder_size,
3030
human_size,
31+
parse_content_disposition,
3132
remove_file,
3233
upload_to_os,
3334
)
@@ -579,3 +580,55 @@ def __init__(self, status_code):
579580
progress_callback=ANY,
580581
)
581582
assert response.status == 200
583+
584+
585+
class TestParseContentDisposition:
586+
def test_attachment_with_quotes(self):
587+
header = 'attachment; filename="example.txt"'
588+
disposition, params = parse_content_disposition(header)
589+
assert disposition == "attachment"
590+
assert params == {"filename": "example.txt"}
591+
592+
def test_attachment_without_quotes(self):
593+
header = "attachment; filename=example.txt"
594+
disposition, params = parse_content_disposition(header)
595+
assert disposition == "attachment"
596+
assert params == {"filename": "example.txt"}
597+
598+
def test_inline_no_params(self):
599+
header = "inline"
600+
disposition, params = parse_content_disposition(header)
601+
assert disposition == "inline"
602+
assert params == {}
603+
604+
def test_multiple_params(self):
605+
header = 'attachment; filename="example.txt"; size=12345'
606+
disposition, params = parse_content_disposition(header)
607+
assert disposition == "attachment"
608+
assert params == {"filename": "example.txt", "size": "12345"}
609+
610+
def test_extra_whitespace(self):
611+
header = ' attachment ; filename = "example.txt" ; param = value '
612+
disposition, params = parse_content_disposition(header)
613+
assert disposition == "attachment"
614+
assert params == {"filename": "example.txt", "param": "value"}
615+
616+
def test_form_data(self):
617+
header = 'form-data; name="fieldName"; filename="filename.jpg"'
618+
disposition, params = parse_content_disposition(header)
619+
assert disposition == "form-data"
620+
# Note: Keys are lowercased, but values remain as extracted.
621+
# Here, 'name' remains "fieldName" since our parser does not modify the case of values.
622+
assert params == {"name": "fieldName", "filename": "filename.jpg"}
623+
624+
def test_no_semicolon(self):
625+
header = "attachment"
626+
disposition, params = parse_content_disposition(header)
627+
assert disposition == "attachment"
628+
assert params == {}
629+
630+
def test_none(self):
631+
header = None
632+
disposition, params = parse_content_disposition(header)
633+
assert disposition == ""
634+
assert params == {}

0 commit comments

Comments
 (0)