Skip to content

Commit fb1ac9e

Browse files
Copilotscbedd
andauthored
Fix namespace parsing to correctly discover package namespaces instead of simple string replacement (#41942)
* Implement namespace discovery logic to replace simple string replacement during parsing of a pyproject.toml --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: scbedd <45376673+scbedd@users.noreply.github.com>
1 parent f75f82c commit fb1ac9e

File tree

2 files changed

+167
-22
lines changed

2 files changed

+167
-22
lines changed

tools/azure-sdk-tools/ci_tools/parsing/parse_functions.py

Lines changed: 100 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,102 @@
2727
VERSION_REGEX = r'^VERSION\s*=\s*[\'"]([^\'"]*)[\'"]'
2828
NEW_REQ_PACKAGES = ["azure-core", "azure-mgmt-core"]
2929

30+
INIT_PY_FILE = "__init__.py"
31+
INIT_EXTENSION_SUBSTRING = ".extend_path(__path__, __name__)"
32+
33+
# Directories to exclude from searches to avoid finding files in wrong places
34+
EXCLUDE = {
35+
"venv",
36+
"__pycache__",
37+
"tests",
38+
"test",
39+
"generated_samples",
40+
"generated_tests",
41+
"samples",
42+
"swagger",
43+
"stress",
44+
"docs",
45+
"doc",
46+
"local",
47+
"scripts",
48+
"images",
49+
".tox"
50+
}
51+
52+
53+
def discover_namespace(package_root_path: str) -> Optional[str]:
54+
"""
55+
Discover the true namespace of a package by walking through its directory structure
56+
and finding the first __init__.py that contains actual content (not just namespace extension).
57+
58+
:param str package_root_path: Root path of the package directory
59+
:rtype: str or None
60+
:return: The discovered namespace string, or None if no suitable namespace found
61+
"""
62+
if not os.path.exists(package_root_path):
63+
return None
64+
65+
namespace = None
66+
67+
for root, subdirs, files in os.walk(package_root_path):
68+
# Ignore any modules with name starts with "_"
69+
# For e.g. _generated, _shared etc
70+
# Ignore build, which is created when installing a package from source.
71+
# Ignore tests, which may have an __init__.py but is not part of the package.
72+
dirs_to_skip = [x for x in subdirs if x.startswith(("_", ".", "test", "build")) or x in EXCLUDE]
73+
for d in dirs_to_skip:
74+
logging.debug("Dirs to skip: {}".format(dirs_to_skip))
75+
subdirs.remove(d)
76+
77+
if INIT_PY_FILE in files:
78+
module_name = os.path.relpath(root, package_root_path).replace(
79+
os.path.sep, "."
80+
)
81+
82+
# If namespace has not been set yet, try to find the first __init__.py that's not purely for extension.
83+
if not namespace:
84+
namespace = _set_root_namespace(
85+
os.path.join(root, INIT_PY_FILE), module_name
86+
)
87+
88+
return namespace
89+
90+
91+
def _set_root_namespace(init_file_path: str, module_name: str) -> Optional[str]:
92+
"""
93+
Examine an __init__.py file to determine if it represents a substantial namespace
94+
or is just a namespace extension file.
95+
96+
:param str init_file_path: Path to the __init__.py file
97+
:param str module_name: The module name corresponding to this __init__.py
98+
:rtype: str or None
99+
:return: The namespace if this file contains substantial content, None otherwise
100+
"""
101+
try:
102+
with open(init_file_path, "r", encoding="utf-8") as f:
103+
in_docstring = False
104+
content = []
105+
for line in f:
106+
stripped_line = line.strip()
107+
# If in multi-line docstring, skip following lines until end of docstring.
108+
# If single-line docstring, skip the docstring line.
109+
if stripped_line.startswith(('"""', "'''")) and not stripped_line.endswith(('"""', "'''")):
110+
in_docstring = not in_docstring
111+
# If comment, skip line. Otherwise, add to content.
112+
if not in_docstring and not stripped_line.startswith("#"):
113+
content.append(line)
114+
115+
# If there's more than one line of content, or if there's one line that's not just namespace extension
116+
if len(content) > 1 or (
117+
len(content) == 1 and INIT_EXTENSION_SUBSTRING not in content[0]
118+
):
119+
return module_name
120+
121+
except Exception as e:
122+
logging.error(f"Error reading {init_file_path}: {e}")
123+
124+
return None
125+
30126

31127
class ParsedSetup:
32128
"""
@@ -394,7 +490,10 @@ def parse_pyproject(
394490
requires = project_config.get("dependencies")
395491
is_new_sdk = name in NEW_REQ_PACKAGES or any(map(lambda x: (parse_require(x).name in NEW_REQ_PACKAGES), requires))
396492

397-
name_space = name.replace("-", ".")
493+
# Discover the actual namespace by walking the package directory
494+
package_directory = os.path.dirname(pyproject_filename)
495+
discovered_namespace = discover_namespace(package_directory)
496+
name_space = discovered_namespace if discovered_namespace else name.replace("-", ".")
398497
package_data = get_value_from_dict(toml_dict, "tool.setuptools.package-data", None)
399498
include_package_data = get_value_from_dict(toml_dict, "tool.setuptools.include-package-data", True)
400499
classifiers = project_config.get("classifiers", [])
@@ -430,27 +529,6 @@ def get_version_py(setup_path: str) -> Optional[str]:
430529
"""
431530
Given the path to pyproject.toml or setup.py, attempts to find a (_)version.py file and return its location.
432531
"""
433-
# this list of directories will be excluded from the search for _version.py
434-
# this is to avoid finding _version.py in the wrong place, such as in tests
435-
# or in the venv directory or ANYWHERE ELSE that may mess with the parsing.
436-
EXCLUDE = {
437-
"venv",
438-
"__pycache__",
439-
"tests",
440-
"test",
441-
"generated_samples",
442-
"generated_tests",
443-
"samples",
444-
"swagger",
445-
"stress",
446-
"docs",
447-
"doc",
448-
"local",
449-
"scripts",
450-
"images",
451-
".tox"
452-
}
453-
454532
file_path, _ = os.path.split(setup_path)
455533

456534
# Find path to _version.py recursively

tools/azure-sdk-tools/tests/test_parse_functionality.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,3 +267,70 @@ def test_parse_pyproject_extensions():
267267
assert parsed_project.is_metapackage == False
268268
assert len(parsed_project.ext_modules) == 1
269269
assert str(type(parsed_project.ext_modules[0])) == "<class 'setuptools.extension.Extension'>"
270+
271+
272+
def test_namespace_discovery_eventhub_checkpointstoreblob():
273+
"""Test that namespace discovery works for azure-eventhub-checkpointstoreblob"""
274+
eventhub_path = os.path.join(
275+
os.path.dirname(__file__), "..", "..", "..", "sdk", "eventhub", "azure-eventhub-checkpointstoreblob"
276+
)
277+
278+
# Check if the path exists (it should in the Azure SDK repo)
279+
if os.path.exists(eventhub_path):
280+
parsed_project = ParsedSetup.from_path(eventhub_path)
281+
282+
assert parsed_project.name == "azure-eventhub-checkpointstoreblob"
283+
assert parsed_project.namespace == "azure.eventhub.extensions.checkpointstoreblob"
284+
else:
285+
pytest.skip("azure-eventhub-checkpointstoreblob not found in repository")
286+
287+
288+
def test_namespace_discovery_fallback():
289+
"""Test that namespace discovery falls back to simple replacement when no packages found"""
290+
# This tests the fallback behavior when no actual package structure is found
291+
from ci_tools.parsing.parse_functions import discover_namespace
292+
293+
# Test with non-existent path
294+
result = discover_namespace("/non/existent/path")
295+
assert result is None
296+
297+
298+
def test_namespace_discovery_with_extension_only():
299+
"""Test namespace discovery logic with extension-only __init__.py files"""
300+
from ci_tools.parsing.parse_functions import _set_root_namespace
301+
import tempfile
302+
import os
303+
304+
# Create a temporary __init__.py file with only extension content
305+
with tempfile.NamedTemporaryFile(mode='w', suffix='__init__.py', delete=False) as f:
306+
f.write('# comment\n')
307+
f.write('__path__ = __import__("pkgutil").extend_path(__path__, __name__)\n')
308+
temp_file = f.name
309+
310+
try:
311+
result = _set_root_namespace(temp_file, "test.module")
312+
# Should return None because it only contains extension logic
313+
assert result is None
314+
finally:
315+
os.unlink(temp_file)
316+
317+
318+
def test_namespace_discovery_with_substantial_content():
319+
"""Test namespace discovery logic with substantial __init__.py content"""
320+
from ci_tools.parsing.parse_functions import _set_root_namespace
321+
import tempfile
322+
import os
323+
324+
# Create a temporary __init__.py file with substantial content
325+
with tempfile.NamedTemporaryFile(mode='w', suffix='__init__.py', delete=False) as f:
326+
f.write('# comment\n')
327+
f.write('from ._version import VERSION\n')
328+
f.write('__version__ = VERSION\n')
329+
temp_file = f.name
330+
331+
try:
332+
result = _set_root_namespace(temp_file, "test.module")
333+
# Should return the module name because it contains substantial content
334+
assert result == "test.module"
335+
finally:
336+
os.unlink(temp_file)

0 commit comments

Comments
 (0)