Skip to content

Fix namespace parsing to correctly discover package namespaces instead of simple string replacement #41942

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 100 additions & 22 deletions tools/azure-sdk-tools/ci_tools/parsing/parse_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,102 @@
VERSION_REGEX = r'^VERSION\s*=\s*[\'"]([^\'"]*)[\'"]'
NEW_REQ_PACKAGES = ["azure-core", "azure-mgmt-core"]

INIT_PY_FILE = "__init__.py"
INIT_EXTENSION_SUBSTRING = ".extend_path(__path__, __name__)"

# Directories to exclude from searches to avoid finding files in wrong places
EXCLUDE = {
"venv",
"__pycache__",
"tests",
"test",
"generated_samples",
"generated_tests",
"samples",
"swagger",
"stress",
"docs",
"doc",
"local",
"scripts",
"images",
".tox"
}


def discover_namespace(package_root_path: str) -> Optional[str]:
"""
Discover the true namespace of a package by walking through its directory structure
and finding the first __init__.py that contains actual content (not just namespace extension).

:param str package_root_path: Root path of the package directory
:rtype: str or None
:return: The discovered namespace string, or None if no suitable namespace found
"""
if not os.path.exists(package_root_path):
return None

namespace = None

for root, subdirs, files in os.walk(package_root_path):
# Ignore any modules with name starts with "_"
# For e.g. _generated, _shared etc
# Ignore build, which is created when installing a package from source.
# Ignore tests, which may have an __init__.py but is not part of the package.
dirs_to_skip = [x for x in subdirs if x.startswith(("_", ".", "test", "build")) or x in EXCLUDE]
for d in dirs_to_skip:
logging.debug("Dirs to skip: {}".format(dirs_to_skip))
subdirs.remove(d)

if INIT_PY_FILE in files:
module_name = os.path.relpath(root, package_root_path).replace(
os.path.sep, "."
)

# If namespace has not been set yet, try to find the first __init__.py that's not purely for extension.
if not namespace:
namespace = _set_root_namespace(
os.path.join(root, INIT_PY_FILE), module_name
)

return namespace


def _set_root_namespace(init_file_path: str, module_name: str) -> Optional[str]:
"""
Examine an __init__.py file to determine if it represents a substantial namespace
or is just a namespace extension file.

:param str init_file_path: Path to the __init__.py file
:param str module_name: The module name corresponding to this __init__.py
:rtype: str or None
:return: The namespace if this file contains substantial content, None otherwise
"""
try:
with open(init_file_path, "r", encoding="utf-8") as f:
in_docstring = False
content = []
for line in f:
stripped_line = line.strip()
# If in multi-line docstring, skip following lines until end of docstring.
# If single-line docstring, skip the docstring line.
if stripped_line.startswith(('"""', "'''")) and not stripped_line.endswith(('"""', "'''")):
in_docstring = not in_docstring
# If comment, skip line. Otherwise, add to content.
if not in_docstring and not stripped_line.startswith("#"):
content.append(line)

# If there's more than one line of content, or if there's one line that's not just namespace extension
if len(content) > 1 or (
len(content) == 1 and INIT_EXTENSION_SUBSTRING not in content[0]
):
return module_name

except Exception as e:
logging.error(f"Error reading {init_file_path}: {e}")

return None


class ParsedSetup:
"""
Expand Down Expand Up @@ -394,7 +490,10 @@ def parse_pyproject(
requires = project_config.get("dependencies")
is_new_sdk = name in NEW_REQ_PACKAGES or any(map(lambda x: (parse_require(x).name in NEW_REQ_PACKAGES), requires))

name_space = name.replace("-", ".")
# Discover the actual namespace by walking the package directory
package_directory = os.path.dirname(pyproject_filename)
discovered_namespace = discover_namespace(package_directory)
name_space = discovered_namespace if discovered_namespace else name.replace("-", ".")
package_data = get_value_from_dict(toml_dict, "tool.setuptools.package-data", None)
include_package_data = get_value_from_dict(toml_dict, "tool.setuptools.include-package-data", True)
classifiers = project_config.get("classifiers", [])
Expand Down Expand Up @@ -430,27 +529,6 @@ def get_version_py(setup_path: str) -> Optional[str]:
"""
Given the path to pyproject.toml or setup.py, attempts to find a (_)version.py file and return its location.
"""
# this list of directories will be excluded from the search for _version.py
# this is to avoid finding _version.py in the wrong place, such as in tests
# or in the venv directory or ANYWHERE ELSE that may mess with the parsing.
EXCLUDE = {
"venv",
"__pycache__",
"tests",
"test",
"generated_samples",
"generated_tests",
"samples",
"swagger",
"stress",
"docs",
"doc",
"local",
"scripts",
"images",
".tox"
}

file_path, _ = os.path.split(setup_path)

# Find path to _version.py recursively
Expand Down
67 changes: 67 additions & 0 deletions tools/azure-sdk-tools/tests/test_parse_functionality.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,3 +267,70 @@ def test_parse_pyproject_extensions():
assert parsed_project.is_metapackage == False
assert len(parsed_project.ext_modules) == 1
assert str(type(parsed_project.ext_modules[0])) == "<class 'setuptools.extension.Extension'>"


def test_namespace_discovery_eventhub_checkpointstoreblob():
"""Test that namespace discovery works for azure-eventhub-checkpointstoreblob"""
eventhub_path = os.path.join(
os.path.dirname(__file__), "..", "..", "..", "sdk", "eventhub", "azure-eventhub-checkpointstoreblob"
)

# Check if the path exists (it should in the Azure SDK repo)
if os.path.exists(eventhub_path):
parsed_project = ParsedSetup.from_path(eventhub_path)

assert parsed_project.name == "azure-eventhub-checkpointstoreblob"
assert parsed_project.namespace == "azure.eventhub.extensions.checkpointstoreblob"
else:
pytest.skip("azure-eventhub-checkpointstoreblob not found in repository")


def test_namespace_discovery_fallback():
"""Test that namespace discovery falls back to simple replacement when no packages found"""
# This tests the fallback behavior when no actual package structure is found
from ci_tools.parsing.parse_functions import discover_namespace

# Test with non-existent path
result = discover_namespace("/non/existent/path")
assert result is None


def test_namespace_discovery_with_extension_only():
"""Test namespace discovery logic with extension-only __init__.py files"""
from ci_tools.parsing.parse_functions import _set_root_namespace
import tempfile
import os

# Create a temporary __init__.py file with only extension content
with tempfile.NamedTemporaryFile(mode='w', suffix='__init__.py', delete=False) as f:
f.write('# comment\n')
f.write('__path__ = __import__("pkgutil").extend_path(__path__, __name__)\n')
temp_file = f.name

try:
result = _set_root_namespace(temp_file, "test.module")
# Should return None because it only contains extension logic
assert result is None
finally:
os.unlink(temp_file)


def test_namespace_discovery_with_substantial_content():
"""Test namespace discovery logic with substantial __init__.py content"""
from ci_tools.parsing.parse_functions import _set_root_namespace
import tempfile
import os

# Create a temporary __init__.py file with substantial content
with tempfile.NamedTemporaryFile(mode='w', suffix='__init__.py', delete=False) as f:
f.write('# comment\n')
f.write('from ._version import VERSION\n')
f.write('__version__ = VERSION\n')
temp_file = f.name

try:
result = _set_root_namespace(temp_file, "test.module")
# Should return the module name because it contains substantial content
assert result == "test.module"
finally:
os.unlink(temp_file)
Loading