diff --git a/tools/azure-sdk-tools/ci_tools/parsing/parse_functions.py b/tools/azure-sdk-tools/ci_tools/parsing/parse_functions.py index f58046052cfd..c8b3c5095548 100644 --- a/tools/azure-sdk-tools/ci_tools/parsing/parse_functions.py +++ b/tools/azure-sdk-tools/ci_tools/parsing/parse_functions.py @@ -27,6 +27,102 @@ VERSION_REGEX = r'^VERSION\s*=\s*[\'"]([^\'"]*)[\'"]' NEW_REQ_PACKAGES = ["azure-core", "azure-mgmt-core"] +INIT_PY_FILE = "__init__.py" +INIT_EXTENSION_SUBSTRING = ".extend_path(__path__, __name__)" + +# Directories to exclude from searches to avoid finding files in wrong places +EXCLUDE = { + "venv", + "__pycache__", + "tests", + "test", + "generated_samples", + "generated_tests", + "samples", + "swagger", + "stress", + "docs", + "doc", + "local", + "scripts", + "images", + ".tox" +} + + +def discover_namespace(package_root_path: str) -> Optional[str]: + """ + Discover the true namespace of a package by walking through its directory structure + and finding the first __init__.py that contains actual content (not just namespace extension). + + :param str package_root_path: Root path of the package directory + :rtype: str or None + :return: The discovered namespace string, or None if no suitable namespace found + """ + if not os.path.exists(package_root_path): + return None + + namespace = None + + for root, subdirs, files in os.walk(package_root_path): + # Ignore any modules with name starts with "_" + # For e.g. _generated, _shared etc + # Ignore build, which is created when installing a package from source. + # Ignore tests, which may have an __init__.py but is not part of the package. + dirs_to_skip = [x for x in subdirs if x.startswith(("_", ".", "test", "build")) or x in EXCLUDE] + for d in dirs_to_skip: + logging.debug("Dirs to skip: {}".format(dirs_to_skip)) + subdirs.remove(d) + + if INIT_PY_FILE in files: + module_name = os.path.relpath(root, package_root_path).replace( + os.path.sep, "." + ) + + # If namespace has not been set yet, try to find the first __init__.py that's not purely for extension. + if not namespace: + namespace = _set_root_namespace( + os.path.join(root, INIT_PY_FILE), module_name + ) + + return namespace + + +def _set_root_namespace(init_file_path: str, module_name: str) -> Optional[str]: + """ + Examine an __init__.py file to determine if it represents a substantial namespace + or is just a namespace extension file. + + :param str init_file_path: Path to the __init__.py file + :param str module_name: The module name corresponding to this __init__.py + :rtype: str or None + :return: The namespace if this file contains substantial content, None otherwise + """ + try: + with open(init_file_path, "r", encoding="utf-8") as f: + in_docstring = False + content = [] + for line in f: + stripped_line = line.strip() + # If in multi-line docstring, skip following lines until end of docstring. + # If single-line docstring, skip the docstring line. + if stripped_line.startswith(('"""', "'''")) and not stripped_line.endswith(('"""', "'''")): + in_docstring = not in_docstring + # If comment, skip line. Otherwise, add to content. + if not in_docstring and not stripped_line.startswith("#"): + content.append(line) + + # If there's more than one line of content, or if there's one line that's not just namespace extension + if len(content) > 1 or ( + len(content) == 1 and INIT_EXTENSION_SUBSTRING not in content[0] + ): + return module_name + + except Exception as e: + logging.error(f"Error reading {init_file_path}: {e}") + + return None + class ParsedSetup: """ @@ -394,7 +490,10 @@ def parse_pyproject( requires = project_config.get("dependencies") is_new_sdk = name in NEW_REQ_PACKAGES or any(map(lambda x: (parse_require(x).name in NEW_REQ_PACKAGES), requires)) - name_space = name.replace("-", ".") + # Discover the actual namespace by walking the package directory + package_directory = os.path.dirname(pyproject_filename) + discovered_namespace = discover_namespace(package_directory) + name_space = discovered_namespace if discovered_namespace else name.replace("-", ".") package_data = get_value_from_dict(toml_dict, "tool.setuptools.package-data", None) include_package_data = get_value_from_dict(toml_dict, "tool.setuptools.include-package-data", True) classifiers = project_config.get("classifiers", []) @@ -430,27 +529,6 @@ def get_version_py(setup_path: str) -> Optional[str]: """ Given the path to pyproject.toml or setup.py, attempts to find a (_)version.py file and return its location. """ - # this list of directories will be excluded from the search for _version.py - # this is to avoid finding _version.py in the wrong place, such as in tests - # or in the venv directory or ANYWHERE ELSE that may mess with the parsing. - EXCLUDE = { - "venv", - "__pycache__", - "tests", - "test", - "generated_samples", - "generated_tests", - "samples", - "swagger", - "stress", - "docs", - "doc", - "local", - "scripts", - "images", - ".tox" - } - file_path, _ = os.path.split(setup_path) # Find path to _version.py recursively diff --git a/tools/azure-sdk-tools/tests/test_parse_functionality.py b/tools/azure-sdk-tools/tests/test_parse_functionality.py index 49201b248930..0becf8274043 100644 --- a/tools/azure-sdk-tools/tests/test_parse_functionality.py +++ b/tools/azure-sdk-tools/tests/test_parse_functionality.py @@ -267,3 +267,70 @@ def test_parse_pyproject_extensions(): assert parsed_project.is_metapackage == False assert len(parsed_project.ext_modules) == 1 assert str(type(parsed_project.ext_modules[0])) == "" + + +def test_namespace_discovery_eventhub_checkpointstoreblob(): + """Test that namespace discovery works for azure-eventhub-checkpointstoreblob""" + eventhub_path = os.path.join( + os.path.dirname(__file__), "..", "..", "..", "sdk", "eventhub", "azure-eventhub-checkpointstoreblob" + ) + + # Check if the path exists (it should in the Azure SDK repo) + if os.path.exists(eventhub_path): + parsed_project = ParsedSetup.from_path(eventhub_path) + + assert parsed_project.name == "azure-eventhub-checkpointstoreblob" + assert parsed_project.namespace == "azure.eventhub.extensions.checkpointstoreblob" + else: + pytest.skip("azure-eventhub-checkpointstoreblob not found in repository") + + +def test_namespace_discovery_fallback(): + """Test that namespace discovery falls back to simple replacement when no packages found""" + # This tests the fallback behavior when no actual package structure is found + from ci_tools.parsing.parse_functions import discover_namespace + + # Test with non-existent path + result = discover_namespace("/non/existent/path") + assert result is None + + +def test_namespace_discovery_with_extension_only(): + """Test namespace discovery logic with extension-only __init__.py files""" + from ci_tools.parsing.parse_functions import _set_root_namespace + import tempfile + import os + + # Create a temporary __init__.py file with only extension content + with tempfile.NamedTemporaryFile(mode='w', suffix='__init__.py', delete=False) as f: + f.write('# comment\n') + f.write('__path__ = __import__("pkgutil").extend_path(__path__, __name__)\n') + temp_file = f.name + + try: + result = _set_root_namespace(temp_file, "test.module") + # Should return None because it only contains extension logic + assert result is None + finally: + os.unlink(temp_file) + + +def test_namespace_discovery_with_substantial_content(): + """Test namespace discovery logic with substantial __init__.py content""" + from ci_tools.parsing.parse_functions import _set_root_namespace + import tempfile + import os + + # Create a temporary __init__.py file with substantial content + with tempfile.NamedTemporaryFile(mode='w', suffix='__init__.py', delete=False) as f: + f.write('# comment\n') + f.write('from ._version import VERSION\n') + f.write('__version__ = VERSION\n') + temp_file = f.name + + try: + result = _set_root_namespace(temp_file, "test.module") + # Should return the module name because it contains substantial content + assert result == "test.module" + finally: + os.unlink(temp_file)