|
1 | 1 | # Copyright (C) 2019-2024 Intel Corporation
|
2 | 2 | # SPDX-License-Identifier: BSD-3-Clause
|
| 3 | +import os |
3 | 4 | import shlex
|
4 | 5 | import warnings
|
| 6 | +from pathlib import Path |
| 7 | + |
| 8 | +import pathspec |
5 | 9 |
|
6 | 10 | import codebasin.source
|
7 | 11 | import codebasin.walkers
|
@@ -123,3 +127,112 @@ def from_json(cls, instance: dict):
|
123 | 127 | command=command,
|
124 | 128 | output=output,
|
125 | 129 | )
|
| 130 | + |
| 131 | + |
| 132 | +class CodeBase: |
| 133 | + """ |
| 134 | + A representation of all source files in the code base. |
| 135 | +
|
| 136 | + Attributes |
| 137 | + ---------- |
| 138 | + directories: list[str | os.PathLike[str]] |
| 139 | + The set of source directories that make up the code base. |
| 140 | +
|
| 141 | + exclude_patterns: list[str] |
| 142 | + A set of patterns describing source files excluded from the code base. |
| 143 | + """ |
| 144 | + |
| 145 | + def __init__( |
| 146 | + self, |
| 147 | + *directories: str | os.PathLike[str], |
| 148 | + exclude_patterns: list[str] = [], |
| 149 | + ): |
| 150 | + """ |
| 151 | + Raises |
| 152 | + ------ |
| 153 | + TypeError |
| 154 | + If any directory in `directories` is not a path. |
| 155 | + If `exclude_patterns` is not a list of strings. |
| 156 | + """ |
| 157 | + if not isinstance(exclude_patterns, list): |
| 158 | + raise TypeError("'exclude_patterns' must be a list.") |
| 159 | + if not all([isinstance(d, (str, os.PathLike)) for d in directories]): |
| 160 | + raise TypeError( |
| 161 | + "Each directory in 'directories' must be PathLike.", |
| 162 | + ) |
| 163 | + if not all([isinstance(p, str) for p in exclude_patterns]): |
| 164 | + raise TypeError( |
| 165 | + "Each pattern in 'exclude_patterns' must be a string.", |
| 166 | + ) |
| 167 | + self._directories = [Path(d).resolve() for d in directories] |
| 168 | + self._excludes = exclude_patterns |
| 169 | + |
| 170 | + def __repr__(self): |
| 171 | + return ( |
| 172 | + f"CodeBase(directories={self.directories}, " |
| 173 | + + f"exclude_patterns={self.exclude_patterns})" |
| 174 | + ) |
| 175 | + |
| 176 | + @property |
| 177 | + def directories(self): |
| 178 | + return [str(d) for d in self._directories] |
| 179 | + |
| 180 | + @property |
| 181 | + def exclude_patterns(self): |
| 182 | + return self._excludes |
| 183 | + |
| 184 | + def __contains__(self, path: os.PathLike) -> bool: |
| 185 | + """ |
| 186 | + Returns |
| 187 | + _______ |
| 188 | + bool |
| 189 | + True if `path` is a recognized source file in one of the code |
| 190 | + base's listed directories and does not match any exclude |
| 191 | + pattern(s). |
| 192 | + """ |
| 193 | + path = Path(path).resolve() |
| 194 | + |
| 195 | + # Files that don't exist aren't part of the code base. |
| 196 | + if not path.exists(): |
| 197 | + return False |
| 198 | + |
| 199 | + # Directories cannot be source files. |
| 200 | + if path.is_dir(): |
| 201 | + return False |
| 202 | + |
| 203 | + # Files with unrecognized extensions are not source files. |
| 204 | + if not codebasin.source.is_source_file(path): |
| 205 | + return False |
| 206 | + |
| 207 | + # Files outside of any directory are not in the code base. |
| 208 | + # Store the root for evaluation of relative exclude paths later. |
| 209 | + root = None |
| 210 | + for directory in self.directories: |
| 211 | + if path.is_relative_to(directory): |
| 212 | + root = directory |
| 213 | + break |
| 214 | + if root is None: |
| 215 | + return False |
| 216 | + |
| 217 | + # Files matching an exclude pattern are not in the code base. |
| 218 | + # |
| 219 | + # Use GitIgnoreSpec to match git behavior in weird corner cases. |
| 220 | + # Convert relative paths to match .gitignore subdirectory behavior. |
| 221 | + spec = pathspec.GitIgnoreSpec.from_lines(self.exclude_patterns) |
| 222 | + try: |
| 223 | + relative_path = path.relative_to(root) |
| 224 | + if spec.match_file(relative_path): |
| 225 | + return False |
| 226 | + except ValueError: |
| 227 | + pass |
| 228 | + |
| 229 | + return True |
| 230 | + |
| 231 | + def __iter__(self): |
| 232 | + """ |
| 233 | + Iterate over all files in the code base by walking each directory. |
| 234 | + """ |
| 235 | + for directory in self.directories: |
| 236 | + for path in Path(directory).rglob("*"): |
| 237 | + if self.__contains__(path): |
| 238 | + yield str(path) |
0 commit comments