Skip to content

Commit 9e6297f

Browse files
committed
Add CodeBase object to store code base information
Represents a code base as a combination of: - A list of (source) directories; and - A list of exclude patterns. The intent of this object is to replace usages of untyped dictionaries to store code base information, to improve documentation and usability. Signed-off-by: John Pennycook <john.pennycook@intel.com>
1 parent e003bc3 commit 9e6297f

File tree

1 file changed

+113
-0
lines changed

1 file changed

+113
-0
lines changed

codebasin/__init__.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
# Copyright (C) 2019-2024 Intel Corporation
22
# SPDX-License-Identifier: BSD-3-Clause
3+
import os
34
import shlex
45
import warnings
6+
from pathlib import Path
7+
8+
import pathspec
59

610
import codebasin.source
711
import codebasin.walkers
@@ -123,3 +127,112 @@ def from_json(cls, instance: dict):
123127
command=command,
124128
output=output,
125129
)
130+
131+
132+
class CodeBase:
133+
"""
134+
A representation of all source files in the code base.
135+
136+
Attributes
137+
----------
138+
directories: list[str | os.PathLike[str]]
139+
The set of source directories that make up the code base.
140+
141+
exclude_patterns: list[str]
142+
A set of patterns describing source files excluded from the code base.
143+
"""
144+
145+
def __init__(
146+
self,
147+
*directories: str | os.PathLike[str],
148+
exclude_patterns: list[str] = [],
149+
):
150+
"""
151+
Raises
152+
------
153+
TypeError
154+
If any directory in `directories` is not a path.
155+
If `exclude_patterns` is not a list of strings.
156+
"""
157+
if not isinstance(exclude_patterns, list):
158+
raise TypeError("'exclude_patterns' must be a list.")
159+
if not all([isinstance(d, (str, os.PathLike)) for d in directories]):
160+
raise TypeError(
161+
"Each directory in 'directories' must be PathLike.",
162+
)
163+
if not all([isinstance(p, str) for p in exclude_patterns]):
164+
raise TypeError(
165+
"Each pattern in 'exclude_patterns' must be a string.",
166+
)
167+
self._directories = [Path(d).resolve() for d in directories]
168+
self._excludes = exclude_patterns
169+
170+
def __repr__(self):
171+
return (
172+
f"CodeBase(directories={self.directories}, "
173+
+ f"exclude_patterns={self.exclude_patterns})"
174+
)
175+
176+
@property
177+
def directories(self):
178+
return [str(d) for d in self._directories]
179+
180+
@property
181+
def exclude_patterns(self):
182+
return self._excludes
183+
184+
def __contains__(self, path: os.PathLike) -> bool:
185+
"""
186+
Returns
187+
_______
188+
bool
189+
True if `path` is a recognized source file in one of the code
190+
base's listed directories and does not match any exclude
191+
pattern(s).
192+
"""
193+
path = Path(path).resolve()
194+
195+
# Files that don't exist aren't part of the code base.
196+
if not path.exists():
197+
return False
198+
199+
# Directories cannot be source files.
200+
if path.is_dir():
201+
return False
202+
203+
# Files with unrecognized extensions are not source files.
204+
if not codebasin.source.is_source_file(path):
205+
return False
206+
207+
# Files outside of any directory are not in the code base.
208+
# Store the root for evaluation of relative exclude paths later.
209+
root = None
210+
for directory in self.directories:
211+
if path.is_relative_to(directory):
212+
root = directory
213+
break
214+
if root is None:
215+
return False
216+
217+
# Files matching an exclude pattern are not in the code base.
218+
#
219+
# Use GitIgnoreSpec to match git behavior in weird corner cases.
220+
# Convert relative paths to match .gitignore subdirectory behavior.
221+
spec = pathspec.GitIgnoreSpec.from_lines(self.exclude_patterns)
222+
try:
223+
relative_path = path.relative_to(root)
224+
if spec.match_file(relative_path):
225+
return False
226+
except ValueError:
227+
pass
228+
229+
return True
230+
231+
def __iter__(self):
232+
"""
233+
Iterate over all files in the code base by walking each directory.
234+
"""
235+
for directory in self.directories:
236+
for path in Path(directory).rglob("*"):
237+
if self.__contains__(path):
238+
yield str(path)

0 commit comments

Comments
 (0)