Skip to content

Commit c411278

Browse files
committed
process_repo end point clone and build both code and git graphs
1 parent a6c182f commit c411278

File tree

3 files changed

+80
-53
lines changed

3 files changed

+80
-53
lines changed

code_graph/analyzers/source_analyzer.py

Lines changed: 50 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
2-
import tempfile
2+
import shutil
3+
import subprocess
34
import concurrent.futures
45

56
from git import Repo
@@ -19,13 +20,11 @@
1920
'.py': PythonAnalyzer()}
2021

2122
class SourceAnalyzer():
22-
def __init__(self, host: str = 'localhost', port: int = 6379,
23-
username: Optional[str] = None, password: Optional[str] = None) -> None:
24-
25-
self.host = host
26-
self.port = port
27-
self.username = username
28-
self.password = password
23+
def __init__(self) -> None:
24+
self.host = os.getenv('FALKORDB_HOST')
25+
self.port = os.getenv('FALKORDB_PORT')
26+
self.username = os.getenv('FALKORDB_USERNAME')
27+
self.password = os.getenv('FALKORDB_PASSWORD')
2928

3029
def first_pass(self, ignore: List[str], executor: concurrent.futures.Executor) -> None:
3130
"""
@@ -126,34 +125,59 @@ def analyze_sources(self, ignore: List[str]) -> None:
126125
# Second pass analysis of the source code
127126
self.second_pass(ignore, executor)
128127

129-
def analyze_github_repository(self, url: str) -> None:
128+
def analyze_github_repository(
129+
self,
130+
url: str,
131+
repo_path: Path,
132+
repo_name: str,
133+
ignore: Optional[List[str]] = []
134+
) -> None:
130135
"""
131136
Analyze a Git repository given its URL.
132137
133138
Args:
134-
url (str): The URL of the Git repository to analyze.
139+
url: The URL of the Git repository to analyze
140+
ignore_patterns: List of patterns to ignore during analysis
141+
142+
Raises:
143+
subprocess.SubprocessError: If git clone fails
144+
OSError: If there are filesystem operation errors
135145
"""
136146

137-
# Extract repository name from the URL
138-
components = url[:url.rfind('.')].split('/')
139-
n = len(components)
140-
repo_name = f'{components[n-2]}/{components[-1]}'
141-
logger.debug(f'repo_name: {repo_name}')
142-
#repo_name = url[url.rfind('/')+1:url.rfind('.')]
147+
# Extract repository name more reliably
148+
# Delete local repository if exists
149+
if repo_path.exists():
150+
shutil.rmtree(repo_path)
143151

144-
# Initialize the graph and analyzer
145-
self.graph = Graph(repo_name, self.host, self.port, self.username,
146-
self.password)
152+
# Create directory
153+
repo_path.mkdir(parents=True, exist_ok=True)
154+
155+
# Clone repository
156+
# Prepare the git clone command
157+
command = ["git", "clone", url, repo_path]
158+
159+
# Run the git clone command and wait for it to finish
160+
result = subprocess.run(command, check=True, capture_output=True, text=True)
161+
162+
# Store original working directory
163+
original_dir = Path.cwd()
164+
165+
# change working directory to local repository
166+
os.chdir(repo_path)
167+
168+
try:
169+
# Initialize the graph and analyzer
170+
self.graph = Graph(repo_name, self.host, self.port, self.username,
171+
self.password)
147172

148-
# Create a temporary directory for cloning the repository
149-
with tempfile.TemporaryDirectory() as temp_dir:
150-
logger.info(f"Cloning repository {url} to {temp_dir}")
151-
repo = Repo.clone_from(url, temp_dir)
173+
# Analyze repository
174+
self.analyze_sources(ignore)
152175

153-
# Analyze source files
154-
self.analyze_sources(temp_dir)
176+
logging.info(f"Successfully processed repository: {repo_name}")
155177

156-
logger.info("Done processing repository")
178+
finally:
179+
# Ensure we always return to the original directory
180+
os.chdir(original_dir)
157181

158182
def analyze_local_folder(self, path: str, ignore: Optional[List[str]] = []) -> Graph:
159183
"""

code_graph/git_utils/git_utils.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -108,14 +108,9 @@ def stop_monitor_effects():
108108
print("monitor thread exited")
109109

110110
# build a graph capturing the git commit history
111-
def build_commit_graph(path: str, ignore_list: Optional[List[str]] = []) -> GitGraph:
112-
print(f"Processing git history at: {path}")
113-
print(f"ignoring the following paths: {ignore_list}")
114-
111+
def build_commit_graph(path: str, repo_name: str, ignore_list: Optional[List[str]] = []) -> GitGraph:
115112
repo = Repo(path)
116113

117-
repo_name = os.path.split(os.path.normpath(path))[-1]
118-
119114
# Clone graph into a temporary graph
120115
g = Graph(repo_name).clone(repo_name + "_tmp")
121116
git_graph = GitGraph(GitRepoName(repo_name))

main.py

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -112,48 +112,56 @@ def get_neighbors():
112112

113113
@app.route('/process_repo', methods=['POST'])
114114
def process_repo():
115-
# Get JSON data from the request
116-
data = request.get_json()
115+
"""
116+
Process a GitHub repository.
117117
118-
# Process the data
118+
Expected JSON payload:
119+
{
120+
"repo_url": "string",
121+
"ignore": ["string"] # optional
122+
}
123+
124+
Returns:
125+
JSON response with processing status
126+
"""
127+
128+
data = request.get_json()
119129
repo_url = data.get('repo_url')
120130
if repo_url is None:
121131
return jsonify({'status': f'Missing mandatory parameter "repo_url"'}), 400
122132
logger.debug(f'Received repo_url: {repo_url}')
123133

124-
# Validate URL
134+
ignore = data.get('ignore', [])
135+
136+
# Validate and normalize URL
125137
try:
126138
urlparse(repo_url)
127139
except ValueError:
128140
return jsonify({'status': 'Invalid repository URL'}), 400
129141

130-
# Extract Organization and Repo name from URL
131-
res = extract_org_name_from_url(repo_url)
132-
if res is None:
133-
return jsonify({'status': f'Failed to process repo_url: {repo_url}'}), 400
134-
135-
org, name = extract_org_name_from_url(repo_url)
136-
logger.debug(f'Org: {org}, name: {name}')
137-
138142
# Convert repo_url to git URL
139143
git_url = repo_url + '.git'
140-
logger.debug(f'git_url: {git_url}')
144+
parsed_url = urlparse(git_url)
145+
logging.debug(f"Processing git URL: {git_url}")
146+
147+
repo_name = parsed_url.path.rstrip('.git').split('/')[-1]
148+
if not repo_name:
149+
raise ValueError(f"Could not extract repository name from URL: {url}")
150+
151+
base_path = Path("./repositories")
152+
repo_path = base_path / repo_name
153+
logging.debug(f"Repository name: {repo_name}")
141154

142155
# Create source code analyzer
143-
analyzer = SourceAnalyzer(host = FALKORDB_HOST,
144-
port = FALKORDB_PORT,
145-
username = FALKORDB_USERNAME,
146-
password = FALKORDB_PASSWORD)
156+
analyzer = SourceAnalyzer()
147157

148158
try:
149-
analyzer.analyze_repository(git_url)
159+
analyzer.analyze_github_repository(git_url, repo_path, repo_name, ignore)
160+
build_commit_graph(repo_path, repo_name, ignore)
150161
except Exception as e:
151162
logger.error(f'An error occurred: {e}')
152163
return jsonify({'status': f'Failed to process repository: {git_url}'}), 400
153164

154-
repo_name = f'{org}/{name}'
155-
save_repository_metadata(git_url, repo_name)
156-
157165
# Create a response
158166
response = {
159167
'status': 'success',

0 commit comments

Comments
 (0)