Skip to content

Commit 0388b1b

Browse files
committed
switch commit
1 parent 139a982 commit 0388b1b

File tree

3 files changed

+236
-42
lines changed

3 files changed

+236
-42
lines changed

code_graph/git_utils/git_graph.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
import logging
3+
from git import Commit
34
from falkordb import FalkorDB, Node
45
from typing import List, Optional
56

@@ -40,15 +41,18 @@ def _commit_from_node(self, node:Node) -> dict:
4041
'author': node.properties['author'],
4142
'message': node.properties['message']}
4243

43-
def add_commit(self, commit_hash: str, author: str, message: str, date: int) -> None:
44+
def add_commit(self, commit: Commit) -> None:
4445
"""
4546
Add a new commit to the graph
4647
"""
47-
48-
logging.info(f"Adding commit {commit_hash}: {message}")
48+
date = commit.committed_date
49+
author = commit.author.name
50+
hexsha = commit.hexsha
51+
message = commit.message
52+
logging.info(f"Adding commit {hexsha}: {message}")
4953

5054
q = "MERGE (c:Commit {hash: $hash, author: $author, message: $message, date: $date})"
51-
params = {'hash': commit_hash, 'author': author, 'message': message, 'date': date}
55+
params = {'hash': hexsha, 'author': author, 'message': message, 'date': date}
5256
self.g.query(q, params)
5357

5458
def list_commits(self) -> List[Node]:
@@ -79,6 +83,18 @@ def get_commits(self, hashes: List[str]) -> List[dict]:
7983
logging.info(f"retrived commits: {commits}")
8084
return commits
8185

86+
def get_child_commit(self, parent) -> Optional[dict]:
87+
q = """MATCH (c:Commit {hash: $parent})-[:CHILD]->(child: Commit)
88+
RETURN child"""
89+
90+
res = self.g.query(q, {'parent': parent}).result_set
91+
92+
if len(res) > 0:
93+
assert(len(res) == 1)
94+
return self._commit_from_node(res[0][0])
95+
96+
return None
97+
8298
def connect_commits(self, child: str, parent: str) -> None:
8399
"""
84100
connect commits via both PARENT and CHILD edges
@@ -119,7 +135,7 @@ def set_child_transition(self, child: str, parent: str, queries: [str], params:
119135
q = """MATCH (parent :Commit {hash: $parent})-[e:CHILD]->(child :Commit {hash: $child})
120136
SET e.queries = $queries, e.params = $params"""
121137

122-
_params = {'child': child, 'parent': parent, 'queries': queries}
138+
_params = {'child': child, 'parent': parent, 'queries': queries, 'params': params}
123139

124140
self.g.query(q, _params)
125141

code_graph/git_utils/git_utils.py

Lines changed: 119 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,9 @@ def build_commit_graph(
7878
GitGraph: Graph object representing the commit history.
7979
"""
8080

81-
# Clone graph into a temporary graph
81+
# Copy the graph into a temporary graph
8282
logging.info(f"Cloning source graph {repo_name} -> {repo_name}_tmp")
83+
# Will be deleted at the end of this function
8384
g = Graph(repo_name).clone(repo_name + "_tmp")
8485
g.enable_backlog()
8586

@@ -88,25 +89,31 @@ def build_commit_graph(
8889
supported_types = analyzer.supported_types()
8990

9091
# Initialize with the current commit
92+
# Save current git for later restoration
9193
repo = Repo('.')
9294
current_commit = repo.head.commit
9395
current_commit_hexsha = current_commit.hexsha
9496

95-
# add commit to the git graph
96-
git_graph.add_commit(current_commit.hexsha, current_commit.author.name,
97-
current_commit.message, current_commit.committed_date)
97+
# Add commit to the git graph
98+
git_graph.add_commit(current_commit)
9899

99-
while len(current_commit.parents) > 0:
100-
prev_commit = current_commit.parents[0]
100+
#--------------------------------------------------------------------------
101+
# Process git history going backwards
102+
#--------------------------------------------------------------------------
103+
104+
logging.info("Computing transition queries moving backwards")
105+
106+
child_commit = current_commit
107+
while len(child_commit.parents) > 0:
108+
parent_commit = child_commit.parents[0]
101109

102110
# add commit to the git graph
103-
git_graph.add_commit(prev_commit.hexsha, prev_commit.author.name,
104-
prev_commit.message, prev_commit.committed_date)
111+
git_graph.add_commit(parent_commit)
105112

106113
# connect child parent commits relation
107-
git_graph.connect_commits(current_commit.hexsha, prev_commit.hexsha)
114+
git_graph.connect_commits(child_commit.hexsha, parent_commit.hexsha)
108115

109-
# represents the changes going backward!
116+
# Represents the changes going backward!
110117
# e.g. which files need to be deleted when moving back one commit
111118
#
112119
# if we were to switch "direction" going forward
@@ -116,18 +123,18 @@ def build_commit_graph(
116123

117124
# Process file changes in this commit
118125
logging.info(f"""Computing diff between
119-
child {current_commit.hexsha}: {current_commit.message}
120-
and {prev_commit.hexsha}: {prev_commit.message}""")
126+
child {child_commit.hexsha}: {child_commit.message}
127+
and {parent_commit.hexsha}: {parent_commit.message}""")
121128

122-
diff = current_commit.diff(prev_commit)
129+
diff = child_commit.diff(parent_commit)
123130
added, deleted, modified = classify_changes(diff, ignore_list)
124131

125-
# Use the repo's git interface to checkout the prev commit
126-
logging.info(f"Checking out commit: {prev_commit.hexsha}")
127-
repo.git.checkout(prev_commit.hexsha)
132+
# Checkout prev commit
133+
logging.info(f"Checking out commit: {parent_commit.hexsha}")
134+
repo.git.checkout(parent_commit.hexsha)
128135

129136
#-----------------------------------------------------------------------
130-
# apply changes
137+
# Apply changes going backwards
131138
#-----------------------------------------------------------------------
132139

133140
# apply deletions
@@ -153,33 +160,107 @@ def build_commit_graph(
153160
analyzer.analyze_file(new_file, g)
154161

155162
queries, params = g.clear_backlog()
163+
164+
# Save transition queries to the git graph
156165
if len(queries) > 0:
157166
assert(len(queries) == len(params))
158167

159168
# Covert parameters from dict to JSON formatted string
160169
params = [json.dumps(p) for p in params]
161170

162-
# log transitions
171+
# Log transitions
163172
logging.debug(f"""Save graph transition from
164-
commit: {current_commit.hexsha}
173+
commit: {child_commit.hexsha}
165174
to
166-
commit: {prev_commit.hexsha}
175+
commit: {parent_commit.hexsha}
167176
Queries: {queries}
168177
Parameters: {params}
169178
""")
170179

171-
git_graph.set_parent_transition(current_commit.hexsha,
172-
prev_commit.hexsha, queries, params)
180+
git_graph.set_parent_transition(child_commit.hexsha,
181+
parent_commit.hexsha, queries, params)
173182
# advance to the next commit
174-
current_commit = prev_commit
183+
child_commit = parent_commit
175184

176-
logging.debug("Done processing repository commit history")
185+
#--------------------------------------------------------------------------
186+
# Process git history going forward
187+
#--------------------------------------------------------------------------
188+
189+
logging.info("Computing transition queries moving forward")
190+
parent_commit = child_commit
191+
while parent_commit.hexsha != current_commit_hexsha:
192+
child_commit = git_graph.get_child_commit(parent_commit.hexsha)
193+
child_commit = repo.commit(child_commit['hash'])
194+
195+
# Represents the changes going forward
196+
# e.g. which files need to be deleted when moving forward one commit
197+
198+
# Process file changes in this commit
199+
logging.info(f"""Computing diff between
200+
child {parent_commit.hexsha}: {parent_commit.message}
201+
and {child_commit.hexsha}: {child_commit.message}""")
202+
203+
diff = parent_commit.diff(child_commit)
204+
added, deleted, modified = classify_changes(diff, ignore_list)
177205

178-
# clean up
206+
# Checkout child commit
207+
logging.info(f"Checking out commit: {child_commit.hexsha}")
208+
repo.git.checkout(child_commit.hexsha)
209+
210+
#-----------------------------------------------------------------------
211+
# Apply changes going forward
212+
#-----------------------------------------------------------------------
213+
214+
# apply deletions
215+
# TODO: a bit of a waste, compute in previous loop
216+
deleted_files = []
217+
for deleted_file_path in deleted:
218+
_ext = os.path.splitext(deleted_file_path)[1]
219+
if _ext in supported_types:
220+
_path = os.path.dirname(deleted_file_path)
221+
_name = os.path.basename(deleted_file_path)
222+
deleted_files.append(
223+
{'path': _path, 'name': _name, 'ext' : _ext})
224+
225+
# remove deleted files from the graph
226+
if len(deleted_files) > 0:
227+
logging.info(f"Removing deleted files: {deleted_files}")
228+
g.delete_files(deleted_files)
229+
230+
if len(added) > 0:
231+
for new_file in added:
232+
# New file been added
233+
logging.info(f"Introducing a new source file: {new_file}")
234+
analyzer.analyze_file(new_file, g)
235+
236+
queries, params = g.clear_backlog()
237+
238+
# Save transition queries to the git graph
239+
if len(queries) > 0:
240+
assert(len(queries) == len(params))
241+
242+
# Covert parameters from dict to JSON formatted string
243+
params = [json.dumps(p) for p in params]
244+
245+
# Log transitions
246+
logging.debug(f"""Save graph transition from
247+
commit: {parent_commit.hexsha}
248+
to
249+
commit: {child_commit.hexsha}
250+
Queries: {queries}
251+
Parameters: {params}
252+
""")
253+
254+
git_graph.set_child_transition(child_commit.hexsha,
255+
parent_commit.hexsha, queries, params)
256+
# advance to the child_commit
257+
parent_commit = child_commit
258+
259+
logging.debug("Done processing repository commit history")
179260

180-
# Restore original commit
181-
logging.debug(f"Restoring repo to its original commit: {current_commit_hexsha}")
182-
repo.git.checkout(current_commit_hexsha)
261+
#--------------------------------------------------------------------------
262+
# Clean up
263+
#--------------------------------------------------------------------------
183264

184265
# Delete temporaty graph
185266
g.disable_backlog()
@@ -267,14 +348,20 @@ def switch_commit(repo: str, to: str) -> dict[str, dict[str, list]]:
267348
current_commit, new_commit = (commits if commits[0]['hash'] == current_hash else reversed(commits))
268349

269350
# Determine the direction of the switch (forward or backward in the commit history)
351+
child_commit = None
352+
parent_commit = None
270353
if current_commit['date'] > new_commit['date']:
271-
logging.info(f"Moving backward from {current_commit['hash']} to {new_commit['hash']}")
354+
child_commit = current_commit
355+
parent_commit = new_commit
356+
logging.info(f"Moving backward from {child_commit['hash']} to {parent_commit['hash']}")
272357
# Get the transitions (queries and parameters) for moving backward
273-
queries, params = git_graph.get_parent_transitions(current_commit['hash'], new_commit['hash'])
358+
queries, params = git_graph.get_parent_transitions(child_commit['hash'], parent_commit['hash'])
274359
else:
275-
logging.info(f"Moving forward from {current_commit['hash']} to {new_commit['hash']}")
360+
child_commit = new_commit
361+
parent_commit = current_commit
362+
logging.info(f"Moving forward from {parent_commit['hash']} to {child_commit['hash']}")
276363
# Get the transitions (queries and parameters) for moving forward
277-
queries, params = git_graph.get_child_transitions(current_commit['hash'], new_commit['hash'])
364+
queries, params = git_graph.get_child_transitions(child_commit['hash'], parent_commit['hash'])
278365

279366
# Apply each transition query with its respective parameters
280367
for q, p in zip(queries, params):

0 commit comments

Comments
 (0)