Skip to content

Commit 52ceb15

Browse files
committed
Move scan of file history when creating merges to bundling code
This makes creating merges faster in the face of large number of files. (up to 100 times for the worst case observed so far)
1 parent 34416bb commit 52ceb15

File tree

4 files changed

+51
-101
lines changed

4 files changed

+51
-101
lines changed

cinnabar/cmd/fsck.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -506,8 +506,7 @@ def fsck(args):
506506
GitHgHelper.seen(b'hg2git',
507507
hg_file)):
508508
if full_file_check:
509-
file = store.file(hg_file, hg_fileparents, git_parents,
510-
store.manifest_path(path))
509+
file = store.file(hg_file, hg_fileparents)
511510
valid = file.node == file.sha1
512511
else:
513512
valid = GitHgHelper.check_file(hg_file,

cinnabar/githg.py

Lines changed: 3 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@
6060
)
6161
from .helper import GitHgHelper
6262
from .util import progress_iter
63-
from .dag import gitdag
6463
from cinnabar import util
6564
from cinnabar.util import fsdecode
6665

@@ -86,10 +85,7 @@ def _invalid_if_new(file):
8685
'Please open an issue with details.')
8786

8887
@staticmethod
89-
def set_parents(file, parent1=NULL_NODE_ID, parent2=NULL_NODE_ID,
90-
git_manifest_parents=None, path=None):
91-
assert git_manifest_parents is not None and path is not None
92-
88+
def set_parents(file, parent1=NULL_NODE_ID, parent2=NULL_NODE_ID):
9389
# Remove null nodes
9490
parents = tuple(p for p in (parent1, parent2) if p != NULL_NODE_ID)
9591
orig_parents = parents
@@ -101,55 +97,10 @@ def set_parents(file, parent1=NULL_NODE_ID, parent2=NULL_NODE_ID,
10197
if len(parents) == 2:
10298
FileFindParents._invalid_if_new(file)
10399
elif len(parents) == 1:
104-
if git_manifest_parents is not None:
105-
if len(git_manifest_parents) != 2:
106-
FileFindParents._invalid_if_new(file)
107100
parents = (NULL_NODE_ID, parents[0])
108-
elif git_manifest_parents is not None:
109-
if len(git_manifest_parents) == 0:
110-
FileFindParents._invalid_if_new(file)
111101
elif len(parents) == 2:
112-
if git_manifest_parents is not None:
113-
if len(git_manifest_parents) != 2:
114-
FileFindParents._invalid_if_new(file)
115102
if parents[0] == parents[1]:
116103
parents = parents[:1]
117-
elif (git_manifest_parents is not None and
118-
(file.node == NULL_NODE_ID or check_enabled('files'))):
119-
# Checking if one parent is the ancestor of another is slow.
120-
# So, unless we're actually creating this file, skip over
121-
# this by default, the fallback will work just fine.
122-
file_dag = gitdag()
123-
mapping = {}
124-
path = GitHgStore.manifest_metadata_path(path)
125-
for sha1, tree, fparents in GitHgHelper.rev_list(
126-
b'--parents', b'--boundary', b'--topo-order',
127-
b'--full-history', b'--reverse',
128-
b'%s...%s' % git_manifest_parents, b'--',
129-
path):
130-
if sha1.startswith(b'-'):
131-
sha1 = sha1[1:]
132-
node = [
133-
s
134-
for mode, typ, s, p in
135-
Git.ls_tree(sha1, path)
136-
]
137-
if not node:
138-
continue
139-
node = node[0]
140-
mapping[sha1] = node
141-
file_dag.add(node, tuple(mapping[p]
142-
for p in fparents
143-
if p in mapping))
144-
145-
file_dag.tag_nodes_and_parents((parents[0],), 'a')
146-
if file_dag._tags.get(parents[1]) == 'a':
147-
parents = parents[:1]
148-
else:
149-
file_dag._tags.clear()
150-
file_dag.tag_nodes_and_parents((parents[1],), 'b')
151-
if file_dag._tags.get(parents[0]) == 'b':
152-
parents = parents[1:]
153104

154105
file.parents = parents
155106
if file.node != NULL_NODE_ID and file.node != file.sha1:
@@ -1045,8 +996,7 @@ def cached_changeset_ref(self, sha1):
1045996
def file_meta(self, sha1):
1046997
return GitHgHelper.file_meta(sha1)
1047998

1048-
def file(self, sha1, file_parents=None, git_manifest_parents=None,
1049-
path=None):
999+
def file(self, sha1, file_parents=None):
10501000
if sha1 == HG_EMPTY_FILE:
10511001
content = b''
10521002
else:
@@ -1058,10 +1008,7 @@ def file(self, sha1, file_parents=None, git_manifest_parents=None,
10581008
file.metadata = meta
10591009
file.content = content
10601010
if file_parents is not None:
1061-
FileFindParents.set_parents(
1062-
file, *file_parents,
1063-
git_manifest_parents=git_manifest_parents,
1064-
path=path)
1011+
FileFindParents.set_parents(file, *file_parents)
10651012
return file
10661013

10671014
def git_file_ref(self, sha1):

cinnabar/hg/bundle.py

Lines changed: 46 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
except ImportError:
55
from urllib import quote as quote_from_bytes
66
from urllib import unquote as unquote_to_bytes
7+
from cinnabar.dag import gitdag
78
from cinnabar.githg import (
89
Changeset,
910
FileFindParents,
@@ -160,8 +161,7 @@ def create_hg_manifest(self, commit, parents):
160161
if not parents:
161162
for line in Git.ls_tree(commit, recursive=True):
162163
mode, typ, sha1, path = line
163-
node = self.create_file(sha1, git_manifest_parents=(),
164-
path=path)
164+
node = self.create_file(sha1)
165165
manifest.add(path, node, self.ATTR[mode], modified=True)
166166
changeset_files.append(path)
167167

@@ -181,7 +181,22 @@ def create_hg_manifest(self, commit, parents):
181181
self.manifest_ref(parent2_node))
182182

183183
# TODO: this would benefit from less git queries
184-
files = [(path, mode, sha1) for mode, _, sha1, path in
184+
file_dags = {}
185+
for m, tree, mparents in GitHgHelper.rev_list(
186+
b'--parents', b'--topo-order',
187+
b'--full-history', b'--reverse',
188+
b'%s...%s' % git_manifests):
189+
for p in mparents:
190+
for path, sha1_after, sha1_before in manifest_diff(p, m):
191+
path = GitHgStore.manifest_path(path)
192+
if path not in file_dags:
193+
file_dags[path] = gitdag()
194+
dag = file_dags[path]
195+
if sha1_before == NULL_NODE_ID:
196+
dag.add(sha1_after, ())
197+
else:
198+
dag.add(sha1_after, (sha1_before,))
199+
files = [(p, mode, sha1) for mode, _, sha1, p in
185200
Git.ls_tree(commit, recursive=True)]
186201
manifests = sorted_merge(parent_manifest, parent2_manifest,
187202
key=lambda i: i.path, non_key=lambda i: i)
@@ -210,15 +225,25 @@ def create_hg_manifest(self, commit, parents):
210225
if self._merge_warn == 1:
211226
logging.warning('This may take a while...')
212227
self._merge_warn = 2
213-
file_parents = (manifest_line_p1.sha1,
214-
manifest_line_p2.sha1)
228+
file_parents = ()
229+
dag = file_dags.pop(path)
230+
if dag:
231+
dag.tag_nodes_and_parents(
232+
(manifest_line_p1.sha1,), 'a')
233+
if dag._tags.get(manifest_line_p2.sha1) == 'a':
234+
file_parents = (manifest_line_p1.sha1,)
235+
else:
236+
dag._tags.clear()
237+
dag.tag_nodes_and_parents(
238+
(manifest_line_p2.sha1,), 'b')
239+
if dag._tags.get(manifest_line_p1.sha1) == 'b':
240+
file_parents = (manifest_line_p2.sha1,)
241+
if not file_parents:
242+
file_parents = (manifest_line_p1.sha1,
243+
manifest_line_p2.sha1)
215244

216245
assert file_parents is not None
217-
f = self._create_file_internal(
218-
sha1, *file_parents,
219-
git_manifest_parents=git_manifests,
220-
path=path
221-
)
246+
f = self._create_file_internal(sha1, *file_parents)
222247
file_parents = tuple(p for p in (f.parent1, f.parent2)
223248
if p != NULL_NODE_ID)
224249
merged = len(file_parents) == 2
@@ -277,31 +302,17 @@ def process_diff(diff):
277302
if sha1_before == sha1_after:
278303
node = manifest_line.sha1
279304
else:
280-
node = self.create_file(
281-
sha1_after, manifest_line.sha1,
282-
git_manifest_parents=(
283-
self.manifest_ref(parent_node),),
284-
path=path)
305+
node = self.create_file(sha1_after, manifest_line.sha1)
285306
elif status in b'RC':
286307
if sha1_after != EMPTY_BLOB:
287308
node = self.create_copy(
288309
(path2, parent_lines[path2].sha1), sha1_after,
289-
git_manifest_parents=(
290-
self.manifest_ref(parent_node),),
291310
path=path)
292311
else:
293-
node = self.create_file(
294-
sha1_after,
295-
git_manifest_parents=(
296-
self.manifest_ref(parent_node),),
297-
path=path)
312+
node = self.create_file(sha1_after)
298313
else:
299314
assert status == b'A'
300-
node = self.create_file(
301-
sha1_after,
302-
git_manifest_parents=(
303-
self.manifest_ref(parent_node),),
304-
path=path)
315+
node = self.create_file(sha1_after)
305316
manifest.add(path, node, attr, modified=True)
306317
changeset_files.append(path)
307318
manifest.parents = (parent_node,)
@@ -364,14 +375,10 @@ def create_hg_metadata(self, commit, parents):
364375
raise Exception('Changeset mismatch')
365376

366377
def _create_file_internal(self, sha1, parent1=NULL_NODE_ID,
367-
parent2=NULL_NODE_ID,
368-
git_manifest_parents=None, path=None):
378+
parent2=NULL_NODE_ID):
369379
hg_file = File()
370380
hg_file.content = GitHgHelper.cat_file(b'blob', sha1)
371-
FileFindParents.set_parents(
372-
hg_file, parent1, parent2,
373-
git_manifest_parents=git_manifest_parents,
374-
path=path)
381+
FileFindParents.set_parents(hg_file, parent1, parent2)
375382
node = hg_file.node = hg_file.sha1
376383
GitHgHelper.set(b'file', node, sha1)
377384
return hg_file
@@ -381,14 +388,11 @@ def _store_file_internal(self, hg_file):
381388
self._pushed.add(node)
382389
return node
383390

384-
def create_file(self, sha1, parent1=NULL_NODE_ID, parent2=NULL_NODE_ID,
385-
git_manifest_parents=None, path=None):
386-
hg_file = self._create_file_internal(sha1, parent1, parent2,
387-
git_manifest_parents, path)
391+
def create_file(self, sha1, parent1=NULL_NODE_ID, parent2=NULL_NODE_ID):
392+
hg_file = self._create_file_internal(sha1, parent1, parent2)
388393
return self._store_file_internal(hg_file)
389394

390-
def create_copy(self, hg_source, sha1, git_manifest_parents=None,
391-
path=None):
395+
def create_copy(self, hg_source, sha1, path=None):
392396
path, rev = hg_source
393397
hg_file = File()
394398
hg_file.metadata = {
@@ -476,7 +480,7 @@ def bundle_data(store, commits):
476480
for path, hg_file, hg_fileparents in changes:
477481
if hg_file != NULL_NODE_ID:
478482
files[store.manifest_path(path)].append(
479-
(hg_file, hg_fileparents, changeset, parents))
483+
(hg_file, hg_fileparents, changeset))
480484

481485
yield None
482486

@@ -485,12 +489,12 @@ def iter_files(files):
485489
for count_names, path in enumerate(sorted(files), 1):
486490
yield (count_chunks, count_names), path
487491
nodes = set()
488-
for node, parents, changeset, mn_parents in files[path]:
492+
for node, parents, changeset in files[path]:
489493
if node in nodes:
490494
continue
491495
count_chunks += 1
492496
nodes.add(node)
493-
file = store.file(node, parents, mn_parents, path)
497+
file = store.file(node, parents)
494498
file.changeset = changeset
495499
assert file.node == file.sha1
496500
yield (count_chunks, count_names), file

git-core

Submodule git-core updated from af6b65d to de49261

0 commit comments

Comments
 (0)