Mercurial > hg > mercurial-crew-with-dirclash
comparison hgext/convert/subversion.py @ 4965:4106dde15aed
Merge with crew
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Sat, 21 Jul 2007 16:44:38 -0500 |
parents | c79e3fa6dc29 |
children | b6c3abdbe0eb |
comparison
equal
deleted
inserted
replaced
4964:ee983d0dbea8 | 4965:4106dde15aed |
---|---|
1 # Subversion 1.4/1.5 Python API backend | 1 # Subversion 1.4/1.5 Python API backend |
2 # | 2 # |
3 # Copyright(C) 2007 Daniel Holth et al | 3 # Copyright(C) 2007 Daniel Holth et al |
4 | 4 # |
5 import pprint | 5 # Configuration options: |
6 # | |
7 # convert.svn.trunk | |
8 # Relative path to the trunk (default: "trunk") | |
9 # convert.svn.branches | |
10 # Relative path to tree of branches (default: "branches") | |
11 # | |
12 # Set these in a hgrc, or on the command line as follows: | |
13 # | |
14 # hg convert --config convert.svn.trunk=wackoname [...] | |
15 | |
6 import locale | 16 import locale |
7 | 17 import os |
18 import cPickle as pickle | |
8 from mercurial import util | 19 from mercurial import util |
9 | 20 |
10 # Subversion stuff. Works best with very recent Python SVN bindings | 21 # Subversion stuff. Works best with very recent Python SVN bindings |
11 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing | 22 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing |
12 # these bindings. | 23 # these bindings. |
25 except ImportError: | 36 except ImportError: |
26 pass | 37 pass |
27 | 38 |
28 class CompatibilityException(Exception): pass | 39 class CompatibilityException(Exception): pass |
29 | 40 |
41 class changedpath(object): | |
42 def __init__(self, p): | |
43 self.copyfrom_path = p.copyfrom_path | |
44 self.copyfrom_rev = p.copyfrom_rev | |
45 self.action = p.action | |
46 | |
30 # SVN conversion code stolen from bzr-svn and tailor | 47 # SVN conversion code stolen from bzr-svn and tailor |
31 class convert_svn(converter_source): | 48 class convert_svn(converter_source): |
32 def __init__(self, ui, url, rev=None): | 49 def __init__(self, ui, url, rev=None): |
33 super(convert_svn, self).__init__(ui, url, rev=rev) | 50 super(convert_svn, self).__init__(ui, url, rev=rev) |
34 | 51 |
49 except ValueError: | 66 except ValueError: |
50 raise util.Abort('svn: revision %s is not an integer' % rev) | 67 raise util.Abort('svn: revision %s is not an integer' % rev) |
51 try: | 68 try: |
52 # Support file://path@rev syntax. Useful e.g. to convert | 69 # Support file://path@rev syntax. Useful e.g. to convert |
53 # deleted branches. | 70 # deleted branches. |
54 url, latest = url.rsplit("@", 1) | 71 at = url.rfind('@') |
55 latest = int(latest) | 72 if at >= 0: |
73 latest = int(url[at+1:]) | |
74 url = url[:at] | |
56 except ValueError, e: | 75 except ValueError, e: |
57 pass | 76 pass |
58 self.url = url | 77 self.url = url |
59 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8 | 78 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8 |
60 try: | 79 try: |
61 self.transport = transport.SvnRaTransport(url = url) | 80 self.transport = transport.SvnRaTransport(url=url) |
62 self.ra = self.transport.ra | 81 self.ra = self.transport.ra |
63 self.ctx = svn.client.create_context() | 82 self.ctx = self.transport.client |
64 self.base = svn.ra.get_repos_root(self.ra) | 83 self.base = svn.ra.get_repos_root(self.ra) |
65 self.module = self.url[len(self.base):] | 84 self.module = self.url[len(self.base):] |
66 self.modulemap = {} # revision, module | 85 self.modulemap = {} # revision, module |
67 self.commits = {} | 86 self.commits = {} |
68 self.files = {} | 87 self.files = {} |
86 lastrevnum = lastrevs.setdefault(module, revnum) | 105 lastrevnum = lastrevs.setdefault(module, revnum) |
87 if revnum > lastrevnum: | 106 if revnum > lastrevnum: |
88 lastrevs[module] = revnum | 107 lastrevs[module] = revnum |
89 self.lastrevs = lastrevs | 108 self.lastrevs = lastrevs |
90 | 109 |
110 def exists(self, path, optrev): | |
111 try: | |
112 return svn.client.ls(self.url.rstrip('/') + '/' + path, | |
113 optrev, False, self.ctx) | |
114 except SubversionException, err: | |
115 return [] | |
116 | |
91 def getheads(self): | 117 def getheads(self): |
92 # detect standard /branches, /tags, /trunk layout | 118 # detect standard /branches, /tags, /trunk layout |
93 optrev = svn.core.svn_opt_revision_t() | 119 optrev = svn.core.svn_opt_revision_t() |
94 optrev.kind = svn.core.svn_opt_revision_number | 120 optrev.kind = svn.core.svn_opt_revision_number |
95 optrev.value.number = self.last_changed | 121 optrev.value.number = self.last_changed |
96 rpath = self.url.strip('/') | 122 rpath = self.url.strip('/') |
97 paths = svn.client.ls(rpath, optrev, False, self.ctx) | 123 cfgtrunk = self.ui.config('convert', 'svn.trunk') |
98 if 'branches' in paths and 'trunk' in paths: | 124 cfgbranches = self.ui.config('convert', 'svn.branches') |
99 self.module += '/trunk' | 125 trunk = (cfgtrunk or 'trunk').strip('/') |
126 branches = (cfgbranches or 'branches').strip('/') | |
127 if self.exists(trunk, optrev) and self.exists(branches, optrev): | |
128 self.ui.note('found trunk at %r and branches at %r\n' % | |
129 (trunk, branches)) | |
130 oldmodule = self.module | |
131 self.module += '/' + trunk | |
100 lt = self.latest(self.module, self.last_changed) | 132 lt = self.latest(self.module, self.last_changed) |
101 self.head = self.revid(lt) | 133 self.head = self.revid(lt) |
102 self.heads = [self.head] | 134 self.heads = [self.head] |
103 branches = svn.client.ls(rpath + '/branches', optrev, False, self.ctx) | 135 branchnames = svn.client.ls(rpath + '/' + branches, optrev, False, |
104 for branch in branches.keys(): | 136 self.ctx) |
105 module = '/branches/' + branch | 137 for branch in branchnames.keys(): |
138 if oldmodule: | |
139 module = '/' + oldmodule + '/' + branches + '/' + branch | |
140 else: | |
141 module = '/' + branches + '/' + branch | |
106 brevnum = self.latest(module, self.last_changed) | 142 brevnum = self.latest(module, self.last_changed) |
107 brev = self.revid(brevnum, module) | 143 brev = self.revid(brevnum, module) |
108 self.ui.note('found branch %s at %d\n' % (branch, brevnum)) | 144 self.ui.note('found branch %s at %d\n' % (branch, brevnum)) |
109 self.heads.append(brev) | 145 self.heads.append(brev) |
146 elif cfgtrunk or cfgbranches: | |
147 raise util.Abort(_('trunk/branch layout expected, ' | |
148 'but not found')) | |
110 else: | 149 else: |
150 self.ui.note('working with one branch\n') | |
111 self.heads = [self.head] | 151 self.heads = [self.head] |
112 return self.heads | 152 return self.heads |
113 | 153 |
114 def getfile(self, file, rev): | 154 def getfile(self, file, rev): |
115 data, mode = self._getfile(file, rev) | 155 data, mode = self._getfile(file, rev) |
116 self.modecache[(file, rev)] = mode | 156 self.modecache[(file, rev)] = mode |
117 return data | 157 return data |
118 | 158 |
119 def getmode(self, file, rev): | 159 def getmode(self, file, rev): |
120 return self.modecache[(file, rev)] | 160 return self.modecache[(file, rev)] |
121 | 161 |
122 def getchanges(self, rev): | 162 def getchanges(self, rev): |
123 self.modecache = {} | 163 self.modecache = {} |
124 files = self.files[rev] | 164 files = self.files[rev] |
138 commit = self.commits[rev] | 178 commit = self.commits[rev] |
139 # caller caches the result, so free it here to release memory | 179 # caller caches the result, so free it here to release memory |
140 del self.commits[rev] | 180 del self.commits[rev] |
141 return commit | 181 return commit |
142 | 182 |
183 def get_log(self, paths, start, end, limit=0, discover_changed_paths=True, | |
184 strict_node_history=False): | |
185 '''wrapper for svn.ra.get_log. | |
186 on a large repository, svn.ra.get_log pins huge amounts of | |
187 memory that cannot be recovered. work around it by forking | |
188 and writing results over a pipe.''' | |
189 | |
190 def child(fp): | |
191 protocol = -1 | |
192 def receiver(orig_paths, revnum, author, date, message, pool): | |
193 if orig_paths is not None: | |
194 for k, v in orig_paths.iteritems(): | |
195 orig_paths[k] = changedpath(v) | |
196 pickle.dump((orig_paths, revnum, author, date, message), | |
197 fp, protocol) | |
198 | |
199 try: | |
200 # Use an ra of our own so that our parent can consume | |
201 # our results without confusing the server. | |
202 t = transport.SvnRaTransport(url=self.url) | |
203 svn.ra.get_log(t.ra, paths, start, end, limit, | |
204 discover_changed_paths, | |
205 strict_node_history, | |
206 receiver) | |
207 except SubversionException, (_, num): | |
208 self.ui.print_exc() | |
209 pickle.dump(num, fp, protocol) | |
210 else: | |
211 pickle.dump(None, fp, protocol) | |
212 fp.close() | |
213 | |
214 def parent(fp): | |
215 while True: | |
216 entry = pickle.load(fp) | |
217 try: | |
218 orig_paths, revnum, author, date, message = entry | |
219 except: | |
220 if entry is None: | |
221 break | |
222 raise SubversionException("child raised exception", entry) | |
223 yield entry | |
224 | |
225 rfd, wfd = os.pipe() | |
226 pid = os.fork() | |
227 if pid: | |
228 os.close(wfd) | |
229 for p in parent(os.fdopen(rfd, 'rb')): | |
230 yield p | |
231 ret = os.waitpid(pid, 0)[1] | |
232 if ret: | |
233 raise util.Abort(_('get_log %s') % util.explain_exit(ret)) | |
234 else: | |
235 os.close(rfd) | |
236 child(os.fdopen(wfd, 'wb')) | |
237 os._exit(0) | |
238 | |
143 def gettags(self): | 239 def gettags(self): |
144 tags = {} | 240 tags = {} |
145 def parselogentry(*arg, **args): | |
146 orig_paths, revnum, author, date, message, pool = arg | |
147 for path in orig_paths: | |
148 if not path.startswith('/tags/'): | |
149 continue | |
150 ent = orig_paths[path] | |
151 source = ent.copyfrom_path | |
152 rev = ent.copyfrom_rev | |
153 tag = path.split('/', 2)[2] | |
154 tags[tag] = self.revid(rev, module=source) | |
155 | |
156 start = self.revnum(self.head) | 241 start = self.revnum(self.head) |
157 try: | 242 try: |
158 svn.ra.get_log(self.ra, ['/tags'], 0, start, 0, True, False, | 243 for entry in self.get_log(['/tags'], 0, start): |
159 parselogentry) | 244 orig_paths, revnum, author, date, message = entry |
160 return tags | 245 for path in orig_paths: |
161 except SubversionException: | 246 if not path.startswith('/tags/'): |
247 continue | |
248 ent = orig_paths[path] | |
249 source = ent.copyfrom_path | |
250 rev = ent.copyfrom_rev | |
251 tag = path.split('/', 2)[2] | |
252 tags[tag] = self.revid(rev, module=source) | |
253 except SubversionException, (_, num): | |
162 self.ui.note('no tags found at revision %d\n' % start) | 254 self.ui.note('no tags found at revision %d\n' % start) |
163 return {} | 255 return tags |
164 | 256 |
165 # -- helper functions -- | 257 # -- helper functions -- |
166 | 258 |
167 def revid(self, revnum, module=None): | 259 def revid(self, revnum, module=None): |
168 if not module: | 260 if not module: |
191 dirent = svn.ra.stat(self.ra, path.strip('/'), stop) | 283 dirent = svn.ra.stat(self.ra, path.strip('/'), stop) |
192 self.reparent(self.module) | 284 self.reparent(self.module) |
193 except SubversionException: | 285 except SubversionException: |
194 dirent = None | 286 dirent = None |
195 if not dirent: | 287 if not dirent: |
196 raise util.Abort('%s not found up to revision %d' \ | 288 print self.base, path |
197 % (path, stop)) | 289 raise util.Abort('%s not found up to revision %d' % (path, stop)) |
198 | 290 |
199 return dirent.created_rev | 291 return dirent.created_rev |
200 | 292 |
201 def get_blacklist(self): | 293 def get_blacklist(self): |
202 """Avoid certain revision numbers. | 294 """Avoid certain revision numbers. |
240 | 332 |
241 # The path is outside our tracked tree... | 333 # The path is outside our tracked tree... |
242 self.ui.debug('Ignoring %r since it is not under %r\n' % (path, module)) | 334 self.ui.debug('Ignoring %r since it is not under %r\n' % (path, module)) |
243 return None | 335 return None |
244 | 336 |
245 received = [] | |
246 # svn.ra.get_log requires no other calls to the ra until it completes, | |
247 # so we just collect the log entries and parse them afterwards | |
248 def receivelog(*arg, **args): | |
249 received.append(arg) | |
250 | |
251 self.child_cset = None | 337 self.child_cset = None |
252 def parselogentry(*arg, **args): | 338 def parselogentry(orig_paths, revnum, author, date, message): |
253 orig_paths, revnum, author, date, message, pool = arg | 339 self.ui.debug("parsing revision %d (%d changes)\n" % |
254 | 340 (revnum, len(orig_paths))) |
255 if self.is_blacklisted(revnum): | |
256 self.ui.note('skipping blacklisted revision %d\n' % revnum) | |
257 return | |
258 | |
259 self.ui.debug("parsing revision %d\n" % revnum) | |
260 | |
261 if orig_paths is None: | |
262 self.ui.debug('revision %d has no entries\n' % revnum) | |
263 return | |
264 | 341 |
265 if revnum in self.modulemap: | 342 if revnum in self.modulemap: |
266 new_module = self.modulemap[revnum] | 343 new_module = self.modulemap[revnum] |
267 if new_module != self.module: | 344 if new_module != self.module: |
268 self.module = new_module | 345 self.module = new_module |
284 if branch == 'trunk': | 361 if branch == 'trunk': |
285 branch = '' | 362 branch = '' |
286 except IndexError: | 363 except IndexError: |
287 branch = None | 364 branch = None |
288 | 365 |
289 paths = orig_paths.keys() | 366 orig_paths = orig_paths.items() |
290 paths.sort() | 367 orig_paths.sort() |
291 for path in paths: | 368 for path, ent in orig_paths: |
292 # self.ui.write("path %s\n" % path) | 369 # self.ui.write("path %s\n" % path) |
293 if path == self.module: # Follow branching back in history | 370 if path == self.module: # Follow branching back in history |
294 ent = orig_paths[path] | |
295 if ent: | 371 if ent: |
296 if ent.copyfrom_path: | 372 if ent.copyfrom_path: |
297 # ent.copyfrom_rev may not be the actual last revision | 373 # ent.copyfrom_rev may not be the actual last revision |
298 prev = self.latest(ent.copyfrom_path, ent.copyfrom_rev) | 374 prev = self.latest(ent.copyfrom_path, ent.copyfrom_rev) |
299 self.modulemap[prev] = ent.copyfrom_path | 375 self.modulemap[prev] = ent.copyfrom_path |
308 if entrypath is None: | 384 if entrypath is None: |
309 # Outside our area of interest | 385 # Outside our area of interest |
310 self.ui.debug("boring@%s: %s\n" % (revnum, path)) | 386 self.ui.debug("boring@%s: %s\n" % (revnum, path)) |
311 continue | 387 continue |
312 entry = entrypath.decode(self.encoding) | 388 entry = entrypath.decode(self.encoding) |
313 ent = orig_paths[path] | |
314 | 389 |
315 kind = svn.ra.check_path(self.ra, entrypath, revnum) | 390 kind = svn.ra.check_path(self.ra, entrypath, revnum) |
316 if kind == svn.core.svn_node_file: | 391 if kind == svn.core.svn_node_file: |
317 if ent.copyfrom_path: | 392 if ent.copyfrom_path: |
318 copyfrom_path = get_entry_from_path(ent.copyfrom_path) | 393 copyfrom_path = get_entry_from_path(ent.copyfrom_path) |
371 # print "Deleted/moved non-file:", revnum, path, ent | 446 # print "Deleted/moved non-file:", revnum, path, ent |
372 # children = self._find_children(path, revnum - 1) | 447 # children = self._find_children(path, revnum - 1) |
373 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action) | 448 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action) |
374 # Sometimes this is tricky. For example: in | 449 # Sometimes this is tricky. For example: in |
375 # The Subversion Repository revision 6940 a dir | 450 # The Subversion Repository revision 6940 a dir |
376 # was copied and one of its files was deleted | 451 # was copied and one of its files was deleted |
377 # from the new location in the same commit. This | 452 # from the new location in the same commit. This |
378 # code can't deal with that yet. | 453 # code can't deal with that yet. |
379 if ent.action == 'C': | 454 if ent.action == 'C': |
380 children = self._find_children(path, fromrev) | 455 children = self._find_children(path, fromrev) |
381 else: | 456 else: |
385 children = [s.replace(oroot,nroot) for s in children] | 460 children = [s.replace(oroot,nroot) for s in children] |
386 # Mark all [files, not directories] as deleted. | 461 # Mark all [files, not directories] as deleted. |
387 for child in children: | 462 for child in children: |
388 # Can we move a child directory and its | 463 # Can we move a child directory and its |
389 # parent in the same commit? (probably can). Could | 464 # parent in the same commit? (probably can). Could |
390 # cause problems if instead of revnum -1, | 465 # cause problems if instead of revnum -1, |
391 # we have to look in (copyfrom_path, revnum - 1) | 466 # we have to look in (copyfrom_path, revnum - 1) |
392 entrypath = get_entry_from_path("/" + child, module=old_module) | 467 entrypath = get_entry_from_path("/" + child, module=old_module) |
393 if entrypath: | 468 if entrypath: |
394 entry = self.recode(entrypath.decode(self.encoding)) | 469 entry = self.recode(entrypath.decode(self.encoding)) |
395 if entry in copies: | 470 if entry in copies: |
415 children = self._find_children(path, revnum) | 490 children = self._find_children(path, revnum) |
416 children.sort() | 491 children.sort() |
417 for child in children: | 492 for child in children: |
418 # Can we move a child directory and its | 493 # Can we move a child directory and its |
419 # parent in the same commit? (probably can). Could | 494 # parent in the same commit? (probably can). Could |
420 # cause problems if instead of revnum -1, | 495 # cause problems if instead of revnum -1, |
421 # we have to look in (copyfrom_path, revnum - 1) | 496 # we have to look in (copyfrom_path, revnum - 1) |
422 entrypath = get_entry_from_path("/" + child, module=self.module) | 497 entrypath = get_entry_from_path("/" + child, module=self.module) |
423 # print child, self.module, entrypath | 498 # print child, self.module, entrypath |
424 if entrypath: | 499 if entrypath: |
425 # Need to filter out directories here... | 500 # Need to filter out directories here... |
464 copies[self.recode(copyto_entry)] = self.recode(entry) | 539 copies[self.recode(copyto_entry)] = self.recode(entry) |
465 # copy from quux splort/quuxfile | 540 # copy from quux splort/quuxfile |
466 | 541 |
467 self.modulemap[revnum] = self.module # track backwards in time | 542 self.modulemap[revnum] = self.module # track backwards in time |
468 # a list of (filename, id) where id lets us retrieve the file. | 543 # a list of (filename, id) where id lets us retrieve the file. |
469 # eg in git, id is the object hash. for svn it'll be the | 544 # eg in git, id is the object hash. for svn it'll be the |
470 self.files[rev] = zip(entries, [rev] * len(entries)) | 545 self.files[rev] = zip(entries, [rev] * len(entries)) |
471 if not entries: | 546 if not entries: |
472 return | 547 return |
473 | 548 |
474 # Example SVN datetime. Includes microseconds. | 549 # Example SVN datetime. Includes microseconds. |
478 | 553 |
479 log = message and self.recode(message) | 554 log = message and self.recode(message) |
480 author = author and self.recode(author) or '' | 555 author = author and self.recode(author) or '' |
481 | 556 |
482 cset = commit(author=author, | 557 cset = commit(author=author, |
483 date=util.datestr(date), | 558 date=util.datestr(date), |
484 desc=log, | 559 desc=log, |
485 parents=parents, | 560 parents=parents, |
486 copies=copies, | 561 copies=copies, |
487 branch=branch, | 562 branch=branch, |
488 rev=rev.encode('utf-8')) | 563 rev=rev.encode('utf-8')) |
489 | 564 |
490 self.commits[rev] = cset | 565 self.commits[rev] = cset |
491 if self.child_cset and not self.child_cset.parents: | 566 if self.child_cset and not self.child_cset.parents: |
492 self.child_cset.parents = [rev] | 567 self.child_cset.parents = [rev] |
493 self.child_cset = cset | 568 self.child_cset = cset |
494 | 569 |
495 self.ui.note('fetching revision log for "%s" from %d to %d\n' % \ | 570 self.ui.note('fetching revision log for "%s" from %d to %d\n' % |
496 (self.module, from_revnum, to_revnum)) | 571 (self.module, from_revnum, to_revnum)) |
497 | 572 |
498 try: | 573 try: |
499 discover_changed_paths = True | 574 discover_changed_paths = True |
500 strict_node_history = False | 575 strict_node_history = False |
501 svn.ra.get_log(self.ra, [self.module], from_revnum, to_revnum, 0, | 576 for entry in self.get_log([self.module], from_revnum, to_revnum): |
502 discover_changed_paths, strict_node_history, | 577 orig_paths, revnum, author, date, message = entry |
503 receivelog) | 578 if self.is_blacklisted(revnum): |
504 for entry in received: | 579 self.ui.note('skipping blacklisted revision %d\n' % revnum) |
505 parselogentry(*entry) | 580 continue |
581 if orig_paths is None: | |
582 self.ui.debug('revision %d has no entries\n' % revnum) | |
583 continue | |
584 parselogentry(orig_paths, revnum, author, date, message) | |
506 except SubversionException, (_, num): | 585 except SubversionException, (_, num): |
507 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION: | 586 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION: |
508 raise NoSuchRevision(branch=self, | 587 raise NoSuchRevision(branch=self, |
509 revision="Revision number %d" % to_revnum) | 588 revision="Revision number %d" % to_revnum) |
510 raise | 589 raise |
511 | 590 |
512 def _getfile(self, file, rev): | 591 def _getfile(self, file, rev): |
513 io = StringIO() | 592 io = StringIO() |
565 # python binding for getdir is broken up to at least 1.4.3 | 644 # python binding for getdir is broken up to at least 1.4.3 |
566 raise CompatibilityException() | 645 raise CompatibilityException() |
567 dirents = getdir[0] | 646 dirents = getdir[0] |
568 if type(dirents) == int: | 647 if type(dirents) == int: |
569 # got here once due to infinite recursion bug | 648 # got here once due to infinite recursion bug |
570 # pprint.pprint(getdir) | |
571 return | 649 return |
572 c = dirents.keys() | 650 c = dirents.keys() |
573 c.sort() | 651 c.sort() |
574 for child in c: | 652 for child in c: |
575 dirent = dirents[child] | 653 dirent = dirents[child] |