comparison hgext/convert/subversion.py @ 4965:4106dde15aed

Merge with crew
author Matt Mackall <mpm@selenic.com>
date Sat, 21 Jul 2007 16:44:38 -0500
parents c79e3fa6dc29
children b6c3abdbe0eb
comparison
equal deleted inserted replaced
4964:ee983d0dbea8 4965:4106dde15aed
1 # Subversion 1.4/1.5 Python API backend 1 # Subversion 1.4/1.5 Python API backend
2 # 2 #
3 # Copyright(C) 2007 Daniel Holth et al 3 # Copyright(C) 2007 Daniel Holth et al
4 4 #
5 import pprint 5 # Configuration options:
6 #
7 # convert.svn.trunk
8 # Relative path to the trunk (default: "trunk")
9 # convert.svn.branches
10 # Relative path to tree of branches (default: "branches")
11 #
12 # Set these in a hgrc, or on the command line as follows:
13 #
14 # hg convert --config convert.svn.trunk=wackoname [...]
15
6 import locale 16 import locale
7 17 import os
18 import cPickle as pickle
8 from mercurial import util 19 from mercurial import util
9 20
10 # Subversion stuff. Works best with very recent Python SVN bindings 21 # Subversion stuff. Works best with very recent Python SVN bindings
11 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing 22 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
12 # these bindings. 23 # these bindings.
25 except ImportError: 36 except ImportError:
26 pass 37 pass
27 38
28 class CompatibilityException(Exception): pass 39 class CompatibilityException(Exception): pass
29 40
41 class changedpath(object):
42 def __init__(self, p):
43 self.copyfrom_path = p.copyfrom_path
44 self.copyfrom_rev = p.copyfrom_rev
45 self.action = p.action
46
30 # SVN conversion code stolen from bzr-svn and tailor 47 # SVN conversion code stolen from bzr-svn and tailor
31 class convert_svn(converter_source): 48 class convert_svn(converter_source):
32 def __init__(self, ui, url, rev=None): 49 def __init__(self, ui, url, rev=None):
33 super(convert_svn, self).__init__(ui, url, rev=rev) 50 super(convert_svn, self).__init__(ui, url, rev=rev)
34 51
49 except ValueError: 66 except ValueError:
50 raise util.Abort('svn: revision %s is not an integer' % rev) 67 raise util.Abort('svn: revision %s is not an integer' % rev)
51 try: 68 try:
52 # Support file://path@rev syntax. Useful e.g. to convert 69 # Support file://path@rev syntax. Useful e.g. to convert
53 # deleted branches. 70 # deleted branches.
54 url, latest = url.rsplit("@", 1) 71 at = url.rfind('@')
55 latest = int(latest) 72 if at >= 0:
73 latest = int(url[at+1:])
74 url = url[:at]
56 except ValueError, e: 75 except ValueError, e:
57 pass 76 pass
58 self.url = url 77 self.url = url
59 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8 78 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
60 try: 79 try:
61 self.transport = transport.SvnRaTransport(url = url) 80 self.transport = transport.SvnRaTransport(url=url)
62 self.ra = self.transport.ra 81 self.ra = self.transport.ra
63 self.ctx = svn.client.create_context() 82 self.ctx = self.transport.client
64 self.base = svn.ra.get_repos_root(self.ra) 83 self.base = svn.ra.get_repos_root(self.ra)
65 self.module = self.url[len(self.base):] 84 self.module = self.url[len(self.base):]
66 self.modulemap = {} # revision, module 85 self.modulemap = {} # revision, module
67 self.commits = {} 86 self.commits = {}
68 self.files = {} 87 self.files = {}
86 lastrevnum = lastrevs.setdefault(module, revnum) 105 lastrevnum = lastrevs.setdefault(module, revnum)
87 if revnum > lastrevnum: 106 if revnum > lastrevnum:
88 lastrevs[module] = revnum 107 lastrevs[module] = revnum
89 self.lastrevs = lastrevs 108 self.lastrevs = lastrevs
90 109
110 def exists(self, path, optrev):
111 try:
112 return svn.client.ls(self.url.rstrip('/') + '/' + path,
113 optrev, False, self.ctx)
114 except SubversionException, err:
115 return []
116
91 def getheads(self): 117 def getheads(self):
92 # detect standard /branches, /tags, /trunk layout 118 # detect standard /branches, /tags, /trunk layout
93 optrev = svn.core.svn_opt_revision_t() 119 optrev = svn.core.svn_opt_revision_t()
94 optrev.kind = svn.core.svn_opt_revision_number 120 optrev.kind = svn.core.svn_opt_revision_number
95 optrev.value.number = self.last_changed 121 optrev.value.number = self.last_changed
96 rpath = self.url.strip('/') 122 rpath = self.url.strip('/')
97 paths = svn.client.ls(rpath, optrev, False, self.ctx) 123 cfgtrunk = self.ui.config('convert', 'svn.trunk')
98 if 'branches' in paths and 'trunk' in paths: 124 cfgbranches = self.ui.config('convert', 'svn.branches')
99 self.module += '/trunk' 125 trunk = (cfgtrunk or 'trunk').strip('/')
126 branches = (cfgbranches or 'branches').strip('/')
127 if self.exists(trunk, optrev) and self.exists(branches, optrev):
128 self.ui.note('found trunk at %r and branches at %r\n' %
129 (trunk, branches))
130 oldmodule = self.module
131 self.module += '/' + trunk
100 lt = self.latest(self.module, self.last_changed) 132 lt = self.latest(self.module, self.last_changed)
101 self.head = self.revid(lt) 133 self.head = self.revid(lt)
102 self.heads = [self.head] 134 self.heads = [self.head]
103 branches = svn.client.ls(rpath + '/branches', optrev, False, self.ctx) 135 branchnames = svn.client.ls(rpath + '/' + branches, optrev, False,
104 for branch in branches.keys(): 136 self.ctx)
105 module = '/branches/' + branch 137 for branch in branchnames.keys():
138 if oldmodule:
139 module = '/' + oldmodule + '/' + branches + '/' + branch
140 else:
141 module = '/' + branches + '/' + branch
106 brevnum = self.latest(module, self.last_changed) 142 brevnum = self.latest(module, self.last_changed)
107 brev = self.revid(brevnum, module) 143 brev = self.revid(brevnum, module)
108 self.ui.note('found branch %s at %d\n' % (branch, brevnum)) 144 self.ui.note('found branch %s at %d\n' % (branch, brevnum))
109 self.heads.append(brev) 145 self.heads.append(brev)
146 elif cfgtrunk or cfgbranches:
147 raise util.Abort(_('trunk/branch layout expected, '
148 'but not found'))
110 else: 149 else:
150 self.ui.note('working with one branch\n')
111 self.heads = [self.head] 151 self.heads = [self.head]
112 return self.heads 152 return self.heads
113 153
114 def getfile(self, file, rev): 154 def getfile(self, file, rev):
115 data, mode = self._getfile(file, rev) 155 data, mode = self._getfile(file, rev)
116 self.modecache[(file, rev)] = mode 156 self.modecache[(file, rev)] = mode
117 return data 157 return data
118 158
119 def getmode(self, file, rev): 159 def getmode(self, file, rev):
120 return self.modecache[(file, rev)] 160 return self.modecache[(file, rev)]
121 161
122 def getchanges(self, rev): 162 def getchanges(self, rev):
123 self.modecache = {} 163 self.modecache = {}
124 files = self.files[rev] 164 files = self.files[rev]
138 commit = self.commits[rev] 178 commit = self.commits[rev]
139 # caller caches the result, so free it here to release memory 179 # caller caches the result, so free it here to release memory
140 del self.commits[rev] 180 del self.commits[rev]
141 return commit 181 return commit
142 182
183 def get_log(self, paths, start, end, limit=0, discover_changed_paths=True,
184 strict_node_history=False):
185 '''wrapper for svn.ra.get_log.
186 on a large repository, svn.ra.get_log pins huge amounts of
187 memory that cannot be recovered. work around it by forking
188 and writing results over a pipe.'''
189
190 def child(fp):
191 protocol = -1
192 def receiver(orig_paths, revnum, author, date, message, pool):
193 if orig_paths is not None:
194 for k, v in orig_paths.iteritems():
195 orig_paths[k] = changedpath(v)
196 pickle.dump((orig_paths, revnum, author, date, message),
197 fp, protocol)
198
199 try:
200 # Use an ra of our own so that our parent can consume
201 # our results without confusing the server.
202 t = transport.SvnRaTransport(url=self.url)
203 svn.ra.get_log(t.ra, paths, start, end, limit,
204 discover_changed_paths,
205 strict_node_history,
206 receiver)
207 except SubversionException, (_, num):
208 self.ui.print_exc()
209 pickle.dump(num, fp, protocol)
210 else:
211 pickle.dump(None, fp, protocol)
212 fp.close()
213
214 def parent(fp):
215 while True:
216 entry = pickle.load(fp)
217 try:
218 orig_paths, revnum, author, date, message = entry
219 except:
220 if entry is None:
221 break
222 raise SubversionException("child raised exception", entry)
223 yield entry
224
225 rfd, wfd = os.pipe()
226 pid = os.fork()
227 if pid:
228 os.close(wfd)
229 for p in parent(os.fdopen(rfd, 'rb')):
230 yield p
231 ret = os.waitpid(pid, 0)[1]
232 if ret:
233 raise util.Abort(_('get_log %s') % util.explain_exit(ret))
234 else:
235 os.close(rfd)
236 child(os.fdopen(wfd, 'wb'))
237 os._exit(0)
238
143 def gettags(self): 239 def gettags(self):
144 tags = {} 240 tags = {}
145 def parselogentry(*arg, **args):
146 orig_paths, revnum, author, date, message, pool = arg
147 for path in orig_paths:
148 if not path.startswith('/tags/'):
149 continue
150 ent = orig_paths[path]
151 source = ent.copyfrom_path
152 rev = ent.copyfrom_rev
153 tag = path.split('/', 2)[2]
154 tags[tag] = self.revid(rev, module=source)
155
156 start = self.revnum(self.head) 241 start = self.revnum(self.head)
157 try: 242 try:
158 svn.ra.get_log(self.ra, ['/tags'], 0, start, 0, True, False, 243 for entry in self.get_log(['/tags'], 0, start):
159 parselogentry) 244 orig_paths, revnum, author, date, message = entry
160 return tags 245 for path in orig_paths:
161 except SubversionException: 246 if not path.startswith('/tags/'):
247 continue
248 ent = orig_paths[path]
249 source = ent.copyfrom_path
250 rev = ent.copyfrom_rev
251 tag = path.split('/', 2)[2]
252 tags[tag] = self.revid(rev, module=source)
253 except SubversionException, (_, num):
162 self.ui.note('no tags found at revision %d\n' % start) 254 self.ui.note('no tags found at revision %d\n' % start)
163 return {} 255 return tags
164 256
165 # -- helper functions -- 257 # -- helper functions --
166 258
167 def revid(self, revnum, module=None): 259 def revid(self, revnum, module=None):
168 if not module: 260 if not module:
191 dirent = svn.ra.stat(self.ra, path.strip('/'), stop) 283 dirent = svn.ra.stat(self.ra, path.strip('/'), stop)
192 self.reparent(self.module) 284 self.reparent(self.module)
193 except SubversionException: 285 except SubversionException:
194 dirent = None 286 dirent = None
195 if not dirent: 287 if not dirent:
196 raise util.Abort('%s not found up to revision %d' \ 288 print self.base, path
197 % (path, stop)) 289 raise util.Abort('%s not found up to revision %d' % (path, stop))
198 290
199 return dirent.created_rev 291 return dirent.created_rev
200 292
201 def get_blacklist(self): 293 def get_blacklist(self):
202 """Avoid certain revision numbers. 294 """Avoid certain revision numbers.
240 332
241 # The path is outside our tracked tree... 333 # The path is outside our tracked tree...
242 self.ui.debug('Ignoring %r since it is not under %r\n' % (path, module)) 334 self.ui.debug('Ignoring %r since it is not under %r\n' % (path, module))
243 return None 335 return None
244 336
245 received = []
246 # svn.ra.get_log requires no other calls to the ra until it completes,
247 # so we just collect the log entries and parse them afterwards
248 def receivelog(*arg, **args):
249 received.append(arg)
250
251 self.child_cset = None 337 self.child_cset = None
252 def parselogentry(*arg, **args): 338 def parselogentry(orig_paths, revnum, author, date, message):
253 orig_paths, revnum, author, date, message, pool = arg 339 self.ui.debug("parsing revision %d (%d changes)\n" %
254 340 (revnum, len(orig_paths)))
255 if self.is_blacklisted(revnum):
256 self.ui.note('skipping blacklisted revision %d\n' % revnum)
257 return
258
259 self.ui.debug("parsing revision %d\n" % revnum)
260
261 if orig_paths is None:
262 self.ui.debug('revision %d has no entries\n' % revnum)
263 return
264 341
265 if revnum in self.modulemap: 342 if revnum in self.modulemap:
266 new_module = self.modulemap[revnum] 343 new_module = self.modulemap[revnum]
267 if new_module != self.module: 344 if new_module != self.module:
268 self.module = new_module 345 self.module = new_module
284 if branch == 'trunk': 361 if branch == 'trunk':
285 branch = '' 362 branch = ''
286 except IndexError: 363 except IndexError:
287 branch = None 364 branch = None
288 365
289 paths = orig_paths.keys() 366 orig_paths = orig_paths.items()
290 paths.sort() 367 orig_paths.sort()
291 for path in paths: 368 for path, ent in orig_paths:
292 # self.ui.write("path %s\n" % path) 369 # self.ui.write("path %s\n" % path)
293 if path == self.module: # Follow branching back in history 370 if path == self.module: # Follow branching back in history
294 ent = orig_paths[path]
295 if ent: 371 if ent:
296 if ent.copyfrom_path: 372 if ent.copyfrom_path:
297 # ent.copyfrom_rev may not be the actual last revision 373 # ent.copyfrom_rev may not be the actual last revision
298 prev = self.latest(ent.copyfrom_path, ent.copyfrom_rev) 374 prev = self.latest(ent.copyfrom_path, ent.copyfrom_rev)
299 self.modulemap[prev] = ent.copyfrom_path 375 self.modulemap[prev] = ent.copyfrom_path
308 if entrypath is None: 384 if entrypath is None:
309 # Outside our area of interest 385 # Outside our area of interest
310 self.ui.debug("boring@%s: %s\n" % (revnum, path)) 386 self.ui.debug("boring@%s: %s\n" % (revnum, path))
311 continue 387 continue
312 entry = entrypath.decode(self.encoding) 388 entry = entrypath.decode(self.encoding)
313 ent = orig_paths[path]
314 389
315 kind = svn.ra.check_path(self.ra, entrypath, revnum) 390 kind = svn.ra.check_path(self.ra, entrypath, revnum)
316 if kind == svn.core.svn_node_file: 391 if kind == svn.core.svn_node_file:
317 if ent.copyfrom_path: 392 if ent.copyfrom_path:
318 copyfrom_path = get_entry_from_path(ent.copyfrom_path) 393 copyfrom_path = get_entry_from_path(ent.copyfrom_path)
371 # print "Deleted/moved non-file:", revnum, path, ent 446 # print "Deleted/moved non-file:", revnum, path, ent
372 # children = self._find_children(path, revnum - 1) 447 # children = self._find_children(path, revnum - 1)
373 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action) 448 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
374 # Sometimes this is tricky. For example: in 449 # Sometimes this is tricky. For example: in
375 # The Subversion Repository revision 6940 a dir 450 # The Subversion Repository revision 6940 a dir
376 # was copied and one of its files was deleted 451 # was copied and one of its files was deleted
377 # from the new location in the same commit. This 452 # from the new location in the same commit. This
378 # code can't deal with that yet. 453 # code can't deal with that yet.
379 if ent.action == 'C': 454 if ent.action == 'C':
380 children = self._find_children(path, fromrev) 455 children = self._find_children(path, fromrev)
381 else: 456 else:
385 children = [s.replace(oroot,nroot) for s in children] 460 children = [s.replace(oroot,nroot) for s in children]
386 # Mark all [files, not directories] as deleted. 461 # Mark all [files, not directories] as deleted.
387 for child in children: 462 for child in children:
388 # Can we move a child directory and its 463 # Can we move a child directory and its
389 # parent in the same commit? (probably can). Could 464 # parent in the same commit? (probably can). Could
390 # cause problems if instead of revnum -1, 465 # cause problems if instead of revnum -1,
391 # we have to look in (copyfrom_path, revnum - 1) 466 # we have to look in (copyfrom_path, revnum - 1)
392 entrypath = get_entry_from_path("/" + child, module=old_module) 467 entrypath = get_entry_from_path("/" + child, module=old_module)
393 if entrypath: 468 if entrypath:
394 entry = self.recode(entrypath.decode(self.encoding)) 469 entry = self.recode(entrypath.decode(self.encoding))
395 if entry in copies: 470 if entry in copies:
415 children = self._find_children(path, revnum) 490 children = self._find_children(path, revnum)
416 children.sort() 491 children.sort()
417 for child in children: 492 for child in children:
418 # Can we move a child directory and its 493 # Can we move a child directory and its
419 # parent in the same commit? (probably can). Could 494 # parent in the same commit? (probably can). Could
420 # cause problems if instead of revnum -1, 495 # cause problems if instead of revnum -1,
421 # we have to look in (copyfrom_path, revnum - 1) 496 # we have to look in (copyfrom_path, revnum - 1)
422 entrypath = get_entry_from_path("/" + child, module=self.module) 497 entrypath = get_entry_from_path("/" + child, module=self.module)
423 # print child, self.module, entrypath 498 # print child, self.module, entrypath
424 if entrypath: 499 if entrypath:
425 # Need to filter out directories here... 500 # Need to filter out directories here...
464 copies[self.recode(copyto_entry)] = self.recode(entry) 539 copies[self.recode(copyto_entry)] = self.recode(entry)
465 # copy from quux splort/quuxfile 540 # copy from quux splort/quuxfile
466 541
467 self.modulemap[revnum] = self.module # track backwards in time 542 self.modulemap[revnum] = self.module # track backwards in time
468 # a list of (filename, id) where id lets us retrieve the file. 543 # a list of (filename, id) where id lets us retrieve the file.
469 # eg in git, id is the object hash. for svn it'll be the 544 # eg in git, id is the object hash. for svn it'll be the
470 self.files[rev] = zip(entries, [rev] * len(entries)) 545 self.files[rev] = zip(entries, [rev] * len(entries))
471 if not entries: 546 if not entries:
472 return 547 return
473 548
474 # Example SVN datetime. Includes microseconds. 549 # Example SVN datetime. Includes microseconds.
478 553
479 log = message and self.recode(message) 554 log = message and self.recode(message)
480 author = author and self.recode(author) or '' 555 author = author and self.recode(author) or ''
481 556
482 cset = commit(author=author, 557 cset = commit(author=author,
483 date=util.datestr(date), 558 date=util.datestr(date),
484 desc=log, 559 desc=log,
485 parents=parents, 560 parents=parents,
486 copies=copies, 561 copies=copies,
487 branch=branch, 562 branch=branch,
488 rev=rev.encode('utf-8')) 563 rev=rev.encode('utf-8'))
489 564
490 self.commits[rev] = cset 565 self.commits[rev] = cset
491 if self.child_cset and not self.child_cset.parents: 566 if self.child_cset and not self.child_cset.parents:
492 self.child_cset.parents = [rev] 567 self.child_cset.parents = [rev]
493 self.child_cset = cset 568 self.child_cset = cset
494 569
495 self.ui.note('fetching revision log for "%s" from %d to %d\n' % \ 570 self.ui.note('fetching revision log for "%s" from %d to %d\n' %
496 (self.module, from_revnum, to_revnum)) 571 (self.module, from_revnum, to_revnum))
497 572
498 try: 573 try:
499 discover_changed_paths = True 574 discover_changed_paths = True
500 strict_node_history = False 575 strict_node_history = False
501 svn.ra.get_log(self.ra, [self.module], from_revnum, to_revnum, 0, 576 for entry in self.get_log([self.module], from_revnum, to_revnum):
502 discover_changed_paths, strict_node_history, 577 orig_paths, revnum, author, date, message = entry
503 receivelog) 578 if self.is_blacklisted(revnum):
504 for entry in received: 579 self.ui.note('skipping blacklisted revision %d\n' % revnum)
505 parselogentry(*entry) 580 continue
581 if orig_paths is None:
582 self.ui.debug('revision %d has no entries\n' % revnum)
583 continue
584 parselogentry(orig_paths, revnum, author, date, message)
506 except SubversionException, (_, num): 585 except SubversionException, (_, num):
507 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION: 586 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
508 raise NoSuchRevision(branch=self, 587 raise NoSuchRevision(branch=self,
509 revision="Revision number %d" % to_revnum) 588 revision="Revision number %d" % to_revnum)
510 raise 589 raise
511 590
512 def _getfile(self, file, rev): 591 def _getfile(self, file, rev):
513 io = StringIO() 592 io = StringIO()
565 # python binding for getdir is broken up to at least 1.4.3 644 # python binding for getdir is broken up to at least 1.4.3
566 raise CompatibilityException() 645 raise CompatibilityException()
567 dirents = getdir[0] 646 dirents = getdir[0]
568 if type(dirents) == int: 647 if type(dirents) == int:
569 # got here once due to infinite recursion bug 648 # got here once due to infinite recursion bug
570 # pprint.pprint(getdir)
571 return 649 return
572 c = dirents.keys() 650 c = dirents.keys()
573 c.sort() 651 c.sort()
574 for child in c: 652 for child in c:
575 dirent = dirents[child] 653 dirent = dirents[child]