comparison hgext/convert/subversion.py @ 5075:514c06098e9c

convert: svn: pull up path to file expansion code into separate function. This is part of the process for deferring path expansion until getchanges. Copy detection also needs to be moved out of the commit object.
author Brendan Cully <brendan@kublai.com>
date Sun, 05 Aug 2007 11:30:52 -0700
parents 4cef0b93969d
children ef338e34a906
comparison
equal deleted inserted replaced
5074:4cef0b93969d 5075:514c06098e9c
324 def reparent(self, module): 324 def reparent(self, module):
325 svn_url = self.base + module 325 svn_url = self.base + module
326 self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding)) 326 self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding))
327 svn.ra.reparent(self.ra, svn_url.encode(self.encoding)) 327 svn.ra.reparent(self.ra, svn_url.encode(self.encoding))
328 328
329 def _fetch_revisions(self, from_revnum = 0, to_revnum = 347): 329 def expandpaths(self, rev, paths, parents):
330 def get_entry_from_path(path, module=self.module): 330 def get_entry_from_path(path, module=self.module):
331 # Given the repository url of this wc, say 331 # Given the repository url of this wc, say
332 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch" 332 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
333 # extract the "entry" portion (a relative path) from what 333 # extract the "entry" portion (a relative path) from what
334 # svn log --xml says, ie 334 # svn log --xml says, ie
335 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py" 335 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
336 # that is to say "tests/PloneTestCase.py" 336 # that is to say "tests/PloneTestCase.py"
337
338 if path.startswith(module): 337 if path.startswith(module):
339 relative = path[len(module):] 338 relative = path[len(module):]
340 if relative.startswith('/'): 339 if relative.startswith('/'):
341 return relative[1:] 340 return relative[1:]
342 else: 341 else:
343 return relative 342 return relative
344 343
345 # The path is outside our tracked tree... 344 # The path is outside our tracked tree...
346 self.ui.debug('Ignoring %r since it is not under %r\n' % (path, module)) 345 self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
347 return None 346 return None
348 347
348 entries = []
349 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
350 copies = {}
351 revnum = self.revnum(rev)
352
353 for path, ent in paths:
354 # self.ui.write("path %s\n" % path)
355 entrypath = get_entry_from_path(path, module=self.module)
356 entry = entrypath.decode(self.encoding)
357
358 kind = svn.ra.check_path(self.ra, entrypath, revnum)
359 if kind == svn.core.svn_node_file:
360 if ent.copyfrom_path:
361 copyfrom_path = get_entry_from_path(ent.copyfrom_path)
362 if copyfrom_path:
363 self.ui.debug("Copied to %s from %s@%s\n" % (entry, copyfrom_path, ent.copyfrom_rev))
364 # It's probably important for hg that the source
365 # exists in the revision's parent, not just the
366 # ent.copyfrom_rev
367 fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev)
368 if fromkind != 0:
369 copies[self.recode(entry)] = self.recode(copyfrom_path)
370 entries.append(self.recode(entry))
371 elif kind == 0: # gone, but had better be a deleted *file*
372 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
373
374 # if a branch is created but entries are removed in the same
375 # changeset, get the right fromrev
376 if parents:
377 uuid, old_module, fromrev = self.revsplit(parents[0])
378 else:
379 fromrev = revnum - 1
380 # might always need to be revnum - 1 in these 3 lines?
381 old_module = self.modulemap.get(fromrev, self.module)
382
383 basepath = old_module + "/" + get_entry_from_path(path, module=self.module)
384 entrypath = old_module + "/" + get_entry_from_path(path, module=self.module)
385
386 def lookup_parts(p):
387 rc = None
388 parts = p.split("/")
389 for i in range(len(parts)):
390 part = "/".join(parts[:i])
391 info = part, copyfrom.get(part, None)
392 if info[1] is not None:
393 self.ui.debug("Found parent directory %s\n" % info[1])
394 rc = info
395 return rc
396
397 self.ui.debug("base, entry %s %s\n" % (basepath, entrypath))
398
399 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
400
401 # need to remove fragment from lookup_parts and replace with copyfrom_path
402 if frompath is not None:
403 self.ui.debug("munge-o-matic\n")
404 self.ui.debug(entrypath + '\n')
405 self.ui.debug(entrypath[len(frompath):] + '\n')
406 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
407 fromrev = froment.copyfrom_rev
408 self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath))
409
410 fromkind = svn.ra.check_path(self.ra, entrypath, fromrev)
411 if fromkind == svn.core.svn_node_file: # a deleted file
412 entries.append(self.recode(entry))
413 elif fromkind == svn.core.svn_node_dir:
414 # print "Deleted/moved non-file:", revnum, path, ent
415 # children = self._find_children(path, revnum - 1)
416 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
417 # Sometimes this is tricky. For example: in
418 # The Subversion Repository revision 6940 a dir
419 # was copied and one of its files was deleted
420 # from the new location in the same commit. This
421 # code can't deal with that yet.
422 if ent.action == 'C':
423 children = self._find_children(path, fromrev)
424 else:
425 oroot = entrypath.strip('/')
426 nroot = path.strip('/')
427 children = self._find_children(oroot, fromrev)
428 children = [s.replace(oroot,nroot) for s in children]
429 # Mark all [files, not directories] as deleted.
430 for child in children:
431 # Can we move a child directory and its
432 # parent in the same commit? (probably can). Could
433 # cause problems if instead of revnum -1,
434 # we have to look in (copyfrom_path, revnum - 1)
435 entrypath = get_entry_from_path("/" + child, module=old_module)
436 if entrypath:
437 entry = self.recode(entrypath.decode(self.encoding))
438 if entry in copies:
439 # deleted file within a copy
440 del copies[entry]
441 else:
442 entries.append(entry)
443 else:
444 self.ui.debug('unknown path in revision %d: %s\n' % \
445 (revnum, path))
446 elif kind == svn.core.svn_node_dir:
447 # Should probably synthesize normal file entries
448 # and handle as above to clean up copy/rename handling.
449
450 # If the directory just had a prop change,
451 # then we shouldn't need to look for its children.
452 # Also this could create duplicate entries. Not sure
453 # whether this will matter. Maybe should make entries a set.
454 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
455 # This will fail if a directory was copied
456 # from another branch and then some of its files
457 # were deleted in the same transaction.
458 children = self._find_children(path, revnum)
459 children.sort()
460 for child in children:
461 # Can we move a child directory and its
462 # parent in the same commit? (probably can). Could
463 # cause problems if instead of revnum -1,
464 # we have to look in (copyfrom_path, revnum - 1)
465 entrypath = get_entry_from_path("/" + child, module=self.module)
466 # print child, self.module, entrypath
467 if entrypath:
468 # Need to filter out directories here...
469 kind = svn.ra.check_path(self.ra, entrypath, revnum)
470 if kind != svn.core.svn_node_dir:
471 entries.append(self.recode(entrypath))
472
473 # Copies here (must copy all from source)
474 # Probably not a real problem for us if
475 # source does not exist
476
477 # Can do this with the copy command "hg copy"
478 # if ent.copyfrom_path:
479 # copyfrom_entry = get_entry_from_path(ent.copyfrom_path.decode(self.encoding),
480 # module=self.module)
481 # copyto_entry = entrypath
482 #
483 # print "copy directory", copyfrom_entry, 'to', copyto_entry
484 #
485 # copies.append((copyfrom_entry, copyto_entry))
486
487 if ent.copyfrom_path:
488 copyfrom_path = ent.copyfrom_path.decode(self.encoding)
489 copyfrom_entry = get_entry_from_path(copyfrom_path, module=self.module)
490 if copyfrom_entry:
491 copyfrom[path] = ent
492 self.ui.debug("mark %s came from %s\n" % (path, copyfrom[path]))
493
494 # Good, /probably/ a regular copy. Really should check
495 # to see whether the parent revision actually contains
496 # the directory in question.
497 children = self._find_children(self.recode(copyfrom_path), ent.copyfrom_rev)
498 children.sort()
499 for child in children:
500 entrypath = get_entry_from_path("/" + child, module=self.module)
501 if entrypath:
502 entry = entrypath.decode(self.encoding)
503 # print "COPY COPY From", copyfrom_entry, entry
504 copyto_path = path + entry[len(copyfrom_entry):]
505 copyto_entry = get_entry_from_path(copyto_path, module=self.module)
506 # print "COPY", entry, "COPY To", copyto_entry
507 copies[self.recode(copyto_entry)] = self.recode(entry)
508 # copy from quux splort/quuxfile
509
510 return (entries, copies)
511
512 def _fetch_revisions(self, from_revnum = 0, to_revnum = 347):
349 self.child_cset = None 513 self.child_cset = None
350 def parselogentry(orig_paths, revnum, author, date, message): 514 def parselogentry(orig_paths, revnum, author, date, message):
351 self.ui.debug("parsing revision %d (%d changes)\n" % 515 self.ui.debug("parsing revision %d (%d changes)\n" %
352 (revnum, len(orig_paths))) 516 (revnum, len(orig_paths)))
353 517
355 new_module = self.modulemap[revnum] 519 new_module = self.modulemap[revnum]
356 if new_module != self.module: 520 if new_module != self.module:
357 self.module = new_module 521 self.module = new_module
358 self.reparent(self.module) 522 self.reparent(self.module)
359 523
360 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
361 copies = {}
362 entries = []
363 rev = self.revid(revnum) 524 rev = self.revid(revnum)
364 parents = []
365
366 # branch log might return entries for a parent we already have 525 # branch log might return entries for a parent we already have
367 if (rev in self.commits or 526 if (rev in self.commits or
368 (revnum < self.lastrevs.get(self.module, 0))): 527 (revnum < self.lastrevs.get(self.module, 0))):
369 return 528 return
370 529
371 try: 530 parents = []
372 branch = self.module.split("/")[-1]
373 if branch == 'trunk':
374 branch = ''
375 except IndexError:
376 branch = None
377
378 orig_paths = orig_paths.items() 531 orig_paths = orig_paths.items()
379 orig_paths.sort() 532 orig_paths.sort()
380 533
381 # check whether this revision is the start of a branch 534 # check whether this revision is the start of a branch
382 path, ent = orig_paths and orig_paths[0] or (None, None) 535 path, ent = orig_paths and orig_paths[0] or (None, None)
389 self.ui.note('found parent of branch %s at %d: %s\n' % \ 542 self.ui.note('found parent of branch %s at %d: %s\n' % \
390 (self.module, prev, ent.copyfrom_path)) 543 (self.module, prev, ent.copyfrom_path))
391 else: 544 else:
392 self.ui.debug("No copyfrom path, don't know what to do.\n") 545 self.ui.debug("No copyfrom path, don't know what to do.\n")
393 546
547 self.modulemap[revnum] = self.module # track backwards in time
548
549 paths = []
550 # filter out unrelated paths
394 for path, ent in orig_paths: 551 for path, ent in orig_paths:
395 # self.ui.write("path %s\n" % path) 552 if not path.startswith(self.module):
396 entrypath = get_entry_from_path(path, module=self.module)
397 # self.ui.write("entrypath %s\n" % entrypath)
398 if entrypath is None:
399 # Outside our area of interest
400 self.ui.debug("boring@%s: %s\n" % (revnum, path)) 553 self.ui.debug("boring@%s: %s\n" % (revnum, path))
401 continue 554 continue
402 entry = entrypath.decode(self.encoding) 555 paths.append((path, ent))
403 556
404 kind = svn.ra.check_path(self.ra, entrypath, revnum) 557 entries, copies = self.expandpaths(rev, paths, parents)
405 if kind == svn.core.svn_node_file:
406 if ent.copyfrom_path:
407 copyfrom_path = get_entry_from_path(ent.copyfrom_path)
408 if copyfrom_path:
409 self.ui.debug("Copied to %s from %s@%s\n" % (entry, copyfrom_path, ent.copyfrom_rev))
410 # It's probably important for hg that the source
411 # exists in the revision's parent, not just the
412 # ent.copyfrom_rev
413 fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev)
414 if fromkind != 0:
415 copies[self.recode(entry)] = self.recode(copyfrom_path)
416 entries.append(self.recode(entry))
417 elif kind == 0: # gone, but had better be a deleted *file*
418 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
419
420 # if a branch is created but entries are removed in the same
421 # changeset, get the right fromrev
422 if parents:
423 uuid, old_module, fromrev = self.revsplit(parents[0])
424 else:
425 fromrev = revnum - 1
426 # might always need to be revnum - 1 in these 3 lines?
427 old_module = self.modulemap.get(fromrev, self.module)
428
429 basepath = old_module + "/" + get_entry_from_path(path, module=self.module)
430 entrypath = old_module + "/" + get_entry_from_path(path, module=self.module)
431
432 def lookup_parts(p):
433 rc = None
434 parts = p.split("/")
435 for i in range(len(parts)):
436 part = "/".join(parts[:i])
437 info = part, copyfrom.get(part, None)
438 if info[1] is not None:
439 self.ui.debug("Found parent directory %s\n" % info[1])
440 rc = info
441 return rc
442
443 self.ui.debug("base, entry %s %s\n" % (basepath, entrypath))
444
445 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
446
447 # need to remove fragment from lookup_parts and replace with copyfrom_path
448 if frompath is not None:
449 self.ui.debug("munge-o-matic\n")
450 self.ui.debug(entrypath + '\n')
451 self.ui.debug(entrypath[len(frompath):] + '\n')
452 entrypath = froment.copyfrom_path + entrypath[len(frompath):]
453 fromrev = froment.copyfrom_rev
454 self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath))
455
456 fromkind = svn.ra.check_path(self.ra, entrypath, fromrev)
457 if fromkind == svn.core.svn_node_file: # a deleted file
458 entries.append(self.recode(entry))
459 elif fromkind == svn.core.svn_node_dir:
460 # print "Deleted/moved non-file:", revnum, path, ent
461 # children = self._find_children(path, revnum - 1)
462 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
463 # Sometimes this is tricky. For example: in
464 # The Subversion Repository revision 6940 a dir
465 # was copied and one of its files was deleted
466 # from the new location in the same commit. This
467 # code can't deal with that yet.
468 if ent.action == 'C':
469 children = self._find_children(path, fromrev)
470 else:
471 oroot = entrypath.strip('/')
472 nroot = path.strip('/')
473 children = self._find_children(oroot, fromrev)
474 children = [s.replace(oroot,nroot) for s in children]
475 # Mark all [files, not directories] as deleted.
476 for child in children:
477 # Can we move a child directory and its
478 # parent in the same commit? (probably can). Could
479 # cause problems if instead of revnum -1,
480 # we have to look in (copyfrom_path, revnum - 1)
481 entrypath = get_entry_from_path("/" + child, module=old_module)
482 if entrypath:
483 entry = self.recode(entrypath.decode(self.encoding))
484 if entry in copies:
485 # deleted file within a copy
486 del copies[entry]
487 else:
488 entries.append(entry)
489 else:
490 self.ui.debug('unknown path in revision %d: %s\n' % \
491 (revnum, path))
492 elif kind == svn.core.svn_node_dir:
493 # Should probably synthesize normal file entries
494 # and handle as above to clean up copy/rename handling.
495
496 # If the directory just had a prop change,
497 # then we shouldn't need to look for its children.
498 # Also this could create duplicate entries. Not sure
499 # whether this will matter. Maybe should make entries a set.
500 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
501 # This will fail if a directory was copied
502 # from another branch and then some of its files
503 # were deleted in the same transaction.
504 children = self._find_children(path, revnum)
505 children.sort()
506 for child in children:
507 # Can we move a child directory and its
508 # parent in the same commit? (probably can). Could
509 # cause problems if instead of revnum -1,
510 # we have to look in (copyfrom_path, revnum - 1)
511 entrypath = get_entry_from_path("/" + child, module=self.module)
512 # print child, self.module, entrypath
513 if entrypath:
514 # Need to filter out directories here...
515 kind = svn.ra.check_path(self.ra, entrypath, revnum)
516 if kind != svn.core.svn_node_dir:
517 entries.append(self.recode(entrypath))
518
519 # Copies here (must copy all from source)
520 # Probably not a real problem for us if
521 # source does not exist
522
523 # Can do this with the copy command "hg copy"
524 # if ent.copyfrom_path:
525 # copyfrom_entry = get_entry_from_path(ent.copyfrom_path.decode(self.encoding),
526 # module=self.module)
527 # copyto_entry = entrypath
528 #
529 # print "copy directory", copyfrom_entry, 'to', copyto_entry
530 #
531 # copies.append((copyfrom_entry, copyto_entry))
532
533 if ent.copyfrom_path:
534 copyfrom_path = ent.copyfrom_path.decode(self.encoding)
535 copyfrom_entry = get_entry_from_path(copyfrom_path, module=self.module)
536 if copyfrom_entry:
537 copyfrom[path] = ent
538 self.ui.debug("mark %s came from %s\n" % (path, copyfrom[path]))
539
540 # Good, /probably/ a regular copy. Really should check
541 # to see whether the parent revision actually contains
542 # the directory in question.
543 children = self._find_children(self.recode(copyfrom_path), ent.copyfrom_rev)
544 children.sort()
545 for child in children:
546 entrypath = get_entry_from_path("/" + child, module=self.module)
547 if entrypath:
548 entry = entrypath.decode(self.encoding)
549 # print "COPY COPY From", copyfrom_entry, entry
550 copyto_path = path + entry[len(copyfrom_entry):]
551 copyto_entry = get_entry_from_path(copyto_path, module=self.module)
552 # print "COPY", entry, "COPY To", copyto_entry
553 copies[self.recode(copyto_entry)] = self.recode(entry)
554 # copy from quux splort/quuxfile
555
556 self.modulemap[revnum] = self.module # track backwards in time
557 # a list of (filename, id) where id lets us retrieve the file. 558 # a list of (filename, id) where id lets us retrieve the file.
558 # eg in git, id is the object hash. for svn it'll be the 559 # eg in git, id is the object hash. for svn it'll be the
559 self.files[rev] = zip(entries, [rev] * len(entries)) 560 self.files[rev] = zip(entries, [rev] * len(entries))
560 if not entries: 561 if not entries:
561 return 562 return
565 # '2007-01-04T17:35:00.902377Z' 566 # '2007-01-04T17:35:00.902377Z'
566 date = util.parsedate(date[:18] + " UTC", ["%Y-%m-%dT%H:%M:%S"]) 567 date = util.parsedate(date[:18] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
567 568
568 log = message and self.recode(message) 569 log = message and self.recode(message)
569 author = author and self.recode(author) or '' 570 author = author and self.recode(author) or ''
571 try:
572 branch = self.module.split("/")[-1]
573 if branch == 'trunk':
574 branch = ''
575 except IndexError:
576 branch = None
570 577
571 cset = commit(author=author, 578 cset = commit(author=author,
572 date=util.datestr(date), 579 date=util.datestr(date),
573 desc=log, 580 desc=log,
574 parents=parents, 581 parents=parents,