hgext/convert/subversion.py
changeset 5075 514c06098e9c
parent 5074 4cef0b93969d
child 5076 ef338e34a906
equal deleted inserted replaced
5074:4cef0b93969d 5075:514c06098e9c
   324     def reparent(self, module):
   324     def reparent(self, module):
   325         svn_url = self.base + module
   325         svn_url = self.base + module
   326         self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding))
   326         self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding))
   327         svn.ra.reparent(self.ra, svn_url.encode(self.encoding))
   327         svn.ra.reparent(self.ra, svn_url.encode(self.encoding))
   328 
   328 
   329     def _fetch_revisions(self, from_revnum = 0, to_revnum = 347):
   329     def expandpaths(self, rev, paths, parents):
   330         def get_entry_from_path(path, module=self.module):
   330         def get_entry_from_path(path, module=self.module):
   331             # Given the repository url of this wc, say
   331             # Given the repository url of this wc, say
   332             #   "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
   332             #   "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
   333             # extract the "entry" portion (a relative path) from what
   333             # extract the "entry" portion (a relative path) from what
   334             # svn log --xml says, ie
   334             # svn log --xml says, ie
   335             #   "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
   335             #   "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
   336             # that is to say "tests/PloneTestCase.py"
   336             # that is to say "tests/PloneTestCase.py"
   337 
       
   338             if path.startswith(module):
   337             if path.startswith(module):
   339                 relative = path[len(module):]
   338                 relative = path[len(module):]
   340                 if relative.startswith('/'):
   339                 if relative.startswith('/'):
   341                     return relative[1:]
   340                     return relative[1:]
   342                 else:
   341                 else:
   343                     return relative
   342                     return relative
   344 
   343 
   345             # The path is outside our tracked tree...
   344             # The path is outside our tracked tree...
   346             self.ui.debug('Ignoring %r since it is not under %r\n' % (path, module))
   345             self.ui.debug('%r is not under %r, ignoring\n' % (path, module))
   347             return None
   346             return None
   348 
   347 
       
   348         entries = []
       
   349         copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
       
   350         copies = {}
       
   351         revnum = self.revnum(rev)
       
   352 
       
   353         for path, ent in paths:
       
   354             # self.ui.write("path %s\n" % path)
       
   355             entrypath = get_entry_from_path(path, module=self.module)
       
   356             entry = entrypath.decode(self.encoding)
       
   357 
       
   358             kind = svn.ra.check_path(self.ra, entrypath, revnum)
       
   359             if kind == svn.core.svn_node_file:
       
   360                 if ent.copyfrom_path:
       
   361                     copyfrom_path = get_entry_from_path(ent.copyfrom_path)
       
   362                     if copyfrom_path:
       
   363                         self.ui.debug("Copied to %s from %s@%s\n" % (entry, copyfrom_path, ent.copyfrom_rev))
       
   364                         # It's probably important for hg that the source
       
   365                         # exists in the revision's parent, not just the
       
   366                         # ent.copyfrom_rev
       
   367                         fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev)
       
   368                         if fromkind != 0:
       
   369                             copies[self.recode(entry)] = self.recode(copyfrom_path)
       
   370                 entries.append(self.recode(entry))
       
   371             elif kind == 0: # gone, but had better be a deleted *file*
       
   372                 self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
       
   373 
       
   374                 # if a branch is created but entries are removed in the same
       
   375                 # changeset, get the right fromrev
       
   376                 if parents:
       
   377                     uuid, old_module, fromrev = self.revsplit(parents[0])
       
   378                 else:
       
   379                     fromrev = revnum - 1
       
   380                     # might always need to be revnum - 1 in these 3 lines?
       
   381                     old_module = self.modulemap.get(fromrev, self.module)
       
   382 
       
   383                 basepath = old_module + "/" + get_entry_from_path(path, module=self.module)
       
   384                 entrypath = old_module + "/" + get_entry_from_path(path, module=self.module)
       
   385 
       
   386                 def lookup_parts(p):
       
   387                     rc = None
       
   388                     parts = p.split("/")
       
   389                     for i in range(len(parts)):
       
   390                         part = "/".join(parts[:i])
       
   391                         info = part, copyfrom.get(part, None)
       
   392                         if info[1] is not None:
       
   393                             self.ui.debug("Found parent directory %s\n" % info[1])
       
   394                             rc = info
       
   395                     return rc
       
   396 
       
   397                 self.ui.debug("base, entry %s %s\n" % (basepath, entrypath))
       
   398 
       
   399                 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
       
   400 
       
   401                 # need to remove fragment from lookup_parts and replace with copyfrom_path
       
   402                 if frompath is not None:
       
   403                     self.ui.debug("munge-o-matic\n")
       
   404                     self.ui.debug(entrypath + '\n')
       
   405                     self.ui.debug(entrypath[len(frompath):] + '\n')
       
   406                     entrypath = froment.copyfrom_path + entrypath[len(frompath):]
       
   407                     fromrev = froment.copyfrom_rev
       
   408                     self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath))
       
   409 
       
   410                 fromkind = svn.ra.check_path(self.ra, entrypath, fromrev)
       
   411                 if fromkind == svn.core.svn_node_file:   # a deleted file
       
   412                     entries.append(self.recode(entry))
       
   413                 elif fromkind == svn.core.svn_node_dir:
       
   414                     # print "Deleted/moved non-file:", revnum, path, ent
       
   415                     # children = self._find_children(path, revnum - 1)
       
   416                     # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
       
   417                     # Sometimes this is tricky. For example: in
       
   418                     # The Subversion Repository revision 6940 a dir
       
   419                     # was copied and one of its files was deleted
       
   420                     # from the new location in the same commit. This
       
   421                     # code can't deal with that yet.
       
   422                     if ent.action == 'C':
       
   423                         children = self._find_children(path, fromrev)
       
   424                     else:
       
   425                         oroot = entrypath.strip('/')
       
   426                         nroot = path.strip('/')
       
   427                         children = self._find_children(oroot, fromrev)
       
   428                         children = [s.replace(oroot,nroot) for s in children]
       
   429                     # Mark all [files, not directories] as deleted.
       
   430                     for child in children:
       
   431                         # Can we move a child directory and its
       
   432                         # parent in the same commit? (probably can). Could
       
   433                         # cause problems if instead of revnum -1,
       
   434                         # we have to look in (copyfrom_path, revnum - 1)
       
   435                         entrypath = get_entry_from_path("/" + child, module=old_module)
       
   436                         if entrypath:
       
   437                             entry = self.recode(entrypath.decode(self.encoding))
       
   438                             if entry in copies:
       
   439                                 # deleted file within a copy
       
   440                                 del copies[entry]
       
   441                             else:
       
   442                                 entries.append(entry)
       
   443                 else:
       
   444                     self.ui.debug('unknown path in revision %d: %s\n' % \
       
   445                                   (revnum, path))
       
   446             elif kind == svn.core.svn_node_dir:
       
   447                 # Should probably synthesize normal file entries
       
   448                 # and handle as above to clean up copy/rename handling.
       
   449 
       
   450                 # If the directory just had a prop change,
       
   451                 # then we shouldn't need to look for its children.
       
   452                 # Also this could create duplicate entries. Not sure
       
   453                 # whether this will matter. Maybe should make entries a set.
       
   454                 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
       
   455                 # This will fail if a directory was copied
       
   456                 # from another branch and then some of its files
       
   457                 # were deleted in the same transaction.
       
   458                 children = self._find_children(path, revnum)
       
   459                 children.sort()
       
   460                 for child in children:
       
   461                     # Can we move a child directory and its
       
   462                     # parent in the same commit? (probably can). Could
       
   463                     # cause problems if instead of revnum -1,
       
   464                     # we have to look in (copyfrom_path, revnum - 1)
       
   465                     entrypath = get_entry_from_path("/" + child, module=self.module)
       
   466                     # print child, self.module, entrypath
       
   467                     if entrypath:
       
   468                         # Need to filter out directories here...
       
   469                         kind = svn.ra.check_path(self.ra, entrypath, revnum)
       
   470                         if kind != svn.core.svn_node_dir:
       
   471                             entries.append(self.recode(entrypath))
       
   472 
       
   473                 # Copies here (must copy all from source)
       
   474                 # Probably not a real problem for us if
       
   475                 # source does not exist
       
   476 
       
   477                 # Can do this with the copy command "hg copy"
       
   478                 # if ent.copyfrom_path:
       
   479                 #     copyfrom_entry = get_entry_from_path(ent.copyfrom_path.decode(self.encoding),
       
   480                 #             module=self.module)
       
   481                 #     copyto_entry = entrypath
       
   482                 #
       
   483                 #     print "copy directory", copyfrom_entry, 'to', copyto_entry
       
   484                 #
       
   485                 #     copies.append((copyfrom_entry, copyto_entry))
       
   486 
       
   487                 if ent.copyfrom_path:
       
   488                     copyfrom_path = ent.copyfrom_path.decode(self.encoding)
       
   489                     copyfrom_entry = get_entry_from_path(copyfrom_path, module=self.module)
       
   490                     if copyfrom_entry:
       
   491                         copyfrom[path] = ent
       
   492                         self.ui.debug("mark %s came from %s\n" % (path, copyfrom[path]))
       
   493 
       
   494                         # Good, /probably/ a regular copy. Really should check
       
   495                         # to see whether the parent revision actually contains
       
   496                         # the directory in question.
       
   497                         children = self._find_children(self.recode(copyfrom_path), ent.copyfrom_rev)
       
   498                         children.sort()
       
   499                         for child in children:
       
   500                             entrypath = get_entry_from_path("/" + child, module=self.module)
       
   501                             if entrypath:
       
   502                                 entry = entrypath.decode(self.encoding)
       
   503                                 # print "COPY COPY From", copyfrom_entry, entry
       
   504                                 copyto_path = path + entry[len(copyfrom_entry):]
       
   505                                 copyto_entry =  get_entry_from_path(copyto_path, module=self.module)
       
   506                                 # print "COPY", entry, "COPY To", copyto_entry
       
   507                                 copies[self.recode(copyto_entry)] = self.recode(entry)
       
   508                                 # copy from quux splort/quuxfile
       
   509 
       
   510         return (entries, copies)
       
   511 
       
   512     def _fetch_revisions(self, from_revnum = 0, to_revnum = 347):
   349         self.child_cset = None
   513         self.child_cset = None
   350         def parselogentry(orig_paths, revnum, author, date, message):
   514         def parselogentry(orig_paths, revnum, author, date, message):
   351             self.ui.debug("parsing revision %d (%d changes)\n" %
   515             self.ui.debug("parsing revision %d (%d changes)\n" %
   352                           (revnum, len(orig_paths)))
   516                           (revnum, len(orig_paths)))
   353 
   517 
   355                 new_module = self.modulemap[revnum]
   519                 new_module = self.modulemap[revnum]
   356                 if new_module != self.module:
   520                 if new_module != self.module:
   357                     self.module = new_module
   521                     self.module = new_module
   358                     self.reparent(self.module)
   522                     self.reparent(self.module)
   359 
   523 
   360             copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
       
   361             copies = {}
       
   362             entries = []
       
   363             rev = self.revid(revnum)
   524             rev = self.revid(revnum)
   364             parents = []
       
   365 
       
   366             # branch log might return entries for a parent we already have
   525             # branch log might return entries for a parent we already have
   367             if (rev in self.commits or
   526             if (rev in self.commits or
   368                 (revnum < self.lastrevs.get(self.module, 0))):
   527                 (revnum < self.lastrevs.get(self.module, 0))):
   369                 return
   528                 return
   370 
   529 
   371             try:
   530             parents = []
   372                 branch = self.module.split("/")[-1]
       
   373                 if branch == 'trunk':
       
   374                     branch = ''
       
   375             except IndexError:
       
   376                 branch = None
       
   377 
       
   378             orig_paths = orig_paths.items()
   531             orig_paths = orig_paths.items()
   379             orig_paths.sort()
   532             orig_paths.sort()
   380             
   533             
   381             # check whether this revision is the start of a branch
   534             # check whether this revision is the start of a branch
   382             path, ent = orig_paths and orig_paths[0] or (None, None)
   535             path, ent = orig_paths and orig_paths[0] or (None, None)
   389                     self.ui.note('found parent of branch %s at %d: %s\n' % \
   542                     self.ui.note('found parent of branch %s at %d: %s\n' % \
   390                                      (self.module, prev, ent.copyfrom_path))
   543                                      (self.module, prev, ent.copyfrom_path))
   391                 else:
   544                 else:
   392                     self.ui.debug("No copyfrom path, don't know what to do.\n")
   545                     self.ui.debug("No copyfrom path, don't know what to do.\n")
   393 
   546 
       
   547             self.modulemap[revnum] = self.module # track backwards in time
       
   548 
       
   549             paths = []
       
   550             # filter out unrelated paths
   394             for path, ent in orig_paths:
   551             for path, ent in orig_paths:
   395                 # self.ui.write("path %s\n" % path)
   552                 if not path.startswith(self.module):
   396                 entrypath = get_entry_from_path(path, module=self.module)
       
   397                 # self.ui.write("entrypath %s\n" % entrypath)
       
   398                 if entrypath is None:
       
   399                     # Outside our area of interest
       
   400                     self.ui.debug("boring@%s: %s\n" % (revnum, path))
   553                     self.ui.debug("boring@%s: %s\n" % (revnum, path))
   401                     continue
   554                     continue
   402                 entry = entrypath.decode(self.encoding)
   555                 paths.append((path, ent))
   403 
   556 
   404                 kind = svn.ra.check_path(self.ra, entrypath, revnum)
   557             entries, copies = self.expandpaths(rev, paths, parents)
   405                 if kind == svn.core.svn_node_file:
       
   406                     if ent.copyfrom_path:
       
   407                         copyfrom_path = get_entry_from_path(ent.copyfrom_path)
       
   408                         if copyfrom_path:
       
   409                             self.ui.debug("Copied to %s from %s@%s\n" % (entry, copyfrom_path, ent.copyfrom_rev))
       
   410                             # It's probably important for hg that the source
       
   411                             # exists in the revision's parent, not just the
       
   412                             # ent.copyfrom_rev
       
   413                             fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev)
       
   414                             if fromkind != 0:
       
   415                                 copies[self.recode(entry)] = self.recode(copyfrom_path)
       
   416                     entries.append(self.recode(entry))
       
   417                 elif kind == 0: # gone, but had better be a deleted *file*
       
   418                     self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
       
   419 
       
   420                     # if a branch is created but entries are removed in the same
       
   421                     # changeset, get the right fromrev
       
   422                     if parents:
       
   423                         uuid, old_module, fromrev = self.revsplit(parents[0])
       
   424                     else:
       
   425                         fromrev = revnum - 1
       
   426                         # might always need to be revnum - 1 in these 3 lines?
       
   427                         old_module = self.modulemap.get(fromrev, self.module)
       
   428 
       
   429                     basepath = old_module + "/" + get_entry_from_path(path, module=self.module)
       
   430                     entrypath = old_module + "/" + get_entry_from_path(path, module=self.module)
       
   431 
       
   432                     def lookup_parts(p):
       
   433                         rc = None
       
   434                         parts = p.split("/")
       
   435                         for i in range(len(parts)):
       
   436                             part = "/".join(parts[:i])
       
   437                             info = part, copyfrom.get(part, None)
       
   438                             if info[1] is not None:
       
   439                                 self.ui.debug("Found parent directory %s\n" % info[1])
       
   440                                 rc = info
       
   441                         return rc
       
   442 
       
   443                     self.ui.debug("base, entry %s %s\n" % (basepath, entrypath))
       
   444 
       
   445                     frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
       
   446 
       
   447                     # need to remove fragment from lookup_parts and replace with copyfrom_path
       
   448                     if frompath is not None:
       
   449                         self.ui.debug("munge-o-matic\n")
       
   450                         self.ui.debug(entrypath + '\n')
       
   451                         self.ui.debug(entrypath[len(frompath):] + '\n')
       
   452                         entrypath = froment.copyfrom_path + entrypath[len(frompath):]
       
   453                         fromrev = froment.copyfrom_rev
       
   454                         self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath))
       
   455 
       
   456                     fromkind = svn.ra.check_path(self.ra, entrypath, fromrev)
       
   457                     if fromkind == svn.core.svn_node_file:   # a deleted file
       
   458                         entries.append(self.recode(entry))
       
   459                     elif fromkind == svn.core.svn_node_dir:
       
   460                         # print "Deleted/moved non-file:", revnum, path, ent
       
   461                         # children = self._find_children(path, revnum - 1)
       
   462                         # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
       
   463                         # Sometimes this is tricky. For example: in
       
   464                         # The Subversion Repository revision 6940 a dir
       
   465                         # was copied and one of its files was deleted
       
   466                         # from the new location in the same commit. This
       
   467                         # code can't deal with that yet.
       
   468                         if ent.action == 'C':
       
   469                             children = self._find_children(path, fromrev)
       
   470                         else:
       
   471                             oroot = entrypath.strip('/')
       
   472                             nroot = path.strip('/')
       
   473                             children = self._find_children(oroot, fromrev)
       
   474                             children = [s.replace(oroot,nroot) for s in children]
       
   475                         # Mark all [files, not directories] as deleted.
       
   476                         for child in children:
       
   477                             # Can we move a child directory and its
       
   478                             # parent in the same commit? (probably can). Could
       
   479                             # cause problems if instead of revnum -1,
       
   480                             # we have to look in (copyfrom_path, revnum - 1)
       
   481                             entrypath = get_entry_from_path("/" + child, module=old_module)
       
   482                             if entrypath:
       
   483                                 entry = self.recode(entrypath.decode(self.encoding))
       
   484                                 if entry in copies:
       
   485                                     # deleted file within a copy
       
   486                                     del copies[entry]
       
   487                                 else:
       
   488                                     entries.append(entry)
       
   489                     else:
       
   490                         self.ui.debug('unknown path in revision %d: %s\n' % \
       
   491                                       (revnum, path))
       
   492                 elif kind == svn.core.svn_node_dir:
       
   493                     # Should probably synthesize normal file entries
       
   494                     # and handle as above to clean up copy/rename handling.
       
   495 
       
   496                     # If the directory just had a prop change,
       
   497                     # then we shouldn't need to look for its children.
       
   498                     # Also this could create duplicate entries. Not sure
       
   499                     # whether this will matter. Maybe should make entries a set.
       
   500                     # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
       
   501                     # This will fail if a directory was copied
       
   502                     # from another branch and then some of its files
       
   503                     # were deleted in the same transaction.
       
   504                     children = self._find_children(path, revnum)
       
   505                     children.sort()
       
   506                     for child in children:
       
   507                         # Can we move a child directory and its
       
   508                         # parent in the same commit? (probably can). Could
       
   509                         # cause problems if instead of revnum -1,
       
   510                         # we have to look in (copyfrom_path, revnum - 1)
       
   511                         entrypath = get_entry_from_path("/" + child, module=self.module)
       
   512                         # print child, self.module, entrypath
       
   513                         if entrypath:
       
   514                             # Need to filter out directories here...
       
   515                             kind = svn.ra.check_path(self.ra, entrypath, revnum)
       
   516                             if kind != svn.core.svn_node_dir:
       
   517                                 entries.append(self.recode(entrypath))
       
   518 
       
   519                     # Copies here (must copy all from source)
       
   520                     # Probably not a real problem for us if
       
   521                     # source does not exist
       
   522 
       
   523                     # Can do this with the copy command "hg copy"
       
   524                     # if ent.copyfrom_path:
       
   525                     #     copyfrom_entry = get_entry_from_path(ent.copyfrom_path.decode(self.encoding),
       
   526                     #             module=self.module)
       
   527                     #     copyto_entry = entrypath
       
   528                     #
       
   529                     #     print "copy directory", copyfrom_entry, 'to', copyto_entry
       
   530                     #
       
   531                     #     copies.append((copyfrom_entry, copyto_entry))
       
   532 
       
   533                     if ent.copyfrom_path:
       
   534                         copyfrom_path = ent.copyfrom_path.decode(self.encoding)
       
   535                         copyfrom_entry = get_entry_from_path(copyfrom_path, module=self.module)
       
   536                         if copyfrom_entry:
       
   537                             copyfrom[path] = ent
       
   538                             self.ui.debug("mark %s came from %s\n" % (path, copyfrom[path]))
       
   539 
       
   540                             # Good, /probably/ a regular copy. Really should check
       
   541                             # to see whether the parent revision actually contains
       
   542                             # the directory in question.
       
   543                             children = self._find_children(self.recode(copyfrom_path), ent.copyfrom_rev)
       
   544                             children.sort()
       
   545                             for child in children:
       
   546                                 entrypath = get_entry_from_path("/" + child, module=self.module)
       
   547                                 if entrypath:
       
   548                                     entry = entrypath.decode(self.encoding)
       
   549                                     # print "COPY COPY From", copyfrom_entry, entry
       
   550                                     copyto_path = path + entry[len(copyfrom_entry):]
       
   551                                     copyto_entry =  get_entry_from_path(copyto_path, module=self.module)
       
   552                                     # print "COPY", entry, "COPY To", copyto_entry
       
   553                                     copies[self.recode(copyto_entry)] = self.recode(entry)
       
   554                                     # copy from quux splort/quuxfile
       
   555 
       
   556             self.modulemap[revnum] = self.module # track backwards in time
       
   557             # a list of (filename, id) where id lets us retrieve the file.
   558             # a list of (filename, id) where id lets us retrieve the file.
   558             # eg in git, id is the object hash. for svn it'll be the
   559             # eg in git, id is the object hash. for svn it'll be the
   559             self.files[rev] = zip(entries, [rev] * len(entries))
   560             self.files[rev] = zip(entries, [rev] * len(entries))
   560             if not entries:
   561             if not entries:
   561                 return
   562                 return
   565             # '2007-01-04T17:35:00.902377Z'
   566             # '2007-01-04T17:35:00.902377Z'
   566             date = util.parsedate(date[:18] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
   567             date = util.parsedate(date[:18] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
   567 
   568 
   568             log = message and self.recode(message)
   569             log = message and self.recode(message)
   569             author = author and self.recode(author) or ''
   570             author = author and self.recode(author) or ''
       
   571             try:
       
   572                 branch = self.module.split("/")[-1]
       
   573                 if branch == 'trunk':
       
   574                     branch = ''
       
   575             except IndexError:
       
   576                 branch = None
   570 
   577 
   571             cset = commit(author=author,
   578             cset = commit(author=author,
   572                           date=util.datestr(date),
   579                           date=util.datestr(date),
   573                           desc=log,
   580                           desc=log,
   574                           parents=parents,
   581                           parents=parents,