hgext/convert/__init__.py
changeset 4513 ac2fe196ac9b
parent 4512 91709ba3cc88
child 4515 86a66cce9566
equal deleted inserted replaced
4512:91709ba3cc88 4513:ac2fe196ac9b
     1 #!/usr/bin/env python
     1 # convert.py Foreign SCM converter
     2 #
     2 #
     3 # This is a generalized framework for converting between SCM
     3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
     4 # repository formats.
       
     5 #
     4 #
     6 # To use, run:
     5 # This software may be used and distributed according to the terms
     7 #
     6 # of the GNU General Public License, incorporated herein by reference.
     8 # convert-repo <source> [<dest> [<mapfile>]]
       
     9 #
       
    10 # Currently accepted source formats: git, cvs
       
    11 # Currently accepted destination formats: hg
       
    12 #
       
    13 # If destination isn't given, a new Mercurial repo named <src>-hg will
       
    14 # be created. If <mapfile> isn't given, it will be put in a default
       
    15 # location (<dest>/.hg/shamap by default)
       
    16 #
       
    17 # The <mapfile> is a simple text file that maps each source commit ID to
       
    18 # the destination ID for that revision, like so:
       
    19 #
       
    20 # <source ID> <destination ID>
       
    21 #
       
    22 # If the file doesn't exist, it's automatically created.  It's updated
       
    23 # on each commit copied, so convert-repo can be interrupted and can
       
    24 # be run repeatedly to copy new commits.
       
    25 
     7 
    26 import sys, os, zlib, sha, time, re, locale, socket
     8 import sys, os, zlib, sha, time, re, locale, socket
    27 os.environ["HGENCODING"] = "utf-8"
     9 from mercurial import hg, ui, util, commands
    28 from mercurial import hg, ui, util, fancyopts
    10 
    29 
    11 commands.norepo += " convert"
    30 class Abort(Exception): pass
    12 
    31 class NoRepo(Exception): pass
    13 class NoRepo(Exception): pass
    32 
    14 
    33 class commit(object):
    15 class commit(object):
    34     def __init__(self, **parts):
    16     def __init__(self, **parts):
    35         for x in "author date desc parents".split():
    17         for x in "author date desc parents".split():
    36             if not x in parts:
    18             if not x in parts:
    37                 abort("commit missing field %s\n" % x)
    19                 raise util.Abort("commit missing field %s\n" % x)
    38         self.__dict__.update(parts)
    20         self.__dict__.update(parts)
    39 
       
    40 quiet = 0
       
    41 def status(msg):
       
    42     if not quiet: sys.stdout.write(str(msg))
       
    43 
       
    44 def warn(msg):
       
    45     sys.stderr.write(str(msg))
       
    46 
       
    47 def abort(msg):
       
    48     raise Abort(msg)
       
    49 
    21 
    50 def recode(s):
    22 def recode(s):
    51     try:
    23     try:
    52         return s.decode("utf-8").encode("utf-8")
    24         return s.decode("utf-8").encode("utf-8")
    53     except:
    25     except:
    57             return s.decode("utf-8", "replace").encode("utf-8")
    29             return s.decode("utf-8", "replace").encode("utf-8")
    58 
    30 
    59 class converter_source(object):
    31 class converter_source(object):
    60     """Conversion source interface"""
    32     """Conversion source interface"""
    61 
    33 
    62     def __init__(self, path):
    34     def __init__(self, ui, path):
    63         """Initialize conversion source (or raise NoRepo("message")
    35         """Initialize conversion source (or raise NoRepo("message")
    64         exception if path is not a valid repository)"""
    36         exception if path is not a valid repository)"""
    65         raise NotImplementedError()
    37         raise NotImplementedError()
    66 
    38 
    67     def getheads(self):
    39     def getheads(self):
    92         raise NotImplementedError()
    64         raise NotImplementedError()
    93 
    65 
    94 class converter_sink(object):
    66 class converter_sink(object):
    95     """Conversion sink (target) interface"""
    67     """Conversion sink (target) interface"""
    96 
    68 
    97     def __init__(self, path):
    69     def __init__(self, ui, path):
    98         """Initialize conversion sink (or raise NoRepo("message")
    70         """Initialize conversion sink (or raise NoRepo("message")
    99         exception if path is not a valid repository)"""
    71         exception if path is not a valid repository)"""
   100         raise NotImplementedError()
    72         raise NotImplementedError()
   101 
    73 
   102     def getheads(self):
    74     def getheads(self):
   137         raise NotImplementedError()
   109         raise NotImplementedError()
   138 
   110 
   139 
   111 
   140 # CVS conversion code inspired by hg-cvs-import and git-cvsimport
   112 # CVS conversion code inspired by hg-cvs-import and git-cvsimport
   141 class convert_cvs(converter_source):
   113 class convert_cvs(converter_source):
   142     def __init__(self, path):
   114     def __init__(self, ui, path):
   143         self.path = path
   115         self.path = path
       
   116         self.ui = ui
   144         cvs = os.path.join(path, "CVS")
   117         cvs = os.path.join(path, "CVS")
   145         if not os.path.exists(cvs):
   118         if not os.path.exists(cvs):
   146             raise NoRepo("couldn't open CVS repo %s" % path)
   119             raise NoRepo("couldn't open CVS repo %s" % path)
   147 
   120 
   148         self.changeset = {}
   121         self.changeset = {}
   221         root = self.cvsroot
   194         root = self.cvsroot
   222         conntype = None
   195         conntype = None
   223         user, host = None, None
   196         user, host = None, None
   224         cmd = ['cvs', 'server']
   197         cmd = ['cvs', 'server']
   225 
   198 
   226         status("connecting to %s\n" % root)
   199         self.ui.status("connecting to %s\n" % root)
   227 
   200 
   228         if root.startswith(":pserver:"):
   201         if root.startswith(":pserver:"):
   229             root = root[9:]
   202             root = root[9:]
   230             m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)', root)
   203             m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)', root)
   231             if m:
   204             if m:
   294                           " Merged Removed\n")
   267                           " Merged Removed\n")
   295         self.writep.write("valid-requests\n")
   268         self.writep.write("valid-requests\n")
   296         self.writep.flush()
   269         self.writep.flush()
   297         r = self.readp.readline()
   270         r = self.readp.readline()
   298         if not r.startswith("Valid-requests"):
   271         if not r.startswith("Valid-requests"):
   299             abort("server sucks\n")
   272             raise util.Abort("server sucks\n")
   300         if "UseUnchanged" in r:
   273         if "UseUnchanged" in r:
   301             self.writep.write("UseUnchanged\n")
   274             self.writep.write("UseUnchanged\n")
   302             self.writep.flush()
   275             self.writep.flush()
   303             r = self.readp.readline()
   276             r = self.readp.readline()
   304 
   277 
   334                 data = self.readp.read(count)
   307                 data = self.readp.read(count)
   335             else:
   308             else:
   336                 if line == "ok\n":
   309                 if line == "ok\n":
   337                     return (data, "x" in mode and "x" or "")
   310                     return (data, "x" in mode and "x" or "")
   338                 elif line.startswith("E "):
   311                 elif line.startswith("E "):
   339                     warn("cvs server: %s\n" % line[2:])
   312                     self.ui.warn("cvs server: %s\n" % line[2:])
   340                 elif line.startswith("Remove"):
   313                 elif line.startswith("Remove"):
   341                     l = self.readp.readline()
   314                     l = self.readp.readline()
   342                     l = self.readp.readline()
   315                     l = self.readp.readline()
   343                     if l != "ok\n":
   316                     if l != "ok\n":
   344                         abort("unknown CVS response: %s\n" % l)
   317                         raise util.Abort("unknown CVS response: %s\n" % l)
   345                 else:
   318                 else:
   346                     abort("unknown CVS response: %s\n" % line)
   319                     raise util.Abort("unknown CVS response: %s\n" % line)
   347 
   320 
   348     def getfile(self, file, rev):
   321     def getfile(self, file, rev):
   349         data, mode = self._getfile(file, rev)
   322         data, mode = self._getfile(file, rev)
   350         self.modecache[(file, rev)] = mode
   323         self.modecache[(file, rev)] = mode
   351         return data
   324         return data
   368 
   341 
   369     def gettags(self):
   342     def gettags(self):
   370         return self.tags
   343         return self.tags
   371 
   344 
   372 class convert_git(converter_source):
   345 class convert_git(converter_source):
   373     def __init__(self, path):
   346     def __init__(self, ui, path):
   374         if os.path.isdir(path + "/.git"):
   347         if os.path.isdir(path + "/.git"):
   375             path += "/.git"
   348             path += "/.git"
   376         self.path = path
   349         self.path = path
       
   350         self.ui = ui
   377         if not os.path.exists(path + "/objects"):
   351         if not os.path.exists(path + "/objects"):
   378             raise NoRepo("couldn't open GIT repo %s" % path)
   352             raise NoRepo("couldn't open GIT repo %s" % path)
   379 
   353 
   380     def getheads(self):
   354     def getheads(self):
   381         fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
   355         fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
   454             tags[tag] = node
   428             tags[tag] = node
   455 
   429 
   456         return tags
   430         return tags
   457 
   431 
   458 class convert_mercurial(converter_sink):
   432 class convert_mercurial(converter_sink):
   459     def __init__(self, path):
   433     def __init__(self, ui, path):
   460         self.path = path
   434         self.path = path
   461         u = ui.ui()
   435         self.ui = ui
   462         try:
   436         try:
   463             self.repo = hg.repository(u, path)
   437             self.repo = hg.repository(self.ui, path)
   464         except:
   438         except:
   465             raise NoRepo("could open hg repo %s" % path)
   439             raise NoRepo("could open hg repo %s" % path)
   466 
   440 
   467     def mapfile(self):
   441     def mapfile(self):
   468         return os.path.join(self.path, ".hg", "shamap")
   442         return os.path.join(self.path, ".hg", "shamap")
   528             newlines.append("%s %s\n" % (tags[tag], tag))
   502             newlines.append("%s %s\n" % (tags[tag], tag))
   529 
   503 
   530         newlines.sort()
   504         newlines.sort()
   531 
   505 
   532         if newlines != oldlines:
   506         if newlines != oldlines:
   533             status("updating tags\n")
   507             self.ui.status("updating tags\n")
   534             f = self.repo.wfile(".hgtags", "w")
   508             f = self.repo.wfile(".hgtags", "w")
   535             f.write("".join(newlines))
   509             f.write("".join(newlines))
   536             f.close()
   510             f.close()
   537             if not oldlines: self.repo.add([".hgtags"])
   511             if not oldlines: self.repo.add([".hgtags"])
   538             date = "%s 0" % int(time.mktime(time.gmtime()))
   512             date = "%s 0" % int(time.mktime(time.gmtime()))
   540                                 date, self.repo.changelog.tip(), hg.nullid)
   514                                 date, self.repo.changelog.tip(), hg.nullid)
   541             return hg.hex(self.repo.changelog.tip())
   515             return hg.hex(self.repo.changelog.tip())
   542 
   516 
   543 converters = [convert_cvs, convert_git, convert_mercurial]
   517 converters = [convert_cvs, convert_git, convert_mercurial]
   544 
   518 
   545 def converter(path):
   519 def converter(ui, path):
   546     if not os.path.isdir(path):
   520     if not os.path.isdir(path):
   547         abort("%s: not a directory\n" % path)
   521         raise util.Abort("%s: not a directory\n" % path)
   548     for c in converters:
   522     for c in converters:
   549         try:
   523         try:
   550             return c(path)
   524             return c(ui, path)
   551         except NoRepo:
   525         except NoRepo:
   552             pass
   526             pass
   553     abort("%s: unknown repository type\n" % path)
   527     raise util.Abort("%s: unknown repository type\n" % path)
   554 
   528 
   555 class convert(object):
   529 class convert(object):
   556     def __init__(self, source, dest, mapfile, opts):
   530     def __init__(self, ui, source, dest, mapfile, opts):
   557 
   531 
   558         self.source = source
   532         self.source = source
   559         self.dest = dest
   533         self.dest = dest
       
   534         self.ui = ui
   560         self.mapfile = mapfile
   535         self.mapfile = mapfile
   561         self.opts = opts
   536         self.opts = opts
   562         self.commitcache = {}
   537         self.commitcache = {}
   563 
   538 
   564         self.map = {}
   539         self.map = {}
   625                     s.append(n)
   600                     s.append(n)
   626                 if n in children:
   601                 if n in children:
   627                     for c in children[n]:
   602                     for c in children[n]:
   628                         visit.insert(0, c)
   603                         visit.insert(0, c)
   629 
   604 
   630         if opts.get('datesort'):
   605         if self.opts.get('datesort'):
   631             depth = {}
   606             depth = {}
   632             for n in s:
   607             for n in s:
   633                 depth[n] = 0
   608                 depth[n] = 0
   634                 pl = [p for p in self.commitcache[n].parents if p not in self.map]
   609                 pl = [p for p in self.commitcache[n].parents if p not in self.map]
   635                 if pl:
   610                 if pl:
   658         f = [f for f,v in files]
   633         f = [f for f,v in files]
   659         self.map[rev] = self.dest.putcommit(f, r, c)
   634         self.map[rev] = self.dest.putcommit(f, r, c)
   660         file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
   635         file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
   661 
   636 
   662     def convert(self):
   637     def convert(self):
   663         status("scanning source...\n")
   638         self.ui.status("scanning source...\n")
   664         heads = self.source.getheads()
   639         heads = self.source.getheads()
   665         parents = self.walktree(heads)
   640         parents = self.walktree(heads)
   666         status("sorting...\n")
   641         self.ui.status("sorting...\n")
   667         t = self.toposort(parents)
   642         t = self.toposort(parents)
   668         num = len(t)
   643         num = len(t)
   669         c = None
   644         c = None
   670 
   645 
   671         status("converting...\n")
   646         self.ui.status("converting...\n")
   672         for c in t:
   647         for c in t:
   673             num -= 1
   648             num -= 1
   674             desc = self.commitcache[c].desc
   649             desc = self.commitcache[c].desc
   675             if "\n" in desc:
   650             if "\n" in desc:
   676                 desc = desc.splitlines()[0]
   651                 desc = desc.splitlines()[0]
   677             status("%d %s\n" % (num, desc))
   652             self.ui.status("%d %s\n" % (num, desc))
   678             self.copy(c)
   653             self.copy(c)
   679 
   654 
   680         tags = self.source.gettags()
   655         tags = self.source.gettags()
   681         ctags = {}
   656         ctags = {}
   682         for k in tags:
   657         for k in tags:
   689             # write another hash correspondence to override the previous
   664             # write another hash correspondence to override the previous
   690             # one so we don't end up with extra tag heads
   665             # one so we don't end up with extra tag heads
   691             if nrev:
   666             if nrev:
   692                 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
   667                 file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
   693 
   668 
   694 def command(src, dest=None, mapfile=None, **opts):
   669 def _convert(ui, src, dest=None, mapfile=None, **opts):
   695     srcc = converter(src)
   670     '''Convert a foreign SCM repository to a Mercurial one.
       
   671 
       
   672     Accepted source formats:
       
   673     - GIT
       
   674     - CVS
       
   675 
       
   676     Accepted destination formats:
       
   677     - Mercurial
       
   678 
       
   679     If destination isn't given, a new Mercurial repo named <src>-hg will
       
   680     be created. If <mapfile> isn't given, it will be put in a default
       
   681     location (<dest>/.hg/shamap by default)
       
   682 
       
   683     The <mapfile> is a simple text file that maps each source commit ID to
       
   684     the destination ID for that revision, like so:
       
   685 
       
   686     <source ID> <destination ID>
       
   687 
       
   688     If the file doesn't exist, it's automatically created.  It's updated
       
   689     on each commit copied, so convert-repo can be interrupted and can
       
   690     be run repeatedly to copy new commits.
       
   691     '''
       
   692 
       
   693     srcc = converter(ui, src)
   696     if not hasattr(srcc, "getcommit"):
   694     if not hasattr(srcc, "getcommit"):
   697         abort("%s: can't read from this repo type\n" % src)
   695         raise util.Abort("%s: can't read from this repo type\n" % src)
   698 
   696 
   699     if not dest:
   697     if not dest:
   700         dest = src + "-hg"
   698         dest = src + "-hg"
   701         status("assuming destination %s\n" % dest)
   699         ui.status("assuming destination %s\n" % dest)
   702         if not os.path.isdir(dest):
   700         if not os.path.isdir(dest):
   703             status("creating repository %s\n" % dest)
   701             ui.status("creating repository %s\n" % dest)
   704             os.system("hg init " + dest)
   702             os.system("hg init " + dest)
   705     destc = converter(dest)
   703     destc = converter(ui, dest)
   706     if not hasattr(destc, "putcommit"):
   704     if not hasattr(destc, "putcommit"):
   707         abort("%s: can't write to this repo type\n" % src)
   705         raise util.Abort("%s: can't write to this repo type\n" % src)
   708 
   706 
   709     if not mapfile:
   707     if not mapfile:
   710         try:
   708         try:
   711             mapfile = destc.mapfile()
   709             mapfile = destc.mapfile()
   712         except:
   710         except:
   713             mapfile = os.path.join(destc, "map")
   711             mapfile = os.path.join(destc, "map")
   714 
   712 
   715     c = convert(srcc, destc, mapfile, opts)
   713     c = convert(ui, srcc, destc, mapfile, opts)
   716     c.convert()
   714     c.convert()
   717 
   715 
   718 options = [('q', 'quiet', None, 'suppress output'),
   716 cmdtable = {
   719            ('', 'datesort', None, 'try to sort changesets by date')]
   717     "convert": (_convert,
   720 opts = {}
   718                 [('', 'datesort', None, 'try to sort changesets by date')],
   721 args = fancyopts.fancyopts(sys.argv[1:], options, opts)
   719                 'hg convert [OPTIONS] <src> [dst [map]]'),
   722 
   720 }
   723 if opts['quiet']:
       
   724     quiet = 1
       
   725 
       
   726 try:
       
   727     command(*args, **opts)
       
   728 except Abort, inst:
       
   729     warn(inst)
       
   730 except KeyboardInterrupt:
       
   731     status("interrupted\n")