hgext/convert/__init__.py
changeset 4534 cc9b79216a76
parent 4532 c3a78a49d7f0
child 4588 9855939d0c82
equal deleted inserted replaced
4533:36abb07c79d4 4534:cc9b79216a76
     3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
     3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
     4 #
     4 #
     5 # This software may be used and distributed according to the terms
     5 # This software may be used and distributed according to the terms
     6 # of the GNU General Public License, incorporated herein by reference.
     6 # of the GNU General Public License, incorporated herein by reference.
     7 
     7 
     8 import sys, os, zlib, sha, time, re, locale, socket
     8 from common import NoRepo
       
     9 from cvs import convert_cvs
       
    10 from git import convert_git
       
    11 from hg import convert_mercurial
       
    12 
       
    13 import os
     9 from mercurial import hg, ui, util, commands
    14 from mercurial import hg, ui, util, commands
    10 
    15 
    11 commands.norepo += " convert"
    16 commands.norepo += " convert"
    12 
       
    13 class NoRepo(Exception): pass
       
    14 
       
    15 class commit(object):
       
    16     def __init__(self, **parts):
       
    17         for x in "author date desc parents".split():
       
    18             if not x in parts:
       
    19                 raise util.Abort("commit missing field %s" % x)
       
    20         self.__dict__.update(parts)
       
    21 
       
    22 def recode(s):
       
    23     try:
       
    24         return s.decode("utf-8").encode("utf-8")
       
    25     except:
       
    26         try:
       
    27             return s.decode("latin-1").encode("utf-8")
       
    28         except:
       
    29             return s.decode("utf-8", "replace").encode("utf-8")
       
    30 
       
    31 class converter_source(object):
       
    32     """Conversion source interface"""
       
    33 
       
    34     def __init__(self, ui, path):
       
    35         """Initialize conversion source (or raise NoRepo("message")
       
    36         exception if path is not a valid repository)"""
       
    37         raise NotImplementedError()
       
    38 
       
    39     def getheads(self):
       
    40         """Return a list of this repository's heads"""
       
    41         raise NotImplementedError()
       
    42 
       
    43     def getfile(self, name, rev):
       
    44         """Return file contents as a string"""
       
    45         raise NotImplementedError()
       
    46 
       
    47     def getmode(self, name, rev):
       
    48         """Return file mode, eg. '', 'x', or 'l'"""
       
    49         raise NotImplementedError()
       
    50 
       
    51     def getchanges(self, version):
       
    52         """Return sorted list of (filename, id) tuples for all files changed in rev.
       
    53 
       
    54         id just tells us which revision to return in getfile(), e.g. in
       
    55         git it's an object hash."""
       
    56         raise NotImplementedError()
       
    57 
       
    58     def getcommit(self, version):
       
    59         """Return the commit object for version"""
       
    60         raise NotImplementedError()
       
    61 
       
    62     def gettags(self):
       
    63         """Return the tags as a dictionary of name: revision"""
       
    64         raise NotImplementedError()
       
    65 
       
    66 class converter_sink(object):
       
    67     """Conversion sink (target) interface"""
       
    68 
       
    69     def __init__(self, ui, path):
       
    70         """Initialize conversion sink (or raise NoRepo("message")
       
    71         exception if path is not a valid repository)"""
       
    72         raise NotImplementedError()
       
    73 
       
    74     def getheads(self):
       
    75         """Return a list of this repository's heads"""
       
    76         raise NotImplementedError()
       
    77 
       
    78     def mapfile(self):
       
    79         """Path to a file that will contain lines
       
    80         source_rev_id sink_rev_id
       
    81         mapping equivalent revision identifiers for each system."""
       
    82         raise NotImplementedError()
       
    83 
       
    84     def putfile(self, f, e, data):
       
    85         """Put file for next putcommit().
       
    86         f: path to file
       
    87         e: '', 'x', or 'l' (regular file, executable, or symlink)
       
    88         data: file contents"""
       
    89         raise NotImplementedError()
       
    90 
       
    91     def delfile(self, f):
       
    92         """Delete file for next putcommit().
       
    93         f: path to file"""
       
    94         raise NotImplementedError()
       
    95 
       
    96     def putcommit(self, files, parents, commit):
       
    97         """Create a revision with all changed files listed in 'files'
       
    98         and having listed parents. 'commit' is a commit object containing
       
    99         at a minimum the author, date, and message for this changeset.
       
   100         Called after putfile() and delfile() calls. Note that the sink
       
   101         repository is not told to update itself to a particular revision
       
   102         (or even what that revision would be) before it receives the
       
   103         file data."""
       
   104         raise NotImplementedError()
       
   105 
       
   106     def puttags(self, tags):
       
   107         """Put tags into sink.
       
   108         tags: {tagname: sink_rev_id, ...}"""
       
   109         raise NotImplementedError()
       
   110 
       
   111 
       
   112 # CVS conversion code inspired by hg-cvs-import and git-cvsimport
       
   113 class convert_cvs(converter_source):
       
   114     def __init__(self, ui, path):
       
   115         self.path = path
       
   116         self.ui = ui
       
   117         cvs = os.path.join(path, "CVS")
       
   118         if not os.path.exists(cvs):
       
   119             raise NoRepo("couldn't open CVS repo %s" % path)
       
   120 
       
   121         self.changeset = {}
       
   122         self.files = {}
       
   123         self.tags = {}
       
   124         self.lastbranch = {}
       
   125         self.parent = {}
       
   126         self.socket = None
       
   127         self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1]
       
   128         self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1]
       
   129         self.encoding = locale.getpreferredencoding()
       
   130         self._parse()
       
   131         self._connect()
       
   132 
       
   133     def _parse(self):
       
   134         if self.changeset:
       
   135             return
       
   136 
       
   137         d = os.getcwd()
       
   138         try:
       
   139             os.chdir(self.path)
       
   140             id = None
       
   141             state = 0
       
   142             for l in os.popen("cvsps -A -u --cvs-direct -q"):
       
   143                 if state == 0: # header
       
   144                     if l.startswith("PatchSet"):
       
   145                         id = l[9:-2]
       
   146                     elif l.startswith("Date"):
       
   147                         date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"])
       
   148                         date = util.datestr(date)
       
   149                     elif l.startswith("Branch"):
       
   150                         branch = l[8:-1]
       
   151                         self.parent[id] = self.lastbranch.get(branch, 'bad')
       
   152                         self.lastbranch[branch] = id
       
   153                     elif l.startswith("Ancestor branch"):
       
   154                         ancestor = l[17:-1]
       
   155                         self.parent[id] = self.lastbranch[ancestor]
       
   156                     elif l.startswith("Author"):
       
   157                         author = self.recode(l[8:-1])
       
   158                     elif l.startswith("Tag: "):
       
   159                         t = l[5:-1].rstrip()
       
   160                         if t != "(none)":
       
   161                             self.tags[t] = id
       
   162                     elif l.startswith("Log:"):
       
   163                         state = 1
       
   164                         log = ""
       
   165                 elif state == 1: # log
       
   166                     if l == "Members: \n":
       
   167                         files = {}
       
   168                         log = self.recode(log[:-1])
       
   169                         if log.isspace():
       
   170                             log = "*** empty log message ***\n"
       
   171                         state = 2
       
   172                     else:
       
   173                         log += l
       
   174                 elif state == 2:
       
   175                     if l == "\n": #
       
   176                         state = 0
       
   177                         p = [self.parent[id]]
       
   178                         if id == "1":
       
   179                             p = []
       
   180                         if branch == "HEAD":
       
   181                             branch = ""
       
   182                         c = commit(author=author, date=date, parents=p,
       
   183                                    desc=log, branch=branch)
       
   184                         self.changeset[id] = c
       
   185                         self.files[id] = files
       
   186                     else:
       
   187                         colon = l.rfind(':')
       
   188                         file = l[1:colon]
       
   189                         rev = l[colon+1:-2]
       
   190                         rev = rev.split("->")[1]
       
   191                         files[file] = rev
       
   192 
       
   193             self.heads = self.lastbranch.values()
       
   194         finally:
       
   195             os.chdir(d)
       
   196 
       
   197     def _connect(self):
       
   198         root = self.cvsroot
       
   199         conntype = None
       
   200         user, host = None, None
       
   201         cmd = ['cvs', 'server']
       
   202 
       
   203         self.ui.status("connecting to %s\n" % root)
       
   204 
       
   205         if root.startswith(":pserver:"):
       
   206             root = root[9:]
       
   207             m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)',
       
   208                          root)
       
   209             if m:
       
   210                 conntype = "pserver"
       
   211                 user, passw, serv, port, root = m.groups()
       
   212                 if not user:
       
   213                     user = "anonymous"
       
   214                 rr = ":pserver:" + user + "@" + serv + ":" +  root
       
   215                 if port:
       
   216                     rr2, port = "-", int(port)
       
   217                 else:
       
   218                     rr2, port = rr, 2401
       
   219                 rr += str(port)
       
   220 
       
   221                 if not passw:
       
   222                     passw = "A"
       
   223                     pf = open(os.path.join(os.environ["HOME"], ".cvspass"))
       
   224                     for l in pf:
       
   225                         # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
       
   226                         m = re.match(r'(/\d+\s+/)?(.*)', l)
       
   227                         l = m.group(2)
       
   228                         w, p = l.split(' ', 1)
       
   229                         if w in [rr, rr2]:
       
   230                             passw = p
       
   231                             break
       
   232                     pf.close()
       
   233 
       
   234                 sck = socket.socket()
       
   235                 sck.connect((serv, port))
       
   236                 sck.send("\n".join(["BEGIN AUTH REQUEST", root, user, passw,
       
   237                                     "END AUTH REQUEST", ""]))
       
   238                 if sck.recv(128) != "I LOVE YOU\n":
       
   239                     raise NoRepo("CVS pserver authentication failed")
       
   240 
       
   241                 self.writep = self.readp = sck.makefile('r+')
       
   242 
       
   243         if not conntype and root.startswith(":local:"):
       
   244             conntype = "local"
       
   245             root = root[7:]
       
   246 
       
   247         if not conntype:
       
   248             # :ext:user@host/home/user/path/to/cvsroot
       
   249             if root.startswith(":ext:"):
       
   250                 root = root[5:]
       
   251             m = re.match(r'(?:([^@:/]+)@)?([^:/]+):?(.*)', root)
       
   252             if not m:
       
   253                 conntype = "local"
       
   254             else:
       
   255                 conntype = "rsh"
       
   256                 user, host, root = m.group(1), m.group(2), m.group(3)
       
   257 
       
   258         if conntype != "pserver":
       
   259             if conntype == "rsh":
       
   260                 rsh = os.environ.get("CVS_RSH" or "rsh")
       
   261                 if user:
       
   262                     cmd = [rsh, '-l', user, host] + cmd
       
   263                 else:
       
   264                     cmd = [rsh, host] + cmd
       
   265 
       
   266             self.writep, self.readp = os.popen2(cmd)
       
   267 
       
   268         self.realroot = root
       
   269 
       
   270         self.writep.write("Root %s\n" % root)
       
   271         self.writep.write("Valid-responses ok error Valid-requests Mode"
       
   272                           " M Mbinary E Checked-in Created Updated"
       
   273                           " Merged Removed\n")
       
   274         self.writep.write("valid-requests\n")
       
   275         self.writep.flush()
       
   276         r = self.readp.readline()
       
   277         if not r.startswith("Valid-requests"):
       
   278             raise util.Abort("server sucks")
       
   279         if "UseUnchanged" in r:
       
   280             self.writep.write("UseUnchanged\n")
       
   281             self.writep.flush()
       
   282             r = self.readp.readline()
       
   283 
       
   284     def getheads(self):
       
   285         return self.heads
       
   286 
       
   287     def _getfile(self, name, rev):
       
   288         if rev.endswith("(DEAD)"):
       
   289             raise IOError
       
   290 
       
   291         args = ("-N -P -kk -r %s --" % rev).split()
       
   292         args.append(os.path.join(self.cvsrepo, name))
       
   293         for x in args:
       
   294             self.writep.write("Argument %s\n" % x)
       
   295         self.writep.write("Directory .\n%s\nco\n" % self.realroot)
       
   296         self.writep.flush()
       
   297 
       
   298         data = ""
       
   299         while 1:
       
   300             line = self.readp.readline()
       
   301             if line.startswith("Created ") or line.startswith("Updated "):
       
   302                 self.readp.readline() # path
       
   303                 self.readp.readline() # entries
       
   304                 mode = self.readp.readline()[:-1]
       
   305                 count = int(self.readp.readline()[:-1])
       
   306                 data = self.readp.read(count)
       
   307             elif line.startswith(" "):
       
   308                 data += line[1:]
       
   309             elif line.startswith("M "):
       
   310                 pass
       
   311             elif line.startswith("Mbinary "):
       
   312                 count = int(self.readp.readline()[:-1])
       
   313                 data = self.readp.read(count)
       
   314             else:
       
   315                 if line == "ok\n":
       
   316                     return (data, "x" in mode and "x" or "")
       
   317                 elif line.startswith("E "):
       
   318                     self.ui.warn("cvs server: %s\n" % line[2:])
       
   319                 elif line.startswith("Remove"):
       
   320                     l = self.readp.readline()
       
   321                     l = self.readp.readline()
       
   322                     if l != "ok\n":
       
   323                         raise util.Abort("unknown CVS response: %s" % l)
       
   324                 else:
       
   325                     raise util.Abort("unknown CVS response: %s" % line)
       
   326 
       
   327     def getfile(self, file, rev):
       
   328         data, mode = self._getfile(file, rev)
       
   329         self.modecache[(file, rev)] = mode
       
   330         return data
       
   331 
       
   332     def getmode(self, file, rev):
       
   333         return self.modecache[(file, rev)]
       
   334 
       
   335     def getchanges(self, rev):
       
   336         self.modecache = {}
       
   337         files = self.files[rev]
       
   338         cl = files.items()
       
   339         cl.sort()
       
   340         return cl
       
   341 
       
   342     def recode(self, text):
       
   343         return text.decode(self.encoding, "replace").encode("utf-8")
       
   344 
       
   345     def getcommit(self, rev):
       
   346         return self.changeset[rev]
       
   347 
       
   348     def gettags(self):
       
   349         return self.tags
       
   350 
       
   351 class convert_git(converter_source):
       
   352     def __init__(self, ui, path):
       
   353         if os.path.isdir(path + "/.git"):
       
   354             path += "/.git"
       
   355         self.path = path
       
   356         self.ui = ui
       
   357         if not os.path.exists(path + "/objects"):
       
   358             raise NoRepo("couldn't open GIT repo %s" % path)
       
   359 
       
   360     def getheads(self):
       
   361         fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
       
   362         return [fh.read()[:-1]]
       
   363 
       
   364     def catfile(self, rev, type):
       
   365         if rev == "0" * 40: raise IOError()
       
   366         fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null"
       
   367                       % (self.path, type, rev))
       
   368         return fh.read()
       
   369 
       
   370     def getfile(self, name, rev):
       
   371         return self.catfile(rev, "blob")
       
   372 
       
   373     def getmode(self, name, rev):
       
   374         return self.modecache[(name, rev)]
       
   375 
       
   376     def getchanges(self, version):
       
   377         self.modecache = {}
       
   378         fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s"
       
   379                       % (self.path, version))
       
   380         changes = []
       
   381         for l in fh:
       
   382             if "\t" not in l: continue
       
   383             m, f = l[:-1].split("\t")
       
   384             m = m.split()
       
   385             h = m[3]
       
   386             p = (m[1] == "100755")
       
   387             s = (m[1] == "120000")
       
   388             self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
       
   389             changes.append((f, h))
       
   390         return changes
       
   391 
       
   392     def getcommit(self, version):
       
   393         c = self.catfile(version, "commit") # read the commit hash
       
   394         end = c.find("\n\n")
       
   395         message = c[end+2:]
       
   396         message = recode(message)
       
   397         l = c[:end].splitlines()
       
   398         manifest = l[0].split()[1]
       
   399         parents = []
       
   400         for e in l[1:]:
       
   401             n, v = e.split(" ", 1)
       
   402             if n == "author":
       
   403                 p = v.split()
       
   404                 tm, tz = p[-2:]
       
   405                 author = " ".join(p[:-2])
       
   406                 if author[0] == "<": author = author[1:-1]
       
   407                 author = recode(author)
       
   408             if n == "committer":
       
   409                 p = v.split()
       
   410                 tm, tz = p[-2:]
       
   411                 committer = " ".join(p[:-2])
       
   412                 if committer[0] == "<": committer = committer[1:-1]
       
   413                 committer = recode(committer)
       
   414                 message += "\ncommitter: %s\n" % committer
       
   415             if n == "parent": parents.append(v)
       
   416 
       
   417         tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
       
   418         tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
       
   419         date = tm + " " + str(tz)
       
   420 
       
   421         c = commit(parents=parents, date=date, author=author, desc=message)
       
   422         return c
       
   423 
       
   424     def gettags(self):
       
   425         tags = {}
       
   426         fh = os.popen('git-ls-remote --tags "%s" 2>/dev/null' % self.path)
       
   427         prefix = 'refs/tags/'
       
   428         for line in fh:
       
   429             line = line.strip()
       
   430             if not line.endswith("^{}"):
       
   431                 continue
       
   432             node, tag = line.split(None, 1)
       
   433             if not tag.startswith(prefix):
       
   434                 continue
       
   435             tag = tag[len(prefix):-3]
       
   436             tags[tag] = node
       
   437 
       
   438         return tags
       
   439 
       
   440 class convert_mercurial(converter_sink):
       
   441     def __init__(self, ui, path):
       
   442         self.path = path
       
   443         self.ui = ui
       
   444         try:
       
   445             self.repo = hg.repository(self.ui, path)
       
   446         except:
       
   447             raise NoRepo("could open hg repo %s" % path)
       
   448 
       
   449     def mapfile(self):
       
   450         return os.path.join(self.path, ".hg", "shamap")
       
   451 
       
   452     def getheads(self):
       
   453         h = self.repo.changelog.heads()
       
   454         return [ hg.hex(x) for x in h ]
       
   455 
       
   456     def putfile(self, f, e, data):
       
   457         self.repo.wwrite(f, data, e)
       
   458         if self.repo.dirstate.state(f) == '?':
       
   459             self.repo.dirstate.update([f], "a")
       
   460 
       
   461     def delfile(self, f):
       
   462         try:
       
   463             os.unlink(self.repo.wjoin(f))
       
   464             #self.repo.remove([f])
       
   465         except:
       
   466             pass
       
   467 
       
   468     def putcommit(self, files, parents, commit):
       
   469         seen = {}
       
   470         pl = []
       
   471         for p in parents:
       
   472             if p not in seen:
       
   473                 pl.append(p)
       
   474                 seen[p] = 1
       
   475         parents = pl
       
   476 
       
   477         if len(parents) < 2: parents.append("0" * 40)
       
   478         if len(parents) < 2: parents.append("0" * 40)
       
   479         p2 = parents.pop(0)
       
   480 
       
   481         text = commit.desc
       
   482         extra = {}
       
   483         try:
       
   484             extra["branch"] = commit.branch
       
   485         except AttributeError:
       
   486             pass
       
   487 
       
   488         while parents:
       
   489             p1 = p2
       
   490             p2 = parents.pop(0)
       
   491             a = self.repo.rawcommit(files, text, commit.author, commit.date,
       
   492                                     hg.bin(p1), hg.bin(p2), extra=extra)
       
   493             text = "(octopus merge fixup)\n"
       
   494             p2 = hg.hex(self.repo.changelog.tip())
       
   495 
       
   496         return p2
       
   497 
       
   498     def puttags(self, tags):
       
   499         try:
       
   500             old = self.repo.wfile(".hgtags").read()
       
   501             oldlines = old.splitlines(1)
       
   502             oldlines.sort()
       
   503         except:
       
   504             oldlines = []
       
   505 
       
   506         k = tags.keys()
       
   507         k.sort()
       
   508         newlines = []
       
   509         for tag in k:
       
   510             newlines.append("%s %s\n" % (tags[tag], tag))
       
   511 
       
   512         newlines.sort()
       
   513 
       
   514         if newlines != oldlines:
       
   515             self.ui.status("updating tags\n")
       
   516             f = self.repo.wfile(".hgtags", "w")
       
   517             f.write("".join(newlines))
       
   518             f.close()
       
   519             if not oldlines: self.repo.add([".hgtags"])
       
   520             date = "%s 0" % int(time.mktime(time.gmtime()))
       
   521             self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
       
   522                                 date, self.repo.changelog.tip(), hg.nullid)
       
   523             return hg.hex(self.repo.changelog.tip())
       
   524 
    17 
   525 converters = [convert_cvs, convert_git, convert_mercurial]
    18 converters = [convert_cvs, convert_git, convert_mercurial]
   526 
    19 
   527 def converter(ui, path):
    20 def converter(ui, path):
   528     if not os.path.isdir(path):
    21     if not os.path.isdir(path):