# HG changeset patch # User Brendan Cully # Date 1181531327 25200 # Node ID cc9b79216a769237f1048e83705b8826c15f0061 # Parent 36abb07c79d4ea808f2145f9a4d3e13ade9296be Split convert extension into common and repository type modules diff --git a/hgext/convert/__init__.py b/hgext/convert/__init__.py --- a/hgext/convert/__init__.py +++ b/hgext/convert/__init__.py @@ -5,523 +5,16 @@ # This software may be used and distributed according to the terms # of the GNU General Public License, incorporated herein by reference. -import sys, os, zlib, sha, time, re, locale, socket +from common import NoRepo +from cvs import convert_cvs +from git import convert_git +from hg import convert_mercurial + +import os from mercurial import hg, ui, util, commands commands.norepo += " convert" -class NoRepo(Exception): pass - -class commit(object): - def __init__(self, **parts): - for x in "author date desc parents".split(): - if not x in parts: - raise util.Abort("commit missing field %s" % x) - self.__dict__.update(parts) - -def recode(s): - try: - return s.decode("utf-8").encode("utf-8") - except: - try: - return s.decode("latin-1").encode("utf-8") - except: - return s.decode("utf-8", "replace").encode("utf-8") - -class converter_source(object): - """Conversion source interface""" - - def __init__(self, ui, path): - """Initialize conversion source (or raise NoRepo("message") - exception if path is not a valid repository)""" - raise NotImplementedError() - - def getheads(self): - """Return a list of this repository's heads""" - raise NotImplementedError() - - def getfile(self, name, rev): - """Return file contents as a string""" - raise NotImplementedError() - - def getmode(self, name, rev): - """Return file mode, eg. '', 'x', or 'l'""" - raise NotImplementedError() - - def getchanges(self, version): - """Return sorted list of (filename, id) tuples for all files changed in rev. - - id just tells us which revision to return in getfile(), e.g. in - git it's an object hash.""" - raise NotImplementedError() - - def getcommit(self, version): - """Return the commit object for version""" - raise NotImplementedError() - - def gettags(self): - """Return the tags as a dictionary of name: revision""" - raise NotImplementedError() - -class converter_sink(object): - """Conversion sink (target) interface""" - - def __init__(self, ui, path): - """Initialize conversion sink (or raise NoRepo("message") - exception if path is not a valid repository)""" - raise NotImplementedError() - - def getheads(self): - """Return a list of this repository's heads""" - raise NotImplementedError() - - def mapfile(self): - """Path to a file that will contain lines - source_rev_id sink_rev_id - mapping equivalent revision identifiers for each system.""" - raise NotImplementedError() - - def putfile(self, f, e, data): - """Put file for next putcommit(). - f: path to file - e: '', 'x', or 'l' (regular file, executable, or symlink) - data: file contents""" - raise NotImplementedError() - - def delfile(self, f): - """Delete file for next putcommit(). - f: path to file""" - raise NotImplementedError() - - def putcommit(self, files, parents, commit): - """Create a revision with all changed files listed in 'files' - and having listed parents. 'commit' is a commit object containing - at a minimum the author, date, and message for this changeset. - Called after putfile() and delfile() calls. Note that the sink - repository is not told to update itself to a particular revision - (or even what that revision would be) before it receives the - file data.""" - raise NotImplementedError() - - def puttags(self, tags): - """Put tags into sink. - tags: {tagname: sink_rev_id, ...}""" - raise NotImplementedError() - - -# CVS conversion code inspired by hg-cvs-import and git-cvsimport -class convert_cvs(converter_source): - def __init__(self, ui, path): - self.path = path - self.ui = ui - cvs = os.path.join(path, "CVS") - if not os.path.exists(cvs): - raise NoRepo("couldn't open CVS repo %s" % path) - - self.changeset = {} - self.files = {} - self.tags = {} - self.lastbranch = {} - self.parent = {} - self.socket = None - self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1] - self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1] - self.encoding = locale.getpreferredencoding() - self._parse() - self._connect() - - def _parse(self): - if self.changeset: - return - - d = os.getcwd() - try: - os.chdir(self.path) - id = None - state = 0 - for l in os.popen("cvsps -A -u --cvs-direct -q"): - if state == 0: # header - if l.startswith("PatchSet"): - id = l[9:-2] - elif l.startswith("Date"): - date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"]) - date = util.datestr(date) - elif l.startswith("Branch"): - branch = l[8:-1] - self.parent[id] = self.lastbranch.get(branch, 'bad') - self.lastbranch[branch] = id - elif l.startswith("Ancestor branch"): - ancestor = l[17:-1] - self.parent[id] = self.lastbranch[ancestor] - elif l.startswith("Author"): - author = self.recode(l[8:-1]) - elif l.startswith("Tag: "): - t = l[5:-1].rstrip() - if t != "(none)": - self.tags[t] = id - elif l.startswith("Log:"): - state = 1 - log = "" - elif state == 1: # log - if l == "Members: \n": - files = {} - log = self.recode(log[:-1]) - if log.isspace(): - log = "*** empty log message ***\n" - state = 2 - else: - log += l - elif state == 2: - if l == "\n": # - state = 0 - p = [self.parent[id]] - if id == "1": - p = [] - if branch == "HEAD": - branch = "" - c = commit(author=author, date=date, parents=p, - desc=log, branch=branch) - self.changeset[id] = c - self.files[id] = files - else: - colon = l.rfind(':') - file = l[1:colon] - rev = l[colon+1:-2] - rev = rev.split("->")[1] - files[file] = rev - - self.heads = self.lastbranch.values() - finally: - os.chdir(d) - - def _connect(self): - root = self.cvsroot - conntype = None - user, host = None, None - cmd = ['cvs', 'server'] - - self.ui.status("connecting to %s\n" % root) - - if root.startswith(":pserver:"): - root = root[9:] - m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)', - root) - if m: - conntype = "pserver" - user, passw, serv, port, root = m.groups() - if not user: - user = "anonymous" - rr = ":pserver:" + user + "@" + serv + ":" + root - if port: - rr2, port = "-", int(port) - else: - rr2, port = rr, 2401 - rr += str(port) - - if not passw: - passw = "A" - pf = open(os.path.join(os.environ["HOME"], ".cvspass")) - for l in pf: - # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah/dev/null" - % (self.path, type, rev)) - return fh.read() - - def getfile(self, name, rev): - return self.catfile(rev, "blob") - - def getmode(self, name, rev): - return self.modecache[(name, rev)] - - def getchanges(self, version): - self.modecache = {} - fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s" - % (self.path, version)) - changes = [] - for l in fh: - if "\t" not in l: continue - m, f = l[:-1].split("\t") - m = m.split() - h = m[3] - p = (m[1] == "100755") - s = (m[1] == "120000") - self.modecache[(f, h)] = (p and "x") or (s and "l") or "" - changes.append((f, h)) - return changes - - def getcommit(self, version): - c = self.catfile(version, "commit") # read the commit hash - end = c.find("\n\n") - message = c[end+2:] - message = recode(message) - l = c[:end].splitlines() - manifest = l[0].split()[1] - parents = [] - for e in l[1:]: - n, v = e.split(" ", 1) - if n == "author": - p = v.split() - tm, tz = p[-2:] - author = " ".join(p[:-2]) - if author[0] == "<": author = author[1:-1] - author = recode(author) - if n == "committer": - p = v.split() - tm, tz = p[-2:] - committer = " ".join(p[:-2]) - if committer[0] == "<": committer = committer[1:-1] - committer = recode(committer) - message += "\ncommitter: %s\n" % committer - if n == "parent": parents.append(v) - - tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:] - tz = -int(tzs) * (int(tzh) * 3600 + int(tzm)) - date = tm + " " + str(tz) - - c = commit(parents=parents, date=date, author=author, desc=message) - return c - - def gettags(self): - tags = {} - fh = os.popen('git-ls-remote --tags "%s" 2>/dev/null' % self.path) - prefix = 'refs/tags/' - for line in fh: - line = line.strip() - if not line.endswith("^{}"): - continue - node, tag = line.split(None, 1) - if not tag.startswith(prefix): - continue - tag = tag[len(prefix):-3] - tags[tag] = node - - return tags - -class convert_mercurial(converter_sink): - def __init__(self, ui, path): - self.path = path - self.ui = ui - try: - self.repo = hg.repository(self.ui, path) - except: - raise NoRepo("could open hg repo %s" % path) - - def mapfile(self): - return os.path.join(self.path, ".hg", "shamap") - - def getheads(self): - h = self.repo.changelog.heads() - return [ hg.hex(x) for x in h ] - - def putfile(self, f, e, data): - self.repo.wwrite(f, data, e) - if self.repo.dirstate.state(f) == '?': - self.repo.dirstate.update([f], "a") - - def delfile(self, f): - try: - os.unlink(self.repo.wjoin(f)) - #self.repo.remove([f]) - except: - pass - - def putcommit(self, files, parents, commit): - seen = {} - pl = [] - for p in parents: - if p not in seen: - pl.append(p) - seen[p] = 1 - parents = pl - - if len(parents) < 2: parents.append("0" * 40) - if len(parents) < 2: parents.append("0" * 40) - p2 = parents.pop(0) - - text = commit.desc - extra = {} - try: - extra["branch"] = commit.branch - except AttributeError: - pass - - while parents: - p1 = p2 - p2 = parents.pop(0) - a = self.repo.rawcommit(files, text, commit.author, commit.date, - hg.bin(p1), hg.bin(p2), extra=extra) - text = "(octopus merge fixup)\n" - p2 = hg.hex(self.repo.changelog.tip()) - - return p2 - - def puttags(self, tags): - try: - old = self.repo.wfile(".hgtags").read() - oldlines = old.splitlines(1) - oldlines.sort() - except: - oldlines = [] - - k = tags.keys() - k.sort() - newlines = [] - for tag in k: - newlines.append("%s %s\n" % (tags[tag], tag)) - - newlines.sort() - - if newlines != oldlines: - self.ui.status("updating tags\n") - f = self.repo.wfile(".hgtags", "w") - f.write("".join(newlines)) - f.close() - if not oldlines: self.repo.add([".hgtags"]) - date = "%s 0" % int(time.mktime(time.gmtime())) - self.repo.rawcommit([".hgtags"], "update tags", "convert-repo", - date, self.repo.changelog.tip(), hg.nullid) - return hg.hex(self.repo.changelog.tip()) - converters = [convert_cvs, convert_git, convert_mercurial] def converter(ui, path): diff --git a/hgext/convert/__init__.py b/hgext/convert/common.py copy from hgext/convert/__init__.py copy to hgext/convert/common.py --- a/hgext/convert/__init__.py +++ b/hgext/convert/common.py @@ -1,14 +1,4 @@ -# convert.py Foreign SCM converter -# -# Copyright 2005, 2006 Matt Mackall -# -# This software may be used and distributed according to the terms -# of the GNU General Public License, incorporated herein by reference. - -import sys, os, zlib, sha, time, re, locale, socket -from mercurial import hg, ui, util, commands - -commands.norepo += " convert" +# common code for the convert extension class NoRepo(Exception): pass @@ -19,15 +9,6 @@ class commit(object): raise util.Abort("commit missing field %s" % x) self.__dict__.update(parts) -def recode(s): - try: - return s.decode("utf-8").encode("utf-8") - except: - try: - return s.decode("latin-1").encode("utf-8") - except: - return s.decode("utf-8", "replace").encode("utf-8") - class converter_source(object): """Conversion source interface""" @@ -107,643 +88,3 @@ class converter_sink(object): """Put tags into sink. tags: {tagname: sink_rev_id, ...}""" raise NotImplementedError() - - -# CVS conversion code inspired by hg-cvs-import and git-cvsimport -class convert_cvs(converter_source): - def __init__(self, ui, path): - self.path = path - self.ui = ui - cvs = os.path.join(path, "CVS") - if not os.path.exists(cvs): - raise NoRepo("couldn't open CVS repo %s" % path) - - self.changeset = {} - self.files = {} - self.tags = {} - self.lastbranch = {} - self.parent = {} - self.socket = None - self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1] - self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1] - self.encoding = locale.getpreferredencoding() - self._parse() - self._connect() - - def _parse(self): - if self.changeset: - return - - d = os.getcwd() - try: - os.chdir(self.path) - id = None - state = 0 - for l in os.popen("cvsps -A -u --cvs-direct -q"): - if state == 0: # header - if l.startswith("PatchSet"): - id = l[9:-2] - elif l.startswith("Date"): - date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"]) - date = util.datestr(date) - elif l.startswith("Branch"): - branch = l[8:-1] - self.parent[id] = self.lastbranch.get(branch, 'bad') - self.lastbranch[branch] = id - elif l.startswith("Ancestor branch"): - ancestor = l[17:-1] - self.parent[id] = self.lastbranch[ancestor] - elif l.startswith("Author"): - author = self.recode(l[8:-1]) - elif l.startswith("Tag: "): - t = l[5:-1].rstrip() - if t != "(none)": - self.tags[t] = id - elif l.startswith("Log:"): - state = 1 - log = "" - elif state == 1: # log - if l == "Members: \n": - files = {} - log = self.recode(log[:-1]) - if log.isspace(): - log = "*** empty log message ***\n" - state = 2 - else: - log += l - elif state == 2: - if l == "\n": # - state = 0 - p = [self.parent[id]] - if id == "1": - p = [] - if branch == "HEAD": - branch = "" - c = commit(author=author, date=date, parents=p, - desc=log, branch=branch) - self.changeset[id] = c - self.files[id] = files - else: - colon = l.rfind(':') - file = l[1:colon] - rev = l[colon+1:-2] - rev = rev.split("->")[1] - files[file] = rev - - self.heads = self.lastbranch.values() - finally: - os.chdir(d) - - def _connect(self): - root = self.cvsroot - conntype = None - user, host = None, None - cmd = ['cvs', 'server'] - - self.ui.status("connecting to %s\n" % root) - - if root.startswith(":pserver:"): - root = root[9:] - m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)', - root) - if m: - conntype = "pserver" - user, passw, serv, port, root = m.groups() - if not user: - user = "anonymous" - rr = ":pserver:" + user + "@" + serv + ":" + root - if port: - rr2, port = "-", int(port) - else: - rr2, port = rr, 2401 - rr += str(port) - - if not passw: - passw = "A" - pf = open(os.path.join(os.environ["HOME"], ".cvspass")) - for l in pf: - # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah/dev/null" - % (self.path, type, rev)) - return fh.read() - - def getfile(self, name, rev): - return self.catfile(rev, "blob") - - def getmode(self, name, rev): - return self.modecache[(name, rev)] - - def getchanges(self, version): - self.modecache = {} - fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s" - % (self.path, version)) - changes = [] - for l in fh: - if "\t" not in l: continue - m, f = l[:-1].split("\t") - m = m.split() - h = m[3] - p = (m[1] == "100755") - s = (m[1] == "120000") - self.modecache[(f, h)] = (p and "x") or (s and "l") or "" - changes.append((f, h)) - return changes - - def getcommit(self, version): - c = self.catfile(version, "commit") # read the commit hash - end = c.find("\n\n") - message = c[end+2:] - message = recode(message) - l = c[:end].splitlines() - manifest = l[0].split()[1] - parents = [] - for e in l[1:]: - n, v = e.split(" ", 1) - if n == "author": - p = v.split() - tm, tz = p[-2:] - author = " ".join(p[:-2]) - if author[0] == "<": author = author[1:-1] - author = recode(author) - if n == "committer": - p = v.split() - tm, tz = p[-2:] - committer = " ".join(p[:-2]) - if committer[0] == "<": committer = committer[1:-1] - committer = recode(committer) - message += "\ncommitter: %s\n" % committer - if n == "parent": parents.append(v) - - tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:] - tz = -int(tzs) * (int(tzh) * 3600 + int(tzm)) - date = tm + " " + str(tz) - - c = commit(parents=parents, date=date, author=author, desc=message) - return c - - def gettags(self): - tags = {} - fh = os.popen('git-ls-remote --tags "%s" 2>/dev/null' % self.path) - prefix = 'refs/tags/' - for line in fh: - line = line.strip() - if not line.endswith("^{}"): - continue - node, tag = line.split(None, 1) - if not tag.startswith(prefix): - continue - tag = tag[len(prefix):-3] - tags[tag] = node - - return tags - -class convert_mercurial(converter_sink): - def __init__(self, ui, path): - self.path = path - self.ui = ui - try: - self.repo = hg.repository(self.ui, path) - except: - raise NoRepo("could open hg repo %s" % path) - - def mapfile(self): - return os.path.join(self.path, ".hg", "shamap") - - def getheads(self): - h = self.repo.changelog.heads() - return [ hg.hex(x) for x in h ] - - def putfile(self, f, e, data): - self.repo.wwrite(f, data, e) - if self.repo.dirstate.state(f) == '?': - self.repo.dirstate.update([f], "a") - - def delfile(self, f): - try: - os.unlink(self.repo.wjoin(f)) - #self.repo.remove([f]) - except: - pass - - def putcommit(self, files, parents, commit): - seen = {} - pl = [] - for p in parents: - if p not in seen: - pl.append(p) - seen[p] = 1 - parents = pl - - if len(parents) < 2: parents.append("0" * 40) - if len(parents) < 2: parents.append("0" * 40) - p2 = parents.pop(0) - - text = commit.desc - extra = {} - try: - extra["branch"] = commit.branch - except AttributeError: - pass - - while parents: - p1 = p2 - p2 = parents.pop(0) - a = self.repo.rawcommit(files, text, commit.author, commit.date, - hg.bin(p1), hg.bin(p2), extra=extra) - text = "(octopus merge fixup)\n" - p2 = hg.hex(self.repo.changelog.tip()) - - return p2 - - def puttags(self, tags): - try: - old = self.repo.wfile(".hgtags").read() - oldlines = old.splitlines(1) - oldlines.sort() - except: - oldlines = [] - - k = tags.keys() - k.sort() - newlines = [] - for tag in k: - newlines.append("%s %s\n" % (tags[tag], tag)) - - newlines.sort() - - if newlines != oldlines: - self.ui.status("updating tags\n") - f = self.repo.wfile(".hgtags", "w") - f.write("".join(newlines)) - f.close() - if not oldlines: self.repo.add([".hgtags"]) - date = "%s 0" % int(time.mktime(time.gmtime())) - self.repo.rawcommit([".hgtags"], "update tags", "convert-repo", - date, self.repo.changelog.tip(), hg.nullid) - return hg.hex(self.repo.changelog.tip()) - -converters = [convert_cvs, convert_git, convert_mercurial] - -def converter(ui, path): - if not os.path.isdir(path): - raise util.Abort("%s: not a directory" % path) - for c in converters: - try: - return c(ui, path) - except NoRepo: - pass - raise util.Abort("%s: unknown repository type" % path) - -class convert(object): - def __init__(self, ui, source, dest, mapfile, opts): - - self.source = source - self.dest = dest - self.ui = ui - self.mapfile = mapfile - self.opts = opts - self.commitcache = {} - - self.map = {} - try: - for l in file(self.mapfile): - sv, dv = l[:-1].split() - self.map[sv] = dv - except IOError: - pass - - def walktree(self, heads): - visit = heads - known = {} - parents = {} - while visit: - n = visit.pop(0) - if n in known or n in self.map: continue - known[n] = 1 - self.commitcache[n] = self.source.getcommit(n) - cp = self.commitcache[n].parents - for p in cp: - parents.setdefault(n, []).append(p) - visit.append(p) - - return parents - - def toposort(self, parents): - visit = parents.keys() - seen = {} - children = {} - - while visit: - n = visit.pop(0) - if n in seen: continue - seen[n] = 1 - pc = 0 - if n in parents: - for p in parents[n]: - if p not in self.map: pc += 1 - visit.append(p) - children.setdefault(p, []).append(n) - if not pc: root = n - - s = [] - removed = {} - visit = children.keys() - while visit: - n = visit.pop(0) - if n in removed: continue - dep = 0 - if n in parents: - for p in parents[n]: - if p in self.map: continue - if p not in removed: - # we're still dependent - visit.append(n) - dep = 1 - break - - if not dep: - # all n's parents are in the list - removed[n] = 1 - if n not in self.map: - s.append(n) - if n in children: - for c in children[n]: - visit.insert(0, c) - - if self.opts.get('datesort'): - depth = {} - for n in s: - depth[n] = 0 - pl = [p for p in self.commitcache[n].parents - if p not in self.map] - if pl: - depth[n] = max([depth[p] for p in pl]) + 1 - - s = [(depth[n], self.commitcache[n].date, n) for n in s] - s.sort() - s = [e[2] for e in s] - - return s - - def copy(self, rev): - c = self.commitcache[rev] - files = self.source.getchanges(rev) - - for f, v in files: - try: - data = self.source.getfile(f, v) - except IOError, inst: - self.dest.delfile(f) - else: - e = self.source.getmode(f, v) - self.dest.putfile(f, e, data) - - r = [self.map[v] for v in c.parents] - f = [f for f, v in files] - self.map[rev] = self.dest.putcommit(f, r, c) - file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev])) - - def convert(self): - self.ui.status("scanning source...\n") - heads = self.source.getheads() - parents = self.walktree(heads) - self.ui.status("sorting...\n") - t = self.toposort(parents) - num = len(t) - c = None - - self.ui.status("converting...\n") - for c in t: - num -= 1 - desc = self.commitcache[c].desc - if "\n" in desc: - desc = desc.splitlines()[0] - self.ui.status("%d %s\n" % (num, desc)) - self.copy(c) - - tags = self.source.gettags() - ctags = {} - for k in tags: - v = tags[k] - if v in self.map: - ctags[k] = self.map[v] - - if c and ctags: - nrev = self.dest.puttags(ctags) - # write another hash correspondence to override the previous - # one so we don't end up with extra tag heads - if nrev: - file(self.mapfile, "a").write("%s %s\n" % (c, nrev)) - -def _convert(ui, src, dest=None, mapfile=None, **opts): - '''Convert a foreign SCM repository to a Mercurial one. - - Accepted source formats: - - GIT - - CVS - - Accepted destination formats: - - Mercurial - - If destination isn't given, a new Mercurial repo named -hg will - be created. If isn't given, it will be put in a default - location (/.hg/shamap by default) - - The is a simple text file that maps each source commit ID to - the destination ID for that revision, like so: - - - - If the file doesn't exist, it's automatically created. It's updated - on each commit copied, so convert-repo can be interrupted and can - be run repeatedly to copy new commits. - ''' - - srcc = converter(ui, src) - if not hasattr(srcc, "getcommit"): - raise util.Abort("%s: can't read from this repo type" % src) - - if not dest: - dest = src + "-hg" - ui.status("assuming destination %s\n" % dest) - - # Try to be smart and initalize things when required - if os.path.isdir(dest): - if len(os.listdir(dest)) > 0: - try: - hg.repository(ui, dest) - ui.status("destination %s is a Mercurial repository\n" % dest) - except hg.RepoError: - raise util.Abort( - "destination directory %s is not empty.\n" - "Please specify an empty directory to be initialized\n" - "or an already initialized mercurial repository" - % dest) - else: - ui.status("initializing destination %s repository\n" % dest) - hg.repository(ui, dest, create=True) - elif os.path.exists(dest): - raise util.Abort("destination %s exists and is not a directory" % dest) - else: - ui.status("initializing destination %s repository\n" % dest) - hg.repository(ui, dest, create=True) - - destc = converter(ui, dest) - if not hasattr(destc, "putcommit"): - raise util.Abort("%s: can't write to this repo type" % src) - - if not mapfile: - try: - mapfile = destc.mapfile() - except: - mapfile = os.path.join(destc, "map") - - c = convert(ui, srcc, destc, mapfile, opts) - c.convert() - -cmdtable = { - "convert": - (_convert, - [('', 'datesort', None, 'try to sort changesets by date')], - 'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'), -} diff --git a/hgext/convert/__init__.py b/hgext/convert/cvs.py copy from hgext/convert/__init__.py copy to hgext/convert/cvs.py --- a/hgext/convert/__init__.py +++ b/hgext/convert/cvs.py @@ -1,115 +1,10 @@ -# convert.py Foreign SCM converter -# -# Copyright 2005, 2006 Matt Mackall -# -# This software may be used and distributed according to the terms -# of the GNU General Public License, incorporated herein by reference. - -import sys, os, zlib, sha, time, re, locale, socket -from mercurial import hg, ui, util, commands - -commands.norepo += " convert" - -class NoRepo(Exception): pass - -class commit(object): - def __init__(self, **parts): - for x in "author date desc parents".split(): - if not x in parts: - raise util.Abort("commit missing field %s" % x) - self.__dict__.update(parts) - -def recode(s): - try: - return s.decode("utf-8").encode("utf-8") - except: - try: - return s.decode("latin-1").encode("utf-8") - except: - return s.decode("utf-8", "replace").encode("utf-8") - -class converter_source(object): - """Conversion source interface""" - - def __init__(self, ui, path): - """Initialize conversion source (or raise NoRepo("message") - exception if path is not a valid repository)""" - raise NotImplementedError() - - def getheads(self): - """Return a list of this repository's heads""" - raise NotImplementedError() - - def getfile(self, name, rev): - """Return file contents as a string""" - raise NotImplementedError() - - def getmode(self, name, rev): - """Return file mode, eg. '', 'x', or 'l'""" - raise NotImplementedError() - - def getchanges(self, version): - """Return sorted list of (filename, id) tuples for all files changed in rev. +# CVS conversion code inspired by hg-cvs-import and git-cvsimport - id just tells us which revision to return in getfile(), e.g. in - git it's an object hash.""" - raise NotImplementedError() - - def getcommit(self, version): - """Return the commit object for version""" - raise NotImplementedError() - - def gettags(self): - """Return the tags as a dictionary of name: revision""" - raise NotImplementedError() - -class converter_sink(object): - """Conversion sink (target) interface""" - - def __init__(self, ui, path): - """Initialize conversion sink (or raise NoRepo("message") - exception if path is not a valid repository)""" - raise NotImplementedError() - - def getheads(self): - """Return a list of this repository's heads""" - raise NotImplementedError() - - def mapfile(self): - """Path to a file that will contain lines - source_rev_id sink_rev_id - mapping equivalent revision identifiers for each system.""" - raise NotImplementedError() +import os, locale, re, socket +from mercurial import util - def putfile(self, f, e, data): - """Put file for next putcommit(). - f: path to file - e: '', 'x', or 'l' (regular file, executable, or symlink) - data: file contents""" - raise NotImplementedError() - - def delfile(self, f): - """Delete file for next putcommit(). - f: path to file""" - raise NotImplementedError() +from common import NoRepo, commit, converter_source - def putcommit(self, files, parents, commit): - """Create a revision with all changed files listed in 'files' - and having listed parents. 'commit' is a commit object containing - at a minimum the author, date, and message for this changeset. - Called after putfile() and delfile() calls. Note that the sink - repository is not told to update itself to a particular revision - (or even what that revision would be) before it receives the - file data.""" - raise NotImplementedError() - - def puttags(self, tags): - """Put tags into sink. - tags: {tagname: sink_rev_id, ...}""" - raise NotImplementedError() - - -# CVS conversion code inspired by hg-cvs-import and git-cvsimport class convert_cvs(converter_source): def __init__(self, ui, path): self.path = path @@ -347,403 +242,3 @@ class convert_cvs(converter_source): def gettags(self): return self.tags - -class convert_git(converter_source): - def __init__(self, ui, path): - if os.path.isdir(path + "/.git"): - path += "/.git" - self.path = path - self.ui = ui - if not os.path.exists(path + "/objects"): - raise NoRepo("couldn't open GIT repo %s" % path) - - def getheads(self): - fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path) - return [fh.read()[:-1]] - - def catfile(self, rev, type): - if rev == "0" * 40: raise IOError() - fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null" - % (self.path, type, rev)) - return fh.read() - - def getfile(self, name, rev): - return self.catfile(rev, "blob") - - def getmode(self, name, rev): - return self.modecache[(name, rev)] - - def getchanges(self, version): - self.modecache = {} - fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s" - % (self.path, version)) - changes = [] - for l in fh: - if "\t" not in l: continue - m, f = l[:-1].split("\t") - m = m.split() - h = m[3] - p = (m[1] == "100755") - s = (m[1] == "120000") - self.modecache[(f, h)] = (p and "x") or (s and "l") or "" - changes.append((f, h)) - return changes - - def getcommit(self, version): - c = self.catfile(version, "commit") # read the commit hash - end = c.find("\n\n") - message = c[end+2:] - message = recode(message) - l = c[:end].splitlines() - manifest = l[0].split()[1] - parents = [] - for e in l[1:]: - n, v = e.split(" ", 1) - if n == "author": - p = v.split() - tm, tz = p[-2:] - author = " ".join(p[:-2]) - if author[0] == "<": author = author[1:-1] - author = recode(author) - if n == "committer": - p = v.split() - tm, tz = p[-2:] - committer = " ".join(p[:-2]) - if committer[0] == "<": committer = committer[1:-1] - committer = recode(committer) - message += "\ncommitter: %s\n" % committer - if n == "parent": parents.append(v) - - tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:] - tz = -int(tzs) * (int(tzh) * 3600 + int(tzm)) - date = tm + " " + str(tz) - - c = commit(parents=parents, date=date, author=author, desc=message) - return c - - def gettags(self): - tags = {} - fh = os.popen('git-ls-remote --tags "%s" 2>/dev/null' % self.path) - prefix = 'refs/tags/' - for line in fh: - line = line.strip() - if not line.endswith("^{}"): - continue - node, tag = line.split(None, 1) - if not tag.startswith(prefix): - continue - tag = tag[len(prefix):-3] - tags[tag] = node - - return tags - -class convert_mercurial(converter_sink): - def __init__(self, ui, path): - self.path = path - self.ui = ui - try: - self.repo = hg.repository(self.ui, path) - except: - raise NoRepo("could open hg repo %s" % path) - - def mapfile(self): - return os.path.join(self.path, ".hg", "shamap") - - def getheads(self): - h = self.repo.changelog.heads() - return [ hg.hex(x) for x in h ] - - def putfile(self, f, e, data): - self.repo.wwrite(f, data, e) - if self.repo.dirstate.state(f) == '?': - self.repo.dirstate.update([f], "a") - - def delfile(self, f): - try: - os.unlink(self.repo.wjoin(f)) - #self.repo.remove([f]) - except: - pass - - def putcommit(self, files, parents, commit): - seen = {} - pl = [] - for p in parents: - if p not in seen: - pl.append(p) - seen[p] = 1 - parents = pl - - if len(parents) < 2: parents.append("0" * 40) - if len(parents) < 2: parents.append("0" * 40) - p2 = parents.pop(0) - - text = commit.desc - extra = {} - try: - extra["branch"] = commit.branch - except AttributeError: - pass - - while parents: - p1 = p2 - p2 = parents.pop(0) - a = self.repo.rawcommit(files, text, commit.author, commit.date, - hg.bin(p1), hg.bin(p2), extra=extra) - text = "(octopus merge fixup)\n" - p2 = hg.hex(self.repo.changelog.tip()) - - return p2 - - def puttags(self, tags): - try: - old = self.repo.wfile(".hgtags").read() - oldlines = old.splitlines(1) - oldlines.sort() - except: - oldlines = [] - - k = tags.keys() - k.sort() - newlines = [] - for tag in k: - newlines.append("%s %s\n" % (tags[tag], tag)) - - newlines.sort() - - if newlines != oldlines: - self.ui.status("updating tags\n") - f = self.repo.wfile(".hgtags", "w") - f.write("".join(newlines)) - f.close() - if not oldlines: self.repo.add([".hgtags"]) - date = "%s 0" % int(time.mktime(time.gmtime())) - self.repo.rawcommit([".hgtags"], "update tags", "convert-repo", - date, self.repo.changelog.tip(), hg.nullid) - return hg.hex(self.repo.changelog.tip()) - -converters = [convert_cvs, convert_git, convert_mercurial] - -def converter(ui, path): - if not os.path.isdir(path): - raise util.Abort("%s: not a directory" % path) - for c in converters: - try: - return c(ui, path) - except NoRepo: - pass - raise util.Abort("%s: unknown repository type" % path) - -class convert(object): - def __init__(self, ui, source, dest, mapfile, opts): - - self.source = source - self.dest = dest - self.ui = ui - self.mapfile = mapfile - self.opts = opts - self.commitcache = {} - - self.map = {} - try: - for l in file(self.mapfile): - sv, dv = l[:-1].split() - self.map[sv] = dv - except IOError: - pass - - def walktree(self, heads): - visit = heads - known = {} - parents = {} - while visit: - n = visit.pop(0) - if n in known or n in self.map: continue - known[n] = 1 - self.commitcache[n] = self.source.getcommit(n) - cp = self.commitcache[n].parents - for p in cp: - parents.setdefault(n, []).append(p) - visit.append(p) - - return parents - - def toposort(self, parents): - visit = parents.keys() - seen = {} - children = {} - - while visit: - n = visit.pop(0) - if n in seen: continue - seen[n] = 1 - pc = 0 - if n in parents: - for p in parents[n]: - if p not in self.map: pc += 1 - visit.append(p) - children.setdefault(p, []).append(n) - if not pc: root = n - - s = [] - removed = {} - visit = children.keys() - while visit: - n = visit.pop(0) - if n in removed: continue - dep = 0 - if n in parents: - for p in parents[n]: - if p in self.map: continue - if p not in removed: - # we're still dependent - visit.append(n) - dep = 1 - break - - if not dep: - # all n's parents are in the list - removed[n] = 1 - if n not in self.map: - s.append(n) - if n in children: - for c in children[n]: - visit.insert(0, c) - - if self.opts.get('datesort'): - depth = {} - for n in s: - depth[n] = 0 - pl = [p for p in self.commitcache[n].parents - if p not in self.map] - if pl: - depth[n] = max([depth[p] for p in pl]) + 1 - - s = [(depth[n], self.commitcache[n].date, n) for n in s] - s.sort() - s = [e[2] for e in s] - - return s - - def copy(self, rev): - c = self.commitcache[rev] - files = self.source.getchanges(rev) - - for f, v in files: - try: - data = self.source.getfile(f, v) - except IOError, inst: - self.dest.delfile(f) - else: - e = self.source.getmode(f, v) - self.dest.putfile(f, e, data) - - r = [self.map[v] for v in c.parents] - f = [f for f, v in files] - self.map[rev] = self.dest.putcommit(f, r, c) - file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev])) - - def convert(self): - self.ui.status("scanning source...\n") - heads = self.source.getheads() - parents = self.walktree(heads) - self.ui.status("sorting...\n") - t = self.toposort(parents) - num = len(t) - c = None - - self.ui.status("converting...\n") - for c in t: - num -= 1 - desc = self.commitcache[c].desc - if "\n" in desc: - desc = desc.splitlines()[0] - self.ui.status("%d %s\n" % (num, desc)) - self.copy(c) - - tags = self.source.gettags() - ctags = {} - for k in tags: - v = tags[k] - if v in self.map: - ctags[k] = self.map[v] - - if c and ctags: - nrev = self.dest.puttags(ctags) - # write another hash correspondence to override the previous - # one so we don't end up with extra tag heads - if nrev: - file(self.mapfile, "a").write("%s %s\n" % (c, nrev)) - -def _convert(ui, src, dest=None, mapfile=None, **opts): - '''Convert a foreign SCM repository to a Mercurial one. - - Accepted source formats: - - GIT - - CVS - - Accepted destination formats: - - Mercurial - - If destination isn't given, a new Mercurial repo named -hg will - be created. If isn't given, it will be put in a default - location (/.hg/shamap by default) - - The is a simple text file that maps each source commit ID to - the destination ID for that revision, like so: - - - - If the file doesn't exist, it's automatically created. It's updated - on each commit copied, so convert-repo can be interrupted and can - be run repeatedly to copy new commits. - ''' - - srcc = converter(ui, src) - if not hasattr(srcc, "getcommit"): - raise util.Abort("%s: can't read from this repo type" % src) - - if not dest: - dest = src + "-hg" - ui.status("assuming destination %s\n" % dest) - - # Try to be smart and initalize things when required - if os.path.isdir(dest): - if len(os.listdir(dest)) > 0: - try: - hg.repository(ui, dest) - ui.status("destination %s is a Mercurial repository\n" % dest) - except hg.RepoError: - raise util.Abort( - "destination directory %s is not empty.\n" - "Please specify an empty directory to be initialized\n" - "or an already initialized mercurial repository" - % dest) - else: - ui.status("initializing destination %s repository\n" % dest) - hg.repository(ui, dest, create=True) - elif os.path.exists(dest): - raise util.Abort("destination %s exists and is not a directory" % dest) - else: - ui.status("initializing destination %s repository\n" % dest) - hg.repository(ui, dest, create=True) - - destc = converter(ui, dest) - if not hasattr(destc, "putcommit"): - raise util.Abort("%s: can't write to this repo type" % src) - - if not mapfile: - try: - mapfile = destc.mapfile() - except: - mapfile = os.path.join(destc, "map") - - c = convert(ui, srcc, destc, mapfile, opts) - c.convert() - -cmdtable = { - "convert": - (_convert, - [('', 'datesort', None, 'try to sort changesets by date')], - 'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'), -} diff --git a/hgext/convert/__init__.py b/hgext/convert/git.py copy from hgext/convert/__init__.py copy to hgext/convert/git.py --- a/hgext/convert/__init__.py +++ b/hgext/convert/git.py @@ -1,23 +1,8 @@ -# convert.py Foreign SCM converter -# -# Copyright 2005, 2006 Matt Mackall -# -# This software may be used and distributed according to the terms -# of the GNU General Public License, incorporated herein by reference. - -import sys, os, zlib, sha, time, re, locale, socket -from mercurial import hg, ui, util, commands +# git support for the convert extension -commands.norepo += " convert" - -class NoRepo(Exception): pass +import os -class commit(object): - def __init__(self, **parts): - for x in "author date desc parents".split(): - if not x in parts: - raise util.Abort("commit missing field %s" % x) - self.__dict__.update(parts) +from common import NoRepo, commit, converter_source def recode(s): try: @@ -28,326 +13,6 @@ def recode(s): except: return s.decode("utf-8", "replace").encode("utf-8") -class converter_source(object): - """Conversion source interface""" - - def __init__(self, ui, path): - """Initialize conversion source (or raise NoRepo("message") - exception if path is not a valid repository)""" - raise NotImplementedError() - - def getheads(self): - """Return a list of this repository's heads""" - raise NotImplementedError() - - def getfile(self, name, rev): - """Return file contents as a string""" - raise NotImplementedError() - - def getmode(self, name, rev): - """Return file mode, eg. '', 'x', or 'l'""" - raise NotImplementedError() - - def getchanges(self, version): - """Return sorted list of (filename, id) tuples for all files changed in rev. - - id just tells us which revision to return in getfile(), e.g. in - git it's an object hash.""" - raise NotImplementedError() - - def getcommit(self, version): - """Return the commit object for version""" - raise NotImplementedError() - - def gettags(self): - """Return the tags as a dictionary of name: revision""" - raise NotImplementedError() - -class converter_sink(object): - """Conversion sink (target) interface""" - - def __init__(self, ui, path): - """Initialize conversion sink (or raise NoRepo("message") - exception if path is not a valid repository)""" - raise NotImplementedError() - - def getheads(self): - """Return a list of this repository's heads""" - raise NotImplementedError() - - def mapfile(self): - """Path to a file that will contain lines - source_rev_id sink_rev_id - mapping equivalent revision identifiers for each system.""" - raise NotImplementedError() - - def putfile(self, f, e, data): - """Put file for next putcommit(). - f: path to file - e: '', 'x', or 'l' (regular file, executable, or symlink) - data: file contents""" - raise NotImplementedError() - - def delfile(self, f): - """Delete file for next putcommit(). - f: path to file""" - raise NotImplementedError() - - def putcommit(self, files, parents, commit): - """Create a revision with all changed files listed in 'files' - and having listed parents. 'commit' is a commit object containing - at a minimum the author, date, and message for this changeset. - Called after putfile() and delfile() calls. Note that the sink - repository is not told to update itself to a particular revision - (or even what that revision would be) before it receives the - file data.""" - raise NotImplementedError() - - def puttags(self, tags): - """Put tags into sink. - tags: {tagname: sink_rev_id, ...}""" - raise NotImplementedError() - - -# CVS conversion code inspired by hg-cvs-import and git-cvsimport -class convert_cvs(converter_source): - def __init__(self, ui, path): - self.path = path - self.ui = ui - cvs = os.path.join(path, "CVS") - if not os.path.exists(cvs): - raise NoRepo("couldn't open CVS repo %s" % path) - - self.changeset = {} - self.files = {} - self.tags = {} - self.lastbranch = {} - self.parent = {} - self.socket = None - self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1] - self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1] - self.encoding = locale.getpreferredencoding() - self._parse() - self._connect() - - def _parse(self): - if self.changeset: - return - - d = os.getcwd() - try: - os.chdir(self.path) - id = None - state = 0 - for l in os.popen("cvsps -A -u --cvs-direct -q"): - if state == 0: # header - if l.startswith("PatchSet"): - id = l[9:-2] - elif l.startswith("Date"): - date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"]) - date = util.datestr(date) - elif l.startswith("Branch"): - branch = l[8:-1] - self.parent[id] = self.lastbranch.get(branch, 'bad') - self.lastbranch[branch] = id - elif l.startswith("Ancestor branch"): - ancestor = l[17:-1] - self.parent[id] = self.lastbranch[ancestor] - elif l.startswith("Author"): - author = self.recode(l[8:-1]) - elif l.startswith("Tag: "): - t = l[5:-1].rstrip() - if t != "(none)": - self.tags[t] = id - elif l.startswith("Log:"): - state = 1 - log = "" - elif state == 1: # log - if l == "Members: \n": - files = {} - log = self.recode(log[:-1]) - if log.isspace(): - log = "*** empty log message ***\n" - state = 2 - else: - log += l - elif state == 2: - if l == "\n": # - state = 0 - p = [self.parent[id]] - if id == "1": - p = [] - if branch == "HEAD": - branch = "" - c = commit(author=author, date=date, parents=p, - desc=log, branch=branch) - self.changeset[id] = c - self.files[id] = files - else: - colon = l.rfind(':') - file = l[1:colon] - rev = l[colon+1:-2] - rev = rev.split("->")[1] - files[file] = rev - - self.heads = self.lastbranch.values() - finally: - os.chdir(d) - - def _connect(self): - root = self.cvsroot - conntype = None - user, host = None, None - cmd = ['cvs', 'server'] - - self.ui.status("connecting to %s\n" % root) - - if root.startswith(":pserver:"): - root = root[9:] - m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)', - root) - if m: - conntype = "pserver" - user, passw, serv, port, root = m.groups() - if not user: - user = "anonymous" - rr = ":pserver:" + user + "@" + serv + ":" + root - if port: - rr2, port = "-", int(port) - else: - rr2, port = rr, 2401 - rr += str(port) - - if not passw: - passw = "A" - pf = open(os.path.join(os.environ["HOME"], ".cvspass")) - for l in pf: - # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah-hg will - be created. If isn't given, it will be put in a default - location (/.hg/shamap by default) - - The is a simple text file that maps each source commit ID to - the destination ID for that revision, like so: - - - - If the file doesn't exist, it's automatically created. It's updated - on each commit copied, so convert-repo can be interrupted and can - be run repeatedly to copy new commits. - ''' - - srcc = converter(ui, src) - if not hasattr(srcc, "getcommit"): - raise util.Abort("%s: can't read from this repo type" % src) - - if not dest: - dest = src + "-hg" - ui.status("assuming destination %s\n" % dest) - - # Try to be smart and initalize things when required - if os.path.isdir(dest): - if len(os.listdir(dest)) > 0: - try: - hg.repository(ui, dest) - ui.status("destination %s is a Mercurial repository\n" % dest) - except hg.RepoError: - raise util.Abort( - "destination directory %s is not empty.\n" - "Please specify an empty directory to be initialized\n" - "or an already initialized mercurial repository" - % dest) - else: - ui.status("initializing destination %s repository\n" % dest) - hg.repository(ui, dest, create=True) - elif os.path.exists(dest): - raise util.Abort("destination %s exists and is not a directory" % dest) - else: - ui.status("initializing destination %s repository\n" % dest) - hg.repository(ui, dest, create=True) - - destc = converter(ui, dest) - if not hasattr(destc, "putcommit"): - raise util.Abort("%s: can't write to this repo type" % src) - - if not mapfile: - try: - mapfile = destc.mapfile() - except: - mapfile = os.path.join(destc, "map") - - c = convert(ui, srcc, destc, mapfile, opts) - c.convert() - -cmdtable = { - "convert": - (_convert, - [('', 'datesort', None, 'try to sort changesets by date')], - 'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'), -} diff --git a/hgext/convert/__init__.py b/hgext/convert/hg.py copy from hgext/convert/__init__.py copy to hgext/convert/hg.py --- a/hgext/convert/__init__.py +++ b/hgext/convert/hg.py @@ -1,441 +1,9 @@ -# convert.py Foreign SCM converter -# -# Copyright 2005, 2006 Matt Mackall -# -# This software may be used and distributed according to the terms -# of the GNU General Public License, incorporated herein by reference. - -import sys, os, zlib, sha, time, re, locale, socket -from mercurial import hg, ui, util, commands - -commands.norepo += " convert" - -class NoRepo(Exception): pass - -class commit(object): - def __init__(self, **parts): - for x in "author date desc parents".split(): - if not x in parts: - raise util.Abort("commit missing field %s" % x) - self.__dict__.update(parts) - -def recode(s): - try: - return s.decode("utf-8").encode("utf-8") - except: - try: - return s.decode("latin-1").encode("utf-8") - except: - return s.decode("utf-8", "replace").encode("utf-8") - -class converter_source(object): - """Conversion source interface""" - - def __init__(self, ui, path): - """Initialize conversion source (or raise NoRepo("message") - exception if path is not a valid repository)""" - raise NotImplementedError() - - def getheads(self): - """Return a list of this repository's heads""" - raise NotImplementedError() - - def getfile(self, name, rev): - """Return file contents as a string""" - raise NotImplementedError() - - def getmode(self, name, rev): - """Return file mode, eg. '', 'x', or 'l'""" - raise NotImplementedError() - - def getchanges(self, version): - """Return sorted list of (filename, id) tuples for all files changed in rev. - - id just tells us which revision to return in getfile(), e.g. in - git it's an object hash.""" - raise NotImplementedError() - - def getcommit(self, version): - """Return the commit object for version""" - raise NotImplementedError() - - def gettags(self): - """Return the tags as a dictionary of name: revision""" - raise NotImplementedError() - -class converter_sink(object): - """Conversion sink (target) interface""" - - def __init__(self, ui, path): - """Initialize conversion sink (or raise NoRepo("message") - exception if path is not a valid repository)""" - raise NotImplementedError() - - def getheads(self): - """Return a list of this repository's heads""" - raise NotImplementedError() - - def mapfile(self): - """Path to a file that will contain lines - source_rev_id sink_rev_id - mapping equivalent revision identifiers for each system.""" - raise NotImplementedError() - - def putfile(self, f, e, data): - """Put file for next putcommit(). - f: path to file - e: '', 'x', or 'l' (regular file, executable, or symlink) - data: file contents""" - raise NotImplementedError() - - def delfile(self, f): - """Delete file for next putcommit(). - f: path to file""" - raise NotImplementedError() - - def putcommit(self, files, parents, commit): - """Create a revision with all changed files listed in 'files' - and having listed parents. 'commit' is a commit object containing - at a minimum the author, date, and message for this changeset. - Called after putfile() and delfile() calls. Note that the sink - repository is not told to update itself to a particular revision - (or even what that revision would be) before it receives the - file data.""" - raise NotImplementedError() - - def puttags(self, tags): - """Put tags into sink. - tags: {tagname: sink_rev_id, ...}""" - raise NotImplementedError() - - -# CVS conversion code inspired by hg-cvs-import and git-cvsimport -class convert_cvs(converter_source): - def __init__(self, ui, path): - self.path = path - self.ui = ui - cvs = os.path.join(path, "CVS") - if not os.path.exists(cvs): - raise NoRepo("couldn't open CVS repo %s" % path) - - self.changeset = {} - self.files = {} - self.tags = {} - self.lastbranch = {} - self.parent = {} - self.socket = None - self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1] - self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1] - self.encoding = locale.getpreferredencoding() - self._parse() - self._connect() - - def _parse(self): - if self.changeset: - return - - d = os.getcwd() - try: - os.chdir(self.path) - id = None - state = 0 - for l in os.popen("cvsps -A -u --cvs-direct -q"): - if state == 0: # header - if l.startswith("PatchSet"): - id = l[9:-2] - elif l.startswith("Date"): - date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"]) - date = util.datestr(date) - elif l.startswith("Branch"): - branch = l[8:-1] - self.parent[id] = self.lastbranch.get(branch, 'bad') - self.lastbranch[branch] = id - elif l.startswith("Ancestor branch"): - ancestor = l[17:-1] - self.parent[id] = self.lastbranch[ancestor] - elif l.startswith("Author"): - author = self.recode(l[8:-1]) - elif l.startswith("Tag: "): - t = l[5:-1].rstrip() - if t != "(none)": - self.tags[t] = id - elif l.startswith("Log:"): - state = 1 - log = "" - elif state == 1: # log - if l == "Members: \n": - files = {} - log = self.recode(log[:-1]) - if log.isspace(): - log = "*** empty log message ***\n" - state = 2 - else: - log += l - elif state == 2: - if l == "\n": # - state = 0 - p = [self.parent[id]] - if id == "1": - p = [] - if branch == "HEAD": - branch = "" - c = commit(author=author, date=date, parents=p, - desc=log, branch=branch) - self.changeset[id] = c - self.files[id] = files - else: - colon = l.rfind(':') - file = l[1:colon] - rev = l[colon+1:-2] - rev = rev.split("->")[1] - files[file] = rev - - self.heads = self.lastbranch.values() - finally: - os.chdir(d) - - def _connect(self): - root = self.cvsroot - conntype = None - user, host = None, None - cmd = ['cvs', 'server'] - - self.ui.status("connecting to %s\n" % root) +# hg backend for convert extension - if root.startswith(":pserver:"): - root = root[9:] - m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)', - root) - if m: - conntype = "pserver" - user, passw, serv, port, root = m.groups() - if not user: - user = "anonymous" - rr = ":pserver:" + user + "@" + serv + ":" + root - if port: - rr2, port = "-", int(port) - else: - rr2, port = rr, 2401 - rr += str(port) - - if not passw: - passw = "A" - pf = open(os.path.join(os.environ["HOME"], ".cvspass")) - for l in pf: - # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah/dev/null" - % (self.path, type, rev)) - return fh.read() - - def getfile(self, name, rev): - return self.catfile(rev, "blob") - - def getmode(self, name, rev): - return self.modecache[(name, rev)] - - def getchanges(self, version): - self.modecache = {} - fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s" - % (self.path, version)) - changes = [] - for l in fh: - if "\t" not in l: continue - m, f = l[:-1].split("\t") - m = m.split() - h = m[3] - p = (m[1] == "100755") - s = (m[1] == "120000") - self.modecache[(f, h)] = (p and "x") or (s and "l") or "" - changes.append((f, h)) - return changes - - def getcommit(self, version): - c = self.catfile(version, "commit") # read the commit hash - end = c.find("\n\n") - message = c[end+2:] - message = recode(message) - l = c[:end].splitlines() - manifest = l[0].split()[1] - parents = [] - for e in l[1:]: - n, v = e.split(" ", 1) - if n == "author": - p = v.split() - tm, tz = p[-2:] - author = " ".join(p[:-2]) - if author[0] == "<": author = author[1:-1] - author = recode(author) - if n == "committer": - p = v.split() - tm, tz = p[-2:] - committer = " ".join(p[:-2]) - if committer[0] == "<": committer = committer[1:-1] - committer = recode(committer) - message += "\ncommitter: %s\n" % committer - if n == "parent": parents.append(v) - - tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:] - tz = -int(tzs) * (int(tzh) * 3600 + int(tzm)) - date = tm + " " + str(tz) - - c = commit(parents=parents, date=date, author=author, desc=message) - return c - - def gettags(self): - tags = {} - fh = os.popen('git-ls-remote --tags "%s" 2>/dev/null' % self.path) - prefix = 'refs/tags/' - for line in fh: - line = line.strip() - if not line.endswith("^{}"): - continue - node, tag = line.split(None, 1) - if not tag.startswith(prefix): - continue - tag = tag[len(prefix):-3] - tags[tag] = node - - return tags +from common import NoRepo, converter_sink class convert_mercurial(converter_sink): def __init__(self, ui, path): @@ -521,229 +89,3 @@ class convert_mercurial(converter_sink): self.repo.rawcommit([".hgtags"], "update tags", "convert-repo", date, self.repo.changelog.tip(), hg.nullid) return hg.hex(self.repo.changelog.tip()) - -converters = [convert_cvs, convert_git, convert_mercurial] - -def converter(ui, path): - if not os.path.isdir(path): - raise util.Abort("%s: not a directory" % path) - for c in converters: - try: - return c(ui, path) - except NoRepo: - pass - raise util.Abort("%s: unknown repository type" % path) - -class convert(object): - def __init__(self, ui, source, dest, mapfile, opts): - - self.source = source - self.dest = dest - self.ui = ui - self.mapfile = mapfile - self.opts = opts - self.commitcache = {} - - self.map = {} - try: - for l in file(self.mapfile): - sv, dv = l[:-1].split() - self.map[sv] = dv - except IOError: - pass - - def walktree(self, heads): - visit = heads - known = {} - parents = {} - while visit: - n = visit.pop(0) - if n in known or n in self.map: continue - known[n] = 1 - self.commitcache[n] = self.source.getcommit(n) - cp = self.commitcache[n].parents - for p in cp: - parents.setdefault(n, []).append(p) - visit.append(p) - - return parents - - def toposort(self, parents): - visit = parents.keys() - seen = {} - children = {} - - while visit: - n = visit.pop(0) - if n in seen: continue - seen[n] = 1 - pc = 0 - if n in parents: - for p in parents[n]: - if p not in self.map: pc += 1 - visit.append(p) - children.setdefault(p, []).append(n) - if not pc: root = n - - s = [] - removed = {} - visit = children.keys() - while visit: - n = visit.pop(0) - if n in removed: continue - dep = 0 - if n in parents: - for p in parents[n]: - if p in self.map: continue - if p not in removed: - # we're still dependent - visit.append(n) - dep = 1 - break - - if not dep: - # all n's parents are in the list - removed[n] = 1 - if n not in self.map: - s.append(n) - if n in children: - for c in children[n]: - visit.insert(0, c) - - if self.opts.get('datesort'): - depth = {} - for n in s: - depth[n] = 0 - pl = [p for p in self.commitcache[n].parents - if p not in self.map] - if pl: - depth[n] = max([depth[p] for p in pl]) + 1 - - s = [(depth[n], self.commitcache[n].date, n) for n in s] - s.sort() - s = [e[2] for e in s] - - return s - - def copy(self, rev): - c = self.commitcache[rev] - files = self.source.getchanges(rev) - - for f, v in files: - try: - data = self.source.getfile(f, v) - except IOError, inst: - self.dest.delfile(f) - else: - e = self.source.getmode(f, v) - self.dest.putfile(f, e, data) - - r = [self.map[v] for v in c.parents] - f = [f for f, v in files] - self.map[rev] = self.dest.putcommit(f, r, c) - file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev])) - - def convert(self): - self.ui.status("scanning source...\n") - heads = self.source.getheads() - parents = self.walktree(heads) - self.ui.status("sorting...\n") - t = self.toposort(parents) - num = len(t) - c = None - - self.ui.status("converting...\n") - for c in t: - num -= 1 - desc = self.commitcache[c].desc - if "\n" in desc: - desc = desc.splitlines()[0] - self.ui.status("%d %s\n" % (num, desc)) - self.copy(c) - - tags = self.source.gettags() - ctags = {} - for k in tags: - v = tags[k] - if v in self.map: - ctags[k] = self.map[v] - - if c and ctags: - nrev = self.dest.puttags(ctags) - # write another hash correspondence to override the previous - # one so we don't end up with extra tag heads - if nrev: - file(self.mapfile, "a").write("%s %s\n" % (c, nrev)) - -def _convert(ui, src, dest=None, mapfile=None, **opts): - '''Convert a foreign SCM repository to a Mercurial one. - - Accepted source formats: - - GIT - - CVS - - Accepted destination formats: - - Mercurial - - If destination isn't given, a new Mercurial repo named -hg will - be created. If isn't given, it will be put in a default - location (/.hg/shamap by default) - - The is a simple text file that maps each source commit ID to - the destination ID for that revision, like so: - - - - If the file doesn't exist, it's automatically created. It's updated - on each commit copied, so convert-repo can be interrupted and can - be run repeatedly to copy new commits. - ''' - - srcc = converter(ui, src) - if not hasattr(srcc, "getcommit"): - raise util.Abort("%s: can't read from this repo type" % src) - - if not dest: - dest = src + "-hg" - ui.status("assuming destination %s\n" % dest) - - # Try to be smart and initalize things when required - if os.path.isdir(dest): - if len(os.listdir(dest)) > 0: - try: - hg.repository(ui, dest) - ui.status("destination %s is a Mercurial repository\n" % dest) - except hg.RepoError: - raise util.Abort( - "destination directory %s is not empty.\n" - "Please specify an empty directory to be initialized\n" - "or an already initialized mercurial repository" - % dest) - else: - ui.status("initializing destination %s repository\n" % dest) - hg.repository(ui, dest, create=True) - elif os.path.exists(dest): - raise util.Abort("destination %s exists and is not a directory" % dest) - else: - ui.status("initializing destination %s repository\n" % dest) - hg.repository(ui, dest, create=True) - - destc = converter(ui, dest) - if not hasattr(destc, "putcommit"): - raise util.Abort("%s: can't write to this repo type" % src) - - if not mapfile: - try: - mapfile = destc.mapfile() - except: - mapfile = os.path.join(destc, "map") - - c = convert(ui, srcc, destc, mapfile, opts) - c.convert() - -cmdtable = { - "convert": - (_convert, - [('', 'datesort', None, 'try to sort changesets by date')], - 'hg convert [OPTION]... SOURCE [DEST [MAPFILE]]'), -}