view contrib/convert-repo @ 2550:45235e492cc6

Disable automatic line endings conversion on windows The rationale behind this is that such conversion implies a particular situation in which all files in the repo are terminated by only LF. This is documented nowhere and it bit me sharply when I upgraded. Furthermore, it works on the assumption that a file containing no NULL characters are actually a text file. Therefore it cannot guarantee that no binary file will be harmed in the process. Currently, if a file already contains CRLF line endings when it is copied to the working dir from the repo, then the version in the working dir will be corrupted by an extra CR. I'm working on a patch that will turn this into a warning. But as a side effect, committing such a file back will strip it from its CR. In all case, unrequested data modification can occur under the feet of the user, which is bad(tm), ihmo.
author Raphael Marmier <raphael@marmier.net>
date Mon, 03 Jul 2006 10:18:46 -0700
parents 5cc414722587
children e6a7a6a33a62
line wrap: on
line source

#!/usr/bin/env python
#
# This is a generalized framework for converting between SCM
# repository formats.
#
# In its current form, it's hardcoded to convert incrementally between
# git and Mercurial.
#
# To use, you must first import the first git version into Mercurial,
# and establish a mapping between the git commit hash and the hash in
# Mercurial for that version. This mapping is kept in a simple text
# file with lines like so:
#
# <git hash> <mercurial hash>
#
# To convert the rest of the repo, run:
#
# convert-repo <git-dir> <hg-dir> <mapfile>
#
# This updates the mapfile on each commit copied, so it can be
# interrupted and can be run repeatedly to copy new commits.

import sys, os, zlib, sha, time
from mercurial import hg, ui, util

class convert_git:
    def __init__(self, path):
        self.path = path

    def getheads(self):
        return [file(self.path + "/HEAD").read()[:-1]]

    def catfile(self, rev, type):
        if rev == "0" * 40: raise IOError()
        fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null" % (self.path, type, rev))
        return fh.read()

    def getfile(self, name, rev):
        return self.catfile(rev, "blob")

    def getchanges(self, version):
        fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s" % (self.path, version))
        changes = []
        for l in fh:
            if "\t" not in l: continue
            m, f = l[:-1].split("\t")
            m = m.split()
            h = m[3]
            p = (m[1] == "100755")
            changes.append((f, h, p))
        return changes

    def getcommit(self, version):
        c = self.catfile(version, "commit") # read the commit hash
        end = c.find("\n\n")
        message = c[end+2:]
        l = c[:end].splitlines()
        manifest = l[0].split()[1]
        parents = []
        for e in l[1:]:
            n,v = e.split(" ", 1)
            if n == "author":
                p = v.split()
                tm, tz = p[-2:]
                author = " ".join(p[:-2])
                if author[0] == "<": author = author[1:-1]
            if n == "committer":
                p = v.split()
                tm, tz = p[-2:]
                committer = " ".join(p[:-2])
                if committer[0] == "<": committer = committer[1:-1]
                message += "\ncommitter: %s\n" % v
            if n == "parent": parents.append(v)

        tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
        tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
        date = tm + " " + str(tz)
        return (parents, author, date, message)

    def gettags(self):
        tags = {}
        for f in os.listdir(self.path + "/refs/tags"):
            try:
                h = file(self.path + "/refs/tags/" + f).read().strip()
                c = self.catfile(h, "tag") # read the commit hash
                h = c.splitlines()[0].split()[1]
                tags[f] = h
            except:
                pass
        return tags

class convert_mercurial:
    def __init__(self, path):
        self.path = path
        u = ui.ui()
        self.repo = hg.repository(u, path)

    def getheads(self):
        h = self.repo.changelog.heads()
        return [ hg.hex(x) for x in h ]

    def putfile(self, f, e, data):
        self.repo.wfile(f, "w").write(data)
        if self.repo.dirstate.state(f) == '?':
            self.repo.dirstate.update([f], "a")

        util.set_exec(self.repo.wjoin(f), e)

    def delfile(self, f):
        try:
            os.unlink(self.repo.wjoin(f))
            #self.repo.remove([f])
        except:
            pass

    def putcommit(self, files, parents, author, dest, text):
        seen = {}
        pl = []
        for p in parents:
            if p not in seen:
                pl.append(p)
                seen[p] = 1
        parents = pl

        if len(parents) < 2: parents.append("0" * 40)
        if len(parents) < 2: parents.append("0" * 40)
        p2 = parents.pop(0)

        while parents:
            p1 = p2
            p2 = parents.pop(0)
            self.repo.rawcommit(files, text, author, dest,
                                hg.bin(p1), hg.bin(p2))
            text = "(octopus merge fixup)\n"
            p2 = hg.hex(self.repo.changelog.tip())

        return p2

    def puttags(self, tags):
        try:
            old = self.repo.wfile(".hgtags").read()
            oldlines = old.splitlines(1)
            oldlines.sort()
        except:
            oldlines = []

        k = tags.keys()
        k.sort()
        newlines = []
        for tag in k:
            newlines.append("%s %s\n" % (tags[tag], tag))

        newlines.sort()

        if newlines != oldlines:
            #print "updating tags"
            f = self.repo.wfile(".hgtags", "w")
            f.write("".join(newlines))
            f.close()
            if not oldlines: self.repo.add([".hgtags"])
            date = "%s 0" % int(time.mktime(time.gmtime()))
            self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
                                date, self.repo.changelog.tip(), hg.nullid)
            return hg.hex(self.repo.changelog.tip())

class convert:
    def __init__(self, source, dest, mapfile):
        self.source = source
        self.dest = dest
        self.mapfile = mapfile
        self.commitcache = {}

        self.map = {}
        try:
            for l in file(self.mapfile):
                sv, dv = l[:-1].split()
                self.map[sv] = dv
        except IOError:
            pass

    def walktree(self, heads):
        visit = heads
        known = {}
        parents = {}
        while visit:
            n = visit.pop(0)
            if n in known or n in self.map: continue
            known[n] = 1
            self.commitcache[n] = self.source.getcommit(n)
            cp = self.commitcache[n][0]
            for p in cp:
                parents.setdefault(n, []).append(p)
                visit.append(p)

        return parents

    def toposort(self, parents):
        visit = parents.keys()
        seen = {}
        children = {}

        while visit:
            n = visit.pop(0)
            if n in seen: continue
            seen[n] = 1
            pc = 0
            if n in parents:
                for p in parents[n]:
                    if p not in self.map: pc += 1
                    visit.append(p)
                    children.setdefault(p, []).append(n)
            if not pc: root = n

        s = []
        removed = {}
        visit = children.keys()
        while visit:
            n = visit.pop(0)
            if n in removed: continue
            dep = 0
            if n in parents:
                for p in parents[n]:
                    if p in self.map: continue
                    if p not in removed:
                        # we're still dependent
                        visit.append(n)
                        dep = 1
                        break

            if not dep:
                # all n's parents are in the list
                removed[n] = 1
                s.append(n)
                if n in children:
                    for c in children[n]:
                        visit.insert(0, c)

        return s

    def copy(self, rev):
        p, a, d, t = self.commitcache[rev]
        files = self.source.getchanges(rev)

        for f,v,e in files:
            try:
                data = self.source.getfile(f, v)
            except IOError, inst:
                self.dest.delfile(f)
            else:
                self.dest.putfile(f, e, data)

        r = [self.map[v] for v in p]
        f = [f for f,v,e in files]
        self.map[rev] = self.dest.putcommit(f, r, a, d, t)
        file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))

    def convert(self):
        heads = self.source.getheads()
        parents = self.walktree(heads)
        t = self.toposort(parents)
        t = [n for n in t if n not in self.map]
        num = len(t)
        c = None

        for c in t:
            num -= 1
            desc = self.commitcache[c][3].splitlines()[0]
            #print num, desc
            self.copy(c)

        tags = self.source.gettags()
        ctags = {}
        for k in tags:
            v = tags[k]
            if v in self.map:
                ctags[k] = self.map[v]

        if c and ctags:
            nrev = self.dest.puttags(ctags)
            # write another hash correspondence to override the previous
            # one so we don't end up with extra tag heads
            file(self.mapfile, "a").write("%s %s\n" % (c, nrev))

gitpath, hgpath, mapfile = sys.argv[1:]
if os.path.isdir(gitpath + "/.git"):
    gitpath += "/.git"

c = convert(convert_git(gitpath), convert_mercurial(hgpath), mapfile)
c.convert()