contrib/convert-repo
author maf46@burn.cl.cam.ac.uk
Mon, 04 Jul 2005 12:38:34 -0800
changeset 616 d45d1c90032e
parent 450 9d785fd7deec
child 692 695dd9a491da
permissions -rwxr-xr-x
Fix zombie files in merge # HG changeset patch # User maf46@burn.cl.cam.ac.uk # Node ID 57667c9b93a5a743e4629d15a0e6bd76699130c3 # Parent d2994b5298fb20f87dc1d4747635b280db3c0526 Fix zombie files in merge Keir Fraser observed the following: > I made a small test case that illustrates the bug in merging changesets > with 'hg remove's in them: > > 1. Create a repository A containing files foo & bar. > 2. Create clone called B. > 3. A removes file bar, and commits this removal. > 4. B edits file foo, and commits this edit. > > Now, if B: > # hg pull ../A; hg update -m; hg commit > Then bar remains deleted. > > If A: > # hg pull ../B; hg update -m; hg commit > Then bar is resurrected! > > It looks as though, when you merge across a branch, any deletions in > your own branch are forgotten. > ... > Fixing this is a must, as zombie files are a real pain. :-) Keir later patched our local copy of hg as shown below, which fixes the problem. I've also enclosed a test which captures the test Keir outlined... Files deleted on a branch should not automatically reappear in a merge Patch notes: 1. The first chunk does not change behaviour, but cleans up the code to more closely match check of 'force' in the second chunk. I think it makes the code clearer. 2. The second chunk fixes two bugs -- i. If we choose to keep a remotely-changed locally-deleted file, then we need to 'get' that file. If we choose to delete it then no action need be taken (it is already deleted in the working manifest). Without this fix, choosing to delete would get a Python traceback. ii. The test for whether the file was remotely-created is insufficient. It is only true if f is not in the common ancestor. Otherwise the file was deleted locally, and should remain deleted. (this is the most important fix!) Index: hg/tests/test-merge6 ===================================================================

#!/usr/bin/env python
#
# This is a generalized framework for converting between SCM
# repository formats.
#
# In its current form, it's hardcoded to convert incrementally between
# git and Mercurial.
#
# To use, you must first import the first git version into Mercurial,
# and establish a mapping between the git commit hash and the hash in
# Mercurial for that version. This mapping is kept in a simple text
# file with lines like so:
#
# <git hash> <mercurial hash>
#
# To convert the rest of the repo, run:
#
# convert-repo <git-dir> <hg-dir> <mapfile>
#
# This updates the mapfile on each commit copied, so it can be
# interrupted and can be run repeatedly to copy new commits.

import sys, os, zlib, sha
from mercurial import hg, ui, util

class convert_git:
    def __init__(self, path):
        self.path = path

    def getheads(self):
        h = file(self.path + "/.git/HEAD").read()[:-1]
        return [h]

    def getfile(self, name, rev):
        a = file(self.path + ("/.git/objects/%s/%s"
                              % (rev[:2], rev[2:]))).read()
        b = zlib.decompress(a)
        if sha.sha(b).hexdigest() != rev: raise "bad hash"
        head, text = b.split('\0', 1)
        return text

    def getchanges(self, version):
        path = os.getcwd()
        os.chdir(self.path)
        fh = os.popen("git-diff-tree -m -r %s" % (version))
        os.chdir(path)
        
        changes = []
        for l in fh:
            if "\t" not in l: continue
            m, f = l[:-1].split("\t")
            m = m.split()
            h = m[3]
            p = (m[1] == "100755")
            changes.append((f, h, p))
        return changes

    def getcommit(self, version):
        c = self.getfile("", version) # read the commit hash
        end = c.find("\n\n")
        message = c[end+2:]
        l = c[:end].splitlines()
        manifest = l[0].split()[1]
        parents = []
        for e in l[1:]:
            n,v = e.split(" ", 1)
            if n == "author":
                p = v.split()
                date = " ".join(p[-2:])
                author = " ".join(p[:-2])
                if author[0] == "<": author = author[1:-1]
            if n == "committer": 
                p = v.split()
                date = " ".join(p[-2:])
                committer = " ".join(p[:-2])
                if committer[0] == "<": committer = committer[1:-1]
                message += "\ncommitter: %s %s\n" % (committer, date)
            if n == "parent": parents.append(v)
        return (parents, author, date, message)

class convert_mercurial:
    def __init__(self, path):
        self.path = path
        u = ui.ui()
        self.repo = hg.repository(u, path)

    def getheads(self):
        h = self.repo.changelog.heads()
        h = [ hg.hex(x) for x in h ]
        return h
        
    def putfile(self, f, e, data):
        self.repo.wfile(f, "w").write(data)
        util.set_exec(self.repo.wjoin(f), e)

    def delfile(self, f):
        try:
            os.unlink(self.repo.wjoin(f))
            self.repo.remove([f])
        except:
            pass

    def putcommit(self, files, parents, author, dest, text):
        if not parents: parents = ["0" * 40]
        if len(parents) < 2: parents.append("0" * 40)

        seen = {}
        pl = []
        for p in parents:
            if p not in seen:
                pl.append(p)
                seen[p] = 1
        parents = pl

        p2 = parents.pop(0)
        c = self.repo.changelog.count()
        while parents:
            p1 = p2
            p2 = parents.pop(0)
            self.repo.rawcommit(files, text, author, dest, 
                                hg.bin(p1), hg.bin(p2))
            text = "(octopus merge fixup)\n"

        return hg.hex(self.repo.changelog.node(c))

class convert:
    def __init__(self, source, dest, mapfile):
        self.source = source
        self.dest = dest
        self.mapfile = mapfile
        self.commitcache = {}

        self.map = {}
        for l in file(self.mapfile):
            sv, dv = l[:-1].split()
            self.map[sv] = dv

    def walktree(self, heads):
        visit = heads
        known = {}
        parents = {}
        while visit:
            n = visit.pop(0)
            if n in known or n in self.map: continue
            known[n] = 1
            self.commitcache[n] = self.source.getcommit(n)
            cp = self.commitcache[n][0]
            for p in cp:
                parents.setdefault(n, []).append(p)
                visit.append(p)

        return parents

    def toposort(self, parents):
        visit = parents.keys()
        seen = {}
        children = {}
        while visit:
            n = visit.pop(0)
            if n in seen: continue
            seen[n] = 1
            pc = 0
            if n in parents:
                for p in parents[n]:
                    if p not in self.map: pc += 1
                    visit.append(p)
                    children.setdefault(p, []).append(n)
            if not pc: root = n

        s = []
        removed = {}
        visit = parents.keys()
        while visit:
            n = visit.pop(0)
            if n in removed: continue
            dep = 0
            if n in parents:
                for p in parents[n]:
                    if p in self.map: continue
                    if p not in removed:
                        # we're still dependent
                        visit.append(n)
                        dep = 1
                        break

            if not dep:
                # all n's parents are in the list
                removed[n] = 1
                s.append(n)
                if n in children:
                    for c in children[n]:
                        visit.insert(0, c)

        return s

    def copy(self, rev):
        p, a, d, t = self.commitcache[rev]
        files = self.source.getchanges(rev)

        for f,v,e in files:
            try:
                data = self.source.getfile(f, v)
            except IOError, inst:
                self.dest.delfile(f)
            else:
                self.dest.putfile(f, e, data)

        r = [self.map[v] for v in p]
        f = [f for f,v,e in files]
        self.map[rev] = self.dest.putcommit(f, r, a, d, t)
        file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))

    def convert(self):
        heads = self.source.getheads()
        parents = self.walktree(heads)
        t = self.toposort(parents)
        num = len(t)

        for c in t:
            num -= 1
            if c in self.map: continue
            desc = self.commitcache[c][3].splitlines()[0]
            print num, desc
            self.copy(c)

gitpath, hgpath, mapfile = sys.argv[1:]

c = convert(convert_git(gitpath), convert_mercurial(hgpath), mapfile)
c.convert()