Add a features list to branches.cache to detect caches of old hg versions.
The leading space in the written file makes sure that the feature list never
can match an existing version, even if the first feature can be read as hex.
Additionally old hg versions display the id with --debug, too.
#!/usr/bin/env python
#
# This is a generalized framework for converting between SCM
# repository formats.
#
# In its current form, it's hardcoded to convert incrementally between
# git and Mercurial.
#
# To use, run:
#
# convert-repo <git-dir> <hg-dir> <mapfile>
#
# (don't forget to create the <hg-dir> repository beforehand)
#
# The <mapfile> is a simple text file that maps a git commit hash to
# the hash in Mercurial for that version, like so:
#
# <git hash> <mercurial hash>
#
# If the file doesn't exist, it's automatically created. It's updated
# on each commit copied, so convert-repo can be interrupted and can
# be run repeatedly to copy new commits.
import sys, os, zlib, sha, time
os.environ["HGENCODING"] = "utf-8"
from mercurial import hg, ui, util
def recode(s):
try:
return s.decode("utf-8").encode("utf-8")
except:
try:
return s.decode("latin-1").encode("utf-8")
except:
return s.decode("utf-8", "replace").encode("utf-8")
class convert_git:
def __init__(self, path):
self.path = path
def getheads(self):
fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
return [fh.read()[:-1]]
def catfile(self, rev, type):
if rev == "0" * 40: raise IOError()
fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null" % (self.path, type, rev))
return fh.read()
def getfile(self, name, rev):
return self.catfile(rev, "blob")
def getchanges(self, version):
fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s" % (self.path, version))
changes = []
for l in fh:
if "\t" not in l: continue
m, f = l[:-1].split("\t")
m = m.split()
h = m[3]
p = (m[1] == "100755")
changes.append((f, h, p))
return changes
def getcommit(self, version):
c = self.catfile(version, "commit") # read the commit hash
end = c.find("\n\n")
message = c[end+2:]
message = recode(message)
l = c[:end].splitlines()
manifest = l[0].split()[1]
parents = []
for e in l[1:]:
n,v = e.split(" ", 1)
if n == "author":
p = v.split()
tm, tz = p[-2:]
author = " ".join(p[:-2])
if author[0] == "<": author = author[1:-1]
author = recode(author)
if n == "committer":
p = v.split()
tm, tz = p[-2:]
committer = " ".join(p[:-2])
if committer[0] == "<": committer = committer[1:-1]
committer = recode(committer)
message += "\ncommitter: %s\n" % committer
if n == "parent": parents.append(v)
tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
date = tm + " " + str(tz)
return (parents, author, date, message)
def gettags(self):
tags = {}
for f in os.listdir(self.path + "/refs/tags"):
try:
h = file(self.path + "/refs/tags/" + f).read().strip()
c = self.catfile(h, "tag") # read the commit hash
h = c.splitlines()[0].split()[1]
tags[f] = h
except:
pass
return tags
class convert_mercurial:
def __init__(self, path):
self.path = path
u = ui.ui()
self.repo = hg.repository(u, path)
def getheads(self):
h = self.repo.changelog.heads()
return [ hg.hex(x) for x in h ]
def putfile(self, f, e, data):
self.repo.wfile(f, "w").write(data)
if self.repo.dirstate.state(f) == '?':
self.repo.dirstate.update([f], "a")
util.set_exec(self.repo.wjoin(f), e)
def delfile(self, f):
try:
os.unlink(self.repo.wjoin(f))
#self.repo.remove([f])
except:
pass
def putcommit(self, files, parents, author, dest, text):
seen = {}
pl = []
for p in parents:
if p not in seen:
pl.append(p)
seen[p] = 1
parents = pl
if len(parents) < 2: parents.append("0" * 40)
if len(parents) < 2: parents.append("0" * 40)
p2 = parents.pop(0)
while parents:
p1 = p2
p2 = parents.pop(0)
self.repo.rawcommit(files, text, author, dest,
hg.bin(p1), hg.bin(p2))
text = "(octopus merge fixup)\n"
p2 = hg.hex(self.repo.changelog.tip())
return p2
def puttags(self, tags):
try:
old = self.repo.wfile(".hgtags").read()
oldlines = old.splitlines(1)
oldlines.sort()
except:
oldlines = []
k = tags.keys()
k.sort()
newlines = []
for tag in k:
newlines.append("%s %s\n" % (tags[tag], tag))
newlines.sort()
if newlines != oldlines:
#print "updating tags"
f = self.repo.wfile(".hgtags", "w")
f.write("".join(newlines))
f.close()
if not oldlines: self.repo.add([".hgtags"])
date = "%s 0" % int(time.mktime(time.gmtime()))
self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
date, self.repo.changelog.tip(), hg.nullid)
return hg.hex(self.repo.changelog.tip())
class convert:
def __init__(self, source, dest, mapfile):
self.source = source
self.dest = dest
self.mapfile = mapfile
self.commitcache = {}
self.map = {}
try:
for l in file(self.mapfile):
sv, dv = l[:-1].split()
self.map[sv] = dv
except IOError:
pass
def walktree(self, heads):
visit = heads
known = {}
parents = {}
while visit:
n = visit.pop(0)
if n in known or n in self.map: continue
known[n] = 1
self.commitcache[n] = self.source.getcommit(n)
cp = self.commitcache[n][0]
for p in cp:
parents.setdefault(n, []).append(p)
visit.append(p)
return parents
def toposort(self, parents):
visit = parents.keys()
seen = {}
children = {}
while visit:
n = visit.pop(0)
if n in seen: continue
seen[n] = 1
pc = 0
if n in parents:
for p in parents[n]:
if p not in self.map: pc += 1
visit.append(p)
children.setdefault(p, []).append(n)
if not pc: root = n
s = []
removed = {}
visit = children.keys()
while visit:
n = visit.pop(0)
if n in removed: continue
dep = 0
if n in parents:
for p in parents[n]:
if p in self.map: continue
if p not in removed:
# we're still dependent
visit.append(n)
dep = 1
break
if not dep:
# all n's parents are in the list
removed[n] = 1
s.append(n)
if n in children:
for c in children[n]:
visit.insert(0, c)
return s
def copy(self, rev):
p, a, d, t = self.commitcache[rev]
files = self.source.getchanges(rev)
for f,v,e in files:
try:
data = self.source.getfile(f, v)
except IOError, inst:
self.dest.delfile(f)
else:
self.dest.putfile(f, e, data)
r = [self.map[v] for v in p]
f = [f for f,v,e in files]
self.map[rev] = self.dest.putcommit(f, r, a, d, t)
file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
def convert(self):
heads = self.source.getheads()
parents = self.walktree(heads)
t = self.toposort(parents)
t = [n for n in t if n not in self.map]
num = len(t)
c = None
for c in t:
num -= 1
desc = self.commitcache[c][3].splitlines()[0]
#print num, desc
self.copy(c)
tags = self.source.gettags()
ctags = {}
for k in tags:
v = tags[k]
if v in self.map:
ctags[k] = self.map[v]
if c and ctags:
nrev = self.dest.puttags(ctags)
# write another hash correspondence to override the previous
# one so we don't end up with extra tag heads
if nrev:
file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
gitpath, hgpath, mapfile = sys.argv[1:]
if os.path.isdir(gitpath + "/.git"):
gitpath += "/.git"
c = convert(convert_git(gitpath), convert_mercurial(hgpath), mapfile)
c.convert()