comparison contrib/convert-repo @ 316:c48d069163d6

Add new convert-repo script -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 Add new convert-repo script This is the beginnings of a generalized framework for converting repositories. Currently hardwired to convert from git to hg. manifest hash: dc3b72de2c45bfdaffcc1cf71da530228793facd -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.0 (GNU/Linux) iD8DBQFCrGf/ywK+sNU5EO8RAi9EAJ0eQ++cwSgn5j2PHiTvF7r3JNiv4gCePY+X do12pUvCczyBKVCoBN7y/uI= =YtzI -----END PGP SIGNATURE-----
author mpm@selenic.com
date Sun, 12 Jun 2005 08:51:11 -0800
parents
children dfc44f3f587c
comparison
equal deleted inserted replaced
315:5f0231b29f42 316:c48d069163d6
1 #!/usr/bin/env python
2 #
3 # This is a generalized framework for converting between SCM
4 # repository formats.
5 #
6 # In its current form, it's hardcoded to convert incrementally between
7 # git and Mercurial.
8 #
9 # To use, you must first import the first git version into Mercurial,
10 # and establish a mapping between the git commit hash and the hash in
11 # Mercurial for that version. This mapping is kept in a simple text
12 # file with lines like so:
13 #
14 # <git hash> <mercurial hash>
15 #
16 # To convert the rest of the repo, run:
17 #
18 # convert-repo <git-dir> <hg-dir> <mapfile>
19 #
20 # This updates the mapfile on each commit copied, so it can be
21 # interrupted and can be run repeatedly to copy new commits.
22
23 import sys, os, zlib, sha
24 from mercurial import hg, ui
25
26 class convert_git:
27 def __init__(self, path):
28 self.path = path
29
30 def getheads(self):
31 h = file(self.path + "/.git/HEAD").read()[:-1]
32 return [h]
33
34 def getfile(self, name, rev):
35 a = file(self.path + ("/.git/objects/%s/%s"
36 % (rev[:2], rev[2:]))).read()
37 b = zlib.decompress(a)
38 if sha.sha(b).hexdigest() != rev: raise "bad hash"
39 head, text = b.split('\0', 1)
40 return text
41
42 def getchanges(self, version):
43 path = os.getcwd()
44 os.chdir(self.path)
45 fh = os.popen("git-diff-tree -m -r %s" % (version))
46 os.chdir(path)
47
48 changes = []
49 for l in fh:
50 if "\t" not in l: continue
51 m, f = l[:-1].split("\t")
52 m = m.split()
53 h = m[3]
54 p = (m[1] == "100755")
55 changes.append((f, h, p))
56 return changes
57
58 def getcommit(self, version):
59 c = self.getfile("", version) # read the commit hash
60 end = c.find("\n\n")
61 message = c[end+2:]
62 l = c[:end].splitlines()
63 manifest = l[0].split()[1]
64 parents = []
65 for e in l[1:]:
66 n,v = e.split(" ", 1)
67 if n == "author":
68 p = v.split()
69 date = " ".join(p[-2:])
70 author = " ".join(p[:-2])
71 if author[0] == "<": author = author[1:-1]
72 if n == "parent": parents.append(v)
73 return (parents, author, date, message)
74
75 class convert_mercurial:
76 def __init__(self, path):
77 self.path = path
78 u = ui.ui()
79 self.repo = hg.repository(u, path)
80
81 def getheads(self):
82 h = self.repo.changelog.heads()
83 h = [ hg.hex(x) for x in h ]
84 return h
85
86 def putfile(self, f, e, data):
87 self.repo.wfile(f, "w").write(data)
88 hg.set_exec(self.repo.wjoin(f), e)
89
90 def delfile(self, f):
91 try:
92 os.unlink(self.repo.wjoin(f))
93 self.repo.remove([f])
94 except:
95 pass
96
97 def putcommit(self, files, parents, author, dest, text):
98 p1, p2 = "0"*40, "0"*40
99 if len(parents) > 0: p1 = parents[0]
100 if len(parents) > 1: p2 = parents[1]
101 if len(parents) > 2: raise "the dreaded octopus merge!"
102 self.repo.rawcommit(files, text, author, dest,
103 hg.bin(p1), hg.bin(p2))
104
105 return hg.hex(self.repo.changelog.tip())
106
107 class convert:
108 def __init__(self, source, dest, mapfile):
109 self.source = source
110 self.dest = dest
111 self.mapfile = mapfile
112 self.commitcache = {}
113
114 self.map = {}
115 for l in file(self.mapfile):
116 sv, dv = l[:-1].split()
117 self.map[sv] = dv
118
119 def walktree(self, heads):
120 visit = heads
121 known = {}
122 parents = {}
123 while visit:
124 n = visit.pop(0)
125 if n in known or n in self.map: continue
126 known[n] = 1
127 self.commitcache[n] = self.source.getcommit(n)
128 cp = self.commitcache[n][0]
129 for p in cp:
130 parents.setdefault(n, []).append(p)
131 visit.append(p)
132
133 return parents
134
135 def toposort(self, parents):
136 visit = parents.keys()
137 seen = {}
138 children = {}
139 while visit:
140 n = visit.pop(0)
141 if n in seen: continue
142 seen[n] = 1
143 pc = 0
144 if n in parents:
145 for p in parents[n]:
146 if p not in self.map: pc += 1
147 visit.append(p)
148 children.setdefault(p, []).append(n)
149 if not pc: root = n
150
151 s = []
152 removed = {}
153 visit = parents.keys()
154 while visit:
155 n = visit.pop(0)
156 if n in removed: continue
157 dep = 0
158 if n in parents:
159 for p in parents[n]:
160 if p in self.map: continue
161 if p not in removed:
162 # we're still dependent
163 visit.append(n)
164 dep = 1
165 break
166
167 if not dep:
168 # all n's parents are in the list
169 removed[n] = 1
170 s.append(n)
171 if n in children:
172 for c in children[n]:
173 visit.insert(0, c)
174
175 return s
176
177 def copy(self, rev):
178 p, a, d, t = self.commitcache[rev]
179 files = self.source.getchanges(rev)
180
181 for f,v,e in files:
182 try:
183 data = self.source.getfile(f, v)
184 except IOError, inst:
185 self.dest.delfile(f)
186 else:
187 self.dest.putfile(f, e, data)
188
189 r = [self.map[v] for v in p]
190 f = [f for f,v,e in files]
191 self.map[rev] = self.dest.putcommit(f, r, a, d, t)
192 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
193
194 def convert(self):
195 heads = self.source.getheads()
196 parents = self.walktree(heads)
197 t = self.toposort(parents)
198 num = len(t)
199
200 for c in t:
201 num -= 1
202 if c in self.map: continue
203 desc = self.commitcache[c][3].splitlines()[0]
204 print num, desc
205 self.copy(c)
206
207 gitpath, hgpath, mapfile = sys.argv[1:]
208
209 c = convert(convert_git(gitpath), convert_mercurial(hgpath), mapfile)
210 c.convert()