Mercurial > hg > mercurial-crew-with-dirclash
annotate contrib/convert-repo @ 3755:05120e210c65
Use unsigned version format.
This way can use one additional bit, and when encountering invalid revlogs
with the first bit set don't produce python warnings or strange error messages.
author | Thomas Arendsen Hein <thomas@intevation.de> |
---|---|
date | Fri, 01 Dec 2006 23:27:53 +0100 |
parents | e6a7a6a33a62 |
children | 158fce02dc40 |
rev | line source |
---|---|
316 | 1 #!/usr/bin/env python |
2 # | |
3 # This is a generalized framework for converting between SCM | |
4 # repository formats. | |
5 # | |
6 # In its current form, it's hardcoded to convert incrementally between | |
7 # git and Mercurial. | |
8 # | |
9 # To use, you must first import the first git version into Mercurial, | |
10 # and establish a mapping between the git commit hash and the hash in | |
11 # Mercurial for that version. This mapping is kept in a simple text | |
12 # file with lines like so: | |
13 # | |
14 # <git hash> <mercurial hash> | |
15 # | |
16 # To convert the rest of the repo, run: | |
17 # | |
18 # convert-repo <git-dir> <hg-dir> <mapfile> | |
19 # | |
20 # This updates the mapfile on each commit copied, so it can be | |
21 # interrupted and can be run repeatedly to copy new commits. | |
22 | |
694 | 23 import sys, os, zlib, sha, time |
1715
40346aa66b0f
Revert convert-repo changes
Matt Mackall <mpm@selenic.com>
parents:
1656
diff
changeset
|
24 from mercurial import hg, ui, util |
316 | 25 |
26 class convert_git: | |
27 def __init__(self, path): | |
28 self.path = path | |
29 | |
30 def getheads(self): | |
2649
e6a7a6a33a62
make convert-repo deal with git symbolic refs.
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents:
2093
diff
changeset
|
31 fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path) |
e6a7a6a33a62
make convert-repo deal with git symbolic refs.
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents:
2093
diff
changeset
|
32 return [fh.read()[:-1]] |
316 | 33 |
692
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
34 def catfile(self, rev, type): |
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
35 if rev == "0" * 40: raise IOError() |
1335
bea6356b8bca
git -> hg conversion script
Florian La Roche <laroche@redhat.com>
parents:
1237
diff
changeset
|
36 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null" % (self.path, type, rev)) |
692
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
37 return fh.read() |
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
38 |
316 | 39 def getfile(self, name, rev): |
692
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
40 return self.catfile(rev, "blob") |
316 | 41 |
42 def getchanges(self, version): | |
1335
bea6356b8bca
git -> hg conversion script
Florian La Roche <laroche@redhat.com>
parents:
1237
diff
changeset
|
43 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s" % (self.path, version)) |
316 | 44 changes = [] |
45 for l in fh: | |
46 if "\t" not in l: continue | |
47 m, f = l[:-1].split("\t") | |
48 m = m.split() | |
49 h = m[3] | |
50 p = (m[1] == "100755") | |
51 changes.append((f, h, p)) | |
52 return changes | |
53 | |
54 def getcommit(self, version): | |
692
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
55 c = self.catfile(version, "commit") # read the commit hash |
316 | 56 end = c.find("\n\n") |
57 message = c[end+2:] | |
58 l = c[:end].splitlines() | |
59 manifest = l[0].split()[1] | |
60 parents = [] | |
61 for e in l[1:]: | |
62 n,v = e.split(" ", 1) | |
63 if n == "author": | |
64 p = v.split() | |
1385
adb3de56635b
convert-repo: Fix timezone handling
Matt Mackall <mpm@selenic.com>
parents:
1335
diff
changeset
|
65 tm, tz = p[-2:] |
316 | 66 author = " ".join(p[:-2]) |
67 if author[0] == "<": author = author[1:-1] | |
692
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
68 if n == "committer": |
431 | 69 p = v.split() |
1385
adb3de56635b
convert-repo: Fix timezone handling
Matt Mackall <mpm@selenic.com>
parents:
1335
diff
changeset
|
70 tm, tz = p[-2:] |
431 | 71 committer = " ".join(p[:-2]) |
72 if committer[0] == "<": committer = committer[1:-1] | |
1385
adb3de56635b
convert-repo: Fix timezone handling
Matt Mackall <mpm@selenic.com>
parents:
1335
diff
changeset
|
73 message += "\ncommitter: %s\n" % v |
316 | 74 if n == "parent": parents.append(v) |
1385
adb3de56635b
convert-repo: Fix timezone handling
Matt Mackall <mpm@selenic.com>
parents:
1335
diff
changeset
|
75 |
adb3de56635b
convert-repo: Fix timezone handling
Matt Mackall <mpm@selenic.com>
parents:
1335
diff
changeset
|
76 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:] |
2093
5cc414722587
convert-repo: fix reversed time zone offset
Vadim Gelfer <vadim.gelfer@gmail.com>
parents:
1715
diff
changeset
|
77 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm)) |
1385
adb3de56635b
convert-repo: Fix timezone handling
Matt Mackall <mpm@selenic.com>
parents:
1335
diff
changeset
|
78 date = tm + " " + str(tz) |
316 | 79 return (parents, author, date, message) |
80 | |
694 | 81 def gettags(self): |
82 tags = {} | |
1335
bea6356b8bca
git -> hg conversion script
Florian La Roche <laroche@redhat.com>
parents:
1237
diff
changeset
|
83 for f in os.listdir(self.path + "/refs/tags"): |
694 | 84 try: |
1335
bea6356b8bca
git -> hg conversion script
Florian La Roche <laroche@redhat.com>
parents:
1237
diff
changeset
|
85 h = file(self.path + "/refs/tags/" + f).read().strip() |
1386
a1040345fdda
convert-repo: retrieve the commit hash from the tag object for tag import
Matt Mackall <mpm@selenic.com>
parents:
1385
diff
changeset
|
86 c = self.catfile(h, "tag") # read the commit hash |
a1040345fdda
convert-repo: retrieve the commit hash from the tag object for tag import
Matt Mackall <mpm@selenic.com>
parents:
1385
diff
changeset
|
87 h = c.splitlines()[0].split()[1] |
1237 | 88 tags[f] = h |
694 | 89 except: |
90 pass | |
91 return tags | |
92 | |
316 | 93 class convert_mercurial: |
94 def __init__(self, path): | |
95 self.path = path | |
96 u = ui.ui() | |
97 self.repo = hg.repository(u, path) | |
98 | |
99 def getheads(self): | |
100 h = self.repo.changelog.heads() | |
1335
bea6356b8bca
git -> hg conversion script
Florian La Roche <laroche@redhat.com>
parents:
1237
diff
changeset
|
101 return [ hg.hex(x) for x in h ] |
692
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
102 |
316 | 103 def putfile(self, f, e, data): |
104 self.repo.wfile(f, "w").write(data) | |
692
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
105 if self.repo.dirstate.state(f) == '?': |
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
106 self.repo.dirstate.update([f], "a") |
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
107 |
450 | 108 util.set_exec(self.repo.wjoin(f), e) |
316 | 109 |
110 def delfile(self, f): | |
111 try: | |
112 os.unlink(self.repo.wjoin(f)) | |
692
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
113 #self.repo.remove([f]) |
316 | 114 except: |
115 pass | |
116 | |
1715
40346aa66b0f
Revert convert-repo changes
Matt Mackall <mpm@selenic.com>
parents:
1656
diff
changeset
|
117 def putcommit(self, files, parents, author, dest, text): |
431 | 118 seen = {} |
119 pl = [] | |
120 for p in parents: | |
121 if p not in seen: | |
122 pl.append(p) | |
123 seen[p] = 1 | |
124 parents = pl | |
316 | 125 |
692
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
126 if len(parents) < 2: parents.append("0" * 40) |
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
127 if len(parents) < 2: parents.append("0" * 40) |
431 | 128 p2 = parents.pop(0) |
692
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
129 |
431 | 130 while parents: |
131 p1 = p2 | |
132 p2 = parents.pop(0) | |
1715
40346aa66b0f
Revert convert-repo changes
Matt Mackall <mpm@selenic.com>
parents:
1656
diff
changeset
|
133 self.repo.rawcommit(files, text, author, dest, |
40346aa66b0f
Revert convert-repo changes
Matt Mackall <mpm@selenic.com>
parents:
1656
diff
changeset
|
134 hg.bin(p1), hg.bin(p2)) |
431 | 135 text = "(octopus merge fixup)\n" |
1389
9b3ef6f3cef5
convert-repo: fix up octopus merge conversion
Matt Mackall <mpm@selenic.com>
parents:
1388
diff
changeset
|
136 p2 = hg.hex(self.repo.changelog.tip()) |
431 | 137 |
1389
9b3ef6f3cef5
convert-repo: fix up octopus merge conversion
Matt Mackall <mpm@selenic.com>
parents:
1388
diff
changeset
|
138 return p2 |
316 | 139 |
694 | 140 def puttags(self, tags): |
141 try: | |
142 old = self.repo.wfile(".hgtags").read() | |
143 oldlines = old.splitlines(1) | |
144 oldlines.sort() | |
145 except: | |
146 oldlines = [] | |
147 | |
148 k = tags.keys() | |
149 k.sort() | |
150 newlines = [] | |
151 for tag in k: | |
152 newlines.append("%s %s\n" % (tags[tag], tag)) | |
153 | |
154 newlines.sort() | |
155 | |
156 if newlines != oldlines: | |
1335
bea6356b8bca
git -> hg conversion script
Florian La Roche <laroche@redhat.com>
parents:
1237
diff
changeset
|
157 #print "updating tags" |
694 | 158 f = self.repo.wfile(".hgtags", "w") |
159 f.write("".join(newlines)) | |
160 f.close() | |
161 if not oldlines: self.repo.add([".hgtags"]) | |
1335
bea6356b8bca
git -> hg conversion script
Florian La Roche <laroche@redhat.com>
parents:
1237
diff
changeset
|
162 date = "%s 0" % int(time.mktime(time.gmtime())) |
694 | 163 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo", |
164 date, self.repo.changelog.tip(), hg.nullid) | |
1387
0c7e8d345564
convert-repo: linearize the tag commit
Matt Mackall <mpm@selenic.com>
parents:
1386
diff
changeset
|
165 return hg.hex(self.repo.changelog.tip()) |
694 | 166 |
316 | 167 class convert: |
168 def __init__(self, source, dest, mapfile): | |
169 self.source = source | |
170 self.dest = dest | |
171 self.mapfile = mapfile | |
172 self.commitcache = {} | |
173 | |
174 self.map = {} | |
1655
7bfd4724932a
convert-repo: automatically create empty map file
Matt Mackall <mpm@selenic.com>
parents:
1389
diff
changeset
|
175 try: |
7bfd4724932a
convert-repo: automatically create empty map file
Matt Mackall <mpm@selenic.com>
parents:
1389
diff
changeset
|
176 for l in file(self.mapfile): |
7bfd4724932a
convert-repo: automatically create empty map file
Matt Mackall <mpm@selenic.com>
parents:
1389
diff
changeset
|
177 sv, dv = l[:-1].split() |
7bfd4724932a
convert-repo: automatically create empty map file
Matt Mackall <mpm@selenic.com>
parents:
1389
diff
changeset
|
178 self.map[sv] = dv |
7bfd4724932a
convert-repo: automatically create empty map file
Matt Mackall <mpm@selenic.com>
parents:
1389
diff
changeset
|
179 except IOError: |
7bfd4724932a
convert-repo: automatically create empty map file
Matt Mackall <mpm@selenic.com>
parents:
1389
diff
changeset
|
180 pass |
316 | 181 |
182 def walktree(self, heads): | |
183 visit = heads | |
184 known = {} | |
185 parents = {} | |
186 while visit: | |
187 n = visit.pop(0) | |
188 if n in known or n in self.map: continue | |
189 known[n] = 1 | |
190 self.commitcache[n] = self.source.getcommit(n) | |
191 cp = self.commitcache[n][0] | |
192 for p in cp: | |
193 parents.setdefault(n, []).append(p) | |
194 visit.append(p) | |
195 | |
196 return parents | |
197 | |
198 def toposort(self, parents): | |
199 visit = parents.keys() | |
200 seen = {} | |
201 children = {} | |
692
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
202 |
316 | 203 while visit: |
204 n = visit.pop(0) | |
205 if n in seen: continue | |
206 seen[n] = 1 | |
207 pc = 0 | |
208 if n in parents: | |
209 for p in parents[n]: | |
210 if p not in self.map: pc += 1 | |
211 visit.append(p) | |
212 children.setdefault(p, []).append(n) | |
213 if not pc: root = n | |
214 | |
215 s = [] | |
216 removed = {} | |
692
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
217 visit = children.keys() |
316 | 218 while visit: |
219 n = visit.pop(0) | |
220 if n in removed: continue | |
221 dep = 0 | |
222 if n in parents: | |
223 for p in parents[n]: | |
224 if p in self.map: continue | |
225 if p not in removed: | |
226 # we're still dependent | |
227 visit.append(n) | |
228 dep = 1 | |
229 break | |
230 | |
231 if not dep: | |
232 # all n's parents are in the list | |
233 removed[n] = 1 | |
234 s.append(n) | |
235 if n in children: | |
236 for c in children[n]: | |
237 visit.insert(0, c) | |
238 | |
239 return s | |
240 | |
241 def copy(self, rev): | |
242 p, a, d, t = self.commitcache[rev] | |
243 files = self.source.getchanges(rev) | |
244 | |
245 for f,v,e in files: | |
246 try: | |
247 data = self.source.getfile(f, v) | |
248 except IOError, inst: | |
249 self.dest.delfile(f) | |
250 else: | |
251 self.dest.putfile(f, e, data) | |
252 | |
253 r = [self.map[v] for v in p] | |
254 f = [f for f,v,e in files] | |
255 self.map[rev] = self.dest.putcommit(f, r, a, d, t) | |
256 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev])) | |
257 | |
258 def convert(self): | |
259 heads = self.source.getheads() | |
260 parents = self.walktree(heads) | |
261 t = self.toposort(parents) | |
1388
5eb2d3c54165
convert-repo: change duplicate elimination
Matt Mackall <mpm@selenic.com>
parents:
1387
diff
changeset
|
262 t = [n for n in t if n not in self.map] |
316 | 263 num = len(t) |
1715
40346aa66b0f
Revert convert-repo changes
Matt Mackall <mpm@selenic.com>
parents:
1656
diff
changeset
|
264 c = None |
316 | 265 |
266 for c in t: | |
267 num -= 1 | |
268 desc = self.commitcache[c][3].splitlines()[0] | |
1335
bea6356b8bca
git -> hg conversion script
Florian La Roche <laroche@redhat.com>
parents:
1237
diff
changeset
|
269 #print num, desc |
316 | 270 self.copy(c) |
271 | |
694 | 272 tags = self.source.gettags() |
273 ctags = {} | |
274 for k in tags: | |
275 v = tags[k] | |
276 if v in self.map: | |
277 ctags[k] = self.map[v] | |
278 | |
1715
40346aa66b0f
Revert convert-repo changes
Matt Mackall <mpm@selenic.com>
parents:
1656
diff
changeset
|
279 if c and ctags: |
1387
0c7e8d345564
convert-repo: linearize the tag commit
Matt Mackall <mpm@selenic.com>
parents:
1386
diff
changeset
|
280 nrev = self.dest.puttags(ctags) |
0c7e8d345564
convert-repo: linearize the tag commit
Matt Mackall <mpm@selenic.com>
parents:
1386
diff
changeset
|
281 # write another hash correspondence to override the previous |
0c7e8d345564
convert-repo: linearize the tag commit
Matt Mackall <mpm@selenic.com>
parents:
1386
diff
changeset
|
282 # one so we don't end up with extra tag heads |
0c7e8d345564
convert-repo: linearize the tag commit
Matt Mackall <mpm@selenic.com>
parents:
1386
diff
changeset
|
283 file(self.mapfile, "a").write("%s %s\n" % (c, nrev)) |
694 | 284 |
316 | 285 gitpath, hgpath, mapfile = sys.argv[1:] |
1335
bea6356b8bca
git -> hg conversion script
Florian La Roche <laroche@redhat.com>
parents:
1237
diff
changeset
|
286 if os.path.isdir(gitpath + "/.git"): |
bea6356b8bca
git -> hg conversion script
Florian La Roche <laroche@redhat.com>
parents:
1237
diff
changeset
|
287 gitpath += "/.git" |
316 | 288 |
289 c = convert(convert_git(gitpath), convert_mercurial(hgpath), mapfile) | |
290 c.convert() |