Mercurial > hg > mercurial-crew-with-dirclash
annotate contrib/convert-repo @ 4168:bbfe5a3fc80c
Add a features list to branches.cache to detect caches of old hg versions.
The leading space in the written file makes sure that the feature list never
can match an existing version, even if the first feature can be read as hex.
Additionally old hg versions display the id with --debug, too.
author | Thomas Arendsen Hein <thomas@intevation.de> |
---|---|
date | Fri, 09 Mar 2007 19:12:03 +0100 |
parents | 645e1dd4b8ae |
children | 0fab73b3f453 |
rev | line source |
---|---|
316 | 1 #!/usr/bin/env python |
2 # | |
3 # This is a generalized framework for converting between SCM | |
4 # repository formats. | |
5 # | |
6 # In its current form, it's hardcoded to convert incrementally between | |
7 # git and Mercurial. | |
8 # | |
3916
645e1dd4b8ae
convert-repo: update usage information
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents:
3911
diff
changeset
|
9 # To use, run: |
645e1dd4b8ae
convert-repo: update usage information
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents:
3911
diff
changeset
|
10 # |
645e1dd4b8ae
convert-repo: update usage information
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents:
3911
diff
changeset
|
11 # convert-repo <git-dir> <hg-dir> <mapfile> |
645e1dd4b8ae
convert-repo: update usage information
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents:
3911
diff
changeset
|
12 # |
645e1dd4b8ae
convert-repo: update usage information
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents:
3911
diff
changeset
|
13 # (don't forget to create the <hg-dir> repository beforehand) |
645e1dd4b8ae
convert-repo: update usage information
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents:
3911
diff
changeset
|
14 # |
645e1dd4b8ae
convert-repo: update usage information
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents:
3911
diff
changeset
|
15 # The <mapfile> is a simple text file that maps a git commit hash to |
645e1dd4b8ae
convert-repo: update usage information
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents:
3911
diff
changeset
|
16 # the hash in Mercurial for that version, like so: |
316 | 17 # |
18 # <git hash> <mercurial hash> | |
19 # | |
3916
645e1dd4b8ae
convert-repo: update usage information
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents:
3911
diff
changeset
|
20 # If the file doesn't exist, it's automatically created. It's updated |
645e1dd4b8ae
convert-repo: update usage information
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents:
3911
diff
changeset
|
21 # on each commit copied, so convert-repo can be interrupted and can |
645e1dd4b8ae
convert-repo: update usage information
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents:
3911
diff
changeset
|
22 # be run repeatedly to copy new commits. |
316 | 23 |
694 | 24 import sys, os, zlib, sha, time |
3825
158fce02dc40
Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents:
2649
diff
changeset
|
25 |
158fce02dc40
Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents:
2649
diff
changeset
|
26 os.environ["HGENCODING"] = "utf-8" |
158fce02dc40
Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents:
2649
diff
changeset
|
27 |
1715
40346aa66b0f
Revert convert-repo changes
Matt Mackall <mpm@selenic.com>
parents:
1656
diff
changeset
|
28 from mercurial import hg, ui, util |
316 | 29 |
3825
158fce02dc40
Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents:
2649
diff
changeset
|
30 def recode(s): |
158fce02dc40
Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents:
2649
diff
changeset
|
31 try: |
158fce02dc40
Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents:
2649
diff
changeset
|
32 return s.decode("utf-8").encode("utf-8") |
158fce02dc40
Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents:
2649
diff
changeset
|
33 except: |
158fce02dc40
Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents:
2649
diff
changeset
|
34 try: |
158fce02dc40
Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents:
2649
diff
changeset
|
35 return s.decode("latin-1").encode("utf-8") |
158fce02dc40
Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents:
2649
diff
changeset
|
36 except: |
158fce02dc40
Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents:
2649
diff
changeset
|
37 return s.decode("utf-8", "replace").encode("utf-8") |
158fce02dc40
Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents:
2649
diff
changeset
|
38 |
316 | 39 class convert_git: |
40 def __init__(self, path): | |
41 self.path = path | |
42 | |
43 def getheads(self): | |
2649
e6a7a6a33a62
make convert-repo deal with git symbolic refs.
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents:
2093
diff
changeset
|
44 fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path) |
e6a7a6a33a62
make convert-repo deal with git symbolic refs.
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents:
2093
diff
changeset
|
45 return [fh.read()[:-1]] |
316 | 46 |
692
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
47 def catfile(self, rev, type): |
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
48 if rev == "0" * 40: raise IOError() |
1335
bea6356b8bca
git -> hg conversion script
Florian La Roche <laroche@redhat.com>
parents:
1237
diff
changeset
|
49 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null" % (self.path, type, rev)) |
692
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
50 return fh.read() |
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
51 |
316 | 52 def getfile(self, name, rev): |
692
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
53 return self.catfile(rev, "blob") |
316 | 54 |
55 def getchanges(self, version): | |
1335
bea6356b8bca
git -> hg conversion script
Florian La Roche <laroche@redhat.com>
parents:
1237
diff
changeset
|
56 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s" % (self.path, version)) |
316 | 57 changes = [] |
58 for l in fh: | |
59 if "\t" not in l: continue | |
60 m, f = l[:-1].split("\t") | |
61 m = m.split() | |
62 h = m[3] | |
63 p = (m[1] == "100755") | |
64 changes.append((f, h, p)) | |
65 return changes | |
66 | |
67 def getcommit(self, version): | |
692
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
68 c = self.catfile(version, "commit") # read the commit hash |
316 | 69 end = c.find("\n\n") |
70 message = c[end+2:] | |
3825
158fce02dc40
Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents:
2649
diff
changeset
|
71 message = recode(message) |
316 | 72 l = c[:end].splitlines() |
73 manifest = l[0].split()[1] | |
74 parents = [] | |
75 for e in l[1:]: | |
76 n,v = e.split(" ", 1) | |
77 if n == "author": | |
78 p = v.split() | |
1385
adb3de56635b
convert-repo: Fix timezone handling
Matt Mackall <mpm@selenic.com>
parents:
1335
diff
changeset
|
79 tm, tz = p[-2:] |
316 | 80 author = " ".join(p[:-2]) |
81 if author[0] == "<": author = author[1:-1] | |
3825
158fce02dc40
Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents:
2649
diff
changeset
|
82 author = recode(author) |
692
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
83 if n == "committer": |
431 | 84 p = v.split() |
1385
adb3de56635b
convert-repo: Fix timezone handling
Matt Mackall <mpm@selenic.com>
parents:
1335
diff
changeset
|
85 tm, tz = p[-2:] |
431 | 86 committer = " ".join(p[:-2]) |
87 if committer[0] == "<": committer = committer[1:-1] | |
3825
158fce02dc40
Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents:
2649
diff
changeset
|
88 committer = recode(committer) |
3910
4bc5a2405b12
convert-repo: fix recoding of committer
Matt Mackall <mpm@selenic.com>
parents:
3825
diff
changeset
|
89 message += "\ncommitter: %s\n" % committer |
316 | 90 if n == "parent": parents.append(v) |
1385
adb3de56635b
convert-repo: Fix timezone handling
Matt Mackall <mpm@selenic.com>
parents:
1335
diff
changeset
|
91 |
adb3de56635b
convert-repo: Fix timezone handling
Matt Mackall <mpm@selenic.com>
parents:
1335
diff
changeset
|
92 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:] |
2093
5cc414722587
convert-repo: fix reversed time zone offset
Vadim Gelfer <vadim.gelfer@gmail.com>
parents:
1715
diff
changeset
|
93 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm)) |
1385
adb3de56635b
convert-repo: Fix timezone handling
Matt Mackall <mpm@selenic.com>
parents:
1335
diff
changeset
|
94 date = tm + " " + str(tz) |
316 | 95 return (parents, author, date, message) |
96 | |
694 | 97 def gettags(self): |
98 tags = {} | |
1335
bea6356b8bca
git -> hg conversion script
Florian La Roche <laroche@redhat.com>
parents:
1237
diff
changeset
|
99 for f in os.listdir(self.path + "/refs/tags"): |
694 | 100 try: |
1335
bea6356b8bca
git -> hg conversion script
Florian La Roche <laroche@redhat.com>
parents:
1237
diff
changeset
|
101 h = file(self.path + "/refs/tags/" + f).read().strip() |
1386
a1040345fdda
convert-repo: retrieve the commit hash from the tag object for tag import
Matt Mackall <mpm@selenic.com>
parents:
1385
diff
changeset
|
102 c = self.catfile(h, "tag") # read the commit hash |
a1040345fdda
convert-repo: retrieve the commit hash from the tag object for tag import
Matt Mackall <mpm@selenic.com>
parents:
1385
diff
changeset
|
103 h = c.splitlines()[0].split()[1] |
1237 | 104 tags[f] = h |
694 | 105 except: |
106 pass | |
107 return tags | |
108 | |
316 | 109 class convert_mercurial: |
110 def __init__(self, path): | |
111 self.path = path | |
112 u = ui.ui() | |
113 self.repo = hg.repository(u, path) | |
114 | |
115 def getheads(self): | |
116 h = self.repo.changelog.heads() | |
1335
bea6356b8bca
git -> hg conversion script
Florian La Roche <laroche@redhat.com>
parents:
1237
diff
changeset
|
117 return [ hg.hex(x) for x in h ] |
692
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
118 |
316 | 119 def putfile(self, f, e, data): |
120 self.repo.wfile(f, "w").write(data) | |
692
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
121 if self.repo.dirstate.state(f) == '?': |
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
122 self.repo.dirstate.update([f], "a") |
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
123 |
450 | 124 util.set_exec(self.repo.wjoin(f), e) |
316 | 125 |
126 def delfile(self, f): | |
127 try: | |
128 os.unlink(self.repo.wjoin(f)) | |
692
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
129 #self.repo.remove([f]) |
316 | 130 except: |
131 pass | |
132 | |
1715
40346aa66b0f
Revert convert-repo changes
Matt Mackall <mpm@selenic.com>
parents:
1656
diff
changeset
|
133 def putcommit(self, files, parents, author, dest, text): |
431 | 134 seen = {} |
135 pl = [] | |
136 for p in parents: | |
137 if p not in seen: | |
138 pl.append(p) | |
139 seen[p] = 1 | |
140 parents = pl | |
316 | 141 |
692
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
142 if len(parents) < 2: parents.append("0" * 40) |
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
143 if len(parents) < 2: parents.append("0" * 40) |
431 | 144 p2 = parents.pop(0) |
692
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
145 |
431 | 146 while parents: |
147 p1 = p2 | |
148 p2 = parents.pop(0) | |
1715
40346aa66b0f
Revert convert-repo changes
Matt Mackall <mpm@selenic.com>
parents:
1656
diff
changeset
|
149 self.repo.rawcommit(files, text, author, dest, |
40346aa66b0f
Revert convert-repo changes
Matt Mackall <mpm@selenic.com>
parents:
1656
diff
changeset
|
150 hg.bin(p1), hg.bin(p2)) |
431 | 151 text = "(octopus merge fixup)\n" |
1389
9b3ef6f3cef5
convert-repo: fix up octopus merge conversion
Matt Mackall <mpm@selenic.com>
parents:
1388
diff
changeset
|
152 p2 = hg.hex(self.repo.changelog.tip()) |
431 | 153 |
1389
9b3ef6f3cef5
convert-repo: fix up octopus merge conversion
Matt Mackall <mpm@selenic.com>
parents:
1388
diff
changeset
|
154 return p2 |
316 | 155 |
694 | 156 def puttags(self, tags): |
157 try: | |
158 old = self.repo.wfile(".hgtags").read() | |
159 oldlines = old.splitlines(1) | |
160 oldlines.sort() | |
161 except: | |
162 oldlines = [] | |
163 | |
164 k = tags.keys() | |
165 k.sort() | |
166 newlines = [] | |
167 for tag in k: | |
168 newlines.append("%s %s\n" % (tags[tag], tag)) | |
169 | |
170 newlines.sort() | |
171 | |
172 if newlines != oldlines: | |
1335
bea6356b8bca
git -> hg conversion script
Florian La Roche <laroche@redhat.com>
parents:
1237
diff
changeset
|
173 #print "updating tags" |
694 | 174 f = self.repo.wfile(".hgtags", "w") |
175 f.write("".join(newlines)) | |
176 f.close() | |
177 if not oldlines: self.repo.add([".hgtags"]) | |
1335
bea6356b8bca
git -> hg conversion script
Florian La Roche <laroche@redhat.com>
parents:
1237
diff
changeset
|
178 date = "%s 0" % int(time.mktime(time.gmtime())) |
694 | 179 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo", |
180 date, self.repo.changelog.tip(), hg.nullid) | |
1387
0c7e8d345564
convert-repo: linearize the tag commit
Matt Mackall <mpm@selenic.com>
parents:
1386
diff
changeset
|
181 return hg.hex(self.repo.changelog.tip()) |
694 | 182 |
316 | 183 class convert: |
184 def __init__(self, source, dest, mapfile): | |
185 self.source = source | |
186 self.dest = dest | |
187 self.mapfile = mapfile | |
188 self.commitcache = {} | |
189 | |
190 self.map = {} | |
1655
7bfd4724932a
convert-repo: automatically create empty map file
Matt Mackall <mpm@selenic.com>
parents:
1389
diff
changeset
|
191 try: |
7bfd4724932a
convert-repo: automatically create empty map file
Matt Mackall <mpm@selenic.com>
parents:
1389
diff
changeset
|
192 for l in file(self.mapfile): |
7bfd4724932a
convert-repo: automatically create empty map file
Matt Mackall <mpm@selenic.com>
parents:
1389
diff
changeset
|
193 sv, dv = l[:-1].split() |
7bfd4724932a
convert-repo: automatically create empty map file
Matt Mackall <mpm@selenic.com>
parents:
1389
diff
changeset
|
194 self.map[sv] = dv |
7bfd4724932a
convert-repo: automatically create empty map file
Matt Mackall <mpm@selenic.com>
parents:
1389
diff
changeset
|
195 except IOError: |
7bfd4724932a
convert-repo: automatically create empty map file
Matt Mackall <mpm@selenic.com>
parents:
1389
diff
changeset
|
196 pass |
316 | 197 |
198 def walktree(self, heads): | |
199 visit = heads | |
200 known = {} | |
201 parents = {} | |
202 while visit: | |
203 n = visit.pop(0) | |
204 if n in known or n in self.map: continue | |
205 known[n] = 1 | |
206 self.commitcache[n] = self.source.getcommit(n) | |
207 cp = self.commitcache[n][0] | |
208 for p in cp: | |
209 parents.setdefault(n, []).append(p) | |
210 visit.append(p) | |
211 | |
212 return parents | |
213 | |
214 def toposort(self, parents): | |
215 visit = parents.keys() | |
216 seen = {} | |
217 children = {} | |
692
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
218 |
316 | 219 while visit: |
220 n = visit.pop(0) | |
221 if n in seen: continue | |
222 seen[n] = 1 | |
223 pc = 0 | |
224 if n in parents: | |
225 for p in parents[n]: | |
226 if p not in self.map: pc += 1 | |
227 visit.append(p) | |
228 children.setdefault(p, []).append(n) | |
229 if not pc: root = n | |
230 | |
231 s = [] | |
232 removed = {} | |
692
695dd9a491da
convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents:
450
diff
changeset
|
233 visit = children.keys() |
316 | 234 while visit: |
235 n = visit.pop(0) | |
236 if n in removed: continue | |
237 dep = 0 | |
238 if n in parents: | |
239 for p in parents[n]: | |
240 if p in self.map: continue | |
241 if p not in removed: | |
242 # we're still dependent | |
243 visit.append(n) | |
244 dep = 1 | |
245 break | |
246 | |
247 if not dep: | |
248 # all n's parents are in the list | |
249 removed[n] = 1 | |
250 s.append(n) | |
251 if n in children: | |
252 for c in children[n]: | |
253 visit.insert(0, c) | |
254 | |
255 return s | |
256 | |
257 def copy(self, rev): | |
258 p, a, d, t = self.commitcache[rev] | |
259 files = self.source.getchanges(rev) | |
260 | |
261 for f,v,e in files: | |
262 try: | |
263 data = self.source.getfile(f, v) | |
264 except IOError, inst: | |
265 self.dest.delfile(f) | |
266 else: | |
267 self.dest.putfile(f, e, data) | |
268 | |
269 r = [self.map[v] for v in p] | |
270 f = [f for f,v,e in files] | |
271 self.map[rev] = self.dest.putcommit(f, r, a, d, t) | |
272 file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev])) | |
273 | |
274 def convert(self): | |
275 heads = self.source.getheads() | |
276 parents = self.walktree(heads) | |
277 t = self.toposort(parents) | |
1388
5eb2d3c54165
convert-repo: change duplicate elimination
Matt Mackall <mpm@selenic.com>
parents:
1387
diff
changeset
|
278 t = [n for n in t if n not in self.map] |
316 | 279 num = len(t) |
1715
40346aa66b0f
Revert convert-repo changes
Matt Mackall <mpm@selenic.com>
parents:
1656
diff
changeset
|
280 c = None |
316 | 281 |
282 for c in t: | |
283 num -= 1 | |
284 desc = self.commitcache[c][3].splitlines()[0] | |
1335
bea6356b8bca
git -> hg conversion script
Florian La Roche <laroche@redhat.com>
parents:
1237
diff
changeset
|
285 #print num, desc |
316 | 286 self.copy(c) |
287 | |
694 | 288 tags = self.source.gettags() |
289 ctags = {} | |
290 for k in tags: | |
291 v = tags[k] | |
292 if v in self.map: | |
293 ctags[k] = self.map[v] | |
294 | |
1715
40346aa66b0f
Revert convert-repo changes
Matt Mackall <mpm@selenic.com>
parents:
1656
diff
changeset
|
295 if c and ctags: |
1387
0c7e8d345564
convert-repo: linearize the tag commit
Matt Mackall <mpm@selenic.com>
parents:
1386
diff
changeset
|
296 nrev = self.dest.puttags(ctags) |
0c7e8d345564
convert-repo: linearize the tag commit
Matt Mackall <mpm@selenic.com>
parents:
1386
diff
changeset
|
297 # write another hash correspondence to override the previous |
0c7e8d345564
convert-repo: linearize the tag commit
Matt Mackall <mpm@selenic.com>
parents:
1386
diff
changeset
|
298 # one so we don't end up with extra tag heads |
3911
fe075ddf3272
convert-repo: avoid adding bogus value to shamap on tag update
Matt Mackall <mpm@selenic.com>
parents:
3910
diff
changeset
|
299 if nrev: |
fe075ddf3272
convert-repo: avoid adding bogus value to shamap on tag update
Matt Mackall <mpm@selenic.com>
parents:
3910
diff
changeset
|
300 file(self.mapfile, "a").write("%s %s\n" % (c, nrev)) |
694 | 301 |
316 | 302 gitpath, hgpath, mapfile = sys.argv[1:] |
1335
bea6356b8bca
git -> hg conversion script
Florian La Roche <laroche@redhat.com>
parents:
1237
diff
changeset
|
303 if os.path.isdir(gitpath + "/.git"): |
bea6356b8bca
git -> hg conversion script
Florian La Roche <laroche@redhat.com>
parents:
1237
diff
changeset
|
304 gitpath += "/.git" |
316 | 305 |
306 c = convert(convert_git(gitpath), convert_mercurial(hgpath), mapfile) | |
307 c.convert() |