comparison hgext/convert/__init__.py @ 4534:cc9b79216a76

Split convert extension into common and repository type modules
author Brendan Cully <brendan@kublai.com>
date Sun, 10 Jun 2007 20:08:47 -0700
parents c3a78a49d7f0
children 9855939d0c82
comparison
equal deleted inserted replaced
4533:36abb07c79d4 4534:cc9b79216a76
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com> 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 # 4 #
5 # This software may be used and distributed according to the terms 5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference. 6 # of the GNU General Public License, incorporated herein by reference.
7 7
8 import sys, os, zlib, sha, time, re, locale, socket 8 from common import NoRepo
9 from cvs import convert_cvs
10 from git import convert_git
11 from hg import convert_mercurial
12
13 import os
9 from mercurial import hg, ui, util, commands 14 from mercurial import hg, ui, util, commands
10 15
11 commands.norepo += " convert" 16 commands.norepo += " convert"
12
13 class NoRepo(Exception): pass
14
15 class commit(object):
16 def __init__(self, **parts):
17 for x in "author date desc parents".split():
18 if not x in parts:
19 raise util.Abort("commit missing field %s" % x)
20 self.__dict__.update(parts)
21
22 def recode(s):
23 try:
24 return s.decode("utf-8").encode("utf-8")
25 except:
26 try:
27 return s.decode("latin-1").encode("utf-8")
28 except:
29 return s.decode("utf-8", "replace").encode("utf-8")
30
31 class converter_source(object):
32 """Conversion source interface"""
33
34 def __init__(self, ui, path):
35 """Initialize conversion source (or raise NoRepo("message")
36 exception if path is not a valid repository)"""
37 raise NotImplementedError()
38
39 def getheads(self):
40 """Return a list of this repository's heads"""
41 raise NotImplementedError()
42
43 def getfile(self, name, rev):
44 """Return file contents as a string"""
45 raise NotImplementedError()
46
47 def getmode(self, name, rev):
48 """Return file mode, eg. '', 'x', or 'l'"""
49 raise NotImplementedError()
50
51 def getchanges(self, version):
52 """Return sorted list of (filename, id) tuples for all files changed in rev.
53
54 id just tells us which revision to return in getfile(), e.g. in
55 git it's an object hash."""
56 raise NotImplementedError()
57
58 def getcommit(self, version):
59 """Return the commit object for version"""
60 raise NotImplementedError()
61
62 def gettags(self):
63 """Return the tags as a dictionary of name: revision"""
64 raise NotImplementedError()
65
66 class converter_sink(object):
67 """Conversion sink (target) interface"""
68
69 def __init__(self, ui, path):
70 """Initialize conversion sink (or raise NoRepo("message")
71 exception if path is not a valid repository)"""
72 raise NotImplementedError()
73
74 def getheads(self):
75 """Return a list of this repository's heads"""
76 raise NotImplementedError()
77
78 def mapfile(self):
79 """Path to a file that will contain lines
80 source_rev_id sink_rev_id
81 mapping equivalent revision identifiers for each system."""
82 raise NotImplementedError()
83
84 def putfile(self, f, e, data):
85 """Put file for next putcommit().
86 f: path to file
87 e: '', 'x', or 'l' (regular file, executable, or symlink)
88 data: file contents"""
89 raise NotImplementedError()
90
91 def delfile(self, f):
92 """Delete file for next putcommit().
93 f: path to file"""
94 raise NotImplementedError()
95
96 def putcommit(self, files, parents, commit):
97 """Create a revision with all changed files listed in 'files'
98 and having listed parents. 'commit' is a commit object containing
99 at a minimum the author, date, and message for this changeset.
100 Called after putfile() and delfile() calls. Note that the sink
101 repository is not told to update itself to a particular revision
102 (or even what that revision would be) before it receives the
103 file data."""
104 raise NotImplementedError()
105
106 def puttags(self, tags):
107 """Put tags into sink.
108 tags: {tagname: sink_rev_id, ...}"""
109 raise NotImplementedError()
110
111
112 # CVS conversion code inspired by hg-cvs-import and git-cvsimport
113 class convert_cvs(converter_source):
114 def __init__(self, ui, path):
115 self.path = path
116 self.ui = ui
117 cvs = os.path.join(path, "CVS")
118 if not os.path.exists(cvs):
119 raise NoRepo("couldn't open CVS repo %s" % path)
120
121 self.changeset = {}
122 self.files = {}
123 self.tags = {}
124 self.lastbranch = {}
125 self.parent = {}
126 self.socket = None
127 self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1]
128 self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1]
129 self.encoding = locale.getpreferredencoding()
130 self._parse()
131 self._connect()
132
133 def _parse(self):
134 if self.changeset:
135 return
136
137 d = os.getcwd()
138 try:
139 os.chdir(self.path)
140 id = None
141 state = 0
142 for l in os.popen("cvsps -A -u --cvs-direct -q"):
143 if state == 0: # header
144 if l.startswith("PatchSet"):
145 id = l[9:-2]
146 elif l.startswith("Date"):
147 date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"])
148 date = util.datestr(date)
149 elif l.startswith("Branch"):
150 branch = l[8:-1]
151 self.parent[id] = self.lastbranch.get(branch, 'bad')
152 self.lastbranch[branch] = id
153 elif l.startswith("Ancestor branch"):
154 ancestor = l[17:-1]
155 self.parent[id] = self.lastbranch[ancestor]
156 elif l.startswith("Author"):
157 author = self.recode(l[8:-1])
158 elif l.startswith("Tag: "):
159 t = l[5:-1].rstrip()
160 if t != "(none)":
161 self.tags[t] = id
162 elif l.startswith("Log:"):
163 state = 1
164 log = ""
165 elif state == 1: # log
166 if l == "Members: \n":
167 files = {}
168 log = self.recode(log[:-1])
169 if log.isspace():
170 log = "*** empty log message ***\n"
171 state = 2
172 else:
173 log += l
174 elif state == 2:
175 if l == "\n": #
176 state = 0
177 p = [self.parent[id]]
178 if id == "1":
179 p = []
180 if branch == "HEAD":
181 branch = ""
182 c = commit(author=author, date=date, parents=p,
183 desc=log, branch=branch)
184 self.changeset[id] = c
185 self.files[id] = files
186 else:
187 colon = l.rfind(':')
188 file = l[1:colon]
189 rev = l[colon+1:-2]
190 rev = rev.split("->")[1]
191 files[file] = rev
192
193 self.heads = self.lastbranch.values()
194 finally:
195 os.chdir(d)
196
197 def _connect(self):
198 root = self.cvsroot
199 conntype = None
200 user, host = None, None
201 cmd = ['cvs', 'server']
202
203 self.ui.status("connecting to %s\n" % root)
204
205 if root.startswith(":pserver:"):
206 root = root[9:]
207 m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)',
208 root)
209 if m:
210 conntype = "pserver"
211 user, passw, serv, port, root = m.groups()
212 if not user:
213 user = "anonymous"
214 rr = ":pserver:" + user + "@" + serv + ":" + root
215 if port:
216 rr2, port = "-", int(port)
217 else:
218 rr2, port = rr, 2401
219 rr += str(port)
220
221 if not passw:
222 passw = "A"
223 pf = open(os.path.join(os.environ["HOME"], ".cvspass"))
224 for l in pf:
225 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
226 m = re.match(r'(/\d+\s+/)?(.*)', l)
227 l = m.group(2)
228 w, p = l.split(' ', 1)
229 if w in [rr, rr2]:
230 passw = p
231 break
232 pf.close()
233
234 sck = socket.socket()
235 sck.connect((serv, port))
236 sck.send("\n".join(["BEGIN AUTH REQUEST", root, user, passw,
237 "END AUTH REQUEST", ""]))
238 if sck.recv(128) != "I LOVE YOU\n":
239 raise NoRepo("CVS pserver authentication failed")
240
241 self.writep = self.readp = sck.makefile('r+')
242
243 if not conntype and root.startswith(":local:"):
244 conntype = "local"
245 root = root[7:]
246
247 if not conntype:
248 # :ext:user@host/home/user/path/to/cvsroot
249 if root.startswith(":ext:"):
250 root = root[5:]
251 m = re.match(r'(?:([^@:/]+)@)?([^:/]+):?(.*)', root)
252 if not m:
253 conntype = "local"
254 else:
255 conntype = "rsh"
256 user, host, root = m.group(1), m.group(2), m.group(3)
257
258 if conntype != "pserver":
259 if conntype == "rsh":
260 rsh = os.environ.get("CVS_RSH" or "rsh")
261 if user:
262 cmd = [rsh, '-l', user, host] + cmd
263 else:
264 cmd = [rsh, host] + cmd
265
266 self.writep, self.readp = os.popen2(cmd)
267
268 self.realroot = root
269
270 self.writep.write("Root %s\n" % root)
271 self.writep.write("Valid-responses ok error Valid-requests Mode"
272 " M Mbinary E Checked-in Created Updated"
273 " Merged Removed\n")
274 self.writep.write("valid-requests\n")
275 self.writep.flush()
276 r = self.readp.readline()
277 if not r.startswith("Valid-requests"):
278 raise util.Abort("server sucks")
279 if "UseUnchanged" in r:
280 self.writep.write("UseUnchanged\n")
281 self.writep.flush()
282 r = self.readp.readline()
283
284 def getheads(self):
285 return self.heads
286
287 def _getfile(self, name, rev):
288 if rev.endswith("(DEAD)"):
289 raise IOError
290
291 args = ("-N -P -kk -r %s --" % rev).split()
292 args.append(os.path.join(self.cvsrepo, name))
293 for x in args:
294 self.writep.write("Argument %s\n" % x)
295 self.writep.write("Directory .\n%s\nco\n" % self.realroot)
296 self.writep.flush()
297
298 data = ""
299 while 1:
300 line = self.readp.readline()
301 if line.startswith("Created ") or line.startswith("Updated "):
302 self.readp.readline() # path
303 self.readp.readline() # entries
304 mode = self.readp.readline()[:-1]
305 count = int(self.readp.readline()[:-1])
306 data = self.readp.read(count)
307 elif line.startswith(" "):
308 data += line[1:]
309 elif line.startswith("M "):
310 pass
311 elif line.startswith("Mbinary "):
312 count = int(self.readp.readline()[:-1])
313 data = self.readp.read(count)
314 else:
315 if line == "ok\n":
316 return (data, "x" in mode and "x" or "")
317 elif line.startswith("E "):
318 self.ui.warn("cvs server: %s\n" % line[2:])
319 elif line.startswith("Remove"):
320 l = self.readp.readline()
321 l = self.readp.readline()
322 if l != "ok\n":
323 raise util.Abort("unknown CVS response: %s" % l)
324 else:
325 raise util.Abort("unknown CVS response: %s" % line)
326
327 def getfile(self, file, rev):
328 data, mode = self._getfile(file, rev)
329 self.modecache[(file, rev)] = mode
330 return data
331
332 def getmode(self, file, rev):
333 return self.modecache[(file, rev)]
334
335 def getchanges(self, rev):
336 self.modecache = {}
337 files = self.files[rev]
338 cl = files.items()
339 cl.sort()
340 return cl
341
342 def recode(self, text):
343 return text.decode(self.encoding, "replace").encode("utf-8")
344
345 def getcommit(self, rev):
346 return self.changeset[rev]
347
348 def gettags(self):
349 return self.tags
350
351 class convert_git(converter_source):
352 def __init__(self, ui, path):
353 if os.path.isdir(path + "/.git"):
354 path += "/.git"
355 self.path = path
356 self.ui = ui
357 if not os.path.exists(path + "/objects"):
358 raise NoRepo("couldn't open GIT repo %s" % path)
359
360 def getheads(self):
361 fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
362 return [fh.read()[:-1]]
363
364 def catfile(self, rev, type):
365 if rev == "0" * 40: raise IOError()
366 fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null"
367 % (self.path, type, rev))
368 return fh.read()
369
370 def getfile(self, name, rev):
371 return self.catfile(rev, "blob")
372
373 def getmode(self, name, rev):
374 return self.modecache[(name, rev)]
375
376 def getchanges(self, version):
377 self.modecache = {}
378 fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s"
379 % (self.path, version))
380 changes = []
381 for l in fh:
382 if "\t" not in l: continue
383 m, f = l[:-1].split("\t")
384 m = m.split()
385 h = m[3]
386 p = (m[1] == "100755")
387 s = (m[1] == "120000")
388 self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
389 changes.append((f, h))
390 return changes
391
392 def getcommit(self, version):
393 c = self.catfile(version, "commit") # read the commit hash
394 end = c.find("\n\n")
395 message = c[end+2:]
396 message = recode(message)
397 l = c[:end].splitlines()
398 manifest = l[0].split()[1]
399 parents = []
400 for e in l[1:]:
401 n, v = e.split(" ", 1)
402 if n == "author":
403 p = v.split()
404 tm, tz = p[-2:]
405 author = " ".join(p[:-2])
406 if author[0] == "<": author = author[1:-1]
407 author = recode(author)
408 if n == "committer":
409 p = v.split()
410 tm, tz = p[-2:]
411 committer = " ".join(p[:-2])
412 if committer[0] == "<": committer = committer[1:-1]
413 committer = recode(committer)
414 message += "\ncommitter: %s\n" % committer
415 if n == "parent": parents.append(v)
416
417 tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
418 tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
419 date = tm + " " + str(tz)
420
421 c = commit(parents=parents, date=date, author=author, desc=message)
422 return c
423
424 def gettags(self):
425 tags = {}
426 fh = os.popen('git-ls-remote --tags "%s" 2>/dev/null' % self.path)
427 prefix = 'refs/tags/'
428 for line in fh:
429 line = line.strip()
430 if not line.endswith("^{}"):
431 continue
432 node, tag = line.split(None, 1)
433 if not tag.startswith(prefix):
434 continue
435 tag = tag[len(prefix):-3]
436 tags[tag] = node
437
438 return tags
439
440 class convert_mercurial(converter_sink):
441 def __init__(self, ui, path):
442 self.path = path
443 self.ui = ui
444 try:
445 self.repo = hg.repository(self.ui, path)
446 except:
447 raise NoRepo("could open hg repo %s" % path)
448
449 def mapfile(self):
450 return os.path.join(self.path, ".hg", "shamap")
451
452 def getheads(self):
453 h = self.repo.changelog.heads()
454 return [ hg.hex(x) for x in h ]
455
456 def putfile(self, f, e, data):
457 self.repo.wwrite(f, data, e)
458 if self.repo.dirstate.state(f) == '?':
459 self.repo.dirstate.update([f], "a")
460
461 def delfile(self, f):
462 try:
463 os.unlink(self.repo.wjoin(f))
464 #self.repo.remove([f])
465 except:
466 pass
467
468 def putcommit(self, files, parents, commit):
469 seen = {}
470 pl = []
471 for p in parents:
472 if p not in seen:
473 pl.append(p)
474 seen[p] = 1
475 parents = pl
476
477 if len(parents) < 2: parents.append("0" * 40)
478 if len(parents) < 2: parents.append("0" * 40)
479 p2 = parents.pop(0)
480
481 text = commit.desc
482 extra = {}
483 try:
484 extra["branch"] = commit.branch
485 except AttributeError:
486 pass
487
488 while parents:
489 p1 = p2
490 p2 = parents.pop(0)
491 a = self.repo.rawcommit(files, text, commit.author, commit.date,
492 hg.bin(p1), hg.bin(p2), extra=extra)
493 text = "(octopus merge fixup)\n"
494 p2 = hg.hex(self.repo.changelog.tip())
495
496 return p2
497
498 def puttags(self, tags):
499 try:
500 old = self.repo.wfile(".hgtags").read()
501 oldlines = old.splitlines(1)
502 oldlines.sort()
503 except:
504 oldlines = []
505
506 k = tags.keys()
507 k.sort()
508 newlines = []
509 for tag in k:
510 newlines.append("%s %s\n" % (tags[tag], tag))
511
512 newlines.sort()
513
514 if newlines != oldlines:
515 self.ui.status("updating tags\n")
516 f = self.repo.wfile(".hgtags", "w")
517 f.write("".join(newlines))
518 f.close()
519 if not oldlines: self.repo.add([".hgtags"])
520 date = "%s 0" % int(time.mktime(time.gmtime()))
521 self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
522 date, self.repo.changelog.tip(), hg.nullid)
523 return hg.hex(self.repo.changelog.tip())
524 17
525 converters = [convert_cvs, convert_git, convert_mercurial] 18 converters = [convert_cvs, convert_git, convert_mercurial]
526 19
527 def converter(ui, path): 20 def converter(ui, path):
528 if not os.path.isdir(path): 21 if not os.path.isdir(path):