hgext/convert/git.py
author Alexis S. L. Carvalho <alexis@cecm.usp.br>
Mon, 24 Sep 2007 19:00:11 -0300
changeset 5335 88e931f74e8b
parent 5233 9d7052f17d77
child 5336 24de027551c1
permissions -rw-r--r--
convert_git: avoid returning two entries for the same file in getchanges This could happen in merge changesets if the merged file was different from both parents.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
4534
cc9b79216a76 Split convert extension into common and repository type modules
Brendan Cully <brendan@kublai.com>
parents: 4532
diff changeset
     1
# git support for the convert extension
3825
158fce02dc40 Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents: 2649
diff changeset
     2
4534
cc9b79216a76 Split convert extension into common and repository type modules
Brendan Cully <brendan@kublai.com>
parents: 4532
diff changeset
     3
import os
5233
9d7052f17d77 convert: fix /dev/null redirections under Windows
Patrick Mezard <pmezard@gmail.com>
parents: 5229
diff changeset
     4
from mercurial import util
3947
0fab73b3f453 convert-repo: add some smarts
Matt Mackall <mpm@selenic.com>
parents: 3916
diff changeset
     5
4534
cc9b79216a76 Split convert extension into common and repository type modules
Brendan Cully <brendan@kublai.com>
parents: 4532
diff changeset
     6
from common import NoRepo, commit, converter_source
3955
9af4b853ed4d convert-repo: add CVS branch support
Matt Mackall <mpm@selenic.com>
parents: 3954
diff changeset
     7
3825
158fce02dc40 Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents: 2649
diff changeset
     8
def recode(s):
158fce02dc40 Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents: 2649
diff changeset
     9
    try:
158fce02dc40 Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents: 2649
diff changeset
    10
        return s.decode("utf-8").encode("utf-8")
158fce02dc40 Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents: 2649
diff changeset
    11
    except:
158fce02dc40 Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents: 2649
diff changeset
    12
        try:
158fce02dc40 Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents: 2649
diff changeset
    13
            return s.decode("latin-1").encode("utf-8")
158fce02dc40 Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents: 2649
diff changeset
    14
        except:
158fce02dc40 Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents: 2649
diff changeset
    15
            return s.decode("utf-8", "replace").encode("utf-8")
158fce02dc40 Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents: 2649
diff changeset
    16
4447
af013ae3ca10 use documented convert-repo interface
Daniel Holth <dholth@fastmail.fm>
parents: 4446
diff changeset
    17
class convert_git(converter_source):
5229
149742a628fd convert: fix issue702 about GIT_DIR= construct unsupported under Windows.
Patrick Mezard <pmezard@gmail.com>
parents: 5228
diff changeset
    18
    # Windows does not support GIT_DIR= construct while other systems
149742a628fd convert: fix issue702 about GIT_DIR= construct unsupported under Windows.
Patrick Mezard <pmezard@gmail.com>
parents: 5228
diff changeset
    19
    # cannot remove environment variable. Just assume none have
149742a628fd convert: fix issue702 about GIT_DIR= construct unsupported under Windows.
Patrick Mezard <pmezard@gmail.com>
parents: 5228
diff changeset
    20
    # both issues.
149742a628fd convert: fix issue702 about GIT_DIR= construct unsupported under Windows.
Patrick Mezard <pmezard@gmail.com>
parents: 5228
diff changeset
    21
    if hasattr(os, 'unsetenv'):
149742a628fd convert: fix issue702 about GIT_DIR= construct unsupported under Windows.
Patrick Mezard <pmezard@gmail.com>
parents: 5228
diff changeset
    22
        def gitcmd(self, s):
149742a628fd convert: fix issue702 about GIT_DIR= construct unsupported under Windows.
Patrick Mezard <pmezard@gmail.com>
parents: 5228
diff changeset
    23
            prevgitdir = os.environ.get('GIT_DIR')
149742a628fd convert: fix issue702 about GIT_DIR= construct unsupported under Windows.
Patrick Mezard <pmezard@gmail.com>
parents: 5228
diff changeset
    24
            os.environ['GIT_DIR'] = self.path
149742a628fd convert: fix issue702 about GIT_DIR= construct unsupported under Windows.
Patrick Mezard <pmezard@gmail.com>
parents: 5228
diff changeset
    25
            try:
149742a628fd convert: fix issue702 about GIT_DIR= construct unsupported under Windows.
Patrick Mezard <pmezard@gmail.com>
parents: 5228
diff changeset
    26
                return os.popen(s)
149742a628fd convert: fix issue702 about GIT_DIR= construct unsupported under Windows.
Patrick Mezard <pmezard@gmail.com>
parents: 5228
diff changeset
    27
            finally:
149742a628fd convert: fix issue702 about GIT_DIR= construct unsupported under Windows.
Patrick Mezard <pmezard@gmail.com>
parents: 5228
diff changeset
    28
                if prevgitdir is None:
149742a628fd convert: fix issue702 about GIT_DIR= construct unsupported under Windows.
Patrick Mezard <pmezard@gmail.com>
parents: 5228
diff changeset
    29
                    del os.environ['GIT_DIR']
149742a628fd convert: fix issue702 about GIT_DIR= construct unsupported under Windows.
Patrick Mezard <pmezard@gmail.com>
parents: 5228
diff changeset
    30
                else:
149742a628fd convert: fix issue702 about GIT_DIR= construct unsupported under Windows.
Patrick Mezard <pmezard@gmail.com>
parents: 5228
diff changeset
    31
                    os.environ['GIT_DIR'] = prevgitdir
149742a628fd convert: fix issue702 about GIT_DIR= construct unsupported under Windows.
Patrick Mezard <pmezard@gmail.com>
parents: 5228
diff changeset
    32
    else:
149742a628fd convert: fix issue702 about GIT_DIR= construct unsupported under Windows.
Patrick Mezard <pmezard@gmail.com>
parents: 5228
diff changeset
    33
        def gitcmd(self, s):
149742a628fd convert: fix issue702 about GIT_DIR= construct unsupported under Windows.
Patrick Mezard <pmezard@gmail.com>
parents: 5228
diff changeset
    34
            return os.popen('GIT_DIR=%s %s' % (self.path, s))
5228
39e6deaa8b81 convert: gitcmd wrapper for os.popen
Brendan Cully <brendan@kublai.com>
parents: 4721
diff changeset
    35
    
4513
ac2fe196ac9b Turns convert.py into a real extension
Edouard Gomez <ed.gomez@free.fr>
parents: 4512
diff changeset
    36
    def __init__(self, ui, path):
3947
0fab73b3f453 convert-repo: add some smarts
Matt Mackall <mpm@selenic.com>
parents: 3916
diff changeset
    37
        if os.path.isdir(path + "/.git"):
0fab73b3f453 convert-repo: add some smarts
Matt Mackall <mpm@selenic.com>
parents: 3916
diff changeset
    38
            path += "/.git"
316
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    39
        self.path = path
4513
ac2fe196ac9b Turns convert.py into a real extension
Edouard Gomez <ed.gomez@free.fr>
parents: 4512
diff changeset
    40
        self.ui = ui
4111
06d65498f73b convert-repo: use .git/objects/ rather than .git/HEAD to detect git repos
Matt Mackall <mpm@selenic.com>
parents: 4082
diff changeset
    41
        if not os.path.exists(path + "/objects"):
3954
fad134931327 convert-repo: add basic CVS import support
Matt Mackall <mpm@selenic.com>
parents: 3948
diff changeset
    42
            raise NoRepo("couldn't open GIT repo %s" % path)
316
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    43
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    44
    def getheads(self):
5228
39e6deaa8b81 convert: gitcmd wrapper for os.popen
Brendan Cully <brendan@kublai.com>
parents: 4721
diff changeset
    45
        fh = self.gitcmd("git-rev-parse --verify HEAD")
2649
e6a7a6a33a62 make convert-repo deal with git symbolic refs.
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 2093
diff changeset
    46
        return [fh.read()[:-1]]
316
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    47
692
695dd9a491da convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents: 450
diff changeset
    48
    def catfile(self, rev, type):
695dd9a491da convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents: 450
diff changeset
    49
        if rev == "0" * 40: raise IOError()
5233
9d7052f17d77 convert: fix /dev/null redirections under Windows
Patrick Mezard <pmezard@gmail.com>
parents: 5229
diff changeset
    50
        fh = self.gitcmd("git-cat-file %s %s 2>%s" % (type, rev,
9d7052f17d77 convert: fix /dev/null redirections under Windows
Patrick Mezard <pmezard@gmail.com>
parents: 5229
diff changeset
    51
                                                      util.nulldev))
692
695dd9a491da convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents: 450
diff changeset
    52
        return fh.read()
695dd9a491da convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents: 450
diff changeset
    53
316
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    54
    def getfile(self, name, rev):
692
695dd9a491da convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents: 450
diff changeset
    55
        return self.catfile(rev, "blob")
316
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    56
3957
558f52943cd2 convert-repo: add CVS exec bit support
Matt Mackall <mpm@selenic.com>
parents: 3955
diff changeset
    57
    def getmode(self, name, rev):
558f52943cd2 convert-repo: add CVS exec bit support
Matt Mackall <mpm@selenic.com>
parents: 3955
diff changeset
    58
        return self.modecache[(name, rev)]
558f52943cd2 convert-repo: add CVS exec bit support
Matt Mackall <mpm@selenic.com>
parents: 3955
diff changeset
    59
316
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    60
    def getchanges(self, version):
3957
558f52943cd2 convert-repo: add CVS exec bit support
Matt Mackall <mpm@selenic.com>
parents: 3955
diff changeset
    61
        self.modecache = {}
5228
39e6deaa8b81 convert: gitcmd wrapper for os.popen
Brendan Cully <brendan@kublai.com>
parents: 4721
diff changeset
    62
        fh = self.gitcmd("git-diff-tree --root -m -r %s" % version)
316
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    63
        changes = []
5335
88e931f74e8b convert_git: avoid returning two entries for the same file in getchanges
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 5233
diff changeset
    64
        seen = {}
316
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    65
        for l in fh:
5335
88e931f74e8b convert_git: avoid returning two entries for the same file in getchanges
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 5233
diff changeset
    66
            if "\t" not in l:
88e931f74e8b convert_git: avoid returning two entries for the same file in getchanges
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 5233
diff changeset
    67
                continue
316
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    68
            m, f = l[:-1].split("\t")
5335
88e931f74e8b convert_git: avoid returning two entries for the same file in getchanges
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 5233
diff changeset
    69
            if f in seen:
88e931f74e8b convert_git: avoid returning two entries for the same file in getchanges
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 5233
diff changeset
    70
                continue
88e931f74e8b convert_git: avoid returning two entries for the same file in getchanges
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 5233
diff changeset
    71
            seen[f] = 1
316
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    72
            m = m.split()
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    73
            h = m[3]
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    74
            p = (m[1] == "100755")
4082
6b2909e84203 convert-repo converts symlinks from git
Daniel Holth <dholth@fastmail.fm>
parents: 4062
diff changeset
    75
            s = (m[1] == "120000")
6b2909e84203 convert-repo converts symlinks from git
Daniel Holth <dholth@fastmail.fm>
parents: 4062
diff changeset
    76
            self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
3957
558f52943cd2 convert-repo: add CVS exec bit support
Matt Mackall <mpm@selenic.com>
parents: 3955
diff changeset
    77
            changes.append((f, h))
316
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    78
        return changes
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    79
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    80
    def getcommit(self, version):
692
695dd9a491da convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents: 450
diff changeset
    81
        c = self.catfile(version, "commit") # read the commit hash
316
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    82
        end = c.find("\n\n")
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    83
        message = c[end+2:]
3825
158fce02dc40 Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents: 2649
diff changeset
    84
        message = recode(message)
316
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    85
        l = c[:end].splitlines()
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    86
        manifest = l[0].split()[1]
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    87
        parents = []
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    88
        for e in l[1:]:
4532
c3a78a49d7f0 Some small cleanups for convert extension:
Thomas Arendsen Hein <thomas@intevation.de>
parents: 4521
diff changeset
    89
            n, v = e.split(" ", 1)
316
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    90
            if n == "author":
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    91
                p = v.split()
1385
adb3de56635b convert-repo: Fix timezone handling
Matt Mackall <mpm@selenic.com>
parents: 1335
diff changeset
    92
                tm, tz = p[-2:]
316
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    93
                author = " ".join(p[:-2])
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
    94
                if author[0] == "<": author = author[1:-1]
3825
158fce02dc40 Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents: 2649
diff changeset
    95
                author = recode(author)
692
695dd9a491da convert-repo: deal with packed git and other fixes
mpm@selenic.com
parents: 450
diff changeset
    96
            if n == "committer":
431
dfc44f3f587c convert-repo fixups
mpm@selenic.com
parents: 316
diff changeset
    97
                p = v.split()
1385
adb3de56635b convert-repo: Fix timezone handling
Matt Mackall <mpm@selenic.com>
parents: 1335
diff changeset
    98
                tm, tz = p[-2:]
431
dfc44f3f587c convert-repo fixups
mpm@selenic.com
parents: 316
diff changeset
    99
                committer = " ".join(p[:-2])
dfc44f3f587c convert-repo fixups
mpm@selenic.com
parents: 316
diff changeset
   100
                if committer[0] == "<": committer = committer[1:-1]
3825
158fce02dc40 Teach convert-repo to deal with mixed charsets in git
Matt Mackall <mpm@selenic.com>
parents: 2649
diff changeset
   101
                committer = recode(committer)
3910
4bc5a2405b12 convert-repo: fix recoding of committer
Matt Mackall <mpm@selenic.com>
parents: 3825
diff changeset
   102
                message += "\ncommitter: %s\n" % committer
316
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
   103
            if n == "parent": parents.append(v)
1385
adb3de56635b convert-repo: Fix timezone handling
Matt Mackall <mpm@selenic.com>
parents: 1335
diff changeset
   104
adb3de56635b convert-repo: Fix timezone handling
Matt Mackall <mpm@selenic.com>
parents: 1335
diff changeset
   105
        tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
2093
5cc414722587 convert-repo: fix reversed time zone offset
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 1715
diff changeset
   106
        tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
1385
adb3de56635b convert-repo: Fix timezone handling
Matt Mackall <mpm@selenic.com>
parents: 1335
diff changeset
   107
        date = tm + " " + str(tz)
4721
96614af3c679 convert: "unknown" is a string
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 4687
diff changeset
   108
        author = author or "unknown"
3955
9af4b853ed4d convert-repo: add CVS branch support
Matt Mackall <mpm@selenic.com>
parents: 3954
diff changeset
   109
9af4b853ed4d convert-repo: add CVS branch support
Matt Mackall <mpm@selenic.com>
parents: 3954
diff changeset
   110
        c = commit(parents=parents, date=date, author=author, desc=message)
9af4b853ed4d convert-repo: add CVS branch support
Matt Mackall <mpm@selenic.com>
parents: 3954
diff changeset
   111
        return c
316
c48d069163d6 Add new convert-repo script
mpm@selenic.com
parents:
diff changeset
   112
694
51eb248d3348 Teach convert-repo about tags
mpm@selenic.com
parents: 692
diff changeset
   113
    def gettags(self):
51eb248d3348 Teach convert-repo about tags
mpm@selenic.com
parents: 692
diff changeset
   114
        tags = {}
5233
9d7052f17d77 convert: fix /dev/null redirections under Windows
Patrick Mezard <pmezard@gmail.com>
parents: 5229
diff changeset
   115
        fh = self.gitcmd('git-ls-remote --tags "%s" 2>%s' % (self.path,
9d7052f17d77 convert: fix /dev/null redirections under Windows
Patrick Mezard <pmezard@gmail.com>
parents: 5229
diff changeset
   116
                                                             util.nulldev))
4062
516f883e3d79 convert-repo: handle packed git tags
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 4047
diff changeset
   117
        prefix = 'refs/tags/'
516f883e3d79 convert-repo: handle packed git tags
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 4047
diff changeset
   118
        for line in fh:
516f883e3d79 convert-repo: handle packed git tags
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 4047
diff changeset
   119
            line = line.strip()
516f883e3d79 convert-repo: handle packed git tags
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 4047
diff changeset
   120
            if not line.endswith("^{}"):
516f883e3d79 convert-repo: handle packed git tags
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 4047
diff changeset
   121
                continue
516f883e3d79 convert-repo: handle packed git tags
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 4047
diff changeset
   122
            node, tag = line.split(None, 1)
516f883e3d79 convert-repo: handle packed git tags
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 4047
diff changeset
   123
            if not tag.startswith(prefix):
516f883e3d79 convert-repo: handle packed git tags
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 4047
diff changeset
   124
                continue
516f883e3d79 convert-repo: handle packed git tags
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 4047
diff changeset
   125
            tag = tag[len(prefix):-3]
516f883e3d79 convert-repo: handle packed git tags
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 4047
diff changeset
   126
            tags[tag] = node
516f883e3d79 convert-repo: handle packed git tags
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 4047
diff changeset
   127
694
51eb248d3348 Teach convert-repo about tags
mpm@selenic.com
parents: 692
diff changeset
   128
        return tags