# HG changeset patch # User Matt Mackall # Date 1165536208 21600 # Node ID 158fce02dc4011e4dec1f7920de5ae622bad24b0 # Parent 4f056896c0934ec4b3d7cec10a251474351cb877 Teach convert-repo to deal with mixed charsets in git diff --git a/contrib/convert-repo b/contrib/convert-repo --- a/contrib/convert-repo +++ b/contrib/convert-repo @@ -21,8 +21,20 @@ # interrupted and can be run repeatedly to copy new commits. import sys, os, zlib, sha, time + +os.environ["HGENCODING"] = "utf-8" + from mercurial import hg, ui, util +def recode(s): + try: + return s.decode("utf-8").encode("utf-8") + except: + try: + return s.decode("latin-1").encode("utf-8") + except: + return s.decode("utf-8", "replace").encode("utf-8") + class convert_git: def __init__(self, path): self.path = path @@ -55,6 +67,7 @@ class convert_git: c = self.catfile(version, "commit") # read the commit hash end = c.find("\n\n") message = c[end+2:] + message = recode(message) l = c[:end].splitlines() manifest = l[0].split()[1] parents = [] @@ -65,11 +78,13 @@ class convert_git: tm, tz = p[-2:] author = " ".join(p[:-2]) if author[0] == "<": author = author[1:-1] + author = recode(author) if n == "committer": p = v.split() tm, tz = p[-2:] committer = " ".join(p[:-2]) if committer[0] == "<": committer = committer[1:-1] + committer = recode(committer) message += "\ncommitter: %s\n" % v if n == "parent": parents.append(v)