# HG changeset patch # User Matt Mackall # Date 1165275817 21600 # Node ID 18c93649f56380df50810a02fc91fd8761df99ef # Parent 8d603f8567aeadd709fff63157ccc3295ceeae07# Parent 8ecc9c57d8833685686c273dbe4f613d5e9d75ba Merge with crew diff --git a/doc/hg.1.txt b/doc/hg.1.txt --- a/doc/hg.1.txt +++ b/doc/hg.1.txt @@ -172,6 +172,20 @@ HGEDITOR:: (deprecated, use .hgrc) +HGENCODING:: + This overrides the default locale setting detected by Mercurial. + This setting is used to convert data including usernames, + changeset descriptions, tag names, and branches. This setting can + be overridden with the --encoding command-line option. + +HGENCODINGMODE:: + This sets Mercurial's behavior for handling unknown characters + while transcoding user inputs. The default is "strict", which + causes Mercurial to abort if it can't translate a character. Other + settings include "replace", which replaces unknown characters, and + "ignore", which drops them. This setting can be overridden with + the --encodingmode command-line option. + HGMERGE:: An executable to use for resolving merge conflicts. The program will be executed with three arguments: local file, remote file, diff --git a/hgweb.cgi b/hgweb.cgi --- a/hgweb.cgi +++ b/hgweb.cgi @@ -10,6 +10,13 @@ from mercurial.hgweb.hgweb_mod import hg from mercurial.hgweb.request import wsgiapplication import mercurial.hgweb.wsgicgi as wsgicgi +# If you'd like to serve pages with UTF-8 instead of your default +# locale charset, you can do so by uncommenting the following lines. +# Note that this will cause your .hgrc files to be interpreted in +# UTF-8 and all your repo files to be displayed using UTF-8. +# +# os.environ["HGENCODING"] = "UTF-8" + def make_web_app(): return hgweb("/path/to/repo", "repository name") diff --git a/hgwebdir.cgi b/hgwebdir.cgi --- a/hgwebdir.cgi +++ b/hgwebdir.cgi @@ -29,6 +29,13 @@ import mercurial.hgweb.wsgicgi as wsgicg # Alternatively you can pass a list of ('virtual/path', '/real/path') tuples # or use a dictionary with entries like 'virtual/path': '/real/path' +# If you'd like to serve pages with UTF-8 instead of your default +# locale charset, you can do so by uncommenting the following lines. +# Note that this will cause your .hgrc files to be interpreted in +# UTF-8 and all your repo files to be displayed using UTF-8. +# +# os.environ["HGENCODING"] = "UTF-8" + def make_web_app(): return hgwebdir("hgweb.config") diff --git a/mercurial/changelog.py b/mercurial/changelog.py --- a/mercurial/changelog.py +++ b/mercurial/changelog.py @@ -61,10 +61,10 @@ class changelog(revlog): if not text: return (nullid, "", (0, 0), [], "", {}) last = text.index("\n\n") - desc = text[last + 2:] + desc = util.tolocal(text[last + 2:]) l = text[:last].split('\n') manifest = bin(l[0]) - user = l[1] + user = util.tolocal(l[1]) extra_data = l[2].split(' ', 2) if len(extra_data) != 3: @@ -88,6 +88,8 @@ class changelog(revlog): def add(self, manifest, list, desc, transaction, p1=None, p2=None, user=None, date=None, extra={}): + user, desc = util.fromlocal(user), util.fromlocal(desc) + if date: parseddate = "%d %d" % util.parsedate(date) else: diff --git a/mercurial/commands.py b/mercurial/commands.py --- a/mercurial/commands.py +++ b/mercurial/commands.py @@ -8,7 +8,7 @@ from demandload import demandload from node import * from i18n import gettext as _ -demandload(globals(), "os re sys signal imp urllib pdb shlex") +demandload(globals(), "bisect os re sys signal imp urllib pdb shlex stat") demandload(globals(), "fancyopts ui hg util lock revlog bundlerepo") demandload(globals(), "difflib patch time") demandload(globals(), "traceback errno version atexit") @@ -273,7 +273,9 @@ def branches(ui, repo): if ui.quiet: ui.write("%s\n" % t) else: - ui.write("%-30s %s:%s\n" % (t, -r, hexfunc(n))) + t = util.localsub(t, 30) + t += " " * (30 - util.locallen(t)) + ui.write("%s %s:%s\n" % (t, -r, hexfunc(n))) def bundle(ui, repo, fname, dest=None, **opts): """create a changegroup file @@ -421,12 +423,28 @@ def commit(ui, repo, *pats, **opts): status = repo.status(files=fns, match=match) modified, added, removed, deleted, unknown = status[:5] files = modified + added + removed + slist = None for f in fns: - if f not in modified + added + removed: + if f not in files: + rf = repo.wjoin(f) if f in unknown: - raise util.Abort(_("file %s not tracked!") % f) - else: - raise util.Abort(_("file %s not found!") % f) + raise util.Abort(_("file %s not tracked!") % rf) + try: + mode = os.lstat(rf)[stat.ST_MODE] + except OSError: + raise util.Abort(_("file %s not found!") % rf) + if stat.S_ISDIR(mode): + name = f + '/' + if slist is None: + slist = list(files) + slist.sort() + i = bisect.bisect(slist, name) + if i >= len(slist) or not slist[i].startswith(name): + raise util.Abort(_("no match under directory %s!") + % rf) + elif not stat.S_ISREG(mode): + raise util.Abort(_("can't commit %s: " + "unsupported file type!") % rf) else: files = [] try: @@ -2210,7 +2228,9 @@ def tags(ui, repo): if ui.quiet: ui.write("%s\n" % t) else: - ui.write("%-30s %s\n" % (t, r)) + t = util.localsub(t, 30) + t += " " * (30 - util.locallen(t)) + ui.write("%s %s\n" % (t, r)) def tip(ui, repo, **opts): """show the tip revision @@ -2311,6 +2331,8 @@ globalopts = [ ('', 'config', [], _('set/override config option')), ('', 'debug', None, _('enable debugging output')), ('', 'debugger', None, _('start debugger')), + ('', 'encoding', util._encoding, _('set the charset encoding')), + ('', 'encodingmode', util._encodingmode, _('set the charset encoding mode')), ('', 'lsprof', None, _('print improved command execution profile')), ('', 'traceback', None, _('print traceback on exception')), ('', 'time', None, _('time how long the command takes')), @@ -2863,6 +2885,10 @@ def dispatch(args): try: cmd, func, args, options, cmdoptions = parse(u, args) + if options["encoding"]: + util._encoding = options["encoding"] + if options["encodingmode"]: + util._encodingmode = options["encodingmode"] if options["time"]: def get_times(): t = os.times() diff --git a/mercurial/hgweb/hgweb_mod.py b/mercurial/hgweb/hgweb_mod.py --- a/mercurial/hgweb/hgweb_mod.py +++ b/mercurial/hgweb/hgweb_mod.py @@ -654,7 +654,8 @@ class hgweb(object): def run_wsgi(self, req): def header(**map): - header_file = cStringIO.StringIO(''.join(self.t("header", **map))) + header_file = cStringIO.StringIO( + ''.join(self.t("header", encoding = util._encoding, **map))) msg = mimetools.Message(header_file, 0) req.header(msg.items()) yield header_file.read() diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py --- a/mercurial/localrepo.py +++ b/mercurial/localrepo.py @@ -198,6 +198,7 @@ class localrepository(repo.repository): self.hook('pretag', throw=True, node=hex(node), tag=name, local=local) if local: + # local tags are stored in the current charset self.opener('localtags', 'a').write('%s %s\n' % (hex(node), name)) self.hook('tag', node=hex(node), tag=name, local=local) return @@ -207,7 +208,9 @@ class localrepository(repo.repository): raise util.Abort(_('working copy of .hgtags is changed ' '(please commit .hgtags manually)')) - self.wfile('.hgtags', 'ab').write('%s %s\n' % (hex(node), name)) + # committed tags are stored in UTF-8 + line = '%s %s\n' % (hex(node), util.fromlocal(name)) + self.wfile('.hgtags', 'ab').write(line) if self.dirstate.state('.hgtags') == '?': self.add(['.hgtags']) @@ -227,7 +230,7 @@ class localrepository(repo.repository): self.ui.warn(_("%s: cannot parse entry\n") % context) return node, key = s - key = key.strip() + key = util.tolocal(key.strip()) # stored in UTF-8 try: bin_n = bin(node) except TypeError: @@ -256,6 +259,9 @@ class localrepository(repo.repository): f = self.opener("localtags") count = 0 for l in f: + # localtags are stored in the local character set + # while the internal tag table is stored in UTF-8 + l = util.fromlocal(l) count += 1 parsetag(l, _("localtags, line %d") % count) except IOError: @@ -316,7 +322,10 @@ class localrepository(repo.repository): self._updatebranchcache(partial, lrev+1, tiprev+1) self._writebranchcache(partial, self.changelog.tip(), tiprev) - self.branchcache = partial + # the branch cache is stored on disk as UTF-8, but in the local + # charset internally + for k, v in partial.items(): + self.branchcache[util.tolocal(k)] = v return self.branchcache def _readbranchcache(self): @@ -627,12 +636,12 @@ class localrepository(repo.repository): m2 = self.manifest.read(c2[0]) if use_dirstate: - branchname = self.workingctx().branch() + branchname = util.fromlocal(self.workingctx().branch()) else: branchname = "" if use_dirstate: - oldname = c1[5].get("branch", "") + oldname = c1[5].get("branch", "") # stored in UTF-8 if not commit and not remove and not force and p2 == nullid and \ branchname == oldname: self.ui.status(_("nothing changed\n")) diff --git a/mercurial/merge.py b/mercurial/merge.py --- a/mercurial/merge.py +++ b/mercurial/merge.py @@ -68,6 +68,16 @@ def checkunknown(wctx, mctx): raise util.Abort(_("untracked local file '%s' differs"\ " from remote version") % f) +def checkcollision(mctx): + "check for case folding collisions in the destination context" + folded = {} + for fn in mctx.manifest(): + fold = fn.lower() + if fold in folded: + raise util.Abort(_("case-folding collision between %s and %s") + % (fn, folded[fold])) + folded[fold] = fn + def forgetremoved(wctx, mctx): """ Forget removed files @@ -460,6 +470,8 @@ def update(repo, node, branchmerge, forc action = [] if not force: checkunknown(wc, p2) + if not util.checkfolding(repo.path): + checkcollision(p2) if not branchmerge: action += forgetremoved(wc, p2) action += manifestmerge(repo, wc, p2, pa, overwrite, partial) @@ -477,6 +489,7 @@ def update(repo, node, branchmerge, forc repo.dirstate.setparents(fp1, fp2) repo.hook('update', parent1=xp1, parent2=xp2, error=stats[3]) if not branchmerge: + b = util.tolocal(p2.branch()) repo.opener("branch", "w").write(p2.branch() + "\n") return stats diff --git a/mercurial/util.py b/mercurial/util.py --- a/mercurial/util.py +++ b/mercurial/util.py @@ -15,7 +15,61 @@ platform-specific details from the core. from i18n import gettext as _ from demandload import * demandload(globals(), "cStringIO errno getpass popen2 re shutil sys tempfile") -demandload(globals(), "os threading time calendar ConfigParser") +demandload(globals(), "os threading time calendar ConfigParser locale") + +_encoding = os.environ.get("HGENCODING") or locale.getpreferredencoding() +_encodingmode = os.environ.get("HGENCODINGMODE", "strict") + +def tolocal(s): + """ + Convert a string from internal UTF-8 to local encoding + + All internal strings should be UTF-8 but some repos before the + implementation of locale support may contain latin1 or possibly + other character sets. We attempt to decode everything strictly + using UTF-8, then Latin-1, and failing that, we use UTF-8 and + replace unknown characters. + """ + for e in "utf-8 latin1".split(): + try: + u = s.decode(e) # attempt strict decoding + return u.encode(_encoding, "replace") + except UnicodeDecodeError: + pass + u = s.decode("utf-8", "replace") # last ditch + return u.encode(_encoding, "replace") + +def fromlocal(s): + """ + Convert a string from the local character encoding to UTF-8 + + We attempt to decode strings using the encoding mode set by + HG_ENCODINGMODE, which defaults to 'strict'. In this mode, unknown + characters will cause an error message. Other modes include + 'replace', which replaces unknown characters with a special + Unicode character, and 'ignore', which drops the character. + """ + try: + return s.decode(_encoding, _encodingmode).encode("utf-8") + except UnicodeDecodeError, inst: + sub = s[max(0, inst.start-10):inst.start+10] + raise Abort("decoding near '%s': %s!\n" % (sub, inst)) + +def locallen(s): + """Find the length in characters of a local string""" + return len(s.decode(_encoding, "replace")) + +def localsub(s, a, b=None): + try: + u = s.decode(_encoding, _encodingmode) + if b is not None: + u = u[a:b] + else: + u = u[:a] + return u.encode(_encoding, _encodingmode) + except UnicodeDecodeError, inst: + sub = s[max(0, inst.start-10), inst.start+10] + raise Abort("decoding near '%s': %s!\n" % (sub, inst)) # used by parsedate defaultdateformats = ('%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M', @@ -579,6 +633,28 @@ def groupname(gid=None): except ImportError: return None +# File system features + +def checkfolding(path): + """ + Check whether the given path is on a case-sensitive filesystem + + Requires a path (like /foo/.hg) ending with a foldable final + directory component. + """ + s1 = os.stat(path) + d, b = os.path.split(path) + p2 = os.path.join(d, b.upper()) + if path == p2: + p2 = os.path.join(d, b.lower()) + try: + s2 = os.stat(p2) + if s2 == s1: + return False + return True + except: + return True + # Platform specific variants if os.name == 'nt': demandload(globals(), "msvcrt") diff --git a/templates/gitweb/header.tmpl b/templates/gitweb/header.tmpl --- a/templates/gitweb/header.tmpl +++ b/templates/gitweb/header.tmpl @@ -1,11 +1,10 @@ -Content-type: text/html +Content-type: text/html; charset={encoding} - + - diff --git a/templates/header.tmpl b/templates/header.tmpl --- a/templates/header.tmpl +++ b/templates/header.tmpl @@ -1,4 +1,4 @@ -Content-type: text/html +Content-type: text/html; charset={encoding} diff --git a/templates/raw/header.tmpl b/templates/raw/header.tmpl --- a/templates/raw/header.tmpl +++ b/templates/raw/header.tmpl @@ -1,1 +1,2 @@ -Content-type: text/plain +Content-type: text/plain; charset={encoding} + diff --git a/tests/legacy-encoding.hg b/tests/legacy-encoding.hg new file mode 100644 index 0000000000000000000000000000000000000000..d050be8ebfef9771e5e1dffdd1cb98be3a51765a GIT binary patch literal 308 zc$@(=0n7eKM=>x$T4*^jL0KkKS&)wZssI29|Nq{Z!~#^BfAAC#q7;8;Ob7rd00dBS z2CfmnAYzCiNdhncxY%lwKxhEadWM5Q0gwTvful_tG}BEzLZr!|y+=|10pi)d^!MlIv^R`l=*HH}wbP`W}4@n46g7crMv*lVk=e-{g z5P<*Q1AuV@C=Gz(P06PJCTIGMuZO~pzSwkuq&+H?bzvkIJ2VzaRtwfEo6{6l^x+hM7zT{s7Tk0KaAx-^Hn)_`8xR!i0o` Gcl}WHxQ9{z diff --git a/tests/run-tests.py b/tests/run-tests.py --- a/tests/run-tests.py +++ b/tests/run-tests.py @@ -332,6 +332,8 @@ os.environ['TZ'] = 'GMT' os.environ["HGEDITOR"] = sys.executable + ' -c "import sys; sys.exit(0)"' os.environ["HGMERGE"] = sys.executable + ' -c "import sys; sys.exit(0)"' os.environ["HGUSER"] = "test" +os.environ["HGENCODING"] = "ascii" +os.environ["HGENCODINGMODE"] = "strict" TESTDIR = os.environ["TESTDIR"] = os.getcwd() HGTMP = os.environ["HGTMP"] = tempfile.mkdtemp("", "hgtests.") diff --git a/tests/test-commit b/tests/test-commit --- a/tests/test-commit +++ b/tests/test-commit @@ -1,5 +1,10 @@ #!/bin/sh +cleanpath() +{ + sed -e "s:/.*\(/test/.*\):...\1:" +} + echo % commit date test hg init test cd test @@ -17,7 +22,35 @@ echo % partial commit test echo bar > bar hg add bar rm bar -hg commit -d "1000000 0" -m commit-8 2>&1 | sed -e "s:/.*\(/test/.*\):...\1:" +hg commit -d "1000000 0" -m commit-8 2>&1 | cleanpath + +hg -q revert -a --no-backup + +mkdir dir +echo boo > dir/file +hg add +hg -v commit -d '0 0' -m commit-9 dir + +echo > dir.file +hg add +hg commit -d '0 0' -m commit-10 dir dir.file 2>&1 | cleanpath + +echo >> dir/file +mkdir bleh +mkdir dir2 +cd bleh +hg commit -d '0 0' -m commit-11 . 2>&1 | cleanpath +hg commit -d '0 0' -m commit-12 ../dir ../dir2 2>&1 | cleanpath +hg -v commit -d '0 0' -m commit-13 ../dir +cd .. + +hg commit -d '0 0' -m commit-14 does-not-exist 2>&1 | cleanpath +ln -s foo baz +hg commit -d '0 0' -m commit-15 baz 2>&1 | cleanpath +touch quux +hg commit -d '0 0' -m commit-16 quux 2>&1 | cleanpath +echo >> dir/file +hg -v commit -d '0 0' -m commit-17 dir/file cd .. echo % partial subdir commit test diff --git a/tests/test-commit.out b/tests/test-commit.out --- a/tests/test-commit.out +++ b/tests/test-commit.out @@ -17,6 +17,19 @@ rollback completed % partial commit test trouble committing bar! abort: No such file or directory: .../test/bar +adding dir/file +dir/file +adding dir.file +abort: no match under directory .../test/dir! +abort: no match under directory .../test/bleh! +abort: no match under directory .../test/dir2! +dir/file +does-not-exist: No such file or directory +abort: file .../test/does-not-exist not found! +baz: unsupported file type (type is symbolic link) +abort: can't commit .../test/baz: unsupported file type! +abort: file .../test/quux not tracked! +dir/file % partial subdir commit test adding bar/bar adding foo/foo diff --git a/tests/test-debugcomplete.out b/tests/test-debugcomplete.out --- a/tests/test-debugcomplete.out +++ b/tests/test-debugcomplete.out @@ -87,6 +87,8 @@ rawcommit --cwd --debug --debugger +--encoding +--encodingmode --help --lsprof --noninteractive @@ -112,6 +114,8 @@ rawcommit --daemon-pipefds --debug --debugger +--encoding +--encodingmode --errorlog --help --ipv6 diff --git a/tests/test-encoding b/tests/test-encoding new file mode 100755 --- /dev/null +++ b/tests/test-encoding @@ -0,0 +1,38 @@ +#!/bin/sh + +hg init t +cd t + +# we need a repo with some legacy latin-1 changesets +hg unbundle $TESTDIR/legacy-encoding.hg +hg co + +printf "latin-1 e' encoded: \xe9" > latin-1 +printf "utf-8 e' encoded: \xc3\xa9" > utf-8 +printf "\xe9" > latin-1-tag + +echo % should fail with encoding error +echo "plain old ascii" > a +hg st +HGENCODING=ascii hg ci -l latin-1 -d "0 0" + +echo % these should work +echo "latin-1" > a +HGENCODING=latin-1 hg ci -l latin-1 -d "0 0" +echo "utf-8" > a +HGENCODING=utf-8 hg ci -l utf-8 -d "0 0" + +HGENCODING=latin-1 hg tag -d "0 0" `cat latin-1-tag` + +echo % ascii +hg --encoding ascii log +echo % latin-1 +hg --encoding latin-1 log +echo % utf-8 +hg --encoding utf-8 log +echo % ascii +HGENCODING=ascii hg tags +echo % latin-1 +HGENCODING=latin-1 hg tags +echo % utf-8 +HGENCODING=utf-8 hg tags diff --git a/tests/test-encoding.out b/tests/test-encoding.out new file mode 100644 --- /dev/null +++ b/tests/test-encoding.out @@ -0,0 +1,94 @@ +adding changesets +adding manifests +adding file changes +added 1 changesets with 1 changes to 1 files +(run 'hg update' to get a working copy) +1 files updated, 0 files merged, 0 files removed, 0 files unresolved +% should fail with encoding error +M a +? latin-1 +? latin-1-tag +? utf-8 +abort: decoding near ' encoded: é': 'ascii' codec can't decode byte 0xe9 in position 20: ordinal not in range(128)! + +transaction abort! +rollback completed +% these should work +% ascii +changeset: 3:5edfc7acb541 +tag: tip +user: test +date: Thu Jan 01 00:00:00 1970 +0000 +summary: Added tag ? for changeset 91878608adb3 + +changeset: 2:91878608adb3 +tag: ? +user: test +date: Thu Jan 01 00:00:00 1970 +0000 +summary: utf-8 e' encoded: ? + +changeset: 1:6355cacf842e +user: test +date: Thu Jan 01 00:00:00 1970 +0000 +summary: latin-1 e' encoded: ? + +changeset: 0:60aad1dd20a9 +user: test +date: Thu Jan 01 00:00:00 1970 +0000 +summary: latin-1 e': ? + +% latin-1 +changeset: 3:5edfc7acb541 +tag: tip +user: test +date: Thu Jan 01 00:00:00 1970 +0000 +summary: Added tag é for changeset 91878608adb3 + +changeset: 2:91878608adb3 +tag: é +user: test +date: Thu Jan 01 00:00:00 1970 +0000 +summary: utf-8 e' encoded: é + +changeset: 1:6355cacf842e +user: test +date: Thu Jan 01 00:00:00 1970 +0000 +summary: latin-1 e' encoded: é + +changeset: 0:60aad1dd20a9 +user: test +date: Thu Jan 01 00:00:00 1970 +0000 +summary: latin-1 e': é + +% utf-8 +changeset: 3:5edfc7acb541 +tag: tip +user: test +date: Thu Jan 01 00:00:00 1970 +0000 +summary: Added tag é for changeset 91878608adb3 + +changeset: 2:91878608adb3 +tag: é +user: test +date: Thu Jan 01 00:00:00 1970 +0000 +summary: utf-8 e' encoded: é + +changeset: 1:6355cacf842e +user: test +date: Thu Jan 01 00:00:00 1970 +0000 +summary: latin-1 e' encoded: é + +changeset: 0:60aad1dd20a9 +user: test +date: Thu Jan 01 00:00:00 1970 +0000 +summary: latin-1 e': é + +% ascii +tip 3:5edfc7acb541 +? 2:91878608adb3 +% latin-1 +tip 3:5edfc7acb541 +é 2:91878608adb3 +% utf-8 +tip 3:5edfc7acb541 +é 2:91878608adb3