# HG changeset patch # User Alexis S. L. Carvalho # Date 1165622465 7200 # Node ID d1ce5461beed8073f5808758a047d54604c4b5ed # Parent a7b61c3b0f933f7faba6cc9a87e88abf82a14bd2 Allow the user to specify the fallback encoding for the changelog Example: use EUC-JP instead of ISO-8859-1: [ui] fallbackencoding = EUC-JP diff --git a/doc/hgrc.5.txt b/doc/hgrc.5.txt --- a/doc/hgrc.5.txt +++ b/doc/hgrc.5.txt @@ -388,6 +388,9 @@ ui:: Print debugging information. True or False. Default is False. editor;; The editor to use during a commit. Default is $EDITOR or "vi". + fallbackencoding;; + Encoding to try if it's not possible to decode the changelog using + UTF-8. Default is ISO-8859-1. ignore;; A file to read per-user ignore patterns from. This file should be in the same format as a repository-wide .hgignore file. This option diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py --- a/mercurial/localrepo.py +++ b/mercurial/localrepo.py @@ -72,6 +72,10 @@ class localrepository(repo.repository): self.manifest = manifest.manifest(self.sopener, v) self.changelog = changelog.changelog(self.sopener, v) + fallback = self.ui.config('ui', 'fallbackencoding') + if fallback: + util._fallbackencoding = fallback + # the changelog might not have the inline index flag # on. If the format of the changelog is the same as found in # .hgrc, apply any flags found in the .hgrc as well. diff --git a/mercurial/util.py b/mercurial/util.py --- a/mercurial/util.py +++ b/mercurial/util.py @@ -19,6 +19,7 @@ demandload(globals(), "os threading time _encoding = os.environ.get("HGENCODING") or locale.getpreferredencoding() _encodingmode = os.environ.get("HGENCODINGMODE", "strict") +_fallbackencoding = 'ISO-8859-1' def tolocal(s): """ @@ -30,7 +31,7 @@ def tolocal(s): using UTF-8, then Latin-1, and failing that, we use UTF-8 and replace unknown characters. """ - for e in "utf-8 latin1".split(): + for e in ('UTF-8', _fallbackencoding): try: u = s.decode(e) # attempt strict decoding return u.encode(_encoding, "replace") diff --git a/tests/legacy-encoding.hg b/tests/legacy-encoding.hg index d050be8ebfef9771e5e1dffdd1cb98be3a51765a..7b1a00d39063a15a9f92d6cb3d253117063b4a26 GIT binary patch literal 530 zc$@(e0`2`sM=>x$T4*^jL0KkKStSe@vj6}S|NsA+SN>kDXnhgLwKxp*_hDLw^h#BfY0002fKn)scKlgvdeSGQB8BHtlA*j4y7!8C~dxu3oj9G2XKsg_Urpan?~t=zO;a zT_o;E1t&SQ7!2r`^i?cIYB|*Fh%p@yC2mlx;4(O|jR`SCk+zYP(GH{?L^}$l6%MY^ z(hQ(S%d2}mw;GF}14W?I7(t`3J5ZE|$45v>LxBuLD9U=)H&aSZq+wUevS!iy>0AF` z6w^ZbSXa1&o>6C5+aWX~gp$`Gj1tTk0H=tStY8Hz^3~~&M0`oJjRU5}+@%11a7cbS zV>a|s>@*Nb227Lg9l#uXn;{-*?kw3#FFOpkMz!Ec#FYdiz9o&dDuK9CF3}%}iNMMg z$af(s7}9$*3Kl{&_tpVVF!>yk&hn>K3G)x%$*6iC#L#K9KAa-WCeuQG+y@4Ro&#fq UprIU>zJH6kBAh5l5{3*}fcePY6951J diff --git a/tests/test-encoding b/tests/test-encoding --- a/tests/test-encoding +++ b/tests/test-encoding @@ -47,3 +47,8 @@ echo % latin-1 HGENCODING=latin-1 hg branches echo % utf-8 HGENCODING=utf-8 hg branches + +echo '[ui]' >> .hg/hgrc +echo 'fallbackencoding = euc-jp' >> .hg/hgrc +echo % utf-8 +HGENCODING=utf-8 hg log diff --git a/tests/test-encoding.out b/tests/test-encoding.out --- a/tests/test-encoding.out +++ b/tests/test-encoding.out @@ -1,7 +1,7 @@ adding changesets adding manifests adding file changes -added 1 changesets with 1 changes to 1 files +added 2 changesets with 2 changes to 1 files (run 'hg update' to get a working copy) 1 files updated, 0 files merged, 0 files removed, 0 files unresolved % should fail with encoding error @@ -15,104 +15,153 @@ transaction abort! rollback completed % these should work % ascii -changeset: 4:d8a5d9eaf41e +changeset: 5:e4ed49b8a8f0 branch: ? tag: tip user: test date: Thu Jan 01 00:00:00 1970 +0000 summary: latin1 branch -changeset: 3:5edfc7acb541 +changeset: 4:a02ca5a58e99 user: test date: Thu Jan 01 00:00:00 1970 +0000 -summary: Added tag ? for changeset 91878608adb3 +summary: Added tag ? for changeset d47908dab82f -changeset: 2:91878608adb3 +changeset: 3:d47908dab82f tag: ? user: test date: Thu Jan 01 00:00:00 1970 +0000 summary: utf-8 e' encoded: ? -changeset: 1:6355cacf842e +changeset: 2:9db1985f3097 user: test date: Thu Jan 01 00:00:00 1970 +0000 summary: latin-1 e' encoded: ? +changeset: 1:af6e0db4427c +user: test +date: Thu Jan 01 00:00:00 1970 +0000 +summary: euc-jp: ?????? = u'\u65e5\u672c\u8a9e' + changeset: 0:60aad1dd20a9 user: test date: Thu Jan 01 00:00:00 1970 +0000 summary: latin-1 e': ? % latin-1 -changeset: 4:d8a5d9eaf41e +changeset: 5:e4ed49b8a8f0 branch: é tag: tip user: test date: Thu Jan 01 00:00:00 1970 +0000 summary: latin1 branch -changeset: 3:5edfc7acb541 +changeset: 4:a02ca5a58e99 user: test date: Thu Jan 01 00:00:00 1970 +0000 -summary: Added tag é for changeset 91878608adb3 +summary: Added tag é for changeset d47908dab82f -changeset: 2:91878608adb3 +changeset: 3:d47908dab82f tag: é user: test date: Thu Jan 01 00:00:00 1970 +0000 summary: utf-8 e' encoded: é -changeset: 1:6355cacf842e +changeset: 2:9db1985f3097 user: test date: Thu Jan 01 00:00:00 1970 +0000 summary: latin-1 e' encoded: é +changeset: 1:af6e0db4427c +user: test +date: Thu Jan 01 00:00:00 1970 +0000 +summary: euc-jp: ÆüËܸì = u'\u65e5\u672c\u8a9e' + changeset: 0:60aad1dd20a9 user: test date: Thu Jan 01 00:00:00 1970 +0000 summary: latin-1 e': é % utf-8 -changeset: 4:d8a5d9eaf41e +changeset: 5:e4ed49b8a8f0 branch: é tag: tip user: test date: Thu Jan 01 00:00:00 1970 +0000 summary: latin1 branch -changeset: 3:5edfc7acb541 +changeset: 4:a02ca5a58e99 user: test date: Thu Jan 01 00:00:00 1970 +0000 -summary: Added tag é for changeset 91878608adb3 +summary: Added tag é for changeset d47908dab82f -changeset: 2:91878608adb3 +changeset: 3:d47908dab82f tag: é user: test date: Thu Jan 01 00:00:00 1970 +0000 summary: utf-8 e' encoded: é -changeset: 1:6355cacf842e +changeset: 2:9db1985f3097 user: test date: Thu Jan 01 00:00:00 1970 +0000 summary: latin-1 e' encoded: é +changeset: 1:af6e0db4427c +user: test +date: Thu Jan 01 00:00:00 1970 +0000 +summary: euc-jp: ÆüËܸì = u'\u65e5\u672c\u8a9e' + changeset: 0:60aad1dd20a9 user: test date: Thu Jan 01 00:00:00 1970 +0000 summary: latin-1 e': é % ascii -tip 4:d8a5d9eaf41e -? 2:91878608adb3 +tip 5:e4ed49b8a8f0 +? 3:d47908dab82f % latin-1 -tip 4:d8a5d9eaf41e -é 2:91878608adb3 +tip 5:e4ed49b8a8f0 +é 3:d47908dab82f +% utf-8 +tip 5:e4ed49b8a8f0 +é 3:d47908dab82f +% ascii +? 5:e4ed49b8a8f0 +% latin-1 +é 5:e4ed49b8a8f0 +% utf-8 +é 5:e4ed49b8a8f0 % utf-8 -tip 4:d8a5d9eaf41e -é 2:91878608adb3 -% ascii -? 4:d8a5d9eaf41e -% latin-1 -é 4:d8a5d9eaf41e -% utf-8 -é 4:d8a5d9eaf41e +changeset: 5:e4ed49b8a8f0 +branch: é +tag: tip +user: test +date: Thu Jan 01 00:00:00 1970 +0000 +summary: latin1 branch + +changeset: 4:a02ca5a58e99 +user: test +date: Thu Jan 01 00:00:00 1970 +0000 +summary: Added tag é for changeset d47908dab82f + +changeset: 3:d47908dab82f +tag: é +user: test +date: Thu Jan 01 00:00:00 1970 +0000 +summary: utf-8 e' encoded: é + +changeset: 2:9db1985f3097 +user: test +date: Thu Jan 01 00:00:00 1970 +0000 +summary: latin-1 e' encoded: é + +changeset: 1:af6e0db4427c +user: test +date: Thu Jan 01 00:00:00 1970 +0000 +summary: euc-jp: 日本語 = u'\u65e5\u672c\u8a9e' + +changeset: 0:60aad1dd20a9 +user: test +date: Thu Jan 01 00:00:00 1970 +0000 +summary: latin-1 e': � +