mercurial/util.py
changeset 3771 f96c158ea3a3
parent 3770 96095d9ff1f8
child 3785 1427949b8f80
equal deleted inserted replaced
3770:96095d9ff1f8 3771:f96c158ea3a3
    16 from demandload import *
    16 from demandload import *
    17 demandload(globals(), "cStringIO errno getpass popen2 re shutil sys tempfile")
    17 demandload(globals(), "cStringIO errno getpass popen2 re shutil sys tempfile")
    18 demandload(globals(), "os threading time calendar ConfigParser locale")
    18 demandload(globals(), "os threading time calendar ConfigParser locale")
    19 
    19 
    20 _encoding = os.environ.get("HGENCODING") or locale.getpreferredencoding()
    20 _encoding = os.environ.get("HGENCODING") or locale.getpreferredencoding()
       
    21 _encodingmode = os.environ.get("HGENCODINGMODE", "strict")
       
    22 
       
    23 def tolocal(s):
       
    24     """
       
    25     Convert a string from internal UTF-8 to local encoding
       
    26 
       
    27     All internal strings should be UTF-8 but some repos before the
       
    28     implementation of locale support may contain latin1 or possibly
       
    29     other character sets. We attempt to decode everything strictly
       
    30     using UTF-8, then Latin-1, and failing that, we use UTF-8 and
       
    31     replace unknown characters.
       
    32     """
       
    33     for e in "utf-8 latin1".split():
       
    34         try:
       
    35             u = s.decode(e) # attempt strict decoding
       
    36             return u.encode(_encoding, "replace")
       
    37         except UnicodeDecodeError:
       
    38             pass
       
    39     u = s.decode("utf-8", "replace") # last ditch
       
    40     return u.encode(_encoding, "replace")
       
    41 
       
    42 def fromlocal(s):
       
    43     """
       
    44     Convert a string from the local character encoding to UTF-8
       
    45 
       
    46     We attempt to decode strings using the encoding mode set by
       
    47     HG_ENCODINGMODE, which defaults to 'strict'. In this mode, unknown
       
    48     characters will cause an error message. Other modes include
       
    49     'replace', which replaces unknown characters with a special
       
    50     Unicode character, and 'ignore', which drops the character.
       
    51     """
       
    52     try:
       
    53         return s.decode(_encoding, _encodingmode).encode("utf-8")
       
    54     except UnicodeDecodeError, inst:
       
    55         sub = s[max(0, inst.start-10):inst.start+10]
       
    56         raise Abort("decoding near '%s': %s!\n" % (sub, inst))
       
    57 
       
    58 def locallen(s):
       
    59     """Find the length in characters of a local string"""
       
    60     return len(s.decode(_encoding, "replace"))
       
    61 
       
    62 def localsub(s, a, b=None):
       
    63     try:
       
    64         u = s.decode(_encoding, _encodingmode)
       
    65         if b is not None:
       
    66             u = u[a:b]
       
    67         else:
       
    68             u = u[:a]
       
    69         return u.encode(_encoding, _encodingmode)
       
    70     except UnicodeDecodeError, inst:
       
    71         sub = s[max(0, inst.start-10), inst.start+10]
       
    72         raise Abort("decoding near '%s': %s!\n" % (sub, inst))
    21 
    73 
    22 # used by parsedate
    74 # used by parsedate
    23 defaultdateformats = ('%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M',
    75 defaultdateformats = ('%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M',
    24                       '%a %b %d %H:%M:%S %Y')
    76                       '%a %b %d %H:%M:%S %Y')
    25 
    77