mercurial/base85.c
author Brendan Cully <brendan@kublai.com>
Fri, 06 Oct 2006 13:01:54 -0700
changeset 3279 1f2c3983a6c5
child 3287 e93c926e069e
permissions -rw-r--r--
Add a base85 codec
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
3279
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
     1
/*
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
     2
 base85 codec
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
     3
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
     4
 Copyright 2006 Brendan Cully <brendan@kublai.com>
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
     5
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
     6
 This software may be used and distributed according to the terms of
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
     7
 the GNU General Public License, incorporated herein by reference.
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
     8
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
     9
 Largely based on git's implementation
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    10
*/
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    11
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    12
#include <Python.h>
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    13
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    14
static const char b85chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    15
	"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~";
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    16
static char b85dec[256];
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    17
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    18
static void
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    19
b85prep(void)
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    20
{
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    21
	int i;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    22
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    23
	memset(b85dec, 0, sizeof(b85dec));
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    24
	for (i = 0; i < sizeof(b85chars); i++)
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    25
		b85dec[(int)(b85chars[i])] = i + 1;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    26
}
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    27
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    28
static PyObject *
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    29
b85encode(PyObject *self, PyObject *args)
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    30
{
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    31
	const unsigned char *text;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    32
	PyObject *out;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    33
	char *dst;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    34
	int len, olen, i;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    35
	unsigned int acc, val, ch;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    36
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    37
	if (!PyArg_ParseTuple(args, "s#", &text, &len))
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    38
		return NULL;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    39
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    40
	olen = (len + 3) / 4 * 5;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    41
	if (!(out = PyString_FromStringAndSize(NULL, olen)))
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    42
		return NULL;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    43
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    44
	dst = PyString_AS_STRING(out);
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    45
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    46
	while (len)
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    47
	{
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    48
		acc = 0;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    49
		for (i = 24; i >= 0; i -= 8) {
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    50
			ch = *text++;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    51
			acc |= ch << i;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    52
			if (--len == 0)
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    53
				break;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    54
		}
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    55
		for (i = 4; i >= 0; i--) {
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    56
			val = acc % 85;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    57
			acc /= 85;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    58
			dst[i] = b85chars[val];
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    59
		}
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    60
		dst += 5;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    61
	}
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    62
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    63
	return out;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    64
}
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    65
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    66
static PyObject *
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    67
b85decode(PyObject *self, PyObject *args)
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    68
{
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    69
	PyObject *out;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    70
	const char *text;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    71
	char *dst;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    72
	int len, i, j, olen, c;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    73
	unsigned int acc;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    74
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    75
	if (!PyArg_ParseTuple(args, "s#", &text, &len))
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    76
		return NULL;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    77
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    78
	olen = (len + 4) / 5 * 4;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    79
	if (!(out = PyString_FromStringAndSize(NULL, olen)))
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    80
		return NULL;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    81
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    82
	dst = PyString_AS_STRING(out);
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    83
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    84
	for (i = 1; len; i++)
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    85
	{
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    86
		acc = 0;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    87
		for (j = 0; j < 4 && --len; j++)
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    88
		{
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    89
			c = b85dec[(int)*text++] - 1;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    90
			if (c < 0)
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    91
				return PyErr_Format(PyExc_ValueError, "Bad base85 character at position %d", i);
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    92
			acc = acc * 85 + c;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    93
		}
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    94
		if (len--)
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    95
		{
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    96
			c = b85dec[(int)*text++] - 1;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    97
			if (c < 0)
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    98
				return PyErr_Format(PyExc_ValueError, "Bad base85 character at position %d", i);
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    99
		}
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   100
		else
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   101
			c = 0;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   102
		/* overflow detection: 0xffffffff == "|NsC0",
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   103
		 * "|NsC" == 0x03030303 */
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   104
		if (acc > 0x03030303 || (acc *= 85) > 0xffffffff - c)
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   105
			return PyErr_Format(PyExc_ValueError, "Bad base85 sequence at position %d", i);
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   106
		
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   107
		acc += c;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   108
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   109
		for (j = 0; j < 4; j++)
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   110
		{
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   111
			acc = (acc << 8) | (acc >> 24);
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   112
			*dst++ = (char)acc;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   113
		}
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   114
	}
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   115
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   116
	return out;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   117
}
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   118
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   119
static char base85_doc[] = "Base85 Data Encoding";
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   120
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   121
static PyMethodDef methods[] = {
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   122
	{"b85encode", b85encode, METH_VARARGS, "encode text in base85\n"},
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   123
	{"b85decode", b85decode, METH_VARARGS, "decode base85 text\n"},
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   124
	{NULL, NULL}
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   125
};
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   126
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   127
PyMODINIT_FUNC initbase85(void)
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   128
{
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   129
	Py_InitModule3("base85", methods, base85_doc);
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   130
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   131
	b85prep();
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   132
}