mercurial/mpatch.c
author Stephen Darnell
Wed, 14 Sep 2005 12:22:20 -0500
changeset 1241 3b4f05ff3130
parent 597 e530637ea060
child 1722 681c5c211b92
permissions -rw-r--r--
Add support for cloning with hardlinks on windows. In order to use hardlinks, the win32file module is needed, and this is present in ActivePython. If it isn't present, or hardlinks are not supported on the underlying filesystem, a regular copy is used. When using hardlinks the biggest benefit is probably the saving in space, but cloning can be much quicker. For example cloning the Xen tree (non trivial) without an update goes from about 95s to 15s. Unix-like platforms should be unaffected, although should be more tolerant on filesystems that don't support hard links. (tweaked by mpm to deal with new copyfiles function) --- hg.orig/mercurial/commands.py 2005-09-13 19:32:53.000000000 -0500 +++ hg/mercurial/commands.py 2005-09-14 12:11:34.000000000 -0500 @@ -620,10 +620,6 @@ def clone(ui, source, dest=None, **opts) if other.dev() != -1: abspath = os.path.abspath(source) - copyfile = (os.stat(dest).st_dev == other.dev() - and getattr(os, 'link', None) or shutil.copy2) - if copyfile is not shutil.copy2: - ui.note("cloning by hardlink\n") # we use a lock here because if we race with commit, we can # end up with extra data in the cloned revlogs that's not @@ -638,7 +634,7 @@ def clone(ui, source, dest=None, **opts) for f in files.split(): src = os.path.join(source, ".hg", f) dst = os.path.join(dest, ".hg", f) - util.copyfiles(src, dst, copyfile) + util.copyfiles(src, dst) repo = hg.repository(ui, dest) Index: hg/mercurial/util.py =================================================================== --- hg.orig/mercurial/util.py 2005-09-08 00:15:25.000000000 -0500 +++ hg/mercurial/util.py 2005-09-14 12:16:49.000000000 -0500 @@ -12,7 +12,7 @@ platform-specific details from the core. import os, errno from demandload import * -demandload(globals(), "re cStringIO") +demandload(globals(), "re cStringIO shutil") def binary(s): """return true if a string is binary data using diff's heuristic""" @@ -217,17 +217,28 @@ def rename(src, dst): os.unlink(dst) os.rename(src, dst) -def copyfiles(src, dst, copyfile): - """Copy a directory tree, files are copied using 'copyfile'.""" +def copyfiles(src, dst, hardlink=None): + """Copy a directory tree using hardlinks if possible""" + + if hardlink is None: + hardlink = (os.stat(src).st_dev == + os.stat(os.path.dirname(dst)).st_dev) if os.path.isdir(src): os.mkdir(dst) for name in os.listdir(src): srcname = os.path.join(src, name) dstname = os.path.join(dst, name) - copyfiles(srcname, dstname, copyfile) + copyfiles(srcname, dstname, hardlink) else: - copyfile(src, dst) + if hardlink: + try: + os_link(src, dst) + except: + hardlink = False + shutil.copy2(src, dst) + else: + shutil.copy2(src, dst) def opener(base): """ @@ -244,13 +255,13 @@ def opener(base): if mode[0] != "r": try: - s = os.stat(f) + nlink = nlinks(f) except OSError: d = os.path.dirname(f) if not os.path.isdir(d): os.makedirs(d) else: - if s.st_nlink > 1: + if nlink > 1: file(f + ".tmp", "wb").write(file(f, "rb").read()) rename(f+".tmp", f) @@ -266,10 +277,41 @@ def _makelock_file(info, pathname): def _readlock_file(pathname): return file(pathname).read() +def nlinks(pathname): + """Return number of hardlinks for the given file.""" + return os.stat(pathname).st_nlink + +if hasattr(os, 'link'): + os_link = os.link +else: + def os_link(src, dst): + raise OSError(0, "Hardlinks not supported") + # Platform specific variants if os.name == 'nt': nulldev = 'NUL:' + try: # ActivePython can create hard links using win32file module + import win32file + + def os_link(src, dst): # NB will only succeed on NTFS + win32file.CreateHardLink(dst, src) + + def nlinks(pathname): + """Return number of hardlinks for the given file.""" + try: + fh = win32file.CreateFile(pathname, + win32file.GENERIC_READ, win32file.FILE_SHARE_READ, + None, win32file.OPEN_EXISTING, 0, None) + res = win32file.GetFileInformationByHandle(fh) + fh.Close() + return res[7] + except: + return os.stat(pathname).st_nlink + + except ImportError: + pass + def is_exec(f, last): return last

/*
 mpatch.c - efficient binary patching for Mercurial

 This implements a patch algorithm that's O(m + nlog n) where m is the
 size of the output and n is the number of patches.

 Given a list of binary patches, it unpacks each into a hunk list,
 then combines the hunk lists with a treewise recursion to form a
 single hunk list. This hunk list is then applied to the original
 text.

 The text (or binary) fragments are copied directly from their source
 Python objects into a preallocated output string to avoid the
 allocation of intermediate Python objects. Working memory is about 2x
 the total number of hunks.

 Copyright 2005 Matt Mackall <mpm@selenic.com>

 This software may be used and distributed according to the terms
 of the GNU General Public License, incorporated herein by reference.
*/

#include <Python.h>
#include <stdlib.h>
#include <string.h>
#ifdef _WIN32
#ifdef _MSC_VER
#define inline __inline
typedef unsigned long uint32_t;
#else
#include <stdint.h>
#endif
static uint32_t ntohl(uint32_t x)
{
	return ((x & 0x000000ffUL) << 24) |
		((x & 0x0000ff00UL) <<  8) |
		((x & 0x00ff0000UL) >>  8) |
		((x & 0xff000000UL) >> 24);
}
#else
#include <sys/types.h>
#include <arpa/inet.h>
#endif

static char mpatch_doc[] = "Efficient binary patching.";

struct frag {
	int start, end, len;
	char *data;
};

struct flist {
	struct frag *base, *head, *tail;
};

static struct flist *lalloc(int size)
{
	struct flist *a = NULL;

	a = malloc(sizeof(struct flist));
	if (a) {
		a->base = malloc(sizeof(struct frag) * size);
		if (!a->base) {
			free(a);
			a = NULL;
		} else
			a->head = a->tail = a->base;
	}
	return a;
}

static void lfree(struct flist *a)
{
	if (a) {
		free(a->base);
		free(a);
	}
}

static int lsize(struct flist *a)
{
	return a->tail - a->head;
}

/* move hunks in source that are less cut to dest, compensating
   for changes in offset. the last hunk may be split if necessary.
*/
static int gather(struct flist *dest, struct flist *src, int cut, int offset)
{
	struct frag *d = dest->tail, *s = src->head;
	int postend, c, l;

	while (s != src->tail) {
		if (s->start + offset >= cut)
			break; /* we've gone far enough */

		postend = offset + s->start + s->len;
		if (postend <= cut) {
			/* save this hunk */
			offset += s->start + s->len - s->end;
			*d++ = *s++;
		}
		else {
			/* break up this hunk */
			c = cut - offset;
			if (s->end < c)
				c = s->end;
			l = cut - offset - s->start;
			if (s->len < l)
				l = s->len;

			offset += s->start + l - c;

			d->start = s->start;
			d->end = c;
			d->len = l;
			d->data = s->data;
			d++;
			s->start = c;
			s->len = s->len - l;
			s->data = s->data + l;

			break;
		}
	}

	dest->tail = d;
	src->head = s;
	return offset;
}

/* like gather, but with no output list */
static int discard(struct flist *src, int cut, int offset)
{
	struct frag *s = src->head;
	int postend, c, l;

	while (s != src->tail) {
		if (s->start + offset >= cut)
			break;

		postend = offset + s->start + s->len;
		if (postend <= cut) {
			offset += s->start + s->len - s->end;
			s++;
		}
		else {
			c = cut - offset;
			if (s->end < c)
				c = s->end;
			l = cut - offset - s->start;
			if (s->len < l)
				l = s->len;

			offset += s->start + l - c;
			s->start = c;
			s->len = s->len - l;
			s->data = s->data + l;

			break;
		}
	}

	src->head = s;
	return offset;
}

/* combine hunk lists a and b, while adjusting b for offset changes in a/
   this deletes a and b and returns the resultant list. */
static struct flist *combine(struct flist *a, struct flist *b)
{
	struct flist *c = NULL;
	struct frag *bh, *ct;
	int offset = 0, post;

	if (a && b)
		c = lalloc((lsize(a) + lsize(b)) * 2);

	if (c) {

		for (bh = b->head; bh != b->tail; bh++) {
			/* save old hunks */
			offset = gather(c, a, bh->start, offset);

			/* discard replaced hunks */
			post = discard(a, bh->end, offset);

			/* insert new hunk */
			ct = c->tail;
			ct->start = bh->start - offset;
			ct->end = bh->end - post;
			ct->len = bh->len;
			ct->data = bh->data;
			c->tail++;
			offset = post;
		}

		/* hold on to tail from a */
		memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
		c->tail += lsize(a);
	}

	lfree(a);
	lfree(b);
	return c;
}

/* decode a binary patch into a hunk list */
static struct flist *decode(char *bin, int len)
{
	struct flist *l;
	struct frag *lt;
	char *end = bin + len;
	char decode[12]; /* for dealing with alignment issues */

	/* assume worst case size, we won't have many of these lists */
	l = lalloc(len / 12);
	lt = l->tail;

	while (bin < end) {
		memcpy(decode, bin, 12);
		lt->start = ntohl(*(uint32_t *)decode);
		lt->end = ntohl(*(uint32_t *)(decode + 4));
		lt->len = ntohl(*(uint32_t *)(decode + 8));
		lt->data = bin + 12;
		bin += 12 + lt->len;
		lt++;
	}

	l->tail = lt;
	return l;
}

/* calculate the size of resultant text */
static int calcsize(int len, struct flist *l)
{
	int outlen = 0, last = 0;
	struct frag *f = l->head;

	while (f != l->tail) {
		outlen += f->start - last;
		last = f->end;
		outlen += f->len;
		f++;
	}

	outlen += len - last;
	return outlen;
}

static void apply(char *buf, char *orig, int len, struct flist *l)
{
	struct frag *f = l->head;
	int last = 0;
	char *p = buf;

	while (f != l->tail) {
		memcpy(p, orig + last, f->start - last);
		p += f->start - last;
		memcpy(p, f->data, f->len);
		last = f->end;
		p += f->len;
		f++;
	}
	memcpy(p, orig + last, len - last);
}

/* recursively generate a patch of all bins between start and end */
static struct flist *fold(PyObject *bins, int start, int end)
{
	int len;

	if (start + 1 == end) {
		/* trivial case, output a decoded list */
		PyObject *tmp = PyList_GetItem(bins, start);
		if (!tmp)
			return NULL;
		return decode(PyString_AsString(tmp), PyString_Size(tmp));
	}

	/* divide and conquer, memory management is elsewhere */
	len = (end - start) / 2;
	return combine(fold(bins, start, start + len),
		       fold(bins, start + len, end));
}

static PyObject *
patches(PyObject *self, PyObject *args)
{
	PyObject *text, *bins, *result;
	struct flist *patch;
	char *in, *out;
	int len, outlen;

	if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
		return NULL;

	len = PyList_Size(bins);
	if (!len) {
		/* nothing to do */
		Py_INCREF(text);
		return text;
	}

	patch = fold(bins, 0, len);
	if (!patch)
		return PyErr_NoMemory();

	outlen = calcsize(PyString_Size(text), patch);
	result = PyString_FromStringAndSize(NULL, outlen);
	if (result) {
		in = PyString_AsString(text);
		out = PyString_AsString(result);
		apply(out, in, PyString_Size(text), patch);
	}

	lfree(patch);
	return result;
}

static PyMethodDef methods[] = {
	{"patches", patches, METH_VARARGS, "apply a series of patches\n"},
	{NULL, NULL}
};

PyMODINIT_FUNC
initmpatch(void)
{
	Py_InitModule3("mpatch", methods, mpatch_doc);
}