mercurial/bdiff.c
author Maxim Dounin <mdounin@mdounin.ru>
Sat, 27 Oct 2007 16:27:55 +0400
changeset 5483 0c43f87baba3
parent 5452 82b4ff3abbcd
permissions -rw-r--r--
Fix file-changed-to-dir and dir-to-file commits (issue660). Allow adding to dirstate files that clash with previously existing but marked for removal. Protect from reintroducing clashes by revert. This change doesn't address related issues with update. Current workaround is to do "clean" update by manually removing conflicting files/dirs from working directory.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     1
/*
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     2
 bdiff.c - efficient binary diff extension for Mercurial
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     3
2858
345bac2bc4ec update copyrights.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 2600
diff changeset
     4
 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     5
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     6
 This software may be used and distributed according to the terms of
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     7
 the GNU General Public License, incorporated herein by reference.
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     8
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     9
 Based roughly on Python difflib
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    10
*/
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    11
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    12
#include <Python.h>
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    13
#include <stdlib.h>
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    14
#include <string.h>
5365
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5364
diff changeset
    15
#include <limits.h>
867
0cd2ee61b10a Allow Mercurial to build on HP-UX 11
tksoh@users.sourceforge.net
parents: 839
diff changeset
    16
3547
8c617d48564a add AIX to the list of compilers that don't have inline keyword
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 3369
diff changeset
    17
#if defined __hpux || defined __SUNPRO_C || defined _AIX
1759
5afd459db177 Sunpro compiler patch
Fabian Otto <sigsegv@alchiba.ni.cs.tu-berlin.de>
parents: 1542
diff changeset
    18
# define inline
2600
c4325f0a9b91 clean up trailing white space.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 2577
diff changeset
    19
#endif
1759
5afd459db177 Sunpro compiler patch
Fabian Otto <sigsegv@alchiba.ni.cs.tu-berlin.de>
parents: 1542
diff changeset
    20
411
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    21
#ifdef _WIN32
551
b460a2fd8bb7 [PATCH] bdiff/mpatch under MSVC
mpm@selenic.com
parents: 510
diff changeset
    22
#ifdef _MSC_VER
b460a2fd8bb7 [PATCH] bdiff/mpatch under MSVC
mpm@selenic.com
parents: 510
diff changeset
    23
#define inline __inline
b460a2fd8bb7 [PATCH] bdiff/mpatch under MSVC
mpm@selenic.com
parents: 510
diff changeset
    24
typedef unsigned long uint32_t;
b460a2fd8bb7 [PATCH] bdiff/mpatch under MSVC
mpm@selenic.com
parents: 510
diff changeset
    25
#else
510
7f3fc8fd427e More fiddling with uint32_t includes for extensions
mpm@selenic.com
parents: 495
diff changeset
    26
#include <stdint.h>
551
b460a2fd8bb7 [PATCH] bdiff/mpatch under MSVC
mpm@selenic.com
parents: 510
diff changeset
    27
#endif
411
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    28
static uint32_t htonl(uint32_t x)
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    29
{
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    30
	return ((x & 0x000000ffUL) << 24) |
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    31
		((x & 0x0000ff00UL) <<  8) |
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    32
		((x & 0x00ff0000UL) >>  8) |
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    33
		((x & 0xff000000UL) >> 24);
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    34
}
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    35
#else
510
7f3fc8fd427e More fiddling with uint32_t includes for extensions
mpm@selenic.com
parents: 495
diff changeset
    36
#include <sys/types.h>
4073
95ffa36d1d2a BeOS compatibility support
Andrew Bachmann <andrewbachmann@gmail.com>
parents: 3547
diff changeset
    37
#ifdef __BEOS__
95ffa36d1d2a BeOS compatibility support
Andrew Bachmann <andrewbachmann@gmail.com>
parents: 3547
diff changeset
    38
#include <ByteOrder.h>
95ffa36d1d2a BeOS compatibility support
Andrew Bachmann <andrewbachmann@gmail.com>
parents: 3547
diff changeset
    39
#else
597
e530637ea060 [PATCH] use <arpa/inet.h> instead of <netinet/in.h> for ntohl/htonl
mpm@selenic.com
parents: 553
diff changeset
    40
#include <arpa/inet.h>
4073
95ffa36d1d2a BeOS compatibility support
Andrew Bachmann <andrewbachmann@gmail.com>
parents: 3547
diff changeset
    41
#endif
2543
860e9c83fc59 Include inttypes.h instead of stdint.h (fixes issue299)
Thomas Arendsen Hein <thomas@intevation.de>
parents: 2483
diff changeset
    42
#include <inttypes.h>
411
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    43
#endif
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    44
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    45
struct line {
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
    46
	int h, len, n, e;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    47
	const char *l;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    48
};
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    49
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
    50
struct pos {
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
    51
	int pos, len;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
    52
};
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
    53
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    54
struct hunk {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    55
	int a1, a2, b1, b2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    56
};
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    57
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    58
struct hunklist {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    59
	struct hunk *base, *head;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    60
};
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    61
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    62
int splitlines(const char *a, int len, struct line **lr)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    63
{
5366
d0c48891dd4a bdiff: switch to lyhash
Matt Mackall <mpm@selenic.com>
parents: 5365
diff changeset
    64
	int h, i;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    65
	const char *p, *b = a;
5364
5737845fd974 bdiff: simple splitlines optimization
Christoph Spiel <cspiel@freenet.de>
parents: 5363
diff changeset
    66
	const char * const plast = a + len - 1;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    67
	struct line *l;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    68
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    69
	/* count the lines */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    70
	i = 1; /* extra line for sentinel */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    71
	for (p = a; p < a + len; p++)
5364
5737845fd974 bdiff: simple splitlines optimization
Christoph Spiel <cspiel@freenet.de>
parents: 5363
diff changeset
    72
		if (*p == '\n' || p == plast)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    73
			i++;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    74
1978
10606ee61107 do proper typecasting on malloc() and calloc() calls
TK Soh <teekaysoh@yahoo.com>
parents: 1759
diff changeset
    75
	*lr = l = (struct line *)malloc(sizeof(struct line) * i);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    76
	if (!l)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    77
		return -1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    78
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    79
	/* build the line array and calculate hashes */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    80
	h = 0;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    81
	for (p = a; p < a + len; p++) {
5366
d0c48891dd4a bdiff: switch to lyhash
Matt Mackall <mpm@selenic.com>
parents: 5365
diff changeset
    82
		/* Leonid Yuriev's hash */
d0c48891dd4a bdiff: switch to lyhash
Matt Mackall <mpm@selenic.com>
parents: 5365
diff changeset
    83
                h = (h * 1664525) + *p + 1013904223;
d0c48891dd4a bdiff: switch to lyhash
Matt Mackall <mpm@selenic.com>
parents: 5365
diff changeset
    84
5364
5737845fd974 bdiff: simple splitlines optimization
Christoph Spiel <cspiel@freenet.de>
parents: 5363
diff changeset
    85
		if (*p == '\n' || p == plast) {
5366
d0c48891dd4a bdiff: switch to lyhash
Matt Mackall <mpm@selenic.com>
parents: 5365
diff changeset
    86
			l->h = h;
d0c48891dd4a bdiff: switch to lyhash
Matt Mackall <mpm@selenic.com>
parents: 5365
diff changeset
    87
			h = 0;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    88
			l->len = p - b + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    89
			l->l = b;
5365
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5364
diff changeset
    90
			l->n = INT_MAX;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    91
			l++;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    92
			b = p + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    93
		}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    94
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    95
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    96
	/* set up a sentinel */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    97
	l->h = l->len = 0;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    98
	l->l = a + len;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    99
	return i - 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   100
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   101
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   102
int inline cmp(struct line *a, struct line *b)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   103
{
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   104
	return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   105
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   106
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   107
static int equatelines(struct line *a, int an, struct line *b, int bn)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   108
{
5452
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5366
diff changeset
   109
	int i, j, buckets = 1, t, scale;
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5366
diff changeset
   110
	struct pos *h = NULL;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   111
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   112
	/* build a hash table of the next highest power of 2 */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   113
	while (buckets < bn + 1)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   114
		buckets *= 2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   115
5363
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
   116
	/* try to allocate a large hash table to avoid collisions */
5452
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5366
diff changeset
   117
	for (scale = 4; scale; scale /= 2) {
5363
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
   118
		h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
5452
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5366
diff changeset
   119
		if (h)
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5366
diff changeset
   120
			break;
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5366
diff changeset
   121
	}
5363
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
   122
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   123
	if (!h)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   124
		return 0;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   125
5363
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
   126
	buckets = buckets * scale - 1;
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
   127
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   128
	/* clear the hash table */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   129
	for (i = 0; i <= buckets; i++) {
5365
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5364
diff changeset
   130
		h[i].pos = INT_MAX;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   131
		h[i].len = 0;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   132
	}
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   133
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   134
	/* add lines to the hash table chains */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   135
	for (i = bn - 1; i >= 0; i--) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   136
		/* find the equivalence class */
5365
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5364
diff changeset
   137
		for (j = b[i].h & buckets; h[j].pos != INT_MAX;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   138
		     j = (j + 1) & buckets)
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   139
			if (!cmp(b + i, b + h[j].pos))
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   140
				break;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   141
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   142
		/* add to the head of the equivalence class */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   143
		b[i].n = h[j].pos;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   144
		b[i].e = j;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   145
		h[j].pos = i;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   146
		h[j].len++; /* keep track of popularity */
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   147
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   148
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   149
	/* compute popularity threshold */
5452
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5366
diff changeset
   150
	t = (bn >= 4000) ? bn / 1000 : bn + 1;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   151
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   152
	/* match items in a to their equivalence class in b */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   153
	for (i = 0; i < an; i++) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   154
		/* find the equivalence class */
5365
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5364
diff changeset
   155
		for (j = a[i].h & buckets; h[j].pos != INT_MAX;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   156
		     j = (j + 1) & buckets)
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   157
			if (!cmp(a + i, b + h[j].pos))
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   158
				break;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   159
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   160
		a[i].e = j; /* use equivalence class for quick compare */
1542
8e80eefb3de6 made C src formatting more consistent
twaldmann@thinkmo.de
parents: 1397
diff changeset
   161
		if (h[j].len <= t)
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   162
			a[i].n = h[j].pos; /* point to head of match list */
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   163
		else
5365
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5364
diff changeset
   164
			a[i].n = INT_MAX; /* too popular */
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   165
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   166
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   167
	/* discard hash tables */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   168
	free(h);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   169
	return 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   170
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   171
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   172
static int longest_match(struct line *a, struct line *b, struct pos *pos,
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   173
			 int a1, int a2, int b1, int b2, int *omi, int *omj)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   174
{
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   175
	int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   176
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   177
	for (i = a1; i < a2; i++) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   178
		/* skip things before the current block */
5365
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5364
diff changeset
   179
		for (j = a[i].n; j < b1; j = b[j].n)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   180
			;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   181
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   182
		/* loop through all lines match a[i] in b */
5365
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5364
diff changeset
   183
		for (; j < b2; j = b[j].n) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   184
			/* does this extend an earlier match? */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   185
			if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   186
				k = pos[j - 1].len + 1;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   187
			else
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   188
				k = 1;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   189
			pos[j].pos = i;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   190
			pos[j].len = k;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   191
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   192
			/* best match so far? */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   193
			if (k > mk) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   194
				mi = i;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   195
				mj = j;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   196
				mk = k;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   197
			}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   198
		}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   199
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   200
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   201
	if (mk) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   202
		mi = mi - mk + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   203
		mj = mj - mk + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   204
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   205
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   206
	/* expand match to include neighboring popular lines */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   207
	while (mi - mb > a1 && mj - mb > b1 &&
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   208
	       a[mi - mb - 1].e == b[mj - mb - 1].e)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   209
		mb++;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   210
	while (mi + mk < a2 && mj + mk < b2 &&
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   211
	       a[mi + mk].e == b[mj + mk].e)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   212
		mk++;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   213
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   214
	*omi = mi - mb;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   215
	*omj = mj - mb;
5365
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5364
diff changeset
   216
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   217
	return mk + mb;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   218
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   219
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   220
static void recurse(struct line *a, struct line *b, struct pos *pos,
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   221
		    int a1, int a2, int b1, int b2, struct hunklist *l)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   222
{
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   223
	int i, j, k;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   224
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   225
	/* find the longest match in this chunk */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   226
	k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   227
	if (!k)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   228
		return;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   229
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   230
	/* and recurse on the remaining chunks on either side */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   231
	recurse(a, b, pos, a1, i, b1, j, l);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   232
	l->head->a1 = i;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   233
	l->head->a2 = i + k;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   234
	l->head->b1 = j;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   235
	l->head->b2 = j + k;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   236
	l->head++;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   237
	recurse(a, b, pos, i + k, a2, j + k, b2, l);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   238
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   239
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   240
static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   241
{
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   242
	struct hunklist l;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   243
	struct pos *pos;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   244
	int t;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   245
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   246
	/* allocate and fill arrays */
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   247
	t = equatelines(a, an, b, bn);
1978
10606ee61107 do proper typecasting on malloc() and calloc() calls
TK Soh <teekaysoh@yahoo.com>
parents: 1759
diff changeset
   248
	pos = (struct pos *)calloc(bn, sizeof(struct pos));
827
a61728b58dc0 Fix array overflow bug in bdiff
"Wallace, Eric S" <eric.s.wallace@intel.com>
parents: 597
diff changeset
   249
	/* we can't have more matches than lines in the shorter file */
1978
10606ee61107 do proper typecasting on malloc() and calloc() calls
TK Soh <teekaysoh@yahoo.com>
parents: 1759
diff changeset
   250
	l.head = l.base = (struct hunk *)malloc(sizeof(struct hunk) *
10606ee61107 do proper typecasting on malloc() and calloc() calls
TK Soh <teekaysoh@yahoo.com>
parents: 1759
diff changeset
   251
	                                        ((an<bn ? an:bn) + 1));
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   252
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   253
	if (pos && l.base && t) {
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   254
		/* generate the matching block list */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   255
		recurse(a, b, pos, 0, an, 0, bn, &l);
4131
1ca664c964e0 don't return uninitialized memory from bdiff.blocks()
Erling Ellingsen <erlingalf@gmail.com>
parents: 3547
diff changeset
   256
		l.head->a1 = l.head->a2 = an;
1ca664c964e0 don't return uninitialized memory from bdiff.blocks()
Erling Ellingsen <erlingalf@gmail.com>
parents: 3547
diff changeset
   257
		l.head->b1 = l.head->b2 = bn;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   258
		l.head++;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   259
	}
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   260
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   261
	free(pos);
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   262
	return l;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   263
}
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   264
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   265
static PyObject *blocks(PyObject *self, PyObject *args)
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   266
{
435
e731d25ddab2 Fix a compile warning for bdiff
mpm@selenic.com
parents: 433
diff changeset
   267
	PyObject *sa, *sb, *rl = NULL, *m;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   268
	struct line *a, *b;
970
bf375e93073b Fix possible unitialized variable warnings
mpm@selenic.com
parents: 896
diff changeset
   269
	struct hunklist l = {NULL, NULL};
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   270
	struct hunk *h;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   271
	int an, bn, pos = 0;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   272
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   273
	if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   274
		return NULL;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   275
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   276
	an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   277
	bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   278
	if (!a || !b)
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   279
		goto nomem;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   280
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   281
	l = diff(a, an, b, bn);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   282
	rl = PyList_New(l.head - l.base);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   283
	if (!l.head || !rl)
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   284
		goto nomem;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   285
1542
8e80eefb3de6 made C src formatting more consistent
twaldmann@thinkmo.de
parents: 1397
diff changeset
   286
	for (h = l.base; h != l.head; h++) {
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   287
		m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   288
		PyList_SetItem(rl, pos, m);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   289
		pos++;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   290
	}
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   291
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   292
nomem:
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   293
	free(a);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   294
	free(b);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   295
	free(l.base);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   296
	return rl ? rl : PyErr_NoMemory();
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   297
}
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   298
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   299
static PyObject *bdiff(PyObject *self, PyObject *args)
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   300
{
3333
9061613c1593 Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents: 2858
diff changeset
   301
	char *sa, *sb;
9061613c1593 Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents: 2858
diff changeset
   302
	PyObject *result = NULL;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   303
	struct line *al, *bl;
970
bf375e93073b Fix possible unitialized variable warnings
mpm@selenic.com
parents: 896
diff changeset
   304
	struct hunklist l = {NULL, NULL};
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   305
	struct hunk *h;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   306
	char encode[12], *rb;
3333
9061613c1593 Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents: 2858
diff changeset
   307
	int an, bn, len = 0, la, lb;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   308
3369
4bad632913d8 python2.5 PyArg_ParseTuple fix
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 3333
diff changeset
   309
	if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   310
		return NULL;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   311
3333
9061613c1593 Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents: 2858
diff changeset
   312
	an = splitlines(sa, la, &al);
9061613c1593 Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents: 2858
diff changeset
   313
	bn = splitlines(sb, lb, &bl);
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   314
	if (!al || !bl)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   315
		goto nomem;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   316
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   317
	l = diff(al, an, bl, bn);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   318
	if (!l.head)
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   319
		goto nomem;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   320
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   321
	/* calculate length of output */
3333
9061613c1593 Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents: 2858
diff changeset
   322
	la = lb = 0;
1542
8e80eefb3de6 made C src formatting more consistent
twaldmann@thinkmo.de
parents: 1397
diff changeset
   323
	for (h = l.base; h != l.head; h++) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   324
		if (h->a1 != la || h->b1 != lb)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   325
			len += 12 + bl[h->b1].l - bl[lb].l;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   326
		la = h->a2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   327
		lb = h->b2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   328
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   329
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   330
	result = PyString_FromStringAndSize(NULL, len);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   331
	if (!result)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   332
		goto nomem;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   333
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   334
	/* build binary patch */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   335
	rb = PyString_AsString(result);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   336
	la = lb = 0;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   337
1542
8e80eefb3de6 made C src formatting more consistent
twaldmann@thinkmo.de
parents: 1397
diff changeset
   338
	for (h = l.base; h != l.head; h++) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   339
		if (h->a1 != la || h->b1 != lb) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   340
			len = bl[h->b1].l - bl[lb].l;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   341
			*(uint32_t *)(encode)     = htonl(al[la].l - al->l);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   342
			*(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   343
			*(uint32_t *)(encode + 8) = htonl(len);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   344
			memcpy(rb, encode, 12);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   345
			memcpy(rb + 12, bl[lb].l, len);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   346
			rb += 12 + len;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   347
		}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   348
		la = h->a2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   349
		lb = h->b2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   350
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   351
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   352
nomem:
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   353
	free(al);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   354
	free(bl);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   355
	free(l.base);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   356
	return result ? result : PyErr_NoMemory();
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   357
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   358
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   359
static char mdiff_doc[] = "Efficient binary diff.";
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   360
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   361
static PyMethodDef methods[] = {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   362
	{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   363
	{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   364
	{NULL, NULL}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   365
};
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   366
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   367
PyMODINIT_FUNC initbdiff(void)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   368
{
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   369
	Py_InitModule3("bdiff", methods, mdiff_doc);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   370
}
1542
8e80eefb3de6 made C src formatting more consistent
twaldmann@thinkmo.de
parents: 1397
diff changeset
   371