mercurial/bdiff.c
author Vadim Gelfer <vadim.gelfer@gmail.com>
Fri, 07 Jul 2006 15:02:55 -0700
changeset 2577 fa76c5d609c9
parent 2543 860e9c83fc59
child 2600 c4325f0a9b91
permissions -rw-r--r--
bdiff: improve worst case behavior by 100x. on 5.8MB (244.000 lines) text file with similar lines, hash before this change made diff against empty file take 75 seconds. this change improves performance to 0.6 seconds. result is that clone of smallish repo (137MB) with some files like this takes 1 minute instead of 10 minutes. common case of diff is 10% slower now, probably because of worse cache locality. but diff does not affect overall performance in common case (less than 1% of runtime is in diff when it is working ok), so this tradeoff looks good.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     1
/*
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     2
 bdiff.c - efficient binary diff extension for Mercurial
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     3
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     4
 Copyright 2005 Matt Mackall <mpm@selenic.com>
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     5
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     6
 This software may be used and distributed according to the terms of
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     7
 the GNU General Public License, incorporated herein by reference.
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     8
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     9
 Based roughly on Python difflib
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    10
*/
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    11
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    12
#include <Python.h>
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    13
#include <stdlib.h>
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    14
#include <string.h>
867
0cd2ee61b10a Allow Mercurial to build on HP-UX 11
tksoh@users.sourceforge.net
parents: 839
diff changeset
    15
0cd2ee61b10a Allow Mercurial to build on HP-UX 11
tksoh@users.sourceforge.net
parents: 839
diff changeset
    16
#ifdef __hpux
0cd2ee61b10a Allow Mercurial to build on HP-UX 11
tksoh@users.sourceforge.net
parents: 839
diff changeset
    17
#define inline
0cd2ee61b10a Allow Mercurial to build on HP-UX 11
tksoh@users.sourceforge.net
parents: 839
diff changeset
    18
#endif
0cd2ee61b10a Allow Mercurial to build on HP-UX 11
tksoh@users.sourceforge.net
parents: 839
diff changeset
    19
1759
5afd459db177 Sunpro compiler patch
Fabian Otto <sigsegv@alchiba.ni.cs.tu-berlin.de>
parents: 1542
diff changeset
    20
#ifdef __SUNPRO_C
5afd459db177 Sunpro compiler patch
Fabian Otto <sigsegv@alchiba.ni.cs.tu-berlin.de>
parents: 1542
diff changeset
    21
# define inline
5afd459db177 Sunpro compiler patch
Fabian Otto <sigsegv@alchiba.ni.cs.tu-berlin.de>
parents: 1542
diff changeset
    22
#endif 
5afd459db177 Sunpro compiler patch
Fabian Otto <sigsegv@alchiba.ni.cs.tu-berlin.de>
parents: 1542
diff changeset
    23
411
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    24
#ifdef _WIN32
551
b460a2fd8bb7 [PATCH] bdiff/mpatch under MSVC
mpm@selenic.com
parents: 510
diff changeset
    25
#ifdef _MSC_VER
b460a2fd8bb7 [PATCH] bdiff/mpatch under MSVC
mpm@selenic.com
parents: 510
diff changeset
    26
#define inline __inline
b460a2fd8bb7 [PATCH] bdiff/mpatch under MSVC
mpm@selenic.com
parents: 510
diff changeset
    27
typedef unsigned long uint32_t;
b460a2fd8bb7 [PATCH] bdiff/mpatch under MSVC
mpm@selenic.com
parents: 510
diff changeset
    28
#else
510
7f3fc8fd427e More fiddling with uint32_t includes for extensions
mpm@selenic.com
parents: 495
diff changeset
    29
#include <stdint.h>
551
b460a2fd8bb7 [PATCH] bdiff/mpatch under MSVC
mpm@selenic.com
parents: 510
diff changeset
    30
#endif
411
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    31
static uint32_t htonl(uint32_t x)
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    32
{
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    33
	return ((x & 0x000000ffUL) << 24) |
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    34
		((x & 0x0000ff00UL) <<  8) |
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    35
		((x & 0x00ff0000UL) >>  8) |
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    36
		((x & 0xff000000UL) >> 24);
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    37
}
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    38
#else
510
7f3fc8fd427e More fiddling with uint32_t includes for extensions
mpm@selenic.com
parents: 495
diff changeset
    39
#include <sys/types.h>
597
e530637ea060 [PATCH] use <arpa/inet.h> instead of <netinet/in.h> for ntohl/htonl
mpm@selenic.com
parents: 553
diff changeset
    40
#include <arpa/inet.h>
2543
860e9c83fc59 Include inttypes.h instead of stdint.h (fixes issue299)
Thomas Arendsen Hein <thomas@intevation.de>
parents: 2483
diff changeset
    41
#include <inttypes.h>
411
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    42
#endif
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    43
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    44
struct line {
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
    45
	int h, len, n, e;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    46
	const char *l;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    47
};
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    48
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
    49
struct pos {
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
    50
	int pos, len;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
    51
};
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
    52
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    53
struct hunk {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    54
	int a1, a2, b1, b2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    55
};
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    56
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    57
struct hunklist {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    58
	struct hunk *base, *head;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    59
};
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    60
1397
66fd3bc1cfcf bdiff: change spurious __inline to inline
Matt Mackall <mpm@selenic.com>
parents: 981
diff changeset
    61
static inline uint32_t rol32(uint32_t word, unsigned int shift)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    62
{
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    63
        return (word << shift) | (word >> (32 - shift));
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    64
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    65
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    66
int splitlines(const char *a, int len, struct line **lr)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    67
{
2577
fa76c5d609c9 bdiff: improve worst case behavior by 100x.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 2543
diff changeset
    68
	int g, h, i;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    69
	const char *p, *b = a;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    70
	struct line *l;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    71
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    72
	/* count the lines */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    73
	i = 1; /* extra line for sentinel */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    74
	for (p = a; p < a + len; p++)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    75
		if (*p == '\n' || p == a + len - 1)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    76
			i++;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    77
1978
10606ee61107 do proper typecasting on malloc() and calloc() calls
TK Soh <teekaysoh@yahoo.com>
parents: 1759
diff changeset
    78
	*lr = l = (struct line *)malloc(sizeof(struct line) * i);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    79
	if (!l)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    80
		return -1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    81
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    82
	/* build the line array and calculate hashes */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    83
	h = 0;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    84
	for (p = a; p < a + len; p++) {
2577
fa76c5d609c9 bdiff: improve worst case behavior by 100x.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 2543
diff changeset
    85
		/*
fa76c5d609c9 bdiff: improve worst case behavior by 100x.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 2543
diff changeset
    86
		 * a simple hash from GNU diff, with better collision
fa76c5d609c9 bdiff: improve worst case behavior by 100x.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 2543
diff changeset
    87
		 * resistance from hashpjw. this slows down common
fa76c5d609c9 bdiff: improve worst case behavior by 100x.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 2543
diff changeset
    88
		 * case by 10%, but speeds up worst case by 100x.
fa76c5d609c9 bdiff: improve worst case behavior by 100x.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 2543
diff changeset
    89
		 */
fa76c5d609c9 bdiff: improve worst case behavior by 100x.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 2543
diff changeset
    90
		h = *p + rol32(h, 7);
fa76c5d609c9 bdiff: improve worst case behavior by 100x.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 2543
diff changeset
    91
		if ((g = h & 0xf0000000)) {
fa76c5d609c9 bdiff: improve worst case behavior by 100x.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 2543
diff changeset
    92
			h ^= g >> 24;
fa76c5d609c9 bdiff: improve worst case behavior by 100x.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 2543
diff changeset
    93
			h ^= g;
fa76c5d609c9 bdiff: improve worst case behavior by 100x.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 2543
diff changeset
    94
		}
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    95
		if (*p == '\n' || p == a + len - 1) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    96
			l->len = p - b + 1;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
    97
			l->h = h * l->len;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    98
			l->l = b;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    99
			l->n = -1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   100
			l++;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   101
			b = p + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   102
			h = 0;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   103
		}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   104
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   105
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   106
	/* set up a sentinel */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   107
	l->h = l->len = 0;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   108
	l->l = a + len;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   109
	return i - 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   110
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   111
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   112
int inline cmp(struct line *a, struct line *b)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   113
{
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   114
	return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   115
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   116
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   117
static int equatelines(struct line *a, int an, struct line *b, int bn)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   118
{
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   119
	int i, j, buckets = 1, t;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   120
	struct pos *h;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   121
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   122
	/* build a hash table of the next highest power of 2 */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   123
	while (buckets < bn + 1)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   124
		buckets *= 2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   125
1978
10606ee61107 do proper typecasting on malloc() and calloc() calls
TK Soh <teekaysoh@yahoo.com>
parents: 1759
diff changeset
   126
	h = (struct pos *)malloc(buckets * sizeof(struct pos));
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   127
	buckets = buckets - 1;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   128
	if (!h)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   129
		return 0;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   130
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   131
	/* clear the hash table */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   132
	for (i = 0; i <= buckets; i++) {
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   133
		h[i].pos = -1;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   134
		h[i].len = 0;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   135
	}
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   136
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   137
	/* add lines to the hash table chains */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   138
	for (i = bn - 1; i >= 0; i--) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   139
		/* find the equivalence class */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   140
		for (j = b[i].h & buckets; h[j].pos != -1;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   141
		     j = (j + 1) & buckets)
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   142
			if (!cmp(b + i, b + h[j].pos))
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   143
				break;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   144
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   145
		/* add to the head of the equivalence class */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   146
		b[i].n = h[j].pos;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   147
		b[i].e = j;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   148
		h[j].pos = i;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   149
		h[j].len++; /* keep track of popularity */
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   150
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   151
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   152
	/* compute popularity threshold */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   153
	t = (bn >= 200) ? bn / 100 : bn + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   154
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   155
	/* match items in a to their equivalence class in b */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   156
	for (i = 0; i < an; i++) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   157
		/* find the equivalence class */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   158
		for (j = a[i].h & buckets; h[j].pos != -1;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   159
		     j = (j + 1) & buckets)
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   160
			if (!cmp(a + i, b + h[j].pos))
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   161
				break;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   162
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   163
		a[i].e = j; /* use equivalence class for quick compare */
1542
8e80eefb3de6 made C src formatting more consistent
twaldmann@thinkmo.de
parents: 1397
diff changeset
   164
		if (h[j].len <= t)
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   165
			a[i].n = h[j].pos; /* point to head of match list */
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   166
		else
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   167
			a[i].n = -1; /* too popular */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   168
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   169
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   170
	/* discard hash tables */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   171
	free(h);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   172
	return 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   173
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   174
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   175
static int longest_match(struct line *a, struct line *b, struct pos *pos,
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   176
			 int a1, int a2, int b1, int b2, int *omi, int *omj)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   177
{
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   178
	int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   179
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   180
	for (i = a1; i < a2; i++) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   181
		/* skip things before the current block */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   182
		for (j = a[i].n; j != -1 && j < b1; j = b[j].n)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   183
			;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   184
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   185
		/* loop through all lines match a[i] in b */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   186
		for (; j != -1 && j < b2; j = b[j].n) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   187
			/* does this extend an earlier match? */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   188
			if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   189
				k = pos[j - 1].len + 1;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   190
			else
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   191
				k = 1;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   192
			pos[j].pos = i;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   193
			pos[j].len = k;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   194
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   195
			/* best match so far? */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   196
			if (k > mk) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   197
				mi = i;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   198
				mj = j;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   199
				mk = k;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   200
			}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   201
		}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   202
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   203
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   204
	if (mk) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   205
		mi = mi - mk + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   206
		mj = mj - mk + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   207
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   208
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   209
	/* expand match to include neighboring popular lines */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   210
	while (mi - mb > a1 && mj - mb > b1 &&
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   211
	       a[mi - mb - 1].e == b[mj - mb - 1].e)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   212
		mb++;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   213
	while (mi + mk < a2 && mj + mk < b2 &&
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   214
	       a[mi + mk].e == b[mj + mk].e)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   215
		mk++;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   216
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   217
	*omi = mi - mb;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   218
	*omj = mj - mb;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   219
	return mk + mb;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   220
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   221
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   222
static void recurse(struct line *a, struct line *b, struct pos *pos,
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   223
		    int a1, int a2, int b1, int b2, struct hunklist *l)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   224
{
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   225
	int i, j, k;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   226
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   227
	/* find the longest match in this chunk */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   228
	k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   229
	if (!k)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   230
		return;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   231
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   232
	/* and recurse on the remaining chunks on either side */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   233
	recurse(a, b, pos, a1, i, b1, j, l);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   234
	l->head->a1 = i;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   235
	l->head->a2 = i + k;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   236
	l->head->b1 = j;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   237
	l->head->b2 = j + k;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   238
	l->head++;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   239
	recurse(a, b, pos, i + k, a2, j + k, b2, l);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   240
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   241
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   242
static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   243
{
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   244
	struct hunklist l;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   245
	struct pos *pos;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   246
	int t;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   247
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   248
	/* allocate and fill arrays */
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   249
	t = equatelines(a, an, b, bn);
1978
10606ee61107 do proper typecasting on malloc() and calloc() calls
TK Soh <teekaysoh@yahoo.com>
parents: 1759
diff changeset
   250
	pos = (struct pos *)calloc(bn, sizeof(struct pos));
827
a61728b58dc0 Fix array overflow bug in bdiff
"Wallace, Eric S" <eric.s.wallace@intel.com>
parents: 597
diff changeset
   251
	/* we can't have more matches than lines in the shorter file */
1978
10606ee61107 do proper typecasting on malloc() and calloc() calls
TK Soh <teekaysoh@yahoo.com>
parents: 1759
diff changeset
   252
	l.head = l.base = (struct hunk *)malloc(sizeof(struct hunk) *
10606ee61107 do proper typecasting on malloc() and calloc() calls
TK Soh <teekaysoh@yahoo.com>
parents: 1759
diff changeset
   253
	                                        ((an<bn ? an:bn) + 1));
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   254
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   255
	if (pos && l.base && t) {
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   256
		/* generate the matching block list */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   257
		recurse(a, b, pos, 0, an, 0, bn, &l);
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   258
		l.head->a1 = an;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   259
		l.head->b1 = bn;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   260
		l.head++;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   261
	}
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   262
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   263
	free(pos);
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   264
	return l;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   265
}
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   266
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   267
static PyObject *blocks(PyObject *self, PyObject *args)
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   268
{
435
e731d25ddab2 Fix a compile warning for bdiff
mpm@selenic.com
parents: 433
diff changeset
   269
	PyObject *sa, *sb, *rl = NULL, *m;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   270
	struct line *a, *b;
970
bf375e93073b Fix possible unitialized variable warnings
mpm@selenic.com
parents: 896
diff changeset
   271
	struct hunklist l = {NULL, NULL};
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   272
	struct hunk *h;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   273
	int an, bn, pos = 0;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   274
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   275
	if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   276
		return NULL;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   277
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   278
	an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   279
	bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   280
	if (!a || !b)
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   281
		goto nomem;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   282
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   283
	l = diff(a, an, b, bn);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   284
	rl = PyList_New(l.head - l.base);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   285
	if (!l.head || !rl)
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   286
		goto nomem;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   287
1542
8e80eefb3de6 made C src formatting more consistent
twaldmann@thinkmo.de
parents: 1397
diff changeset
   288
	for (h = l.base; h != l.head; h++) {
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   289
		m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   290
		PyList_SetItem(rl, pos, m);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   291
		pos++;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   292
	}
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   293
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   294
nomem:
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   295
	free(a);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   296
	free(b);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   297
	free(l.base);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   298
	return rl ? rl : PyErr_NoMemory();
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   299
}
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   300
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   301
static PyObject *bdiff(PyObject *self, PyObject *args)
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   302
{
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   303
	PyObject *sa, *sb, *result = NULL;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   304
	struct line *al, *bl;
970
bf375e93073b Fix possible unitialized variable warnings
mpm@selenic.com
parents: 896
diff changeset
   305
	struct hunklist l = {NULL, NULL};
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   306
	struct hunk *h;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   307
	char encode[12], *rb;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   308
	int an, bn, len = 0, la = 0, lb = 0;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   309
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   310
	if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   311
		return NULL;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   312
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   313
	an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   314
	bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl);
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   315
	if (!al || !bl)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   316
		goto nomem;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   317
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   318
	l = diff(al, an, bl, bn);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   319
	if (!l.head)
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   320
		goto nomem;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   321
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   322
	/* calculate length of output */
1542
8e80eefb3de6 made C src formatting more consistent
twaldmann@thinkmo.de
parents: 1397
diff changeset
   323
	for (h = l.base; h != l.head; h++) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   324
		if (h->a1 != la || h->b1 != lb)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   325
			len += 12 + bl[h->b1].l - bl[lb].l;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   326
		la = h->a2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   327
		lb = h->b2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   328
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   329
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   330
	result = PyString_FromStringAndSize(NULL, len);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   331
	if (!result)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   332
		goto nomem;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   333
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   334
	/* build binary patch */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   335
	rb = PyString_AsString(result);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   336
	la = lb = 0;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   337
1542
8e80eefb3de6 made C src formatting more consistent
twaldmann@thinkmo.de
parents: 1397
diff changeset
   338
	for (h = l.base; h != l.head; h++) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   339
		if (h->a1 != la || h->b1 != lb) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   340
			len = bl[h->b1].l - bl[lb].l;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   341
			*(uint32_t *)(encode)     = htonl(al[la].l - al->l);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   342
			*(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   343
			*(uint32_t *)(encode + 8) = htonl(len);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   344
			memcpy(rb, encode, 12);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   345
			memcpy(rb + 12, bl[lb].l, len);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   346
			rb += 12 + len;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   347
		}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   348
		la = h->a2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   349
		lb = h->b2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   350
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   351
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   352
nomem:
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   353
	free(al);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   354
	free(bl);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   355
	free(l.base);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   356
	return result ? result : PyErr_NoMemory();
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   357
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   358
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   359
static char mdiff_doc[] = "Efficient binary diff.";
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   360
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   361
static PyMethodDef methods[] = {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   362
	{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   363
	{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   364
	{NULL, NULL}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   365
};
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   366
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   367
PyMODINIT_FUNC initbdiff(void)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   368
{
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   369
	Py_InitModule3("bdiff", methods, mdiff_doc);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   370
}
1542
8e80eefb3de6 made C src formatting more consistent
twaldmann@thinkmo.de
parents: 1397
diff changeset
   371