85 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len); |
89 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len); |
86 } |
90 } |
87 |
91 |
88 static int equatelines(struct line *a, int an, struct line *b, int bn) |
92 static int equatelines(struct line *a, int an, struct line *b, int bn) |
89 { |
93 { |
90 int i, j, buckets = 1, t, *h, *l; |
94 int i, j, buckets = 1, t; |
|
95 struct pos *h; |
91 |
96 |
92 /* build a hash table of the next highest power of 2 */ |
97 /* build a hash table of the next highest power of 2 */ |
93 while (buckets < bn + 1) |
98 while (buckets < bn + 1) |
94 buckets *= 2; |
99 buckets *= 2; |
95 |
100 |
96 h = malloc(buckets * sizeof(int)); |
101 h = malloc(buckets * sizeof(struct pos)); |
97 l = calloc(buckets, sizeof(int)); |
|
98 buckets = buckets - 1; |
102 buckets = buckets - 1; |
99 if (!h || !l) { |
103 if (!h) |
100 free(h); |
|
101 return 0; |
104 return 0; |
102 } |
|
103 |
105 |
104 /* clear the hash table */ |
106 /* clear the hash table */ |
105 for (i = 0; i <= buckets; i++) |
107 for (i = 0; i <= buckets; i++) { |
106 h[i] = -1; |
108 h[i].pos = -1; |
|
109 h[i].len = 0; |
|
110 } |
107 |
111 |
108 /* add lines to the hash table chains */ |
112 /* add lines to the hash table chains */ |
109 for (i = bn - 1; i >= 0; i--) { |
113 for (i = bn - 1; i >= 0; i--) { |
110 /* find the equivalence class */ |
114 /* find the equivalence class */ |
111 for (j = b[i].h & buckets; h[j] != -1; j = (j + 1) & buckets) |
115 for (j = b[i].h & buckets; h[j].pos != -1; |
112 if (!cmp(b + i, b + h[j])) |
116 j = (j + 1) & buckets) |
|
117 if (!cmp(b + i, b + h[j].pos)) |
113 break; |
118 break; |
114 |
119 |
115 /* add to the head of the equivalence class */ |
120 /* add to the head of the equivalence class */ |
116 b[i].n = h[j]; |
121 b[i].n = h[j].pos; |
117 b[i].e = j; |
122 b[i].e = j; |
118 h[j] = i; |
123 h[j].pos = i; |
119 l[j]++; /* keep track of popularity */ |
124 h[j].len++; /* keep track of popularity */ |
120 } |
125 } |
121 |
126 |
122 /* compute popularity threshold */ |
127 /* compute popularity threshold */ |
123 t = (bn >= 200) ? bn / 100 : bn + 1; |
128 t = (bn >= 200) ? bn / 100 : bn + 1; |
124 |
129 |
125 /* match items in a to their equivalence class in b */ |
130 /* match items in a to their equivalence class in b */ |
126 for (i = 0; i < an; i++) { |
131 for (i = 0; i < an; i++) { |
127 /* find the equivalence class */ |
132 /* find the equivalence class */ |
128 for (j = a[i].h & buckets; h[j] != -1; j = (j + 1) & buckets) |
133 for (j = a[i].h & buckets; h[j].pos != -1; |
129 if (!cmp(a + i, b + h[j])) |
134 j = (j + 1) & buckets) |
|
135 if (!cmp(a + i, b + h[j].pos)) |
130 break; |
136 break; |
131 |
137 |
132 a[i].e = j; /* use equivalence class for quick compare */ |
138 a[i].e = j; /* use equivalence class for quick compare */ |
133 if(l[j] <= t) |
139 if(h[j].len <= t) |
134 a[i].n = h[j]; /* point to head of match list */ |
140 a[i].n = h[j].pos; /* point to head of match list */ |
135 else |
141 else |
136 a[i].n = -1; /* too popular */ |
142 a[i].n = -1; /* too popular */ |
137 } |
143 } |
138 |
144 |
139 /* discard hash tables */ |
145 /* discard hash tables */ |
140 free(h); |
146 free(h); |
141 free(l); |
|
142 return 1; |
147 return 1; |
143 } |
148 } |
144 |
149 |
145 static int longest_match(struct line *a, struct line *b, int *jpos, int *jlen, |
150 static int longest_match(struct line *a, struct line *b, struct pos *pos, |
146 int a1, int a2, int b1, int b2, int *omi, int *omj) |
151 int a1, int a2, int b1, int b2, int *omi, int *omj) |
147 { |
152 { |
148 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k; |
153 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k; |
149 |
154 |
150 for (i = a1; i < a2; i++) { |
155 for (i = a1; i < a2; i++) { |
187 *omi = mi - mb; |
192 *omi = mi - mb; |
188 *omj = mj - mb; |
193 *omj = mj - mb; |
189 return mk + mb; |
194 return mk + mb; |
190 } |
195 } |
191 |
196 |
192 static void recurse(struct line *a, struct line *b, int *jpos, int *jlen, |
197 static void recurse(struct line *a, struct line *b, struct pos *pos, |
193 int a1, int a2, int b1, int b2, struct hunklist *l) |
198 int a1, int a2, int b1, int b2, struct hunklist *l) |
194 { |
199 { |
195 int i, j, k; |
200 int i, j, k; |
196 |
201 |
197 /* find the longest match in this chunk */ |
202 /* find the longest match in this chunk */ |
198 k = longest_match(a, b, jpos, jlen, a1, a2, b1, b2, &i, &j); |
203 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j); |
199 if (!k) |
204 if (!k) |
200 return; |
205 return; |
201 |
206 |
202 /* and recurse on the remaining chunks on either side */ |
207 /* and recurse on the remaining chunks on either side */ |
203 recurse(a, b, jpos, jlen, a1, i, b1, j, l); |
208 recurse(a, b, pos, a1, i, b1, j, l); |
204 l->head->a1 = i; |
209 l->head->a1 = i; |
205 l->head->a2 = i + k; |
210 l->head->a2 = i + k; |
206 l->head->b1 = j; |
211 l->head->b1 = j; |
207 l->head->b2 = j + k; |
212 l->head->b2 = j + k; |
208 l->head++; |
213 l->head++; |
209 recurse(a, b, jpos, jlen, i + k, a2, j + k, b2, l); |
214 recurse(a, b, pos, i + k, a2, j + k, b2, l); |
210 } |
215 } |
211 |
216 |
212 static struct hunklist diff(struct line *a, int an, struct line *b, int bn) |
217 static struct hunklist diff(struct line *a, int an, struct line *b, int bn) |
213 { |
218 { |
214 struct hunklist l; |
219 struct hunklist l; |
215 int *jpos, *jlen, t; |
220 struct pos *pos; |
|
221 int t; |
216 |
222 |
217 /* allocate and fill arrays */ |
223 /* allocate and fill arrays */ |
218 t = equatelines(a, an, b, bn); |
224 t = equatelines(a, an, b, bn); |
219 jpos = calloc(bn, sizeof(int)); |
225 pos = calloc(bn, sizeof(struct pos)); |
220 jlen = calloc(bn, sizeof(int)); |
|
221 l.head = l.base = malloc(sizeof(struct hunk) * ((an + bn) / 4 + 2)); |
226 l.head = l.base = malloc(sizeof(struct hunk) * ((an + bn) / 4 + 2)); |
222 |
227 |
223 if (jpos && jlen && l.base && t) { |
228 if (pos && l.base && t) { |
224 /* generate the matching block list */ |
229 /* generate the matching block list */ |
225 recurse(a, b, jpos, jlen, 0, an, 0, bn, &l); |
230 recurse(a, b, pos, 0, an, 0, bn, &l); |
226 l.head->a1 = an; |
231 l.head->a1 = an; |
227 l.head->b1 = bn; |
232 l.head->b1 = bn; |
228 l.head++; |
233 l.head++; |
229 } |
234 } |
230 |
235 |
231 free(jpos); |
236 free(pos); |
232 free(jlen); |
|
233 return l; |
237 return l; |
234 } |
238 } |
235 |
239 |
236 static PyObject *blocks(PyObject *self, PyObject *args) |
240 static PyObject *blocks(PyObject *self, PyObject *args) |
237 { |
241 { |