mercurial/mdiff.py
author mpm@selenic.com
Fri, 27 May 2005 19:38:34 -0800
changeset 184 697f05bfe976
parent 170 e6c621a825f2
child 239 75840796e8e2
child 241 afe895fcc0d0
permissions -rw-r--r--
Improved binary diff from Christopher Li This is more intelligent/efficient by combining neighboring inserts, replaces and deletes. Passes test of converting kernel repo, but doesn't appear to substantially affect compression or performance.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
     1
#!/usr/bin/python
127
44538462d3c8 Fix braindamaged import in mdiff.
mpm@selenic.com
parents: 125
diff changeset
     2
import difflib, struct, mmap
44538462d3c8 Fix braindamaged import in mdiff.
mpm@selenic.com
parents: 125
diff changeset
     3
from mercurial.mpatch import *
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
     4
64
b3e2ddff0159 Diff in subdirectories from Jake Edge
mpm@selenic.com
parents: 35
diff changeset
     5
def unidiff(a, ad, b, bd, fn):
35
9197c26a414b unidiff: punt on comparing empty files
mpm@selenic.com
parents: 0
diff changeset
     6
    if not a and not b: return ""
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
     7
    a = a.splitlines(1)
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
     8
    b = b.splitlines(1)
64
b3e2ddff0159 Diff in subdirectories from Jake Edge
mpm@selenic.com
parents: 35
diff changeset
     9
    l = list(difflib.unified_diff(a, b, "a/" + fn, "b/" + fn, ad, bd))
170
e6c621a825f2 hg diff: fix missing final newline bug
mpm@selenic.com
parents: 127
diff changeset
    10
e6c621a825f2 hg diff: fix missing final newline bug
mpm@selenic.com
parents: 127
diff changeset
    11
    for ln in xrange(len(l)):
e6c621a825f2 hg diff: fix missing final newline bug
mpm@selenic.com
parents: 127
diff changeset
    12
        if l[ln][-1] != '\n':
e6c621a825f2 hg diff: fix missing final newline bug
mpm@selenic.com
parents: 127
diff changeset
    13
            l[ln] += "\n\ No newline at end of file\n"
e6c621a825f2 hg diff: fix missing final newline bug
mpm@selenic.com
parents: 127
diff changeset
    14
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
    15
    return "".join(l)
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
    16
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
    17
def textdiff(a, b):
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
    18
    return diff(a.splitlines(1), b.splitlines(1))
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
    19
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
    20
def sortdiff(a, b):
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
    21
    la = lb = 0
184
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
    22
    lena = len(a)
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
    23
    lenb = len(b)
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
    24
    while 1:
184
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
    25
        am, bm, = la, lb
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
    26
        while lb < lenb and la < len and a[la] == b[lb] :
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
    27
            la += 1
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
    28
            lb += 1
184
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
    29
        if la>am: yield (am, bm, la-am)
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
    30
        while lb < lenb and b[lb] < a[la]: lb += 1
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
    31
        if lb>=lenb: break
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
    32
        while la < lena and b[lb] > a[la]: la += 1
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
    33
        if la>=lena: break
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
    34
    yield (lena, lenb, 0)
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
    35
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
    36
def diff(a, b, sorted=0):
184
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
    37
    if not a:
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
    38
        s = "".join(b)
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
    39
        return s and (struct.pack(">lll", 0, 0, len(s)) + s)
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
    40
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
    41
    bin = []
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
    42
    p = [0]
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
    43
    for i in a: p.append(p[-1] + len(i))
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
    44
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
    45
    if sorted:
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
    46
        d = sortdiff(a, b)
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
    47
    else:
184
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
    48
        d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
    49
    la = 0
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
    50
    lb = 0
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
    51
    for am, bm, size in d:
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
    52
        s = "".join(b[lb:bm])
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
    53
        if am > la or s:
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
    54
            bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
    55
        la = am + size
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
    56
        lb = bm + size
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
    57
    
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
    58
    return "".join(bin)
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
    59
120
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
    60
def patchtext(bin):
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
    61
    pos = 0
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
    62
    t = []
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
    63
    while pos < len(bin):
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
    64
        p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
    65
        pos += 12
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
    66
        t.append(bin[pos:pos + l])
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
    67
        pos += l
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
    68
    return "".join(t)
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
    69
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
    70
def patch(a, bin):
72
4a6ab4d80dc4 Add an O(m + nlog n) patching extension
mpm@selenic.com
parents: 71
diff changeset
    71
    return patches(a, [bin])