# HG changeset patch # User mpm@selenic.com # Date 1116464767 28800 # Node ID 58039eddbddaba5dde6385bc7ee16cee777c947d # Parent 3dde7c87e36df4da5694313a41803bc8ed358e60# Parent 6da5cf0c4193cd9bc00f132e9069aaa8f474e17c Merge from hgweb diff --git a/.hgignore b/.hgignore --- a/.hgignore +++ b/.hgignore @@ -1,6 +1,7 @@ .*\.orig .*\.rej .*~ +.*\.so .*pyc build/.* dist/ diff --git a/README b/README --- a/README +++ b/README @@ -38,6 +38,8 @@ Mercurial commands: $ hg remove bar # mark a file as removed $ hg verify # check repo integrity $ hg tags # show current tags + $ hg annotate [files] # show changeset numbers for each file line + $ hg blame [files] # show commit users for each file line Branching and merging: diff --git a/hg b/hg --- a/hg +++ b/hg @@ -38,6 +38,8 @@ def help(): dumpmanifest [rev] dump the latest or given revision of the manifest diff [files...] diff working directory (or selected files) tags show current changeset tags + annotate [files...] show changeset number per file line + blame [files...] show commit user per file line """ def filterfiles(list, files): @@ -87,7 +89,8 @@ def diff(files = None, node1 = None, nod options = {} opts = [('v', 'verbose', None, 'verbose'), - ('d', 'debug', None, 'debug')] + ('d', 'debug', None, 'debug'), + ('q', 'quiet', None, 'quiet')] args = fancyopts.fancyopts(sys.argv[1:], opts, options, 'hg [options] [command options] [files]') @@ -98,7 +101,7 @@ try: except: cmd = "" -ui = hg.ui(options["verbose"], options["debug"]) +ui = hg.ui(options["verbose"], options["debug"], options["quiet"]) if cmd == "init": repo = hg.repository(ui, ".", create=1) @@ -112,8 +115,8 @@ elif cmd == "help": else: try: repo = hg.repository(ui=ui) - except: - print "Unable to open repository" + except IOError: + ui.warn("Unable to open repository\n") sys.exit(0) if cmd == "checkout" or cmd == "co": @@ -136,6 +139,12 @@ elif cmd == "commit" or cmd == "checkin" repo.commit(repo.current) elif cmd == "import" or cmd == "patch": + try: + import psyco + psyco.full() + except: + pass + ioptions = {} opts = [('p', 'strip', 1, 'path strip'), ('b', 'base', "", 'base path'), @@ -154,21 +163,23 @@ elif cmd == "import" or cmd == "patch": text = "" for l in file(pf): - if l[:3] == "---": break + if l[:4] == "--- ": break text += l - if os.system("patch -p%d < %s %s" % (strip, pf, quiet)): - raise "patch failed!" f = os.popen("lsdiff --strip %d %s" % (strip, pf)) files = filter(None, map(lambda x: x.rstrip(), f.read().splitlines())) f.close() + + if files: + if os.system("patch -p%d < %s %s" % (strip, pf, quiet)): + raise "patch failed!" repo.commit(repo.current, files, text) elif cmd == "status": (c, a, d) = repo.diffdir(repo.root, repo.current) - for f in c: print "C", f - for f in a: print "?", f - for f in d: print "R", f + for f in c: ui.status("C %s\n" % f) + for f in a: ui.status("? %s\n" % f) + for f in d: ui.status("R %s\n" % f) elif cmd == "diff": revs = [] @@ -181,7 +192,7 @@ elif cmd == "diff": revs = map(lambda x: repo.lookup(x), doptions['revision']) if len(revs) > 2: - print "too many revisions to diff" + self.ui.warn("too many revisions to diff\n") sys.exit(1) if os.getcwd() != repo.root: @@ -191,9 +202,59 @@ elif cmd == "diff": diff(args, *revs) +elif cmd == "annotate": + aoptions = {} + opts = [('r', 'revision', '', 'revision')] + args = fancyopts.fancyopts(args, opts, aoptions, + 'hg annotate [-r id] [files]') + if args: + node = repo.current + if aoptions['revision']: + node = repo.changelog.lookup(aoptions['revision']) + change = repo.changelog.read(node) + mmap = repo.manifest.read(change[0]) + for f in args: + for n, l in repo.file(f).annotate(mmap[f]): + sys.stdout.write("% 6s:%s"%(n, l)) + +elif cmd == "blame": + aoptions = {} + opts = [('r', 'revision', '', 'revision')] + args = fancyopts.fancyopts(args, opts, aoptions, + 'hg blame [-r id] [files]') + if args: + bcache = {} + node = repo.current + if aoptions['revision']: + node = repo.changelog.lookup(aoptions['revision']) + change = repo.changelog.read(node) + mmap = repo.manifest.read(change[0]) + for f in args: + for n, l in repo.file(f).annotate(mmap[f]): + try: + name = bcache[n] + except KeyError: + cl = repo.changelog.read(repo.changelog.node(n)) + name = cl[1] + f = name.find('@') + if f >= 0: + name = name[:f] + bcache[n] = name + sys.stdout.write("% 10s:%s"%(name, l)) + elif cmd == "export": node = repo.lookup(args[0]) - prev = repo.changelog.parents(node)[0] + prev, other = repo.changelog.parents(node) + change = repo.changelog.read(node) + print "# HG changeset patch" + print "# User %s" % change[1] + print "# Node ID %s" % hg.hex(node) + print "# Parent %s" % hg.hex(prev) + print + if other != hg.nullid: + print "# Parent %s" % hg.hex(other) + print change[4] + diff(None, prev, node) elif cmd == "debugchangegroup": @@ -229,6 +290,11 @@ elif cmd == "history": print "description:" print changes[4] +elif cmd == "tip": + n = repo.changelog.tip() + t = repo.changelog.rev(n) + ui.status("%d:%s\n" % (t, hg.hex(n))) + elif cmd == "log": if args: r = repo.file(args[0]) @@ -271,19 +337,35 @@ elif cmd == "debughash": print f.encodepath(args[0]) elif cmd == "debugindex": + if ".hg" not in args[0]: + args[0] = ".hg/data/" + repo.file(args[0]).encodepath(args[0]) + "i" + r = hg.revlog(open, args[0], "") - print " rev offset length base linkrev"+\ + print " rev offset length base linkrev"+\ " p1 p2 nodeid" for i in range(r.count()): e = r.index[i] - print "% 6d % 9d % 7d % 5d % 7d %s.. %s.. %s.." % ( + print "% 6d % 9d % 7d % 6d % 7d %s.. %s.. %s.." % ( i, e[0], e[1], e[2], e[3], hg.hex(e[4][:5]), hg.hex(e[5][:5]), hg.hex(e[6][:5])) +elif cmd == "debugindexdot": + if ".hg" not in args[0]: + args[0] = ".hg/data/" + repo.file(args[0]).encodepath(args[0]) + "i" + + r = hg.revlog(open, args[0], "") + print "digraph G {" + for i in range(r.count()): + e = r.index[i] + print "\t%d -> %d" % (r.rev(e[4]), i) + if e[5] != hg.nullid: + print "\t%d -> %d" % (r.rev(e[5]), i) + print "}" + elif cmd == "merge": if args: other = hg.repository(ui, args[0]) - print "requesting changegroup" + ui.status("requesting changegroup\n") cg = repo.getchangegroup(other) repo.addchangegroup(cg) else: @@ -312,79 +394,119 @@ elif cmd == "verify": filenodes = {} manifestchangeset = {} changesets = revisions = files = 0 + errors = 0 - print "checking changesets" + ui.status("checking changesets\n") for i in range(repo.changelog.count()): changesets += 1 n = repo.changelog.node(i) - changes = repo.changelog.read(n) + for p in repo.changelog.parents(n): + if p not in repo.changelog.nodemap: + ui.warn("changeset %s has unknown parent %s\n" % + (hg.short(n), hg.short(p))) + errors += 1 + try: + changes = repo.changelog.read(n) + except Error, inst: + ui.warn("unpacking changeset %s: %s\n" % (short(n), inst)) + errors += 1 + manifestchangeset[changes[0]] = n for f in changes[3]: revisions += 1 filelinkrevs.setdefault(f, []).append(i) - print "checking manifests" + ui.status("checking manifests\n") for i in range(repo.manifest.count()): n = repo.manifest.node(i) + for p in repo.manifest.parents(n): + if p not in repo.manifest.nodemap: + ui.warn("manifest %s has unknown parent %s\n" % + (hg.short(n), hg.short(p))) + errors += 1 ca = repo.changelog.node(repo.manifest.linkrev(n)) cc = manifestchangeset[n] if ca != cc: - print "manifest %s points to %s, not %s" % \ - (hg.hex(n), hg.hex(ca), hg.hex(cc)) - m = repo.manifest.read(n) + ui.warn("manifest %s points to %s, not %s\n" % + (hg.hex(n), hg.hex(ca), hg.hex(cc))) + errors += 1 + + try: + m = repo.manifest.read(n) + except Exception, inst: + ui.warn("unpacking manifest %s: %s\n" % (hg.short(n), inst)) + errors += 1 + for f, fn in m.items(): filenodes.setdefault(f, {})[fn] = 1 - print "crosschecking files in changesets and manifests" + ui.status("crosschecking files in changesets and manifests\n") for f in filenodes: if f not in filelinkrevs: - print "file %s in manifest but not in changesets" + ui.warn("file %s in manifest but not in changesets\n" % f) + errors += 1 for f in filelinkrevs: if f not in filenodes: - print "file %s in changeset but not in manifest" + ui.warn("file %s in changeset but not in manifest" % f) + errors += 1 - print "checking files" + ui.status("checking files\n") for f in filenodes: files += 1 fl = repo.file(f) - nodes = {"\0"*20: 1} + nodes = { hg.nullid: 1 } for i in range(fl.count()): n = fl.node(i) if n not in filenodes[f]: - print "%s:%s not in manifests" % (f, hg.hex(n)) + ui.warn("%s:%s not in manifests\n" % (f, hg.short(n))) + errors += 1 else: del filenodes[f][n] flr = fl.linkrev(n) if flr not in filelinkrevs[f]: - print "%s:%s points to unexpected changeset rev %d" \ - % (f, hg.hex(n), fl.linkrev(n)) + ui.warn("%s:%s points to unexpected changeset rev %d\n" + % (f, hg.short(n), fl.linkrev(n))) + errors += 1 else: filelinkrevs[f].remove(flr) # verify contents - t = fl.read(n) - + try: + t = fl.read(n) + except Error, inst: + ui.warn("unpacking file %s %s: %s\n" % (f, short(n), inst)) + errors += 1 + # verify parents (p1, p2) = fl.parents(n) if p1 not in nodes: - print "%s:%s unknown parent 1 %s" % (f, hg.hex(n), hg.hex(p1)) + ui.warn("file %s:%s unknown parent 1 %s" % + (f, hg.short(n), hg.short(p1))) + errors += 1 if p2 not in nodes: - print "file %s:%s unknown parent %s" % (f, hg.hex(n), hg.hex(p1)) + ui.warn("file %s:%s unknown parent 2 %s" % + (f, hg.short(n), hg.short(p1))) + errors += 1 nodes[n] = 1 # cross-check for flr in filelinkrevs[f]: - print "changeset rev %d not in %s" % (flr, f) + ui.warn("changeset rev %d not in %s\n" % (flr, f)) + errors += 1 for node in filenodes[f]: - print "node %s in manifests not in %s" % (hg.hex(n), f) - + ui.warn("node %s in manifests not in %s\n" % (hg.hex(n), f)) + errors += 1 - print "%d files, %d changesets, %d total revisions" % (files, changesets, - revisions) + ui.status("%d files, %d changesets, %d total revisions\n" % + (files, changesets, revisions)) + + if errors: + ui.warn("%d integrity errors encountered!\n" % errors) + sys.exit(1) else: print "unknown command\n" diff --git a/hgweb.py b/hgweb.py diff --git a/mercurial/hg.py b/mercurial/hg.py --- a/mercurial/hg.py +++ b/mercurial/hg.py @@ -10,6 +10,7 @@ import urllib from mercurial import byterange from mercurial.transaction import * from mercurial.revlog import * +from difflib import SequenceMatcher class filelog(revlog): def __init__(self, opener, path): @@ -29,41 +30,31 @@ class filelog(revlog): def add(self, text, transaction, link, p1=None, p2=None): return self.addrevision(text, transaction, link, p1, p2) - def resolvedag(self, old, new, transaction, link): - """resolve unmerged heads in our DAG""" - if old == new: return None - a = self.ancestor(old, new) - if old == a: return None - return self.merge3(old, new, a, transaction, link) - - def merge3(self, my, other, base, transaction, link): - """perform a 3-way merge and append the result""" - def temp(prefix, node): - (fd, name) = tempfile.mkstemp(prefix) - f = os.fdopen(fd, "w") - f.write(self.revision(node)) - f.close() - return name - - a = temp("local", my) - b = temp("remote", other) - c = temp("parent", base) - - cmd = os.environ["HGMERGE"] - r = os.system("%s %s %s %s" % (cmd, a, b, c)) - if r: - raise "Merge failed, implement rollback!" - - t = open(a).read() - os.unlink(a) - os.unlink(b) - os.unlink(c) - return self.addrevision(t, transaction, link, my, other) - - def merge(self, other, transaction, linkseq, link): - """perform a merge and resolve resulting heads""" - (o, n) = self.mergedag(other, transaction, linkseq) - return self.resolvedag(o, n, transaction, link) + def annotate(self, node): + revs = [] + while node != nullid: + revs.append(node) + node = self.parents(node)[0] + revs.reverse() + prev = [] + annotate = [] + for node in revs: + curr = self.read(node).splitlines(1) + linkrev = self.linkrev(node) + sm = SequenceMatcher(None, prev, curr) + offset = 0 + for o, m, n, s, t in sm.get_opcodes(): + if o in ('insert','replace'): + annotate[m+offset:n+offset] = \ + [ (linkrev, l) for l in curr[s:t]] + if o == 'insert': + offset += m-n + elif o == 'delete': + del annotate[m+offset:n+offset] + offset -= m-n + assert len(annotate) == len(curr) + prev = curr + return annotate class manifest(revlog): def __init__(self, opener): @@ -74,7 +65,7 @@ class manifest(revlog): def read(self, node): if self.mapcache and self.mapcache[0] == node: - return self.mapcache[1] + return self.mapcache[1].copy() text = self.revision(node) map = {} self.listcache = (text, text.splitlines(1)) @@ -87,7 +78,11 @@ class manifest(revlog): def diff(self, a, b): # this is sneaky, as we're not actually using a and b if self.listcache and len(self.listcache[0]) == len(a): - return mdiff.diff(self.listcache[1], self.addlist, 1) + d = mdiff.diff(self.listcache[1], self.addlist, 1) + if mdiff.patch(a, d) != b: + sys.stderr.write("*** sortdiff failed, falling back ***\n") + return mdiff.textdiff(a, b) + return d else: return mdiff.textdiff(a, b) @@ -133,9 +128,6 @@ class changelog(revlog): text = "\n".join(l) return self.addrevision(text, transaction, self.count(), p1, p2) - def merge3(self, my, other, base): - pass - class dircache: def __init__(self, opener, ui): self.opener = opener @@ -306,123 +298,8 @@ class localrepository: return filelog(self.opener, f) def transaction(self): - return transaction(self.opener, self.join("journal")) - - def merge(self, other): - tr = self.transaction() - changed = {} - new = {} - seqrev = self.changelog.count() - # some magic to allow fiddling in nested scope - nextrev = [seqrev] - - # helpers for back-linking file revisions to local changeset - # revisions so we can immediately get to changeset from annotate - def accumulate(text): - # track which files are added in which changeset and the - # corresponding _local_ changeset revision - files = self.changelog.extract(text)[3] - for f in files: - changed.setdefault(f, []).append(nextrev[0]) - nextrev[0] += 1 - - def seq(start): - while 1: - yield start - start += 1 - - def lseq(l): - for r in l: - yield r - - # begin the import/merge of changesets - self.ui.status("merging new changesets\n") - (co, cn) = self.changelog.mergedag(other.changelog, tr, - seq(seqrev), accumulate) - resolverev = self.changelog.count() - - # is there anything to do? - if co == cn: - tr.close() - return - - # do we need to resolve? - simple = (co == self.changelog.ancestor(co, cn)) - - # merge all files changed by the changesets, - # keeping track of the new tips - changelist = changed.keys() - changelist.sort() - for f in changelist: - sys.stdout.write(".") - sys.stdout.flush() - r = self.file(f) - node = r.merge(other.file(f), tr, lseq(changed[f]), resolverev) - if node: - new[f] = node - sys.stdout.write("\n") - - # begin the merge of the manifest - self.ui.status("merging manifests\n") - (mm, mo) = self.manifest.mergedag(other.manifest, tr, seq(seqrev)) - - # For simple merges, we don't need to resolve manifests or changesets - if simple: - tr.close() - return - - ma = self.manifest.ancestor(mm, mo) - - # resolve the manifest to point to all the merged files - self.ui.status("resolving manifests\n") - mmap = self.manifest.read(mm) # mine - omap = self.manifest.read(mo) # other - amap = self.manifest.read(ma) # ancestor - nmap = {} - - for f, mid in mmap.iteritems(): - if f in omap: - if mid != omap[f]: - nmap[f] = new.get(f, mid) # use merged version - else: - nmap[f] = new.get(f, mid) # they're the same - del omap[f] - elif f in amap: - if mid != amap[f]: - pass # we should prompt here - else: - pass # other deleted it - else: - nmap[f] = new.get(f, mid) # we created it - - del mmap - - for f, oid in omap.iteritems(): - if f in amap: - if oid != amap[f]: - pass # this is the nasty case, we should prompt - else: - pass # probably safe - else: - nmap[f] = new.get(f, oid) # remote created it - - del omap - del amap - - node = self.manifest.add(nmap, tr, resolverev, mm, mo) - - # Now all files and manifests are merged, we add the changed files - # and manifest id to the changelog - self.ui.status("committing merge changeset\n") - new = new.keys() - new.sort() - if co == cn: cn = -1 - - edittext = "\n"+"".join(["HG: changed %s\n" % f for f in new]) - edittext = self.ui.edit(edittext) - n = self.changelog.add(node, new, edittext, tr, co, cn) - - tr.close() + return transaction(self.opener, self.join("journal"), + self.join("undo")) def commit(self, parent, update = None, text = ""): tr = self.transaction() @@ -441,6 +318,7 @@ class localrepository: new = {} linkrev = self.changelog.count() for f in update: + self.ui.note(f + "\n") try: t = file(f).read() except IOError: @@ -488,6 +366,7 @@ class localrepository: l.sort() stats = [] for f in l: + self.ui.note(f + "\n") r = self.file(f) t = r.revision(mmap[f]) try: @@ -611,43 +490,57 @@ class localrepository: def newer(self, nodes): m = {} nl = [] + pm = {} cl = self.changelog t = l = cl.count() + + # find the lowest numbered node for n in nodes: l = min(l, cl.rev(n)) - for p in cl.parents(n): - m[p] = 1 + m[n] = 1 for i in xrange(l, t): n = cl.node(i) + if n in m: # explicitly listed + pm[n] = 1 + nl.append(n) + continue for p in cl.parents(n): - if p in m and n not in m: - m[n] = 1 + if p in pm: # parent listed + pm[n] = 1 nl.append(n) + break return nl def getchangegroup(self, remote): tip = remote.branches([])[0] + self.ui.debug("remote tip branch is %s:%s\n" % + (short(tip[0]), short(tip[1]))) m = self.changelog.nodemap unknown = [tip] search = [] fetch = [] if tip[0] in m: + self.ui.note("nothing to do!\n") return None while unknown: n = unknown.pop(0) if n == nullid: break if n[1] and n[1] in m: # do we know the base? + self.ui.debug("found incomplete branch %s\n" % short(n[1])) search.append(n) # schedule branch range for scanning else: + if n[2] in m and n[3] in m: + if n[1] not in fetch: + self.ui.debug("found new changeset %s\n" % + short(n[1])) + fetch.append(n[1]) # earliest unknown + continue for b in remote.branches([n[2], n[3]]): - if b[0] in m: - if n[1] not in fetch: - fetch.append(n[1]) # earliest unknown - else: + if b[0] not in m: unknown.append(b) while search: @@ -657,16 +550,23 @@ class localrepository: f = 1 for i in l + [n[1]]: if i in m: - if f <= 4: + if f <= 2: + self.ui.debug("found new branch changeset %s\n" % + short(p)) fetch.append(p) else: + self.ui.debug("narrowed branch search to %s:%s\n" + % (short(p), short(i))) search.append((p, i)) break p, f = i, f * 2 for f in fetch: if f in m: - raise "already have", hex(f[:4]) + raise "already have", short(f[:4]) + + self.ui.note("adding new changesets starting at " + + " ".join([short(f) for f in fetch]) + "\n") return remote.changegroup(fetch) @@ -725,17 +625,21 @@ class localrepository: tr = self.transaction() simple = True - print "merging changesets" + self.ui.status("adding changesets\n") # pull off the changeset group + def report(x): + self.ui.debug("add changeset %s\n" % short(x)) + return self.changelog.count() + csg = getchunk() co = self.changelog.tip() - cn = self.changelog.addgroup(csg, lambda x: self.changelog.count(), tr) + cn = self.changelog.addgroup(csg, report, tr) - print "merging manifests" + self.ui.status("adding manifests\n") # pull off the manifest group mfg = getchunk() - mo = self.manifest.tip() - mm = self.manifest.addgroup(mfg, lambda x: self.changelog.rev(x), tr) + mm = self.manifest.tip() + mo = self.manifest.addgroup(mfg, lambda x: self.changelog.rev(x), tr) # do we need a resolve? if self.changelog.ancestor(co, cn) != co: @@ -743,57 +647,70 @@ class localrepository: resolverev = self.changelog.count() # process the files - print "merging files" + self.ui.status("adding files\n") new = {} while 1: f = getchunk(4) if not f: break fg = getchunk() - + self.ui.debug("adding %s revisions\n" % f) fl = self.file(f) o = fl.tip() n = fl.addgroup(fg, lambda x: self.changelog.rev(x), tr) if not simple: - nn = fl.resolvedag(o, n, tr, resolverev) - if nn: new[f] = nn + if o == n: continue + # this file has changed between branches, so it must be + # represented in the merge changeset + new[f] = self.merge3(fl, f, o, n, tr, resolverev) # For simple merges, we don't need to resolve manifests or changesets if simple: + self.ui.debug("simple merge, skipping resolve\n") tr.close() return # resolve the manifest to point to all the merged files self.ui.status("resolving manifests\n") ma = self.manifest.ancestor(mm, mo) - mmap = self.manifest.read(mm) # mine omap = self.manifest.read(mo) # other amap = self.manifest.read(ma) # ancestor + mmap = self.manifest.read(mm) # mine + self.ui.debug("ancestor %s local %s other %s\n" % + (short(ma), short(mm), short(mo))) nmap = {} for f, mid in mmap.iteritems(): if f in omap: - if mid != omap[f]: - nmap[f] = new.get(f, mid) # use merged version + if mid != omap[f]: + self.ui.debug("%s versions differ\n" % f) + if f in new: self.ui.debug("%s updated in resolve\n" % f) + # use merged version or local version + nmap[f] = new.get(f, mid) else: - nmap[f] = new.get(f, mid) # they're the same + nmap[f] = mid # keep ours del omap[f] elif f in amap: - if mid != amap[f]: + if mid != amap[f]: + self.ui.debug("local changed %s which other deleted\n" % f) pass # we should prompt here else: + self.ui.debug("other deleted %s\n" % f) pass # other deleted it else: - nmap[f] = new.get(f, mid) # we created it + self.ui.debug("local created %s\n" %f) + nmap[f] = mid # we created it del mmap for f, oid in omap.iteritems(): if f in amap: if oid != amap[f]: + self.ui.debug("other changed %s which we deleted\n" % f) pass # this is the nasty case, we should prompt else: pass # probably safe else: + self.ui.debug("remote created %s\n" % f) nmap[f] = new.get(f, oid) # remote created it del omap @@ -808,18 +725,56 @@ class localrepository: new.sort() if co == cn: cn = -1 - edittext = "\n"+"".join(["HG: changed %s\n" % f for f in new]) + edittext = "\nHG: merge resolve\n" + \ + "".join(["HG: changed %s\n" % f for f in new]) edittext = self.ui.edit(edittext) n = self.changelog.add(node, new, edittext, tr, co, cn) tr.close() + def merge3(self, fl, fn, my, other, transaction, link): + """perform a 3-way merge and append the result""" + + def temp(prefix, node): + pre = "%s~%s." % (os.path.basename(fn), prefix) + (fd, name) = tempfile.mkstemp("", pre) + f = os.fdopen(fd, "w") + f.write(fl.revision(node)) + f.close() + return name + + base = fl.ancestor(my, other) + self.ui.note("resolving %s\n" % fn) + self.ui.debug("local %s remote %s ancestor %s\n" % + (short(my), short(other), short(base))) + + if my == base: + text = fl.revision(other) + else: + a = temp("local", my) + b = temp("remote", other) + c = temp("parent", base) + + cmd = os.environ["HGMERGE"] + self.ui.debug("invoking merge with %s\n" % cmd) + r = os.system("%s %s %s %s" % (cmd, a, b, c)) + if r: + raise "Merge failed!" + + text = open(a).read() + os.unlink(a) + os.unlink(b) + os.unlink(c) + + return fl.addrevision(text, transaction, link, my, other) + class remoterepository: def __init__(self, ui, path): self.url = path.replace("hg://", "http://", 1) self.ui = ui def do_cmd(self, cmd, **args): + self.ui.debug("sending %s command\n" % cmd) q = {"cmd": cmd} q.update(args) qs = urllib.urlencode(q) @@ -856,8 +811,10 @@ def repository(ui, path=None, create=0): return localrepository(ui, path, create) class ui: - def __init__(self, verbose=False, debug=False): - self.verbose = verbose + def __init__(self, verbose=False, debug=False, quiet=False): + self.quiet = quiet and not verbose and not debug + self.verbose = verbose or debug + self.debugflag = debug def write(self, *args): for a in args: sys.stdout.write(str(a)) @@ -868,13 +825,13 @@ class ui: if re.match(pat, r): return r def status(self, *msg): - self.write(*msg) + if not self.quiet: self.write(*msg) def warn(self, msg): self.write(*msg) def note(self, msg): if self.verbose: self.write(*msg) def debug(self, msg): - if self.debug: self.write(*msg) + if self.debugflag: self.write(*msg) def edit(self, text): (fd, name) = tempfile.mkstemp("hg") f = os.fdopen(fd, "w") diff --git a/mercurial/mdiff.py b/mercurial/mdiff.py --- a/mercurial/mdiff.py +++ b/mercurial/mdiff.py @@ -1,6 +1,7 @@ #!/usr/bin/python -import difflib, struct -from cStringIO import StringIO +import difflib, struct, mmap + +devzero = file("/dev/zero") def unidiff(a, ad, b, bd, fn): if not a and not b: return "" @@ -52,24 +53,82 @@ def diff(a, b, sorted=0): return "".join(bin) -def patch(a, bin): - last = pos = 0 - r = [] +# This attempts to apply a series of patches in time proportional to +# the total size of the patches, rather than patches * len(text). This +# means rather than shuffling strings around, we shuffle around +# pointers to fragments with fragment lists. +# +# When the fragment lists get too long, we collapse them. To do this +# efficiently, we do all our operations inside a buffer created by +# mmap and simply use memmove. This avoids creating a bunch of large +# temporary string buffers. + +def patches(a, bins): + if not bins: return a + + plens = [len(x) for x in bins] + pl = sum(plens) + bl = len(a) + pl + tl = bl + bl + pl # enough for the patches and two working texts + b1, b2 = 0, bl + + if not tl: return a + + m = mmap.mmap(devzero.fileno(), tl, mmap.MAP_PRIVATE) + + # load our original text + m.write(a) + frags = [(len(a), b1)] + + # copy all the patches into our segment so we can memmove from them + pos = b2 + bl + m.seek(pos) + for p in bins: m.write(p) - c = 0 - while pos < len(bin): - p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12]) - pos += 12 - r.append(a[last:p1]) - r.append(bin[pos:pos + l]) - pos += l - last = p2 - c += 1 - r.append(a[last:]) + def pull(dst, src, l): # pull l bytes from src + while l: + f = src.pop(0) + if f[0] > l: # do we need to split? + src.insert(0, (f[0] - l, f[1] + l)) + dst.append((l, f[1])) + return + dst.append(f) + l -= f[0] + + def collect(buf, list): + start = buf + for l, p in list: + m.move(buf, p, l) + buf += l + return (buf - start, start) + + for plen in plens: + # if our list gets too long, execute it + if len(frags) > 128: + b2, b1 = b1, b2 + frags = [collect(b1, frags)] - return "".join(r) - + new = [] + end = pos + plen + last = 0 + while pos < end: + p1, p2, l = struct.unpack(">lll", m[pos:pos + 12]) + pull(new, frags, p1 - last) # what didn't change + pull([], frags, p2 - p1) # what got deleted + new.append((l, pos + 12)) # what got added + pos += l + 12 + last = p2 + frags = new + frags # what was left at the end + t = collect(b2, frags) + return m[t[1]:t[1] + t[0]] +def patch(a, bin): + return patches(a, [bin]) +try: + import mpatch + patches = mpatch.patches +except: + pass diff --git a/mercurial/mpatch.c b/mercurial/mpatch.c new file mode 100644 --- /dev/null +++ b/mercurial/mpatch.c @@ -0,0 +1,293 @@ +/* + mpatch.c - efficient binary patching for Mercurial + + This implements a patch algorithm that's O(m + nlog n) where m is the + size of the output and n is the number of patches. + + Given a list of binary patches, it unpacks each into a hunk list, + then combines the hunk lists with a treewise recursion to form a + single hunk list. This hunk list is then applied to the original + text. + + The text (or binary) fragments are copied directly from their source + Python objects into a preallocated output string to avoid the + allocation of intermediate Python objects. Working memory is about 2x + the total number of hunks. + + Copyright 2005 Matt Mackall + + This software may be used and distributed according to the terms + of the GNU General Public License, incorporated herein by reference. +*/ + +#include +#include +#include +#include +#include + +static char mpatch_doc[] = "Efficient binary patching."; + +struct frag { + int start, end, len; + char *data; +}; + +struct flist { + struct frag *base, *head, *tail; +}; + +static struct flist *lalloc(int size) +{ + struct flist *a; + + a = malloc(sizeof(struct flist)); + a->head = a->tail = a->base = malloc(sizeof(struct frag) * size); + return a; +} + +static void lfree(struct flist *a) +{ + free(a->base); + free(a); +} + +static int lsize(struct flist *a) +{ + return a->tail - a->head; +} + +/* move hunks in source that are less cut to dest, compensating + for changes in offset. the last hunk may be split if necessary. +*/ +static int gather(struct flist *dest, struct flist *src, int cut, int offset) +{ + struct frag *d = dest->tail, *s = src->head; + int postend, c, l; + + while (s != src->tail) { + if (s->start + offset >= cut) + break; /* we've gone far enough */ + + postend = offset + s->start + s->len; + if (postend <= cut) { + /* save this hunk */ + offset += s->start + s->len - s->end; + *d++ = *s++; + } + else { + /* break up this hunk */ + c = cut - offset; + if (s->end < c) + c = s->end; + l = cut - offset - s->start; + if (s->len < l) + l = s->len; + + offset += s->start + l - c; + + d->start = s->start; + d->end = c; + d->len = l; + d->data = s->data; + d++; + s->start = c; + s->len = s->len - l; + s->data = s->data + l; + + break; + } + } + + dest->tail = d; + src->head = s; + return offset; +} + +/* like gather, but with no output list */ +static int discard(struct flist *src, int cut, int offset) +{ + struct frag *s = src->head; + int postend, c, l; + + while (s != src->tail) { + if (s->start + offset >= cut) + break; + + postend = offset + s->start + s->len; + if (postend <= cut) { + offset += s->start + s->len - s->end; + s++; + } + else { + c = cut - offset; + if (s->end < c) + c = s->end; + l = cut - offset - s->start; + if (s->len < l) + l = s->len; + + offset += s->start + l - c; + s->start = c; + s->len = s->len - l; + s->data = s->data + l; + + break; + } + } + + src->head = s; + return offset; +} + +/* combine hunk lists a and b, while adjusting b for offset changes in a/ + this deletes a and b and returns the resultant list. */ +static struct flist *combine(struct flist *a, struct flist *b) +{ + struct flist *c; + struct frag *bh = b->head, *ct; + int offset = 0, post; + + c = lalloc((lsize(a) + lsize(b)) * 2); + + while (bh != b->tail) { + /* save old hunks */ + offset = gather(c, a, bh->start, offset); + + /* discard replaced hunks */ + post = discard(a, bh->end, offset); + + /* insert new hunk */ + ct = c->tail; + ct->start = bh->start - offset; + ct->end = bh->end - post; + ct->len = bh->len; + ct->data = bh->data; + c->tail++; + bh++; + offset = post; + } + + /* hold on to tail from a */ + memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a)); + c->tail += lsize(a); + lfree(a); + lfree(b); + return c; +} + +/* decode a binary patch into a hunk list */ +static struct flist *decode(char *bin, int len) +{ + struct flist *l; + struct frag *lt; + char *end = bin + len; + + /* assume worst case size, we won't have many of these lists */ + l = lalloc(len / 12); + lt = l->tail; + + while (bin < end) { + lt->start = ntohl(*(uint32_t *)bin); + lt->end = ntohl(*(uint32_t *)(bin + 4)); + lt->len = ntohl(*(uint32_t *)(bin + 8)); + lt->data = bin + 12; + bin += 12 + lt->len; + lt++; + } + + l->tail = lt; + return l; +} + +/* calculate the size of resultant text */ +static int calcsize(int len, struct flist *l) +{ + int outlen = 0, last = 0; + struct frag *f = l->head; + + while (f != l->tail) { + outlen += f->start - last; + last = f->end; + outlen += f->len; + f++; + } + + outlen += len - last; + return outlen; +} + +static void apply(char *buf, char *orig, int len, struct flist *l) +{ + struct frag *f = l->head; + int last = 0; + char *p = buf; + + while (f != l->tail) { + memcpy(p, orig + last, f->start - last); + p += f->start - last; + memcpy(p, f->data, f->len); + last = f->end; + p += f->len; + f++; + } + memcpy(p, orig + last, len - last); +} + +/* recursively generate a patch of all bins between start and end */ +static struct flist *fold(PyObject *bins, int start, int end) +{ + int len; + + if (start + 1 == end) { + /* trivial case, output a decoded list */ + PyObject *tmp = PyList_GetItem(bins, start); + return decode(PyString_AsString(tmp), PyString_Size(tmp)); + } + + /* divide and conquer, memory management is elsewhere */ + len = (end - start) / 2; + return combine(fold(bins, start, start + len), + fold(bins, start + len, end)); +} + +static PyObject * +patches(PyObject *self, PyObject *args) +{ + PyObject *text, *bins, *result; + struct flist *patch; + char *in, *out; + int len, outlen; + + if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins)) + return NULL; + + len = PyList_Size(bins); + if (!len) { + /* nothing to do */ + Py_INCREF(text); + return text; + } + + patch = fold(bins, 0, len); + outlen = calcsize(PyString_Size(text), patch); + result = PyString_FromStringAndSize(NULL, outlen); + in = PyString_AsString(text); + out = PyString_AsString(result); + apply(out, in, PyString_Size(text), patch); + lfree(patch); + + return result; +} + +static PyMethodDef methods[] = { + {"patches", patches, METH_VARARGS, "apply a series of patches\n"}, + {NULL, NULL} +}; + +PyMODINIT_FUNC +initmpatch(void) +{ + Py_InitModule3("mpatch", methods, mpatch_doc); +} + diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -13,6 +13,7 @@ from mercurial import mdiff def hex(node): return binascii.hexlify(node) def bin(node): return binascii.unhexlify(node) +def short(node): return hex(node[:4]) def compress(text): return zlib.compress(text) @@ -28,26 +29,97 @@ def hash(text, p1, p2): nullid = "\0" * 20 indexformat = ">4l20s20s20s" +class lazyparser: + def __init__(self, data): + self.data = data + self.s = struct.calcsize(indexformat) + self.l = len(data)/self.s + self.index = [None] * self.l + self.map = {nullid: -1} + + if 0: + n = 0 + i = self.data + s = struct.calcsize(indexformat) + for f in xrange(0, len(i), s): + # offset, size, base, linkrev, p1, p2, nodeid + e = struct.unpack(indexformat, i[f:f + s]) + self.map[e[6]] = n + self.index.append(e) + n += 1 + + def load(self, pos): + block = pos / 1000 + i = block * 1000 + end = min(self.l, i + 1000) + while i < end: + d = self.data[i * self.s: (i + 1) * self.s] + e = struct.unpack(indexformat, d) + self.index[i] = e + self.map[e[6]] = i + i += 1 + +class lazyindex: + def __init__(self, parser): + self.p = parser + def __len__(self): + return len(self.p.index) + def __getitem__(self, pos): + i = self.p.index[pos] + if not i: + self.p.load(pos) + return self.p.index[pos] + return i + def append(self, e): + self.p.index.append(e) + +class lazymap: + def __init__(self, parser): + self.p = parser + def load(self, key): + n = self.p.data.find(key) + if n < 0: raise KeyError("node " + hex(key)) + pos = n / self.p.s + self.p.load(pos) + def __contains__(self, key): + try: + self[key] + return True + except KeyError: + return False + def __iter__(self): + for i in xrange(self.p.l): + try: + yield self.p.index[i][6] + except: + self.p.load(i) + yield self.p.index[i][6] + def __getitem__(self, key): + try: + return self.p.map[key] + except KeyError: + try: + self.load(key) + return self.p.map[key] + except KeyError: + raise KeyError("node " + hex(key)) + def __setitem__(self, key, val): + self.p.map[key] = val + class revlog: def __init__(self, opener, indexfile, datafile): self.indexfile = indexfile self.datafile = datafile - self.index = [] self.opener = opener self.cache = None - self.nodemap = {nullid: -1} # read the whole index for now, handle on-demand later try: - n = 0 i = self.opener(self.indexfile).read() - s = struct.calcsize(indexformat) - for f in range(0, len(i), s): - # offset, size, base, linkrev, p1, p2, nodeid - e = struct.unpack(indexformat, i[f:f + s]) - self.nodemap[e[6]] = n - self.index.append(e) - n += 1 - except IOError: pass + except IOError: + i = "" + parser = lazyparser(i) + self.index = lazyindex(parser) + self.nodemap = lazymap(parser) def tip(self): return self.node(len(self.index) - 1) def count(self): return len(self.index) @@ -78,17 +150,11 @@ class revlog: return None - def revisions(self, list): - # this can be optimized to do spans, etc - # be stupid for now - for node in list: - yield self.revision(node) - def diff(self, a, b): return mdiff.textdiff(a, b) - def patch(self, text, patch): - return mdiff.patch(text, patch) + def patches(self, t, pl): + return mdiff.patches(t, pl) def revision(self, node): if node == nullid: return "" @@ -114,15 +180,18 @@ class revlog: last = self.length(base) text = decompress(data[:last]) + bins = [] for r in xrange(base + 1, rev + 1): s = self.length(r) - b = decompress(data[last:last + s]) - text = self.patch(text, b) + bins.append(decompress(data[last:last + s])) last = last + s + text = mdiff.patches(text, bins) + (p1, p2) = self.parents(node) if node != hash(text, p1, p2): - raise "integrity check failed on %s:%d" % (self.datafile, rev) + raise IOError("integrity check failed on %s:%d" + % (self.datafile, rev)) self.cache = (node, rev, text) return text @@ -142,7 +211,10 @@ class revlog: start = self.start(base) end = self.end(t) prev = self.revision(self.tip()) - data = compress(self.diff(prev, text)) + d = self.diff(prev, text) + if self.patches(prev, [d]) != text: + raise AssertionError("diff failed") + data = compress(d) dist = end - start + len(data) # full versions are inserted when the needed deltas @@ -205,34 +277,9 @@ class revlog: return nullid - def mergedag(self, other, transaction, linkseq, accumulate = None): - """combine the nodes from other's DAG into ours""" - old = self.tip() - i = self.count() - l = [] - - # merge the other revision log into our DAG - for r in range(other.count()): - id = other.node(r) - if id not in self.nodemap: - (xn, yn) = other.parents(id) - l.append((id, xn, yn)) - self.nodemap[id] = i - i += 1 - - # merge node date for new nodes - r = other.revisions([e[0] for e in l]) - for e in l: - t = r.next() - if accumulate: accumulate(t) - self.addrevision(t, transaction, linkseq.next(), e[1], e[2]) - - # return the unmerged heads for later resolving - return (old, self.tip()) - def group(self, linkmap): # given a list of changeset revs, return a set of deltas and - # metadata corresponding to nodes the first delta is + # metadata corresponding to nodes. the first delta is # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to # have this parent as it has all history before these # changesets. parent is parent[0] @@ -301,14 +348,12 @@ class revlog: # helper to reconstruct intermediate versions def construct(text, base, rev): - for r in range(base + 1, rev + 1): - b = decompress(chunks[r]) - text = self.patch(text, b) - return text + bins = [decompress(chunks[r]) for r in xrange(base + 1, rev + 1)] + return mdiff.patches(text, bins) # build deltas deltas = [] - for d in range(0, len(revs) - 1): + for d in xrange(0, len(revs) - 1): a, b = revs[d], revs[d + 1] n = self.node(b) @@ -349,6 +394,8 @@ class revlog: # retrieve the parent revision of the delta chain chain = data[24:44] + if not chain in self.nodemap: + raise "unknown base %s" % short(chain[:4]) # track the base of the current delta log r = self.count() @@ -374,6 +421,8 @@ class revlog: l, node, p1, p2, cs = struct.unpack(">l20s20s20s20s", data[pos:pos+84]) link = linkmapper(cs) + if node in self.nodemap: + raise "already have %s" % hex(node[:4]) delta = data[pos + 84:pos + l] pos += l @@ -391,7 +440,7 @@ class revlog: dfh.flush() ifh.flush() text = self.revision(chain) - text = self.patch(text, delta) + text = self.patches(text, [delta]) chk = self.addrevision(text, transaction, link, p1, p2) if chk != node: raise "consistency error adding group" diff --git a/mercurial/transaction.py b/mercurial/transaction.py --- a/mercurial/transaction.py +++ b/mercurial/transaction.py @@ -14,16 +14,16 @@ import os class transaction: - def __init__(self, opener, journal): + def __init__(self, opener, journal, after = None): self.opener = opener + self.after = after self.entries = [] self.map = {} self.journal = journal # abort here if the journal already exists if os.path.exists(self.journal): - print "journal already exists, recovering" - self.recover() + raise "journal already exists!" self.file = open(self.journal, "w") @@ -43,7 +43,10 @@ class transaction: def close(self): self.file.close() self.entries = [] - os.unlink(self.journal) + if self.after: + os.rename(self.journal, self.after) + else: + os.unlink(self.journal) def abort(self): if not self.entries: return diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -5,14 +5,15 @@ # './setup.py install', or # './setup.py --help' for more options -from distutils.core import setup +from distutils.core import setup, Extension setup(name='mercurial', - version='0.4f', - author='Matt Mackall', - author_email='mpm@selenic.com', - url='http://selenic.com/mercurial', - description='scalable distributed SCM', - license='GNU GPL', - packages=['mercurial'], - scripts=['hg', 'hgweb.py']) + version='0.4f', + author='Matt Mackall', + author_email='mpm@selenic.com', + url='http://selenic.com/mercurial', + description='scalable distributed SCM', + license='GNU GPL', + packages=['mercurial'], + ext_modules=[Extension('mercurial.mpatch', ['mercurial/mpatch.c'])], + scripts=['hg', 'hgweb.py'])