diff --git a/mercurial/verify.py b/mercurial/verify.py new file mode 100644 --- /dev/null +++ b/mercurial/verify.py @@ -0,0 +1,200 @@ +# verify.py - repository integrity checking for Mercurial +# +# Copyright 2006 Matt Mackall +# +# This software may be used and distributed according to the terms +# of the GNU General Public License, incorporated herein by reference. + +from node import * +from i18n import gettext as _ +import revlog, mdiff + +def verify(repo): + filelinkrevs = {} + filenodes = {} + changesets = revisions = files = 0 + errors = [0] + warnings = [0] + neededmanifests = {} + + def err(msg): + repo.ui.warn(msg + "\n") + errors[0] += 1 + + def warn(msg): + repo.ui.warn(msg + "\n") + warnings[0] += 1 + + def checksize(obj, name): + d = obj.checksize() + if d[0]: + err(_("%s data length off by %d bytes") % (name, d[0])) + if d[1]: + err(_("%s index contains %d extra bytes") % (name, d[1])) + + def checkversion(obj, name): + if obj.version != revlog.REVLOGV0: + if not revlogv1: + warn(_("warning: `%s' uses revlog format 1") % name) + elif revlogv1: + warn(_("warning: `%s' uses revlog format 0") % name) + + revlogv1 = repo.revlogversion != revlog.REVLOGV0 + if repo.ui.verbose or revlogv1 != repo.revlogv1: + repo.ui.status(_("repository uses revlog format %d\n") % + (revlogv1 and 1 or 0)) + + seen = {} + repo.ui.status(_("checking changesets\n")) + checksize(repo.changelog, "changelog") + + for i in range(repo.changelog.count()): + changesets += 1 + n = repo.changelog.node(i) + l = repo.changelog.linkrev(n) + if l != i: + err(_("incorrect link (%d) for changeset revision %d") %(l, i)) + if n in seen: + err(_("duplicate changeset at revision %d") % i) + seen[n] = 1 + + for p in repo.changelog.parents(n): + if p not in repo.changelog.nodemap: + err(_("changeset %s has unknown parent %s") % + (short(n), short(p))) + try: + changes = repo.changelog.read(n) + except KeyboardInterrupt: + repo.ui.warn(_("interrupted")) + raise + except Exception, inst: + err(_("unpacking changeset %s: %s") % (short(n), inst)) + continue + + neededmanifests[changes[0]] = n + + for f in changes[3]: + filelinkrevs.setdefault(f, []).append(i) + + seen = {} + repo.ui.status(_("checking manifests\n")) + checkversion(repo.manifest, "manifest") + checksize(repo.manifest, "manifest") + + for i in range(repo.manifest.count()): + n = repo.manifest.node(i) + l = repo.manifest.linkrev(n) + + if l < 0 or l >= repo.changelog.count(): + err(_("bad manifest link (%d) at revision %d") % (l, i)) + + if n in neededmanifests: + del neededmanifests[n] + + if n in seen: + err(_("duplicate manifest at revision %d") % i) + + seen[n] = 1 + + for p in repo.manifest.parents(n): + if p not in repo.manifest.nodemap: + err(_("manifest %s has unknown parent %s") % + (short(n), short(p))) + + try: + delta = mdiff.patchtext(repo.manifest.delta(n)) + except KeyboardInterrupt: + repo.ui.warn(_("interrupted")) + raise + except Exception, inst: + err(_("unpacking manifest %s: %s") % (short(n), inst)) + continue + + try: + ff = [ l.split('\0') for l in delta.splitlines() ] + for f, fn in ff: + filenodes.setdefault(f, {})[bin(fn[:40])] = 1 + except (ValueError, TypeError), inst: + err(_("broken delta in manifest %s: %s") % (short(n), inst)) + + repo.ui.status(_("crosschecking files in changesets and manifests\n")) + + for m, c in neededmanifests.items(): + err(_("Changeset %s refers to unknown manifest %s") % + (short(m), short(c))) + del neededmanifests + + for f in filenodes: + if f not in filelinkrevs: + err(_("file %s in manifest but not in changesets") % f) + + for f in filelinkrevs: + if f not in filenodes: + err(_("file %s in changeset but not in manifest") % f) + + repo.ui.status(_("checking files\n")) + ff = filenodes.keys() + ff.sort() + for f in ff: + if f == "/dev/null": + continue + files += 1 + if not f: + err(_("file without name in manifest %s") % short(n)) + continue + fl = repo.file(f) + checkversion(fl, f) + checksize(fl, f) + + nodes = {nullid: 1} + seen = {} + for i in range(fl.count()): + revisions += 1 + n = fl.node(i) + + if n in seen: + err(_("%s: duplicate revision %d") % (f, i)) + if n not in filenodes[f]: + err(_("%s: %d:%s not in manifests") % (f, i, short(n))) + else: + del filenodes[f][n] + + flr = fl.linkrev(n) + if flr not in filelinkrevs.get(f, []): + err(_("%s:%s points to unexpected changeset %d") + % (f, short(n), flr)) + else: + filelinkrevs[f].remove(flr) + + # verify contents + try: + t = fl.read(n) + except KeyboardInterrupt: + repo.ui.warn(_("interrupted")) + raise + except Exception, inst: + err(_("unpacking file %s %s: %s") % (f, short(n), inst)) + + # verify parents + (p1, p2) = fl.parents(n) + if p1 not in nodes: + err(_("file %s:%s unknown parent 1 %s") % + (f, short(n), short(p1))) + if p2 not in nodes: + err(_("file %s:%s unknown parent 2 %s") % + (f, short(n), short(p1))) + nodes[n] = 1 + + # cross-check + for node in filenodes[f]: + err(_("node %s in manifests not in %s") % (hex(node), f)) + + repo.ui.status(_("%d files, %d changesets, %d total revisions\n") % + (files, changesets, revisions)) + + if warnings[0]: + repo.ui.warn(_("%d warnings encountered!\n") % warnings[0]) + if errors[0]: + repo.ui.warn(_("%d integrity errors encountered!\n") % errors[0]) + return 1 +