# HG changeset patch # User Matt Mackall # Date 1130441176 25200 # Node ID 0847c45ffee6189bf3a6ee98a0a584914a4b969e # Parent 214f42f23a3bbf04efb96ec1c02097cad11d018d# Parent dc1bbc456b96db68f5fde618e13b8fcf9a078dae Merge bundle -r work from Eric Hopper diff --git a/mercurial/commands.py b/mercurial/commands.py --- a/mercurial/commands.py +++ b/mercurial/commands.py @@ -693,7 +693,7 @@ def clone(ui, source, dest=None, **opts) copy = False if other.dev() != -1: abspath = os.path.abspath(source) - if not opts['pull']: + if not opts['pull'] and not opts['rev']: copy = True if copy: @@ -723,8 +723,14 @@ def clone(ui, source, dest=None, **opts) repo = hg.repository(ui, dest) else: + revs = None + if opts['rev']: + if not other.local(): + raise util.Abort("clone -r not supported yet for remote repositories.") + else: + revs = [other.lookup(rev) for rev in opts['rev']] repo = hg.repository(ui, dest, create=1) - repo.pull(other) + repo.pull(other, heads = revs) f = repo.opener("hgrc", "w", text=True) f.write("[paths]\n") @@ -1396,7 +1402,7 @@ def incoming(ui, repo, source="default", o = repo.findincoming(other) if not o: return - o = other.newer(o) + o = other.changelog.nodesbetween(o)[0] if opts['newest_first']: o.reverse() for n in o: @@ -1561,7 +1567,7 @@ def outgoing(ui, repo, dest="default-pus dest = ui.expandpath(dest, repo.root) other = hg.repository(ui, dest) o = repo.findoutgoing(other) - o = repo.newer(o) + o = repo.changelog.nodesbetween(o)[0] if opts['newest_first']: o.reverse() for n in o: @@ -1643,7 +1649,12 @@ def pull(ui, repo, source="default", **o ui.setconfig("ui", "remotecmd", opts['remotecmd']) other = hg.repository(ui, source) - r = repo.pull(other) + revs = None + if opts['rev'] and not other.local(): + raise util.Abort("pull -r doesn't work for remote repositories yet") + elif opts['rev']: + revs = [other.lookup(rev) for rev in opts['rev']] + r = repo.pull(other, heads=revs) if not r: if opts['update']: return update(ui, repo) @@ -2193,6 +2204,7 @@ table = { [('U', 'noupdate', None, _('do not update the new working directory')), ('e', 'ssh', "", _('specify ssh command to use')), ('', 'pull', None, _('use pull protocol to copy metadata')), + ('r', 'rev', [], _('a changeset you would like to have after cloning')), ('', 'remotecmd', "", _('specify hg command to run on the remote side'))], _('hg clone [OPTION]... SOURCE [DEST]')), "^commit|ci": @@ -2304,8 +2316,9 @@ table = { (pull, [('u', 'update', None, _('update the working directory to tip after pull')), ('e', 'ssh', "", _('specify ssh command to use')), + ('r', 'rev', [], _('a specific revision you would like to pull')), ('', 'remotecmd', "", _('specify hg command to run on the remote side'))], - _('hg pull [-u] [-e FILE] [--remotecmd FILE] [SOURCE]')), + _('hg pull [-u] [-e FILE] [-r rev] [--remotecmd FILE] [SOURCE]')), "^push": (push, [('f', 'force', None, _('force push')), diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py --- a/mercurial/localrepo.py +++ b/mercurial/localrepo.py @@ -727,32 +727,6 @@ class localrepository: return r - def newer(self, nodes): - m = {} - nl = [] - pm = {} - cl = self.changelog - t = l = cl.count() - - # find the lowest numbered node - for n in nodes: - l = min(l, cl.rev(n)) - m[n] = 1 - - for i in xrange(l, t): - n = cl.node(i) - if n in m: # explicitly listed - pm[n] = 1 - nl.append(n) - continue - for p in cl.parents(n): - if p in pm: # parent listed - pm[n] = 1 - nl.append(n) - break - - return nl - def findincoming(self, remote, base=None, heads=None): m = self.changelog.nodemap search = [] @@ -903,7 +877,7 @@ class localrepository: # this is the set of all roots we have to push return subset - def pull(self, remote): + def pull(self, remote, heads = None): lock = self.lock() # if we have an empty repo, fetch everything @@ -917,7 +891,10 @@ class localrepository: self.ui.status(_("no changes found\n")) return 1 - cg = remote.changegroup(fetch) + if heads is None: + cg = remote.changegroup(fetch) + else: + cg = remote.changegroupsubset(fetch, heads) return self.addchangegroup(cg) def push(self, remote, force=False): @@ -945,40 +922,327 @@ class localrepository: cg = self.changegroup(update) return remote.addchangegroup(cg) + def changegroupsubset(self, bases, heads): + """This function generates a changegroup consisting of all the nodes + that are descendents of any of the bases, and ancestors of any of + the heads. + + It is fairly complex as determining which filenodes and which + manifest nodes need to be included for the changeset to be complete + is non-trivial. + + Another wrinkle is doing the reverse, figuring out which changeset in + the changegroup a particular filenode or manifestnode belongs to.""" + + # Set up some initial variables + # Make it easy to refer to self.changelog + cl = self.changelog + # msng is short for missing - compute the list of changesets in this + # changegroup. + msng_cl_lst, bases, heads = cl.nodesbetween(bases, heads) + # Some bases may turn out to be superfluous, and some heads may be + # too. nodesbetween will return the minimal set of bases and heads + # necessary to re-create the changegroup. + + # Known heads are the list of heads that it is assumed the recipient + # of this changegroup will know about. + knownheads = {} + # We assume that all parents of bases are known heads. + for n in bases: + for p in cl.parents(n): + if p != nullid: + knownheads[p] = 1 + knownheads = knownheads.keys() + if knownheads: + # Now that we know what heads are known, we can compute which + # changesets are known. The recipient must know about all + # changesets required to reach the known heads from the null + # changeset. + has_cl_set, junk, junk = cl.nodesbetween(None, knownheads) + junk = None + # Transform the list into an ersatz set. + has_cl_set = dict.fromkeys(has_cl_set) + else: + # If there were no known heads, the recipient cannot be assumed to + # know about any changesets. + has_cl_set = {} + + # Make it easy to refer to self.manifest + mnfst = self.manifest + # We don't know which manifests are missing yet + msng_mnfst_set = {} + # Nor do we know which filenodes are missing. + msng_filenode_set = {} + + junk = mnfst.index[mnfst.count() - 1] # Get around a bug in lazyindex + junk = None + + # A changeset always belongs to itself, so the changenode lookup + # function for a changenode is identity. + def identity(x): + return x + + # A function generating function. Sets up an environment for the + # inner function. + def cmp_by_rev_func(revlog): + # Compare two nodes by their revision number in the environment's + # revision history. Since the revision number both represents the + # most efficient order to read the nodes in, and represents a + # topological sorting of the nodes, this function is often useful. + def cmp_by_rev(a, b): + return cmp(revlog.rev(a), revlog.rev(b)) + return cmp_by_rev + + # If we determine that a particular file or manifest node must be a + # node that the recipient of the changegroup will already have, we can + # also assume the recipient will have all the parents. This function + # prunes them from the set of missing nodes. + def prune_parents(revlog, hasset, msngset): + haslst = hasset.keys() + haslst.sort(cmp_by_rev_func(revlog)) + for node in haslst: + parentlst = [p for p in revlog.parents(node) if p != nullid] + while parentlst: + n = parentlst.pop() + if n not in hasset: + hasset[n] = 1 + p = [p for p in revlog.parents(n) if p != nullid] + parentlst.extend(p) + for n in hasset: + msngset.pop(n, None) + + # This is a function generating function used to set up an environment + # for the inner function to execute in. + def manifest_and_file_collector(changedfileset): + # This is an information gathering function that gathers + # information from each changeset node that goes out as part of + # the changegroup. The information gathered is a list of which + # manifest nodes are potentially required (the recipient may + # already have them) and total list of all files which were + # changed in any changeset in the changegroup. + # + # We also remember the first changenode we saw any manifest + # referenced by so we can later determine which changenode 'owns' + # the manifest. + def collect_manifests_and_files(clnode): + c = cl.read(clnode) + for f in c[3]: + # This is to make sure we only have one instance of each + # filename string for each filename. + changedfileset.setdefault(f, f) + msng_mnfst_set.setdefault(c[0], clnode) + return collect_manifests_and_files + + # Figure out which manifest nodes (of the ones we think might be part + # of the changegroup) the recipient must know about and remove them + # from the changegroup. + def prune_manifests(): + has_mnfst_set = {} + for n in msng_mnfst_set: + # If a 'missing' manifest thinks it belongs to a changenode + # the recipient is assumed to have, obviously the recipient + # must have that manifest. + linknode = cl.node(mnfst.linkrev(n)) + if linknode in has_cl_set: + has_mnfst_set[n] = 1 + prune_parents(mnfst, has_mnfst_set, msng_mnfst_set) + + # Use the information collected in collect_manifests_and_files to say + # which changenode any manifestnode belongs to. + def lookup_manifest_link(mnfstnode): + return msng_mnfst_set[mnfstnode] + + # A function generating function that sets up the initial environment + # the inner function. + def filenode_collector(changedfiles): + next_rev = [0] + # This gathers information from each manifestnode included in the + # changegroup about which filenodes the manifest node references + # so we can include those in the changegroup too. + # + # It also remembers which changenode each filenode belongs to. It + # does this by assuming the a filenode belongs to the changenode + # the first manifest that references it belongs to. + def collect_msng_filenodes(mnfstnode): + r = mnfst.rev(mnfstnode) + if r == next_rev[0]: + # If the last rev we looked at was the one just previous, + # we only need to see a diff. + delta = mdiff.patchtext(mnfst.delta(mnfstnode)) + # For each line in the delta + for dline in delta.splitlines(): + # get the filename and filenode for that line + f, fnode = dline.split('\0') + fnode = bin(fnode[:40]) + f = changedfiles.get(f, None) + # And if the file is in the list of files we care + # about. + if f is not None: + # Get the changenode this manifest belongs to + clnode = msng_mnfst_set[mnfstnode] + # Create the set of filenodes for the file if + # there isn't one already. + ndset = msng_filenode_set.setdefault(f, {}) + # And set the filenode's changelog node to the + # manifest's if it hasn't been set already. + ndset.setdefault(fnode, clnode) + else: + # Otherwise we need a full manifest. + m = mnfst.read(mnfstnode) + # For every file in we care about. + for f in changedfiles: + fnode = m.get(f, None) + # If it's in the manifest + if fnode is not None: + # See comments above. + clnode = msng_mnfst_set[mnfstnode] + ndset = msng_filenode_set.setdefault(f, {}) + ndset.setdefault(fnode, clnode) + # Remember the revision we hope to see next. + next_rev[0] = r + 1 + return collect_msng_filenodes + + # We have a list of filenodes we think we need for a file, lets remove + # all those we now the recipient must have. + def prune_filenodes(f, filerevlog): + msngset = msng_filenode_set[f] + hasset = {} + # If a 'missing' filenode thinks it belongs to a changenode we + # assume the recipient must have, then the recipient must have + # that filenode. + for n in msngset: + clnode = cl.node(filerevlog.linkrev(n)) + if clnode in has_cl_set: + hasset[n] = 1 + prune_parents(filerevlog, hasset, msngset) + + # A function generator function that sets up the a context for the + # inner function. + def lookup_filenode_link_func(fname): + msngset = msng_filenode_set[fname] + # Lookup the changenode the filenode belongs to. + def lookup_filenode_link(fnode): + return msngset[fnode] + return lookup_filenode_link + + # Now that we have all theses utility functions to help out and + # logically divide up the task, generate the group. + def gengroup(): + # The set of changed files starts empty. + changedfiles = {} + # Create a changenode group generator that will call our functions + # back to lookup the owning changenode and collect information. + group = cl.group(msng_cl_lst, identity, + manifest_and_file_collector(changedfiles)) + for chnk in group: + yield chnk + + # The list of manifests has been collected by the generator + # calling our functions back. + prune_manifests() + msng_mnfst_lst = msng_mnfst_set.keys() + # Sort the manifestnodes by revision number. + msng_mnfst_lst.sort(cmp_by_rev_func(mnfst)) + # Create a generator for the manifestnodes that calls our lookup + # and data collection functions back. + group = mnfst.group(msng_mnfst_lst, lookup_manifest_link, + filenode_collector(changedfiles)) + for chnk in group: + yield chnk + + # These are no longer needed, dereference and toss the memory for + # them. + msng_mnfst_lst = None + msng_mnfst_set.clear() + + changedfiles = changedfiles.keys() + changedfiles.sort() + # Go through all our files in order sorted by name. + for fname in changedfiles: + filerevlog = self.file(fname) + # Toss out the filenodes that the recipient isn't really + # missing. + prune_filenodes(fname, filerevlog) + msng_filenode_lst = msng_filenode_set[fname].keys() + # If any filenodes are left, generate the group for them, + # otherwise don't bother. + if len(msng_filenode_lst) > 0: + yield struct.pack(">l", len(fname) + 4) + fname + # Sort the filenodes by their revision # + msng_filenode_lst.sort(cmp_by_rev_func(filerevlog)) + # Create a group generator and only pass in a changenode + # lookup function as we need to collect no information + # from filenodes. + group = filerevlog.group(msng_filenode_lst, + lookup_filenode_link_func(fname)) + for chnk in group: + yield chnk + # Don't need this anymore, toss it to free memory. + del msng_filenode_set[fname] + # Signal that no more groups are left. + yield struct.pack(">l", 0) + + return util.chunkbuffer(gengroup()) + def changegroup(self, basenodes): - genread = util.chunkbuffer + """Generate a changegroup of all nodes that we have that a recipient + doesn't. + + This is much easier than the previous function as we can assume that + the recipient has any changenode we aren't sending them.""" + cl = self.changelog + nodes = cl.nodesbetween(basenodes, None)[0] + revset = dict.fromkeys([cl.rev(n) for n in nodes]) + + def identity(x): + return x + + def gennodelst(revlog): + for r in xrange(0, revlog.count()): + n = revlog.node(r) + if revlog.linkrev(n) in revset: + yield n + + def changed_file_collector(changedfileset): + def collect_changed_files(clnode): + c = cl.read(clnode) + for fname in c[3]: + changedfileset[fname] = 1 + return collect_changed_files + + def lookuprevlink_func(revlog): + def lookuprevlink(n): + return cl.node(revlog.linkrev(n)) + return lookuprevlink def gengroup(): - nodes = self.newer(basenodes) + # construct a list of all changed files + changedfiles = {} - # construct the link map - linkmap = {} - for n in nodes: - linkmap[self.changelog.rev(n)] = n + for chnk in cl.group(nodes, identity, + changed_file_collector(changedfiles)): + yield chnk + changedfiles = changedfiles.keys() + changedfiles.sort() - # construct a list of all changed files - changed = {} - for n in nodes: - c = self.changelog.read(n) - for f in c[3]: - changed[f] = 1 - changed = changed.keys() - changed.sort() + mnfst = self.manifest + nodeiter = gennodelst(mnfst) + for chnk in mnfst.group(nodeiter, lookuprevlink_func(mnfst)): + yield chnk - # the changegroup is changesets + manifests + all file revs - revs = [ self.changelog.rev(n) for n in nodes ] - - for y in self.changelog.group(linkmap): yield y - for y in self.manifest.group(linkmap): yield y - for f in changed: - yield struct.pack(">l", len(f) + 4) + f - g = self.file(f).group(linkmap) - for y in g: - yield y + for fname in changedfiles: + filerevlog = self.file(fname) + nodeiter = gennodelst(filerevlog) + nodeiter = list(nodeiter) + if nodeiter: + yield struct.pack(">l", len(fname) + 4) + fname + lookup = lookuprevlink_func(filerevlog) + for chnk in filerevlog.group(nodeiter, lookup): + yield chnk yield struct.pack(">l", 0) - return genread(gengroup()) + return util.chunkbuffer(gengroup()) def addchangegroup(self, source): diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -249,6 +249,157 @@ class revlog: visit.append(p) return reachable + def nodesbetween(self, roots=None, heads=None): + """Return a tuple containing three elements. Elements 1 and 2 contain + a final list bases and heads after all the unreachable ones have been + pruned. Element 0 contains a topologically sorted list of all + + nodes that satisfy these constraints: + 1. All nodes must be descended from a node in roots (the nodes on + roots are considered descended from themselves). + 2. All nodes must also be ancestors of a node in heads (the nodes in + heads are considered to be their own ancestors). + + If roots is unspecified, nullid is assumed as the only root. + If heads is unspecified, it is taken to be the output of the + heads method (i.e. a list of all nodes in the repository that + have no children).""" + nonodes = ([], [], []) + if roots is not None: + roots = list(roots) + if not roots: + return nonodes + lowestrev = min([self.rev(n) for n in roots]) + else: + roots = [nullid] # Everybody's a descendent of nullid + lowestrev = -1 + if (lowestrev == -1) and (heads is None): + # We want _all_ the nodes! + return ([self.node(r) for r in xrange(0, self.count())], + [nullid], list(self.heads())) + if heads is None: + # All nodes are ancestors, so the latest ancestor is the last + # node. + highestrev = self.count() - 1 + # Set ancestors to None to signal that every node is an ancestor. + ancestors = None + # Set heads to an empty dictionary for later discovery of heads + heads = {} + else: + heads = list(heads) + if not heads: + return nonodes + ancestors = {} + # Start at the top and keep marking parents until we're done. + nodestotag = heads[:] + # Turn heads into a dictionary so we can remove 'fake' heads. + # Also, later we will be using it to filter out the heads we can't + # find from roots. + heads = dict.fromkeys(heads, 0) + # Remember where the top was so we can use it as a limit later. + highestrev = max([self.rev(n) for n in nodestotag]) + while nodestotag: + # grab a node to tag + n = nodestotag.pop() + # Never tag nullid + if n == nullid: + continue + # A node's revision number represents its place in a + # topologically sorted list of nodes. + r = self.rev(n) + if r >= lowestrev: + if n not in ancestors: + # If we are possibly a descendent of one of the roots + # and we haven't already been marked as an ancestor + ancestors[n] = 1 # Mark as ancestor + # Add non-nullid parents to list of nodes to tag. + nodestotag.extend([p for p in self.parents(n) if + p != nullid]) + elif n in heads: # We've seen it before, is it a fake head? + # So it is, real heads should not be the ancestors of + # any other heads. + heads.pop(n) + if not ancestors: + return nonodes + # Now that we have our set of ancestors, we want to remove any + # roots that are not ancestors. + + # If one of the roots was nullid, everything is included anyway. + if lowestrev > -1: + # But, since we weren't, let's recompute the lowest rev to not + # include roots that aren't ancestors. + + # Filter out roots that aren't ancestors of heads + roots = [n for n in roots if n in ancestors] + # Recompute the lowest revision + if roots: + lowestrev = min([self.rev(n) for n in roots]) + else: + # No more roots? Return empty list + return nonodes + else: + # We are descending from nullid, and don't need to care about + # any other roots. + lowestrev = -1 + roots = [nullid] + # Transform our roots list into a 'set' (i.e. a dictionary where the + # values don't matter. + descendents = dict.fromkeys(roots, 1) + # Also, keep the original roots so we can filter out roots that aren't + # 'real' roots (i.e. are descended from other roots). + roots = descendents.copy() + # Our topologically sorted list of output nodes. + orderedout = [] + # Don't start at nullid since we don't want nullid in our output list, + # and if nullid shows up in descedents, empty parents will look like + # they're descendents. + for r in xrange(max(lowestrev, 0), highestrev + 1): + n = self.node(r) + isdescendent = False + if lowestrev == -1: # Everybody is a descendent of nullid + isdescendent = True + elif n in descendents: + # n is already a descendent + isdescendent = True + # This check only needs to be done here because all the roots + # will start being marked is descendents before the loop. + if n in roots: + # If n was a root, check if it's a 'real' root. + p = tuple(self.parents(n)) + # If any of its parents are descendents, it's not a root. + if (p[0] in descendents) or (p[1] in descendents): + roots.pop(n) + else: + p = tuple(self.parents(n)) + # A node is a descendent if either of its parents are + # descendents. (We seeded the dependents list with the roots + # up there, remember?) + if (p[0] in descendents) or (p[1] in descendents): + descendents[n] = 1 + isdescendent = True + if isdescendent and ((ancestors is None) or (n in ancestors)): + # Only include nodes that are both descendents and ancestors. + orderedout.append(n) + if (ancestors is not None) and (n in heads): + # We're trying to figure out which heads are reachable + # from roots. + # Mark this head as having been reached + heads[n] = 1 + elif ancestors is None: + # Otherwise, we're trying to discover the heads. + # Assume this is a head because if it isn't, the next step + # will eventually remove it. + heads[n] = 1 + # But, obviously its parents aren't. + for p in self.parents(n): + heads.pop(p, None) + heads = [n for n in heads.iterkeys() if heads[n] != 0] + roots = roots.keys() + assert orderedout + assert roots + assert heads + return (orderedout, roots, heads) + def heads(self, stop=None): """return the list of all nodes that have no children""" p = {} @@ -482,7 +633,7 @@ class revlog: #print "next x" gx = x.next() - def group(self, linkmap): + def group(self, nodelist, lookup, infocollect = None): """calculate a delta group Given a list of changeset revs, return a set of deltas and @@ -491,14 +642,8 @@ class revlog: have this parent as it has all history before these changesets. parent is parent[0] """ - revs = [] - needed = {} - - # find file nodes/revs that match changeset revs - for i in xrange(0, self.count()): - if self.index[i][3] in linkmap: - revs.append(i) - needed[i] = 1 + revs = [self.rev(n) for n in nodelist] + needed = dict.fromkeys(revs, 1) # if we don't have any revisions touched by these changesets, bail if not revs: @@ -566,6 +711,9 @@ class revlog: a, b = revs[d], revs[d + 1] n = self.node(b) + if infocollect is not None: + infocollect(n) + # do we need to construct a new delta? if a + 1 != b or self.base(b) == b: if a >= 0: @@ -587,7 +735,7 @@ class revlog: d = chunks[b] p = self.parents(n) - meta = n + p[0] + p[1] + linkmap[self.linkrev(n)] + meta = n + p[0] + p[1] + lookup(n) l = struct.pack(">l", len(meta) + len(d) + 4) yield l yield meta diff --git a/tests/test-clone-r b/tests/test-clone-r new file mode 100755 --- /dev/null +++ b/tests/test-clone-r @@ -0,0 +1,59 @@ +#!/bin/bash + +hg init test +cd test +cat >>afile <>afile <>afile <>afile <>afile <>afile <fred <>afile <