# HG changeset patch # User mason@suse.com # Date 1123859421 28800 # Node ID 953ccddd57bd8073143f3df33e0f8d3b874aa9f5 # Parent c2e77581bc841e5bf978bc51e3323d1b49916a9e dirstate walking optimizations The repo walking code introduces a number of calls to dirstate.map.copy(), significantly slowing down the walk on large trees. When a list of files is passed to the walking code, we should only look at map entries relevant to the file list passed in. dirstate.filterfiles() is added to return a subset of the dirstate map. The subset includes in files passed in, and if one of the files requested is actually a directory, it includes any files inside that directory tree. This brings the time for hg diff Makefile down from 1.7s to .3s on a linux kernel repo. Also, the diff command was unconditionally calling makewalk, leading to an extra pass through repo.changes. This patch avoids the call to makewalk when commands.diff isn't given a list of patterns, cutting the time for hg diff (with no args) in half. Index: mine/mercurial/hg.py =================================================================== diff --git a/mercurial/commands.py b/mercurial/commands.py --- a/mercurial/commands.py +++ b/mercurial/commands.py @@ -632,9 +632,11 @@ def diff(ui, repo, *pats, **opts): raise util.Abort("too many revisions to diff") files = [] - roots, match, results = makewalk(repo, pats, opts) - for src, abs, rel in results: - files.append(abs) + match = util.always + if pats: + roots, match, results = makewalk(repo, pats, opts) + for src, abs, rel in results: + files.append(abs) dodiff(sys.stdout, ui, repo, files, *revs, **{'match': match}) def doexport(ui, repo, changeset, seqno, total, revwidth, opts): diff --git a/mercurial/hg.py b/mercurial/hg.py --- a/mercurial/hg.py +++ b/mercurial/hg.py @@ -440,11 +440,50 @@ class dirstate: st.write(e + f) self.dirty = 0 - def walk(self, files = None, match = util.always): + def filterfiles(self, files): + ret = {} + unknown = [] + + for x in files: + if x is '.': + return self.map.copy() + if x not in self.map: + unknown.append(x) + else: + ret[x] = self.map[x] + + if not unknown: + return ret + + b = self.map.keys() + b.sort() + blen = len(b) + + for x in unknown: + bs = bisect.bisect(b, x) + if bs != 0 and b[bs-1] == x: + ret[x] = self.map[x] + continue + while bs < blen: + s = b[bs] + if len(s) > len(x) and s.startswith(x) and s[len(x)] == '/': + ret[s] = self.map[s] + else: + break + bs += 1 + return ret + + def walk(self, files = None, match = util.always, dc=None): self.read() - dc = self.map.copy() + # walk all files by default - if not files: files = [self.root] + if not files: + files = [self.root] + if not dc: + dc = self.map.copy() + elif not dc: + dc = self.filterfiles(files) + known = {'.hg': 1} def seen(fn): if fn in known: return True @@ -482,19 +521,20 @@ class dirstate: for src, fn in util.unique(traverse()): fn = os.path.normpath(fn) if seen(fn): continue - if fn in dc: - del dc[fn] - elif self.ignore(fn): + if fn not in dc and self.ignore(fn): continue if match(fn): yield src, fn def changes(self, files = None, match = util.always): self.read() - dc = self.map.copy() + if not files: + dc = self.map.copy() + else: + dc = self.filterfiles(files) lookup, changed, added, unknown = [], [], [], [] - for src, fn in self.walk(files, match): + for src, fn in self.walk(files, match, dc=dc): try: s = os.stat(os.path.join(self.root, fn)) except: continue