comparison mercurial/hg.py @ 872:9a0af739cf55

dirstate walking optimizations The repo walking code introduces a number of calls to dirstate.map.copy(), significantly slowing down the walk on large trees. When a list of files is passed to the walking code, we should only look at map entries relevant to the file list passed in. dirstate.filterfiles() is added to return a subset of the dirstate map. The subset includes in files passed in, and if one of the files requested is actually a directory, it includes any files inside that directory tree. This brings the time for hg diff Makefile down from 1.7s to .3s on a linux kernel repo. Also, the diff command was unconditionally calling makewalk, leading to an extra pass through repo.changes. This patch avoids the call to makewalk when commands.diff isn't given a list of patterns, cutting the time for hg diff (with no args) in half. Index: mine/mercurial/hg.py ===================================================================
author mason@suse.com
date Fri, 12 Aug 2005 09:57:56 -0800
parents 6390c377a9e6
children d4cb383e7de7
comparison
equal deleted inserted replaced
859:6390c377a9e6 872:9a0af739cf55
433 f = f + "\0" + c 433 f = f + "\0" + c
434 e = struct.pack(">cllll", e[0], e[1], e[2], e[3], len(f)) 434 e = struct.pack(">cllll", e[0], e[1], e[2], e[3], len(f))
435 st.write(e + f) 435 st.write(e + f)
436 self.dirty = 0 436 self.dirty = 0
437 437
438 def walk(self, files = None, match = util.always): 438 def filterfiles(self, files):
439 ret = {}
440 unknown = []
441
442 for x in files:
443 if x is '.':
444 return self.map.copy()
445 if x not in self.map:
446 unknown.append(x)
447 else:
448 ret[x] = self.map[x]
449
450 if not unknown:
451 return ret
452
453 b = self.map.keys()
454 b.sort()
455 blen = len(b)
456
457 for x in unknown:
458 bs = bisect.bisect(b, x)
459 if bs != 0 and b[bs-1] == x:
460 ret[x] = self.map[x]
461 continue
462 while bs < blen:
463 s = b[bs]
464 if len(s) > len(x) and s.startswith(x) and s[len(x)] == '/':
465 ret[s] = self.map[s]
466 else:
467 break
468 bs += 1
469 return ret
470
471 def walk(self, files = None, match = util.always, dc=None):
439 self.read() 472 self.read()
440 dc = self.map.copy() 473
441 # walk all files by default 474 # walk all files by default
442 if not files: files = [self.root] 475 if not files:
476 files = [self.root]
477 if not dc:
478 dc = self.map.copy()
479 elif not dc:
480 dc = self.filterfiles(files)
481
443 known = {'.hg': 1} 482 known = {'.hg': 1}
444 def seen(fn): 483 def seen(fn):
445 if fn in known: return True 484 if fn in known: return True
446 known[fn] = 1 485 known[fn] = 1
447 def traverse(): 486 def traverse():
475 # not in .hgignore 514 # not in .hgignore
476 515
477 for src, fn in util.unique(traverse()): 516 for src, fn in util.unique(traverse()):
478 fn = os.path.normpath(fn) 517 fn = os.path.normpath(fn)
479 if seen(fn): continue 518 if seen(fn): continue
480 if fn in dc: 519 if fn not in dc and self.ignore(fn):
481 del dc[fn]
482 elif self.ignore(fn):
483 continue 520 continue
484 if match(fn): 521 if match(fn):
485 yield src, fn 522 yield src, fn
486 523
487 def changes(self, files = None, match = util.always): 524 def changes(self, files = None, match = util.always):
488 self.read() 525 self.read()
489 dc = self.map.copy() 526 if not files:
527 dc = self.map.copy()
528 else:
529 dc = self.filterfiles(files)
490 lookup, changed, added, unknown = [], [], [], [] 530 lookup, changed, added, unknown = [], [], [], []
491 531
492 for src, fn in self.walk(files, match): 532 for src, fn in self.walk(files, match, dc=dc):
493 try: s = os.stat(os.path.join(self.root, fn)) 533 try: s = os.stat(os.path.join(self.root, fn))
494 except: continue 534 except: continue
495 535
496 if fn in dc: 536 if fn in dc:
497 c = dc[fn] 537 c = dc[fn]