comparison mercurial/util.py @ 1199:78ceaf83f28f

Created a class in util called chunkbuffer that buffers reads from an iterator over strings (aka chunks). Also added to util (for future use) is a generator function that iterates over a file n bytes at a time. Lastly, localrepo was changed to use this new chunkbuffer class when reading changegroups form the local repository.
author Eric Hopper <hopper@omnifarious.org>
date Sun, 04 Sep 2005 14:11:51 -0700
parents e388c4f5cec5
children 333de1d53846
comparison
equal deleted inserted replaced
1198:66f7d3946109 1199:78ceaf83f28f
10 platform-specific details from the core. 10 platform-specific details from the core.
11 """ 11 """
12 12
13 import os, errno 13 import os, errno
14 from demandload import * 14 from demandload import *
15 demandload(globals(), "re") 15 demandload(globals(), "re cStringIO")
16 16
17 def binary(s): 17 def binary(s):
18 """return true if a string is binary data using diff's heuristic""" 18 """return true if a string is binary data using diff's heuristic"""
19 if s and '\0' in s[:4096]: 19 if s and '\0' in s[:4096]:
20 return True 20 return True
350 return "killed by signal %d" % val, val 350 return "killed by signal %d" % val, val
351 elif os.WIFSTOPPED(code): 351 elif os.WIFSTOPPED(code):
352 val = os.WSTOPSIG(code) 352 val = os.WSTOPSIG(code)
353 return "stopped by signal %d" % val, val 353 return "stopped by signal %d" % val, val
354 raise ValueError("invalid exit code") 354 raise ValueError("invalid exit code")
355
356 class chunkbuffer(object):
357 """Allow arbitrary sized chunks of data to be efficiently read from an
358 iterator over chunks of arbitrary size."""
359 def __init__(self, in_iter, targetsize = 2**16):
360 """in_iter is the iterator that's iterating over the input chunks.
361 targetsize is how big a buffer to try to maintain."""
362 self.in_iter = iter(in_iter)
363 self.buf = ''
364 targetsize = int(targetsize)
365 if (targetsize <= 0):
366 raise ValueError("targetsize must be greater than 0, was %d" % targetsize)
367 self.targetsize = int(targetsize)
368 self.iterempty = False
369 def fillbuf(self):
370 """x.fillbuf()
371
372 Ignore the target size, and just read every chunk from the iterator
373 until it's empty."""
374 if not self.iterempty:
375 collector = cStringIO.StringIO()
376 collector.write(self.buf)
377 for ch in self.in_iter:
378 collector.write(ch)
379 self.buf = collector.getvalue()
380 collector.close()
381 collector = None
382 self.iterempty = True
383
384 def read(self, l):
385 """x.read(l) -> str
386 Read l bytes of data from the iterator of chunks of data. Returns less
387 than l bytes if the iterator runs dry."""
388 if l > len(self.buf) and not self.iterempty:
389 # Clamp to a multiple of self.targetsize
390 targetsize = self.targetsize * ((l // self.targetsize) + 1)
391 collector = cStringIO.StringIO()
392 collector.write(self.buf)
393 collected = len(self.buf)
394 for chunk in self.in_iter:
395 collector.write(chunk)
396 collected += len(chunk)
397 if collected >= targetsize:
398 break
399 if collected < targetsize:
400 self.iterempty = True
401 self.buf = collector.getvalue()
402 collector.close()
403 collector = None
404 s = self.buf[:l]
405 self.buf = buffer(self.buf, l)
406 return s
407 def __repr__(self):
408 return "<%s.%s targetsize = %u buffered = %u bytes>" % \
409 (self.__class__.__module__, self.__class__.__name__,
410 self.targetsize, len(self.buf))
411
412 def filechunkiter(f, size = 65536):
413 """filechunkiter(file[, size]) -> generator
414
415 Create a generator that produces all the data in the file size (default
416 65536) bytes at a time. Chunks may be less than size bytes if the
417 chunk is the last chunk in the file, or the file is a socket or some
418 other type of file that sometimes reads less data than is requested."""
419 s = f.read(size)
420 while len(s) >= 0:
421 yield s
422 s = f.read(size)