view contrib/darcs2hg.py @ 2577:fa76c5d609c9

bdiff: improve worst case behavior by 100x. on 5.8MB (244.000 lines) text file with similar lines, hash before this change made diff against empty file take 75 seconds. this change improves performance to 0.6 seconds. result is that clone of smallish repo (137MB) with some files like this takes 1 minute instead of 10 minutes. common case of diff is 10% slower now, probably because of worse cache locality. but diff does not affect overall performance in common case (less than 1% of runtime is in diff when it is working ok), so this tradeoff looks good.
author Vadim Gelfer <vadim.gelfer@gmail.com>
date Fri, 07 Jul 2006 15:02:55 -0700
parents 61909dfb316d
children 5ec2dded1bda
line wrap: on
line source

#!/usr/bin/env python
# Encoding: iso-8859-1
# vim: tw=80 ts=4 sw=4 noet
# -----------------------------------------------------------------------------
# Project   : Basic Darcs to Mercurial conversion script
# -----------------------------------------------------------------------------
# Author    : Sebastien Pierre <sebastien@xprima.com>
# Creation  : 24-May-2006
# Last mod  : 26-May-2006
# History   :
#             26-May-2006 - Updated
#             24-May-2006 - First implementation
# -----------------------------------------------------------------------------

import os, sys
import tempfile
import xml.dom.minidom as xml_dom
from time import strptime, mktime

DARCS_REPO = None
HG_REPO    = None

USAGE = """\
%s DARCSREPO HGREPO

    Converts the given Darcs repository to a new Mercurial repository. The given
    HGREPO must not exist, as it will be created and filled up (this will avoid
    overwriting valuable data.

""" % (os.path.basename(sys.argv[0]))

# ------------------------------------------------------------------------------
#
# Utilities
#
# ------------------------------------------------------------------------------

def cmd(text, path=None):
	"""Executes a command, in the given directory (if any), and returns the
	command result as a string."""
	cwd = None
	if path:
		path = os.path.abspath(path)
		cwd  = os.getcwd()
		os.chdir(path)
	print text
	res = os.popen(text).read()
	if path:
		os.chdir(cwd)
	return res

def writefile(path, data):
	"""Writes the given data into the given file."""
	f = file(path, "w") ; f.write(data)  ; f.close()

# ------------------------------------------------------------------------------
#
# Darcs interface
#
# ------------------------------------------------------------------------------

def darcs_changes(darcsRepo):
	"""Gets the changes list from the given darcs repository. This returns the
	chronological list of changes as (change name, change summary)."""
	changes    = cmd("darcs changes --reverse --xml-output", darcsRepo)
	doc        = xml_dom.parseString(changes)
	res        = []
	for patch_node in doc.childNodes[0].childNodes:
		name = filter(lambda n:n.nodeName == "name", patch_node.childNodes)
		comm = filter(lambda n:n.nodeName == "comment", patch_node.childNodes)
		if not name:continue
		else: name = name[0].childNodes[0].data
		if not comm: comm = ""
		else: comm = comm[0].childNodes[0].data
		author = patch_node.getAttribute("author")
		date = patch_node.getAttribute("date")
		yield author, date, name, comm

def darcs_pull(hg_repo, darcs_repo, change):
	cmd("darcs pull '%s' --all --patches='%s'" % (darcs_repo, change), hg_repo)

# ------------------------------------------------------------------------------
#
# Mercurial interface
#
# ------------------------------------------------------------------------------

def hg_commit( hg_repo, text, author, date ):
	fd, tmpfile = tempfile.mkstemp(prefix="darcs2hg_")
	writefile(tmpfile, text)
	cmd("hg add -X _darcs", hg_repo)
	cmd("hg remove -X _darcs --after", hg_repo)
	cmd("hg commit -l %s -u '%s' -d '%s 0'"  % (tmpfile, author, date), hg_repo)
	os.unlink(tmpfile)

# ------------------------------------------------------------------------------
#
# Main
#
# ------------------------------------------------------------------------------

if __name__ == "__main__":
	args = sys.argv[1:]
	# We parse the arguments
	if len(args)   == 2:
		darcs_repo = os.path.abspath(args[0])
		hg_repo    = os.path.abspath(args[1])
	else:
		print USAGE
		sys.exit(-1)
	# Initializes the target repo
	if not os.path.isdir(darcs_repo + "/_darcs"):
		print "No darcs directory found at: " + darc_repo
		sys.exit(-1)
	if not os.path.isdir(hg_repo):
		os.mkdir(hg_repo)
	else:
		print "Given HG repository must not exist. It will be created"
		sys.exit(-1)
	cmd("hg init '%s'" % (hg_repo))
	cmd("darcs initialize", hg_repo)
	# Get the changes from the Darcs repository
	for author, date, summary, description in darcs_changes(darcs_repo):
		text = summary + "\n" + description
		darcs_pull(hg_repo, darcs_repo, summary)
		epoch = int(mktime(strptime(date, '%Y%m%d%H%M%S')))
		hg_commit(hg_repo, text, author, epoch)

# EOF