Mercurial > hg > mercurial-crew-with-dirclash
comparison hgext/convert/subversion.py @ 5075:514c06098e9c
convert: svn: pull up path to file expansion code into separate function.
This is part of the process for deferring path expansion until getchanges.
Copy detection also needs to be moved out of the commit object.
author | Brendan Cully <brendan@kublai.com> |
---|---|
date | Sun, 05 Aug 2007 11:30:52 -0700 |
parents | 4cef0b93969d |
children | ef338e34a906 |
comparison
equal
deleted
inserted
replaced
5074:4cef0b93969d | 5075:514c06098e9c |
---|---|
324 def reparent(self, module): | 324 def reparent(self, module): |
325 svn_url = self.base + module | 325 svn_url = self.base + module |
326 self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding)) | 326 self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding)) |
327 svn.ra.reparent(self.ra, svn_url.encode(self.encoding)) | 327 svn.ra.reparent(self.ra, svn_url.encode(self.encoding)) |
328 | 328 |
329 def _fetch_revisions(self, from_revnum = 0, to_revnum = 347): | 329 def expandpaths(self, rev, paths, parents): |
330 def get_entry_from_path(path, module=self.module): | 330 def get_entry_from_path(path, module=self.module): |
331 # Given the repository url of this wc, say | 331 # Given the repository url of this wc, say |
332 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch" | 332 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch" |
333 # extract the "entry" portion (a relative path) from what | 333 # extract the "entry" portion (a relative path) from what |
334 # svn log --xml says, ie | 334 # svn log --xml says, ie |
335 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py" | 335 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py" |
336 # that is to say "tests/PloneTestCase.py" | 336 # that is to say "tests/PloneTestCase.py" |
337 | |
338 if path.startswith(module): | 337 if path.startswith(module): |
339 relative = path[len(module):] | 338 relative = path[len(module):] |
340 if relative.startswith('/'): | 339 if relative.startswith('/'): |
341 return relative[1:] | 340 return relative[1:] |
342 else: | 341 else: |
343 return relative | 342 return relative |
344 | 343 |
345 # The path is outside our tracked tree... | 344 # The path is outside our tracked tree... |
346 self.ui.debug('Ignoring %r since it is not under %r\n' % (path, module)) | 345 self.ui.debug('%r is not under %r, ignoring\n' % (path, module)) |
347 return None | 346 return None |
348 | 347 |
348 entries = [] | |
349 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions. | |
350 copies = {} | |
351 revnum = self.revnum(rev) | |
352 | |
353 for path, ent in paths: | |
354 # self.ui.write("path %s\n" % path) | |
355 entrypath = get_entry_from_path(path, module=self.module) | |
356 entry = entrypath.decode(self.encoding) | |
357 | |
358 kind = svn.ra.check_path(self.ra, entrypath, revnum) | |
359 if kind == svn.core.svn_node_file: | |
360 if ent.copyfrom_path: | |
361 copyfrom_path = get_entry_from_path(ent.copyfrom_path) | |
362 if copyfrom_path: | |
363 self.ui.debug("Copied to %s from %s@%s\n" % (entry, copyfrom_path, ent.copyfrom_rev)) | |
364 # It's probably important for hg that the source | |
365 # exists in the revision's parent, not just the | |
366 # ent.copyfrom_rev | |
367 fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev) | |
368 if fromkind != 0: | |
369 copies[self.recode(entry)] = self.recode(copyfrom_path) | |
370 entries.append(self.recode(entry)) | |
371 elif kind == 0: # gone, but had better be a deleted *file* | |
372 self.ui.debug("gone from %s\n" % ent.copyfrom_rev) | |
373 | |
374 # if a branch is created but entries are removed in the same | |
375 # changeset, get the right fromrev | |
376 if parents: | |
377 uuid, old_module, fromrev = self.revsplit(parents[0]) | |
378 else: | |
379 fromrev = revnum - 1 | |
380 # might always need to be revnum - 1 in these 3 lines? | |
381 old_module = self.modulemap.get(fromrev, self.module) | |
382 | |
383 basepath = old_module + "/" + get_entry_from_path(path, module=self.module) | |
384 entrypath = old_module + "/" + get_entry_from_path(path, module=self.module) | |
385 | |
386 def lookup_parts(p): | |
387 rc = None | |
388 parts = p.split("/") | |
389 for i in range(len(parts)): | |
390 part = "/".join(parts[:i]) | |
391 info = part, copyfrom.get(part, None) | |
392 if info[1] is not None: | |
393 self.ui.debug("Found parent directory %s\n" % info[1]) | |
394 rc = info | |
395 return rc | |
396 | |
397 self.ui.debug("base, entry %s %s\n" % (basepath, entrypath)) | |
398 | |
399 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1) | |
400 | |
401 # need to remove fragment from lookup_parts and replace with copyfrom_path | |
402 if frompath is not None: | |
403 self.ui.debug("munge-o-matic\n") | |
404 self.ui.debug(entrypath + '\n') | |
405 self.ui.debug(entrypath[len(frompath):] + '\n') | |
406 entrypath = froment.copyfrom_path + entrypath[len(frompath):] | |
407 fromrev = froment.copyfrom_rev | |
408 self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath)) | |
409 | |
410 fromkind = svn.ra.check_path(self.ra, entrypath, fromrev) | |
411 if fromkind == svn.core.svn_node_file: # a deleted file | |
412 entries.append(self.recode(entry)) | |
413 elif fromkind == svn.core.svn_node_dir: | |
414 # print "Deleted/moved non-file:", revnum, path, ent | |
415 # children = self._find_children(path, revnum - 1) | |
416 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action) | |
417 # Sometimes this is tricky. For example: in | |
418 # The Subversion Repository revision 6940 a dir | |
419 # was copied and one of its files was deleted | |
420 # from the new location in the same commit. This | |
421 # code can't deal with that yet. | |
422 if ent.action == 'C': | |
423 children = self._find_children(path, fromrev) | |
424 else: | |
425 oroot = entrypath.strip('/') | |
426 nroot = path.strip('/') | |
427 children = self._find_children(oroot, fromrev) | |
428 children = [s.replace(oroot,nroot) for s in children] | |
429 # Mark all [files, not directories] as deleted. | |
430 for child in children: | |
431 # Can we move a child directory and its | |
432 # parent in the same commit? (probably can). Could | |
433 # cause problems if instead of revnum -1, | |
434 # we have to look in (copyfrom_path, revnum - 1) | |
435 entrypath = get_entry_from_path("/" + child, module=old_module) | |
436 if entrypath: | |
437 entry = self.recode(entrypath.decode(self.encoding)) | |
438 if entry in copies: | |
439 # deleted file within a copy | |
440 del copies[entry] | |
441 else: | |
442 entries.append(entry) | |
443 else: | |
444 self.ui.debug('unknown path in revision %d: %s\n' % \ | |
445 (revnum, path)) | |
446 elif kind == svn.core.svn_node_dir: | |
447 # Should probably synthesize normal file entries | |
448 # and handle as above to clean up copy/rename handling. | |
449 | |
450 # If the directory just had a prop change, | |
451 # then we shouldn't need to look for its children. | |
452 # Also this could create duplicate entries. Not sure | |
453 # whether this will matter. Maybe should make entries a set. | |
454 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev | |
455 # This will fail if a directory was copied | |
456 # from another branch and then some of its files | |
457 # were deleted in the same transaction. | |
458 children = self._find_children(path, revnum) | |
459 children.sort() | |
460 for child in children: | |
461 # Can we move a child directory and its | |
462 # parent in the same commit? (probably can). Could | |
463 # cause problems if instead of revnum -1, | |
464 # we have to look in (copyfrom_path, revnum - 1) | |
465 entrypath = get_entry_from_path("/" + child, module=self.module) | |
466 # print child, self.module, entrypath | |
467 if entrypath: | |
468 # Need to filter out directories here... | |
469 kind = svn.ra.check_path(self.ra, entrypath, revnum) | |
470 if kind != svn.core.svn_node_dir: | |
471 entries.append(self.recode(entrypath)) | |
472 | |
473 # Copies here (must copy all from source) | |
474 # Probably not a real problem for us if | |
475 # source does not exist | |
476 | |
477 # Can do this with the copy command "hg copy" | |
478 # if ent.copyfrom_path: | |
479 # copyfrom_entry = get_entry_from_path(ent.copyfrom_path.decode(self.encoding), | |
480 # module=self.module) | |
481 # copyto_entry = entrypath | |
482 # | |
483 # print "copy directory", copyfrom_entry, 'to', copyto_entry | |
484 # | |
485 # copies.append((copyfrom_entry, copyto_entry)) | |
486 | |
487 if ent.copyfrom_path: | |
488 copyfrom_path = ent.copyfrom_path.decode(self.encoding) | |
489 copyfrom_entry = get_entry_from_path(copyfrom_path, module=self.module) | |
490 if copyfrom_entry: | |
491 copyfrom[path] = ent | |
492 self.ui.debug("mark %s came from %s\n" % (path, copyfrom[path])) | |
493 | |
494 # Good, /probably/ a regular copy. Really should check | |
495 # to see whether the parent revision actually contains | |
496 # the directory in question. | |
497 children = self._find_children(self.recode(copyfrom_path), ent.copyfrom_rev) | |
498 children.sort() | |
499 for child in children: | |
500 entrypath = get_entry_from_path("/" + child, module=self.module) | |
501 if entrypath: | |
502 entry = entrypath.decode(self.encoding) | |
503 # print "COPY COPY From", copyfrom_entry, entry | |
504 copyto_path = path + entry[len(copyfrom_entry):] | |
505 copyto_entry = get_entry_from_path(copyto_path, module=self.module) | |
506 # print "COPY", entry, "COPY To", copyto_entry | |
507 copies[self.recode(copyto_entry)] = self.recode(entry) | |
508 # copy from quux splort/quuxfile | |
509 | |
510 return (entries, copies) | |
511 | |
512 def _fetch_revisions(self, from_revnum = 0, to_revnum = 347): | |
349 self.child_cset = None | 513 self.child_cset = None |
350 def parselogentry(orig_paths, revnum, author, date, message): | 514 def parselogentry(orig_paths, revnum, author, date, message): |
351 self.ui.debug("parsing revision %d (%d changes)\n" % | 515 self.ui.debug("parsing revision %d (%d changes)\n" % |
352 (revnum, len(orig_paths))) | 516 (revnum, len(orig_paths))) |
353 | 517 |
355 new_module = self.modulemap[revnum] | 519 new_module = self.modulemap[revnum] |
356 if new_module != self.module: | 520 if new_module != self.module: |
357 self.module = new_module | 521 self.module = new_module |
358 self.reparent(self.module) | 522 self.reparent(self.module) |
359 | 523 |
360 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions. | |
361 copies = {} | |
362 entries = [] | |
363 rev = self.revid(revnum) | 524 rev = self.revid(revnum) |
364 parents = [] | |
365 | |
366 # branch log might return entries for a parent we already have | 525 # branch log might return entries for a parent we already have |
367 if (rev in self.commits or | 526 if (rev in self.commits or |
368 (revnum < self.lastrevs.get(self.module, 0))): | 527 (revnum < self.lastrevs.get(self.module, 0))): |
369 return | 528 return |
370 | 529 |
371 try: | 530 parents = [] |
372 branch = self.module.split("/")[-1] | |
373 if branch == 'trunk': | |
374 branch = '' | |
375 except IndexError: | |
376 branch = None | |
377 | |
378 orig_paths = orig_paths.items() | 531 orig_paths = orig_paths.items() |
379 orig_paths.sort() | 532 orig_paths.sort() |
380 | 533 |
381 # check whether this revision is the start of a branch | 534 # check whether this revision is the start of a branch |
382 path, ent = orig_paths and orig_paths[0] or (None, None) | 535 path, ent = orig_paths and orig_paths[0] or (None, None) |
389 self.ui.note('found parent of branch %s at %d: %s\n' % \ | 542 self.ui.note('found parent of branch %s at %d: %s\n' % \ |
390 (self.module, prev, ent.copyfrom_path)) | 543 (self.module, prev, ent.copyfrom_path)) |
391 else: | 544 else: |
392 self.ui.debug("No copyfrom path, don't know what to do.\n") | 545 self.ui.debug("No copyfrom path, don't know what to do.\n") |
393 | 546 |
547 self.modulemap[revnum] = self.module # track backwards in time | |
548 | |
549 paths = [] | |
550 # filter out unrelated paths | |
394 for path, ent in orig_paths: | 551 for path, ent in orig_paths: |
395 # self.ui.write("path %s\n" % path) | 552 if not path.startswith(self.module): |
396 entrypath = get_entry_from_path(path, module=self.module) | |
397 # self.ui.write("entrypath %s\n" % entrypath) | |
398 if entrypath is None: | |
399 # Outside our area of interest | |
400 self.ui.debug("boring@%s: %s\n" % (revnum, path)) | 553 self.ui.debug("boring@%s: %s\n" % (revnum, path)) |
401 continue | 554 continue |
402 entry = entrypath.decode(self.encoding) | 555 paths.append((path, ent)) |
403 | 556 |
404 kind = svn.ra.check_path(self.ra, entrypath, revnum) | 557 entries, copies = self.expandpaths(rev, paths, parents) |
405 if kind == svn.core.svn_node_file: | |
406 if ent.copyfrom_path: | |
407 copyfrom_path = get_entry_from_path(ent.copyfrom_path) | |
408 if copyfrom_path: | |
409 self.ui.debug("Copied to %s from %s@%s\n" % (entry, copyfrom_path, ent.copyfrom_rev)) | |
410 # It's probably important for hg that the source | |
411 # exists in the revision's parent, not just the | |
412 # ent.copyfrom_rev | |
413 fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev) | |
414 if fromkind != 0: | |
415 copies[self.recode(entry)] = self.recode(copyfrom_path) | |
416 entries.append(self.recode(entry)) | |
417 elif kind == 0: # gone, but had better be a deleted *file* | |
418 self.ui.debug("gone from %s\n" % ent.copyfrom_rev) | |
419 | |
420 # if a branch is created but entries are removed in the same | |
421 # changeset, get the right fromrev | |
422 if parents: | |
423 uuid, old_module, fromrev = self.revsplit(parents[0]) | |
424 else: | |
425 fromrev = revnum - 1 | |
426 # might always need to be revnum - 1 in these 3 lines? | |
427 old_module = self.modulemap.get(fromrev, self.module) | |
428 | |
429 basepath = old_module + "/" + get_entry_from_path(path, module=self.module) | |
430 entrypath = old_module + "/" + get_entry_from_path(path, module=self.module) | |
431 | |
432 def lookup_parts(p): | |
433 rc = None | |
434 parts = p.split("/") | |
435 for i in range(len(parts)): | |
436 part = "/".join(parts[:i]) | |
437 info = part, copyfrom.get(part, None) | |
438 if info[1] is not None: | |
439 self.ui.debug("Found parent directory %s\n" % info[1]) | |
440 rc = info | |
441 return rc | |
442 | |
443 self.ui.debug("base, entry %s %s\n" % (basepath, entrypath)) | |
444 | |
445 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1) | |
446 | |
447 # need to remove fragment from lookup_parts and replace with copyfrom_path | |
448 if frompath is not None: | |
449 self.ui.debug("munge-o-matic\n") | |
450 self.ui.debug(entrypath + '\n') | |
451 self.ui.debug(entrypath[len(frompath):] + '\n') | |
452 entrypath = froment.copyfrom_path + entrypath[len(frompath):] | |
453 fromrev = froment.copyfrom_rev | |
454 self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath)) | |
455 | |
456 fromkind = svn.ra.check_path(self.ra, entrypath, fromrev) | |
457 if fromkind == svn.core.svn_node_file: # a deleted file | |
458 entries.append(self.recode(entry)) | |
459 elif fromkind == svn.core.svn_node_dir: | |
460 # print "Deleted/moved non-file:", revnum, path, ent | |
461 # children = self._find_children(path, revnum - 1) | |
462 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action) | |
463 # Sometimes this is tricky. For example: in | |
464 # The Subversion Repository revision 6940 a dir | |
465 # was copied and one of its files was deleted | |
466 # from the new location in the same commit. This | |
467 # code can't deal with that yet. | |
468 if ent.action == 'C': | |
469 children = self._find_children(path, fromrev) | |
470 else: | |
471 oroot = entrypath.strip('/') | |
472 nroot = path.strip('/') | |
473 children = self._find_children(oroot, fromrev) | |
474 children = [s.replace(oroot,nroot) for s in children] | |
475 # Mark all [files, not directories] as deleted. | |
476 for child in children: | |
477 # Can we move a child directory and its | |
478 # parent in the same commit? (probably can). Could | |
479 # cause problems if instead of revnum -1, | |
480 # we have to look in (copyfrom_path, revnum - 1) | |
481 entrypath = get_entry_from_path("/" + child, module=old_module) | |
482 if entrypath: | |
483 entry = self.recode(entrypath.decode(self.encoding)) | |
484 if entry in copies: | |
485 # deleted file within a copy | |
486 del copies[entry] | |
487 else: | |
488 entries.append(entry) | |
489 else: | |
490 self.ui.debug('unknown path in revision %d: %s\n' % \ | |
491 (revnum, path)) | |
492 elif kind == svn.core.svn_node_dir: | |
493 # Should probably synthesize normal file entries | |
494 # and handle as above to clean up copy/rename handling. | |
495 | |
496 # If the directory just had a prop change, | |
497 # then we shouldn't need to look for its children. | |
498 # Also this could create duplicate entries. Not sure | |
499 # whether this will matter. Maybe should make entries a set. | |
500 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev | |
501 # This will fail if a directory was copied | |
502 # from another branch and then some of its files | |
503 # were deleted in the same transaction. | |
504 children = self._find_children(path, revnum) | |
505 children.sort() | |
506 for child in children: | |
507 # Can we move a child directory and its | |
508 # parent in the same commit? (probably can). Could | |
509 # cause problems if instead of revnum -1, | |
510 # we have to look in (copyfrom_path, revnum - 1) | |
511 entrypath = get_entry_from_path("/" + child, module=self.module) | |
512 # print child, self.module, entrypath | |
513 if entrypath: | |
514 # Need to filter out directories here... | |
515 kind = svn.ra.check_path(self.ra, entrypath, revnum) | |
516 if kind != svn.core.svn_node_dir: | |
517 entries.append(self.recode(entrypath)) | |
518 | |
519 # Copies here (must copy all from source) | |
520 # Probably not a real problem for us if | |
521 # source does not exist | |
522 | |
523 # Can do this with the copy command "hg copy" | |
524 # if ent.copyfrom_path: | |
525 # copyfrom_entry = get_entry_from_path(ent.copyfrom_path.decode(self.encoding), | |
526 # module=self.module) | |
527 # copyto_entry = entrypath | |
528 # | |
529 # print "copy directory", copyfrom_entry, 'to', copyto_entry | |
530 # | |
531 # copies.append((copyfrom_entry, copyto_entry)) | |
532 | |
533 if ent.copyfrom_path: | |
534 copyfrom_path = ent.copyfrom_path.decode(self.encoding) | |
535 copyfrom_entry = get_entry_from_path(copyfrom_path, module=self.module) | |
536 if copyfrom_entry: | |
537 copyfrom[path] = ent | |
538 self.ui.debug("mark %s came from %s\n" % (path, copyfrom[path])) | |
539 | |
540 # Good, /probably/ a regular copy. Really should check | |
541 # to see whether the parent revision actually contains | |
542 # the directory in question. | |
543 children = self._find_children(self.recode(copyfrom_path), ent.copyfrom_rev) | |
544 children.sort() | |
545 for child in children: | |
546 entrypath = get_entry_from_path("/" + child, module=self.module) | |
547 if entrypath: | |
548 entry = entrypath.decode(self.encoding) | |
549 # print "COPY COPY From", copyfrom_entry, entry | |
550 copyto_path = path + entry[len(copyfrom_entry):] | |
551 copyto_entry = get_entry_from_path(copyto_path, module=self.module) | |
552 # print "COPY", entry, "COPY To", copyto_entry | |
553 copies[self.recode(copyto_entry)] = self.recode(entry) | |
554 # copy from quux splort/quuxfile | |
555 | |
556 self.modulemap[revnum] = self.module # track backwards in time | |
557 # a list of (filename, id) where id lets us retrieve the file. | 558 # a list of (filename, id) where id lets us retrieve the file. |
558 # eg in git, id is the object hash. for svn it'll be the | 559 # eg in git, id is the object hash. for svn it'll be the |
559 self.files[rev] = zip(entries, [rev] * len(entries)) | 560 self.files[rev] = zip(entries, [rev] * len(entries)) |
560 if not entries: | 561 if not entries: |
561 return | 562 return |
565 # '2007-01-04T17:35:00.902377Z' | 566 # '2007-01-04T17:35:00.902377Z' |
566 date = util.parsedate(date[:18] + " UTC", ["%Y-%m-%dT%H:%M:%S"]) | 567 date = util.parsedate(date[:18] + " UTC", ["%Y-%m-%dT%H:%M:%S"]) |
567 | 568 |
568 log = message and self.recode(message) | 569 log = message and self.recode(message) |
569 author = author and self.recode(author) or '' | 570 author = author and self.recode(author) or '' |
571 try: | |
572 branch = self.module.split("/")[-1] | |
573 if branch == 'trunk': | |
574 branch = '' | |
575 except IndexError: | |
576 branch = None | |
570 | 577 |
571 cset = commit(author=author, | 578 cset = commit(author=author, |
572 date=util.datestr(date), | 579 date=util.datestr(date), |
573 desc=log, | 580 desc=log, |
574 parents=parents, | 581 parents=parents, |