#!/usr/bin/perl -w # # VSS to Perforce converter, phase I: extract metadata # # Copyright 1998 Perforce Software. All rights reserved. # Written by James Strickland, April 1998 # # This script parses the output of "ss properties -R root_project -O@tempfile" # to generate a list of files and their file types (including "tempobj" # for files which only have the last version stored). Each file history # is then obtained with "ss history filename". Note that there is an option to # parse corrupt VSS repositories without the -R recursive option as that won't work # in such cases - see extract_vss_metadata. # # Note: there is *no* way to obtain the history of files which are currently # deleted! The only workaround would seem to be to "ss recover" the files # before doing the conversion, then "p4 delete" them afterward. This converter # does not attempt to do that. # # It is necessary to do things this way because of the following VSS quirks: # - some revisions aren't really revisions, they're just label operations; # ss history -R output for revisions which are labels doesn't include # the project they're in and hence is ambiguous # - ss history -R does not output any information about version 1 of a file # - the only way to discover the type of a file is with the "ss filetype" # or "ss properties" command # - running "ss filetype -R" is much faster than running "ss properties -R" # but it's missing one crucial bit of information: Store only latest version # Without this the converter will try to include file revisions it can't # get, and there is no way of knowing whether the failure is due to # storing only the latest version or that SourceSafe has lost the version. # - without using -O@ SourceSafe's output is ambigous - it word wraps filenames # and if the wrap occurs at a space it does not output the space # # # The output of ss history consists of a header which specifies the filename, # then a series of multi-line data blocks starting with the line # ***************** Version n ***************** # # The rest of the lines in the data block are allowed to occur in any order, # except for lines starting with "Comment:" or "Label comment:". These must # always appear last, and are terminated by a blank line followed by the start # of another data block. require 5.0; use strict; use integer; use Time::Local; use lib '.'; use convert; use Change; use File::Path; mkpath($convert::metadata_dir); # ensure the metadata directory exists mkpath("$convert::metadata_dir/labels"); # ensure the label dir exists my $msg="can't open"; open(FILES, ">$convert::metadata_dir/files") or die $msg; # Parse the output of "ss properties" - we use a really simple scheme here # which relies solely on the fields "File:", "Type:" and # "Store only latest version:" always appearing, and appearing in that order. # Note that filenames with spaces are no problem - they are, in effect, # quoted by :\s+ on the left and \n on the right. Possible values for # "Type" are "Binary" and "Text", which translates nicely to Perforce's # "binary" and "text". Possible values for "Store only latest version:" # are "Yes" and "No". open(LOGFILE,">>logfile.log") or die "can't open logfile: $!"; # print LOGFILE "\n\nReading from: $cmd\n"; # Modified to allow more than one VSS root dir to be specified unlink("$convert::metadata_dir/ssproperties"); # otherwise VSS appends to it if ($convert::root =~ /\|/) { my $rt; foreach $rt (split(/\|/, $convert::root)) { extract_vss_metadata($rt); } } else { extract_vss_metadata($convert::root); } my %label_files; # Check for DB_File module installed - much faster for larger repositories if so my $USEDB_FILE = 0; eval{use DB_File;}; if (!$@) { $USEDB_FILE = 1; print "Using module DB_File\n"; unlink "$convert::metadata_dir/db.labels" ; tie %label_files, "DB_File", "$convert::metadata_dir/db.labels", O_RDWR|O_CREAT, 0666, $DB_HASH or die "Cannot open file '$convert::metadata_dir/db.labels': $!\n"; } open(SS,"<$convert::metadata_dir/ssproperties") or die $msg; my ($file,$type); FILE_OR_PROJECT: while( $_=myread() ) { chomp; s/\r$//; my ($field,$value) = split(/:\s*/,$_,2); next unless defined($value); if( $field eq "File") { $file = $value; if( $file =~ /[\000-\031@#%\*]/ || $file =~ /\.\.\./ ) { die "..., @, #, %, * or unprintable characters not allowed in filename '$file' - sorry, rename it and start over.\n"; } } elsif( $field eq "Type") { $type=lc($value); } elsif( $field eq "Store only latest version") { die "parse error" if(!defined($file)||!defined($type)); if($value eq "Yes") { $type="tempobj"; } $file =~ s/^\s+//; if (exclude_file($file)) { my $msg = "Ignoring file: $file\n"; print $msg; print LOGFILE $msg; } else { print FILES "$type $file\n"; } undef $file; undef $type; } } print LOGFILE "\n\nDone with: ssproperties\n"; close(FILES); close(SS); open(FILES, "<$convert::metadata_dir/files") or die $msg; # for input open(LABELS_SUMMARY, ">$convert::metadata_dir/labels_summary") or die $msg; open(CHANGES, ">$convert::metadata_dir/changes.ns") or die $msg; # ns for "not sorted" unlink("$convert::metadata_dir/changes"); # get rid of any existing change file to avoid confusion while(<FILES>) { my ($type,$file) = split(/ /,$_,2); chomp($file); read_history($file); } close(LOGFILE); if ($USEDB_FILE) { untie %label_files; } my (%label_comments, %label_warned, %label_ignored, %label_processed); my (%label_timestamp); sub read_history # expects open file descriptors CHANGES, LABELS_SUMMARY, BRANCHES { my $file = shift; my $tmpfile = "$convert::metadata_dir/_fileprops"; unlink($tmpfile); my $cmd = "ss history \"$file\" $convert::ss_options -O\@$tmpfile "; system($cmd); print LOGFILE "\n\nReading from: $cmd\n"; open(SS,"<$tmpfile"); read_comment(); # ignore everything up to the first data block # (previously I verified that it started with "History of $file ..." # except that MS word wraps long filenames with spaces so just hang it) my (@pending_labels); ITEM: while($_=myread()) { my ($op,$version,$user,$timestamp,$label,$comment); if(/^\*{17}( +Version (\d+) +\*{17})?/) { my ($version,$user,$timestamp,$label,$comment); $version = $2; $_=myread(); if(/^Label: "(.+)"$/) { $label = $1; $_=myread(); } ($user,$timestamp) = parse_user_and_timestamp($_); $_=myread(); if(/Labeled/) { # this revision isn't really a revision - it's just a label my $before = $label; if ($convert::label_ignore_regexp) { # ignore unwanted labels if ($label =~ /$convert::label_ignore_regexp/i) { if (!$label_ignored{$label}) { print "Ignoring label \"$label\"\n"; $label_ignored{$label} = 1; } $comment = read_comment(); next ITEM; } } # label transformations without a warning # labels must not contain umlauts $label =~ s/\204/ae/g; $label =~ s/\224/oe/g; $label =~ s/\201/ue/g; # delete contigous exclamation marks and append "[local] instead" if ($label =~ s/ *!!+ *//g) { $label .= "_[local]"; } # replace spaces within labels silently as they appear in almost every label $label =~ s/ /_/g; # replace more special characters silently $label =~ s@/@-@g; $label =~ tr/()/[]/; # label transformations with a warning # replace special characters if( $label =~ s%[\000-\031 @#&,<>()'!\%\*/'\\]%_%g && !$label_warned{$label}) { print "label \"$before\" changed to \"$label\"\n"; $label_warned{$label} = 1; } if( $label =~ s/\.\.\./_/g && !$label_warned{$label} ) { print "Removed ellipsis (...) in \"$label\"\n"; $label_warned{$label} = 1; } if( $label =~ /^\d+$/ ) { $label = "_$label"; print "Underscore prepended to all numeric label \"$label\"\n"; $label_warned{$label} = 1; } # Prefix label if specified $label = $convert::label_prefix . $label; push @pending_labels,$label; # assume all the comments for a particular label are the same... $comment = read_comment(); $label_comments{$label} = $comment if ! defined($label_comments{$label}); $label_timestamp{$label} = $timestamp if ! defined($label_timestamp{$label}); next ITEM; } # Labelled # this is a "real" revision - either "Checked in projectname" or "Branched" $comment = read_comment(); my $change = new Change( { 'timestamp' => $timestamp, 'author' => $user, 'change_description' => $comment, 'changelist' => [ "$version $file" ] } ); $change->put(\*CHANGES); while(@pending_labels) { $label = pop(@pending_labels); if ($label_files{"$version $file"}) { $label_files{"$version $file"} = $label_files{"$version $file"} . " " . $label; } else { $label_files{"$version $file"} = $label; } if (! defined($label_processed{$label})) { $label_processed{$label} = 1; print LABELS_SUMMARY "$label $label_timestamp{$label}\n"; # use 10 *'s as end of comments marker print LABELS_SUMMARY "$label_comments{$label}**********\n"; } } next ITEM; } else { die "parsing messed up on '$_'\n"; } } close(SS); } sub parse_user_and_timestamp { $_=shift; my $date_re = $convert::format_date; my $time_re = $convert::format_time; if ( ! m@^User: (.*)\b\s+Date:\s+(\S+)\s+Time:\s+(\S+)@ ) { # 2002-03-04/ps # A problem here most likely indicates a row of asterisks in a # comment. Probably the regexps in this file with "\*{17}" should be # changed to fix this, but I have no more time to do _and_ test # it... print "timestamp problem: $_\n"; die "can't parse timestamp" ; } my $user = $1; $user=lc($user) if($convert::lowercase_usernames); my $date_exp = $2; my $time_exp = $3; my ($day, $mon, $year, $hour, $min); my ($a, $b, $c); if (!(($a, $b, $c) = ($date_exp =~ m@^(\d+).(\d+).(\d+)@ ))) { print "date problem: $date_exp\n"; die "can't parse date" ; } if ($date_re =~ /^m.d.y$/) { $day = $b; $mon = $a; $year = $c } elsif($date_re =~ /^d.m.y$/) { $day = $a; $mon = $b; $year = $c; } else { print "date problem: $date_re\n"; die "can't parse date format" ; } # Now time component if ( !(($a, $b, $c) = ($time_exp =~ m@^(\d+).(\d+)([ap]*)@ ))) { print "time problem: $date_exp\n"; die "can't parse time" ; } if ($time_re =~ /^H:M$/) { $hour = $a; $min = $b; if($c =~ /[ap]/) { print "time problem: expected $time_re got unexpected AM/PM char\n"; die "can't parse time" ; } } elsif($time_re =~ /^H:Mp$/) { $hour = $a % 12; if($c eq 'p') { $hour += 12; } $min = $b; } else { print "time problem: $time_re\n"; die "can't parse time format" ; } # timelocal takes second, minute, hour, day, month, year # in the range 0..59, 0..59, 0..23, 1..31, 0..11, 0..99 # The two digit year has assumptions made about it such that # any time before 2037 (when the 32-bit seconds-since-1970 time # will run out) is handled correctly. i.e. 97 -> 1997, 1 -> 2001 my $timestamp=timelocal(0, $min, $hour, $day, $mon -1, $year); # convert user names because they must not contain umlauts $user =~ s/\204/ae/g; $user =~ s/\224/oe/g; $user =~ s/\201/ue/g; # # nifty table for more complicated username conversion (if desired) # my %usertable = ( "user_j" => "joe.user", # "developer_d" => "donald.developer", # ); # if ( !exists $usertable{$user} ) { # print "unknown user $user\n"; # } else { # $user = $usertable{$user}; # } return ($user,$timestamp); } sub read_comment { my $comment=""; while($_=myread()) { # convert a few umlauts from DOS-Codepage to Latin-1 # $_ =~ tr/\204\224\201\216\231\232\341//; # SAB 980503 - Comment can be terminated either by a new version # banner or by a Label separator... ## ***************** Version 28 ***************** ## User: User1 Date: 3/19/98 Time: 2:16p ## Checked in $/Projects/Common/AppUtils ## Comment: setup CoffFormat build configuration ## ## ********************** ## Label: "demo #1" ## User: User2 Date: 3/16/98 Time: 4:23a ## Labeled ## Label comment: Proposed demo. ## ## ********************** ## Label: "demo." ## User: User2 Date: 3/13/98 Time: 2:35a ## Labeled ## Label comment: Project ready for demo. ## ## ***************** Version 27 ***************** ## User: User2 Date: 2/17/98 Time: 10:27p ## Checked in $/Projects/Common/AppUtils ## Comment: Fixed location of output .lib/.bsc files. ## # if(/^\*{17} +Version (\d+) +\*{17}/) { if(/^\*{17}/) { $comment =~ s/^(Label comment|Comment): //; $comment =~ s/\n\s+$/\n/; # strip trailing blank lines pushback($_); return $comment; } $comment .= $_; } $comment =~ s/^Comment: //; return $comment; } # functions to read from SS with pushback my @linebuffer; sub myread { my $line; $line = pop @linebuffer; if (! defined($line)) { $line = <SS>; print LOGFILE "${line}" if defined($line) && $convert::debug_level > 1; } return $line; } sub pushback { push @linebuffer,shift; } sub exclude_file # True if file should be excluded { my $file = shift; if (!$convert::exclude) { return 0; } # note use of Quote Meta encapsulation and file matching at start of string if ($convert::exclude =~ /\|/) { my $pat; foreach $pat (split(/\|/, $convert::exclude)) { if ($file =~ /^\Q$pat\E/) { return 1; } } } elsif ($file =~ /^\Q$convert::exclude\E/) { return 1; } return 0; } # Procedure which extracts using a more painful method sub extract_vss_metadata { my $rt = shift; my $cmd; if (!$convert::vsscorrupt) { # This appends to the output $cmd = "ss properties -R \"$rt\" -O\@$convert::metadata_dir/ssproperties $convert::ss_options"; die "Failed to extract properties" if (system($cmd) != 0); return; } # Otherwise we know we have to be careful how we produce a tree listing of all files so as to avoid # any corrupt parts that will stop us in our tracks! my @dirs = vss_dir_tree($rt); my $dir; foreach $dir (@dirs) { if (exclude_file($dir)) { my $msg = "Ignoring directory: $dir\n"; print $msg; print LOGFILE $msg; } else { print LOGFILE "Processing Directory: $dir\n"; # Extract without the -R for recursive $cmd = "ss properties \"$dir/*\" -O\@$convert::metadata_dir/ssproperties $convert::ss_options"; die "Failed to extract properties" if (system($cmd) != 0); } } } # Recurse down the list of VSS directories and find complete directory tree sub vss_dir_tree { my $rt = shift; my @dirs; my $tmpfile = "$convert::metadata_dir/_dirlist"; unlink($tmpfile); my $cmd = "ss dir \"$rt\" $convert::ss_options -F- -O\@$tmpfile "; # Directories only die "Failed to execute $cmd\n" if (system($cmd) != 0); open(DIR, "<$tmpfile") or die "Failed to open $cmd output\n"; # my @lines = map {chomp; s/\r$//;} (<DIR>); my @lines = (<DIR>); close (DIR); my $line; foreach $line (@lines) { if ($line =~ /:$/) {} # Just ignore these lines elsif ($line =~ /\$([^\n]*)/) { my $dir = "$rt/$1"; if (exclude_file($dir)) { my $msg = "Ignoring directory: $dir\n"; print $msg; print LOGFILE $msg; } else { push @dirs, "$dir"; push @dirs, &vss_dir_tree("$dir"); } } } return @dirs; }
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#10 | 3607 | Robert Cowham |
Put use of DB_File inside eval so it doesn't matter if not installed. Updated README for Performance Tips. |
||
#9 | 3605 | Robert Cowham | Change name of logfile | ||
#8 | 3590 | Robert Cowham | Use DB_File to store label info rather than large hashes in memory. | ||
#7 | 3589 | Robert Cowham | Merge in changes from Main branch. | ||
#6 | 2133 | Robert Cowham |
Added more careful VSS tree parsing to avoid corrupt parts of a database. New option "vsscorrupt" which triggers this. |
||
#5 | 2125 | Robert Cowham |
Several improvements: - config option for date and time format (tested with US and UK) see comments in config for details. - improved error handling when running subsidiary scripts - extra config option allowing parts of a VSS repository to be excluded (e.g. if corrupt) - allow p4port/client/user to be specified in config file |
||
#4 | 2124 | Robert Cowham | Handle line wrap problems | ||
#3 | 1695 | Robert Cowham | Integrated Peter's changes in - note not fully tested... | ||
#2 | 1691 | Robert Cowham |
Latest changes, e.g. - allow restarts (from date and time) - add client name to config options |
||
#1 | 237 | Robert Cowham |
Improved version of vsstop4. Makes life easier if importing into a depot which already contains stuff you want to keep. Also handles other people updating the depot at the same time. See changes labelled RHGC, and new items in config file. |