#!/usr/bin/perl -w # # VSS to Perforce converter, phase I: extract metadata # # Copyright 1998 Perforce Software. All rights reserved. # Written by James Strickland, April 1998 # # $Id: //guest/perforce_software/vsstop4/main/readhist.pl#1 $ # # This script parses the output of "ss properties -R root_project -O@tempfile" # to generate a list of files and their file types (including "tempobj" # for files which only have the last version stored). Each file history # is then obtained with "ss history filename". Note that there is an option to # parse corrupt VSS repositories without the -R recursive option as that won't work # in such cases - see extract_vss_metadata. # # Note: there is *no* way to obtain the history of files which are currently # deleted! The only workaround would seem to be to "ss recover" the files # before doing the conversion, then "p4 delete" them afterward. This converter # does not attempt to do that. # # It is necessary to do things this way because of the following VSS quirks: # - some revisions aren't really revisions, they're just label operations; # ss history -R output for revisions which are labels doesn't include # the project they're in and hence is ambiguous # - ss history -R does not output any information about version 1 of a file # - the only way to discover the type of a file is with the "ss filetype" # or "ss properties" command # - running "ss filetype -R" is much faster than running "ss properties -R" # but it's missing one crucial bit of information: Store only latest version # Without this the converter will try to include file revisions it can't # get, and there is no way of knowing whether the failure is due to # storing only the latest version or that SourceSafe has lost the version. # - without using -O@ SourceSafe's output is ambigous - it word wraps filenames # and if the wrap occurs at a space it does not output the space # # # The output of ss history consists of a header which specifies the filename, # then a series of multi-line data blocks starting with the line # ***************** Version n ***************** # # The rest of the lines in the data block are allowed to occur in any order, # except for lines starting with "Comment:" or "Label comment:". These must # always appear last, and are terminated by a blank line followed by the start # of another data block. require 5.0; use strict; use integer; use Time::Local; use lib '.'; use convert; use Change; use File::Path; mkpath($convert::metadata_dir); # ensure the metadata directory exists mkpath("$convert::metadata_dir/labels"); # ensure the label dir exists my $msg="can't open"; open(FILES, ">$convert::metadata_dir/files") or die $msg; # Parse the output of "ss properties" - we use a really simple scheme here # which relies solely on the fields "File:", "Type:" and # "Store only latest version:" always appearing, and appearing in that order. # Note that filenames with spaces are no problem - they are, in effect, # quoted by :\s+ on the left and \n on the right. Possible values for # "Type" are "Binary" and "Text", which translates nicely to Perforce's # "binary" and "text". Possible values for "Store only latest version:" # are "Yes" and "No". open(LOGFILE,">>logfile.log") or die "can't open logfile: $!"; # print LOGFILE "\n\nReading from: $cmd\n"; # Modified to allow more than one VSS root dir to be specified unlink("$convert::metadata_dir/ssproperties"); # otherwise VSS appends to it if ($convert::root =~ /\|/) { my $rt; foreach $rt (split(/\|/, $convert::root)) { extract_vss_metadata($rt); } } else { extract_vss_metadata($convert::root); } my %label_files; open(SS,"<$convert::metadata_dir/ssproperties") or die $msg; my ($file,$type); FILE_OR_PROJECT: while( $_=myread() ) { chomp; s/\r$//; my ($field,$value) = split(/:\s*/,$_,2); next unless defined($value); if( $field eq "File") { $file = $value; if( $file =~ /[\000-\031]/ || $file =~ /\.\.\./ ) { die "... or unprintable characters not allowed in filename '$file' - sorry, rename it and start over.\n"; } } elsif( $field eq "Type") { $type = lc($value); # The following lines handle new VSS 2005 file types. $type =~ s/ auto-detect//g; $type =~ s@text: ansi/mbcs@text@g; } elsif( $field eq "Store only latest version") { die "parse error" if(!defined($file)||!defined($type)); if($value eq "Yes") { $type="tempobj"; } $file =~ s/^\s+//; if (exclude_file($file)) { my $msg = "Ignoring file: $file\n"; print $msg; print LOGFILE $msg; } else { print FILES "$type $file\n"; } undef $file; undef $type; } } print LOGFILE "\n\nDone with: ssproperties\n"; close(FILES); close(SS); open(FILES, "<$convert::metadata_dir/files") or die $msg; # for input open(LABELS_SUMMARY, ">$convert::metadata_dir/labels_summary") or die $msg; open(CHANGES, ">$convert::metadata_dir/changes.ns") or die $msg; # ns for "not sorted" unlink("$convert::metadata_dir/changes"); # get rid of any existing change file to avoid confusion print_status("Starting processing of file versions\n"); my $file_count = 0; while(<FILES>) { my ($type,$file) = split(/ /,$_,2); chomp($file); read_history($file); print "Processing file: $file_count\r"; convert::sum_log("Processing file: $file_count " . scalar(localtime())) if ($file_count % 100 == 0); $file_count++; } print "\n"; close(LOGFILE); open(LF,">$convert::metadata_dir/db.labels") or die $msg; foreach my $k (keys %label_files) { print LF "$k#" . $label_files{$k} . "\n"; } close(LF); my (%label_comments, %label_warned, %label_ignored, %label_processed); my (%label_timestamp); sub read_history # expects open file descriptors CHANGES, LABELS_SUMMARY, BRANCHES { my $file = shift; my $tmpfile = "$convert::metadata_dir/_fileprops"; unlink($tmpfile); my $cmd = "ss history \"$file\" $convert::ss_options -O\@$tmpfile "; system($cmd); print LOGFILE "\n\nReading from: $cmd\n"; open(SS,"<$tmpfile"); read_comment(); # ignore everything up to the first data block # (previously I verified that it started with "History of $file ..." # except that MS word wraps long filenames with spaces so just hang it) my (@pending_labels); ITEM: while($_=myread()) { my ($op,$version,$user,$timestamp,$label,$comment); if(/^\*{17}( +Version (\d+) +\*{17})?/) { my ($version,$user,$timestamp,$label,$comment); $version = $2; $_=myread(); if(/^Label: "(.+)"$/) { $label = $1; $_=myread(); } ($user,$timestamp) = parse_user_and_timestamp($_); $_=myread(); if(/Labeled/) { # this revision isn't really a revision - it's just a label my $before = $label; if ($convert::label_ignore_regexp) { # ignore unwanted labels if ($label =~ /$convert::label_ignore_regexp/i) { if (!$label_ignored{$label}) { print "Ignoring label \"$label\"\n"; $label_ignored{$label} = 1; } $comment = read_comment(); next ITEM; } } # label transformations without a warning # labels must not contain umlauts $label =~ s/\204/ae/g; $label =~ s/\224/oe/g; $label =~ s/\201/ue/g; # delete contigous exclamation marks and append "[local] instead" if ($label =~ s/ *!!+ *//g) { $label .= "_[local]"; } # replace spaces within labels silently as they appear in almost every label $label =~ s/ /_/g; # replace more special characters silently $label =~ s@/@-@g; $label =~ s/\?/_/g; $label =~ tr/()/[]/; # label transformations with a warning # replace special characters if( $label =~ s%[\000-\031 \?@#&,<>()\"'!\%\*/'\\:]%_%g && !$label_warned{$label}) { print "label \"$before\" changed to \"$label\"\n"; $label_warned{$label} = 1; } if( $label =~ s/\.\.\./_/g && !$label_warned{$label} ) { print "Removed ellipsis (...) in \"$label\"\n"; $label_warned{$label} = 1; } if( $label =~ /^\d+$/ && !$label_warned{$label} ) { $label = "_$label"; print "Underscore prepended to all numeric label \"$label\"\n"; $label_warned{$label} = 1; } # Prefix label if specified $label = $convert::label_prefix . $label; push @pending_labels,$label; # assume all the comments for a particular label are the same... $comment = read_comment(); $label_comments{$label} = $comment if ! defined($label_comments{$label}); $label_timestamp{$label} = $timestamp if ! defined($label_timestamp{$label}); next ITEM; } # Labelled # this is a "real" revision - either "Checked in projectname" or "Branched" $comment = read_comment(); my $change = new Change( { 'timestamp' => $timestamp, 'author' => $user, 'change_description' => $comment, 'changelist' => [ "$version $file" ] } ); $change->put(\*CHANGES); while(@pending_labels) { $label = pop(@pending_labels); if ($label_files{"$version $file"}) { $label_files{"$version $file"} = $label_files{"$version $file"} . " " . $label; } else { $label_files{"$version $file"} = $label; } if (! defined($label_processed{$label})) { $label_processed{$label} = 1; print LABELS_SUMMARY "$label $label_timestamp{$label}\n"; # use end of comments marker print LABELS_SUMMARY "$label_comments{$label}$convert::label_end_marker\n"; } } next ITEM; } else { die "parsing messed up on '$_'\n"; } } close(SS); } sub parse_user_and_timestamp { $_=shift; my $date_re = $convert::format_date; my $time_re = $convert::format_time; if ( ! m@^User: (.*)\b\s+Date:\s+(\S+)\s+Time:\s+(\S+)@ ) { # 2002-03-04/ps # A problem here most likely indicates a row of asterisks in a # comment. Probably the regexps in this file with "\*{17}" should be # changed to fix this, but I have no more time to do _and_ test # it... # Changed by RC - seems OK. print "timestamp problem: $_\n"; die "can't parse timestamp" ; } my $user = $1; $user=lc($user) if($convert::lowercase_usernames); my $date_exp = $2; my $time_exp = $3; my ($day, $mon, $year, $hour, $min); my ($a, $b, $c); if (!(($a, $b, $c) = ($date_exp =~ m@^(\d+).(\d+).(\d+)@ ))) { print "date problem: $date_exp\n"; die "can't parse date" ; } if ($date_re =~ /^m.d.y$/) { $day = $b; $mon = $a; $year = $c } elsif($date_re =~ /^d.m.y$/) { $day = $a; $mon = $b; $year = $c; } elsif($date_re =~ /^y.m.d$/) { $day = $c; $mon = $b; $year = $a; } else { print "date problem: $date_re\n"; die "can't parse date format" ; } # Now time component if ( !(($a, $b, $c) = ($time_exp =~ m@^(\d+).(\d+)([ap]*)@ ))) { print "time problem: $date_exp\n"; die "can't parse time" ; } if ($time_re =~ /^H:M$/) { $hour = $a; $min = $b; if($c =~ /[ap]/) { print "time problem: expected $time_re got unexpected AM/PM char\n"; die "can't parse time" ; } } elsif($time_re =~ /^H:Mp$/) { $hour = $a % 12; if($c eq 'p') { $hour += 12; } $min = $b; } else { print "time problem: $time_re\n"; die "can't parse time format" ; } # timelocal takes second, minute, hour, day, month, year # in the range 0..59, 0..59, 0..23, 1..31, 0..11, 0..99 # The two digit year has assumptions made about it such that # any time before 2037 (when the 32-bit seconds-since-1970 time # will run out) is handled correctly. i.e. 97 -> 1997, 1 -> 2001 my $timestamp=timelocal(0, $min, $hour, $day, $mon -1, $year); # convert user names because they must not contain umlauts $user =~ s/\204/ae/g; $user =~ s/\224/oe/g; $user =~ s/\201/ue/g; # # nifty table for more complicated username conversion (if desired) # my %usertable = ( "user_j" => "joe.user", # "developer_d" => "donald.developer", # ); # if ( !exists $usertable{$user} ) { # print "unknown user $user\n"; # } else { # $user = $usertable{$user}; # } return ($user,$timestamp); } sub read_comment { my $comment=""; while($_=myread()) { # convert a few umlauts from DOS-Codepage to Latin-1 # ? ? ? ? ? ? $_ =~ tr/\204\224\201\216\231\232\341/??????/; # SAB 980503 - Comment can be terminated either by a new version # banner or by a Label separator... ## ***************** Version 28 ***************** ## User: User1 Date: 3/19/98 Time: 2:16p ## Checked in $/Projects/Common/AppUtils ## Comment: setup CoffFormat build configuration ## ## ********************** ## Label: "demo #1" ## User: User2 Date: 3/16/98 Time: 4:23a ## Labeled ## Label comment: Proposed demo. ## ## ********************** ## Label: "demo." ## User: User2 Date: 3/13/98 Time: 2:35a ## Labeled ## Label comment: Project ready for demo. ## ## ***************** Version 27 ***************** ## User: User2 Date: 2/17/98 Time: 10:27p ## Checked in $/Projects/Common/AppUtils ## Comment: Fixed location of output .lib/.bsc files. ## if(/^\*{22}$|^\*{17} +Version (\d+) +\*{17}$/) { $comment =~ s/^(Label comment|Comment): //; $comment =~ s/\n\s+$/\n/; # strip trailing blank lines pushback($_); return $comment; } $comment .= $_; } $comment =~ s/^Comment: //; return $comment; } # functions to read from SS with pushback my @linebuffer; sub myread { my $line; $line = pop @linebuffer; if (! defined($line)) { $line = <SS>; print LOGFILE "${line}" if defined($line) && $convert::debug_level > 1; } return $line; } sub pushback { push @linebuffer,shift; } sub exclude_file # True if file should be excluded { my $file = shift; if (!$convert::exclude) { return 0; } # note use of Quote Meta encapsulation and file matching at start of string if ($convert::exclude =~ /\|/) { my $pat; foreach $pat (split(/\|/, $convert::exclude)) { if ($file =~ /^\Q$pat\E/) { return 1; } } } elsif ($file =~ /^\Q$convert::exclude\E/) { return 1; } return 0; } # Procedure which extracts using a more painful method sub extract_vss_metadata { my $rt = shift; my $cmd; if (!$convert::vsscorrupt) { # This appends to the output $cmd = "ss properties -R \"$rt\" -O\@$convert::metadata_dir/ssproperties $convert::ss_options"; die "Failed to extract properties" if (system($cmd) != 0); return; } # Otherwise we know we have to be careful how we produce a tree listing of all files so as to avoid # any corrupt parts that will stop us in our tracks! my @files = vss_dir_tree($rt); my $msg = "Number of VSS files found: $#files\n"; print_status($msg); my $file; my $i = 0; my $tmpfile = "$convert::metadata_dir/_fileprops"; foreach $file (@files) { #print LOGFILE "Processing File: $file\n"; print LOGFILE "Getting properties for file: $i\r"; $i++; unlink($tmpfile); $cmd = "ss properties \"$file\" -O\@$tmpfile $convert::ss_options"; if (system($cmd) != 0) { $msg = "Failed to get properties for:\n$file\n"; print LOGFILE $msg; if ($convert::skip_ss_get_errors) { print $msg; } else { die $msg; } } else { append_file("$convert::metadata_dir/ssproperties", $tmpfile); } } print "\n"; } sub append_file # Append second file to first { my $first = shift; my $second = shift; open(FILE, "<$second") or die "Failed to open $second\n"; my @lines = (<FILE>); close (FILE); open(FILE, ">>$first") or die "Failed to open $first\n"; my $line; foreach $line (@lines) { print FILE $line; } close (FILE); } # Recurse down the list of VSS directories and find complete directory tree sub vss_dir_tree { my $rt = shift; my @files; my $tmpfile = "$convert::metadata_dir/_dirlist"; unlink($tmpfile); my $cmd = "ss dir \"$rt\" $convert::ss_options -O\@$tmpfile "; if (system($cmd) != 0) { my $msg = "Failed to list dir $rt\n"; print LOGFILE $msg; if ($convert::skip_ss_get_errors) { print $msg; } else { die $msg; } return @files; } open(DIR, "<$tmpfile") or die "Failed to open $cmd output\n"; my @lines = (<DIR>); close (DIR); my $line; foreach $line (@lines) { if ($line =~ /:$/) {} # Just ignore these lines elsif ($line =~ /\$([^\n]*)/) { my $m = $1; my $dir = "$rt/$m"; if (exclude_file($dir)) { my $msg = "Ignoring directory: $dir\n"; print $msg; print LOGFILE $msg; } else { push @files, &vss_dir_tree("$dir"); } } elsif ($line =~ /([^\n]*)/) { my $file = $1; if ($file =~ /^([^;]*);/) { $file = $1; } if ($file !~ /\d+ item\(s\)|^\s*$/) { $file = "$rt/$file"; push @files, "$file"; } } } return @files; } sub print_status { my $msg = shift; print $msg; convert::log($msg); convert::sum_log($msg); }
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#1 | 16012 | Robert Cowham | Rename/move project to conform to new Workshop naming structure. | ||
//guest/perforce_software/utils/vsstop4/main/readhist.pl | |||||
#27 | 8593 | Robert Cowham |
- Updated to work with VSS 2005 - Switched docs to mention Strawberry Perl rather than ActiveState Perl |
||
#26 | 6404 | Robert Cowham |
- Convert to official P4Perl - Avoid appending to logfile.log - Tidy up label markers - Remove name clash on p4exec function |
||
#25 | 6326 | Robert Cowham | - Change label end of comment marker from 10 asterisks to avoid problems | ||
#24 | 6325 | Robert Cowham | Remove question marks (?) from labels | ||
#23 | 6244 | Robert Cowham |
- Merge in changes to cope with previously illegal chars in filenames %, *, #, @ (requires 2004.2 server and above) |
||
#22 | 6017 | Robert Cowham | - Fixed problem with multiple directories of the same name when usin vss_corrupt option. | ||
#21 | 5860 | Robert Cowham | - Improved logging slightly - new summary log file sumlogfile.log created | ||
#20 | 5682 | Robert Cowham | Remove use of DB_File module as seems unreliable. | ||
#19 | 5074 | Robert Cowham | Add \" to excluded chars. | ||
#18 | 4213 | Robert Cowham |
Make more robust when dealing with errors. Output progress when reading VSS history. |
||
#17 | 4212 | Robert Cowham | Cope with Chinese date format. | ||
#16 | 4026 | Robert Cowham |
Make previous change configurable rather than hard-coding and add explanation to config.ini about it. Point Update section on web page to Changelog.txt |
||
#15 | 4024 | Robert Cowham | Make slightly more robust - ignore VSS prompts for input by using the -I- flag to commands. | ||
#14 | 3924 | Robert Cowham | Don't allow ":" in label names. | ||
#13 | 3919 | Robert Cowham | Fix problem with labels when DB_File not used. | ||
#12 | 3917 | Robert Cowham | Fix problem with label parsing again - properly this time. | ||
#11 | 3815 | Robert Cowham | Fixed problem with line of asterisks in change comment. | ||
#10 | 3797 | Robert Cowham |
Remove warnings. Trim whitespace from ends of lines in config.ini entries |
||
#9 | 3795 | Robert Cowham | Avoid warnings (and work with Perl 5.8). | ||
#8 | 3639 | Robert Cowham |
Changed filetype and inserted ID string. Fixed typo. |
||
#7 | 3609 | Robert Cowham |
Integrate in changes for: - Use of DB_File for large hashes (if available) - Reworking of label algorithm - MUCH faster - Use of P4Perl (if available) - much faster - Use of VSS OLE Automation (if available) - sometimes faster See README.txt and Performance section for details. |
||
#6 | 3558 | Robert Cowham |
Fixed bug with French date formats. Autoupdate client root and view based on config.ini entries. |
||
#5 | 3556 | Robert Cowham |
Added extra check on time stamp formats. Tidied up readme - will only run on Windows now. Updated comments in config.ini |
||
#4 | 3188 | Robert Cowham | Fixed bug in sub decl. | ||
#3 | 2660 | Robert Cowham |
Allow labels to be ignored if not wanted using configurable regexp. Next change in this area is to make recreation of labels vastly quicker! |
||
#2 | 2165 | Robert Cowham | Merged in changes from Guest branch | ||
#1 | 2160 | Robert Cowham | Main version from .zip file from http://www.perforce.com/perforce/loadsupp.html#conv page |