#!/usr/bin/perl -w # # $Id: //guest/jeffery_g_smith/perforce/utils/sttop4/python/readhist.py#1 $ # # StarTeam to Perforce Converter, phase I: extract StarTeam metadata # # Copyright 1998 Perforce Software. All rights reserved. # Based on VSStoP4: # Written by James Strickland, April 1998 # Maintained by Robert Cowham, since 2000 # Updated to support StarTeam conversions: # Jeffery G. Smith, MedPlus, Inc. 2004-2005 # # The StarTeam respoitory is stored in a non-documented, binary format. However, all # the metadata (data about the revisions in the archive) is available via a COM # interface supported by the StarTeam SDK. This script extracts all useful metadata, # using the COM API writing the result to files which # contain no extraneous information and which are easy to parse. # # Of course, if Borland changes the COM SPI, this script may have to change to match it! require 5.0; use strict; use integer; use Time::Local; use lib '.'; use convert; use Change; use File::Path; use Win32::OLE; use stcom; convert::openlog('open'); StarTeamSDKinit(); mkpath($convert::metadata_dir); # ensure the metadata directory exists mkpath("$convert::metadata_dir/labels"); # ensure the label dir exists my $msg="can't open"; open(FILES, ">$convert::metadata_dir/files") or die $msg; unlink("$convert::metadata_dir/ssproperties"); # otherwise VSS appends to it # Modified to allow more than one VSS root dir to be specified if ($convert::root =~ /\|/) { my $rt; foreach $rt (split(/\|/, $convert::root)) { extract_vss_metadata($rt); } } else { extract_vss_metadata($convert::root); } my %label_files; # Check for DB_File module installed - much faster for larger repositories if so my $USEDB_FILE = 0; eval{require DB_File; import DB_File;}; if (!$@) { $USEDB_FILE = 1; print "Using module DB_File\n"; unlink "$convert::metadata_dir/db.labels" ; no warnings qw(once); tie %label_files, "DB_File", "$convert::metadata_dir/db.labels", O_RDWR()|O_CREAT(), 0666, $DB_File::DB_HASH or die "Cannot open file '$convert::metadata_dir/db.labels': $!\n"; } open(SS,"<$convert::metadata_dir/ssproperties") or die $msg; my ($file,$type); FILE_OR_PROJECT: while( $_=myread() ) { chomp; s/\r$//; my ($field,$value) = split(/:\s*/,$_,2); next unless defined($value); if( $field eq "File") { $file = $value; if( $file =~ /[\000-\031@#%\*]/ || $file =~ /\.\.\./ ) { die "..., @, #, %, * or unprintable characters not allowed in filename '$file' - sorry, rename it and start over.\n"; } } elsif( $field eq "Type") { $type=lc($value); } elsif( $field eq "Store only latest version") { die "parse error" if(!defined($file)||!defined($type)); if($value eq "Yes") { $type="tempobj"; } $file =~ s/^\s+//; if (exclude_file($file)) { my $msg = "Ignoring file: $file\n"; convert::log($msg); } else { print FILES "$type $file\n"; } undef $file; undef $type; } } convert::log("\n\nDone with: ssproperties\n", 1); close(FILES); close(SS); open(FILES, "<$convert::metadata_dir/files") or die $msg; # for input open(LABELS_SUMMARY, ">$convert::metadata_dir/labels_summary") or die $msg; open(CHANGES, ">$convert::metadata_dir/changes.ns") or die $msg; # ns for "not sorted" unlink("$convert::metadata_dir/changes"); # get rid of any existing change file to avoid confusion print "Starting processing of file versions\n"; my $file_count = 0; while(<FILES>) { my ($type,$file) = split(/ /,$_,2); chomp($file); read_history($file); print "Processing file: $file_count\r"; $file_count++; } print "\n"; if ($USEDB_FILE) { untie %label_files; } else { open(LF,">$convert::metadata_dir/db.labels") or die $msg; foreach my $k (keys %label_files) { print LF "$k#" . $label_files{$k} . "\n"; } close(LF); } StarTeamSDKdone(); my (%label_comments, %label_warned, %label_ignored, %label_processed); my (%label_timestamp); sub read_history # expects open file descriptors CHANGES, LABELS_SUMMARY, BRANCHES { my $file = shift; read_file_history($file); read_comment(); # ignore everything up to the first data block # (previously I verified that it started with "History of $file ..." # except that MS word wraps long filenames with spaces so just hang it) my (@pending_labels); ITEM: while($_=myread()) { my ($op,$version,$user,$timestamp,$label,$comment); if(/^\*{17}( +Version (\d+) +\*{17})?/) { my ($version,$user,$timestamp,$label,$comment); $version = $2; $_=myread(); if(/^Label: "(.+)"$/) { $label = $1; $_=myread(); } ($user,$timestamp) = parse_user_and_timestamp($_); $_=myread(); if(/Labeled/) { # this revision isn't really a revision - it's just a label my $before = $label; if ($convert::label_ignore_regexp) { # ignore unwanted labels if ($label =~ /$convert::label_ignore_regexp/i) { if (!$label_ignored{$label}) { print "Ignoring label \"$label\"\n"; $label_ignored{$label} = 1; } $comment = read_comment(); next ITEM; } } # label transformations without a warning # labels must not contain umlauts $label =~ s/\204/ae/g; $label =~ s/\224/oe/g; $label =~ s/\201/ue/g; # delete contigous exclamation marks and append "[local] instead" if ($label =~ s/ *!!+ *//g) { $label .= "_[local]"; } # replace spaces within labels silently as they appear in almost every label $label =~ s/ /_/g; # replace more special characters silently $label =~ s@/@-@g; $label =~ tr/()/[]/; # label transformations with a warning # replace special characters if( $label =~ s%[\000-\031 @#&,<>()\"'!\%\*/'\\:]%_%g && !$label_warned{$label}) { print "label \"$before\" changed to \"$label\"\n"; $label_warned{$label} = 1; } if( $label =~ s/\.\.\./_/g && !$label_warned{$label} ) { print "Removed ellipsis (...) in \"$label\"\n"; $label_warned{$label} = 1; } if( $label =~ /^\d+$/ && !$label_warned{$label} ) { $label = "_$label"; print "Underscore prepended to all numeric label \"$label\"\n"; $label_warned{$label} = 1; } # Prefix label if specified $label = $convert::label_prefix . $label; push @pending_labels,$label; # assume all the comments for a particular label are the same... $comment = read_comment(); $label_comments{$label} = $comment if ! defined($label_comments{$label}); $label_timestamp{$label} = $timestamp if ! defined($label_timestamp{$label}); next ITEM; } # Labelled # this is a "real" revision - either "Checked in projectname" or "Branched" $comment = read_comment(); my $change = new Change( { 'timestamp' => $timestamp, 'author' => $user, 'change_description' => $comment, 'changelist' => [ "$version $file" ] } ); $change->put(\*CHANGES); while(@pending_labels) { $label = pop(@pending_labels); if ($label_files{"$version $file"}) { $label_files{"$version $file"} = $label_files{"$version $file"} . " " . $label; } else { $label_files{"$version $file"} = $label; } if (! defined($label_processed{$label})) { $label_processed{$label} = 1; print LABELS_SUMMARY "$label $label_timestamp{$label}\n"; # use 10 *'s as end of comments marker print LABELS_SUMMARY "$label_comments{$label}**********\n"; } } next ITEM; } else { die "parsing messed up on '$_'\n"; } } close(SS); } sub parse_user_and_timestamp { $_=shift; my $date_re = $convert::format_date; my $time_re = $convert::format_time; if ( ! m@^User: (.*)\b\s+Date:\s+(\S+)\s+Time:\s+(\S+)@ ) { # 2002-03-04/ps # A problem here most likely indicates a row of asterisks in a # comment. Probably the regexps in this file with "\*{17}" should be # changed to fix this, but I have no more time to do _and_ test # it... # Changed by RC - seems OK. print "timestamp problem: $_\n"; die "can't parse timestamp" ; } my $user = $1; $user=lc($user) if($convert::lowercase_usernames); my $date_exp = $2; my $time_exp = $3; my ($day, $mon, $year, $hour, $min); my ($a, $b, $c); if (!(($a, $b, $c) = ($date_exp =~ m@^(\d+).(\d+).(\d+)@ ))) { print "date problem: $date_exp\n"; die "can't parse date" ; } if ($date_re =~ /^m.d.y$/) { $day = $b; $mon = $a; $year = $c } elsif($date_re =~ /^d.m.y$/) { $day = $a; $mon = $b; $year = $c; } elsif($date_re =~ /^y.m.d$/) { $day = $c; $mon = $b; $year = $a; } else { print "date problem: $date_re\n"; die "can't parse date format" ; } # Now time component if ( !(($a, $b, $c) = ($time_exp =~ m@^(\d+).(\d+)([ap]*)@ ))) { print "time problem: $date_exp\n"; die "can't parse time" ; } if ($time_re =~ /^H:M$/) { $hour = $a; $min = $b; if($c =~ /[ap]/) { print "time problem: expected $time_re got unexpected AM/PM char\n"; die "can't parse time" ; } } elsif($time_re =~ /^H:Mp$/) { $hour = $a % 12; if($c eq 'p') { $hour += 12; } $min = $b; } else { print "time problem: $time_re\n"; die "can't parse time format" ; } # timelocal takes second, minute, hour, day, month, year # in the range 0..59, 0..59, 0..23, 1..31, 0..11, 0..99 # The two digit year has assumptions made about it such that # any time before 2037 (when the 32-bit seconds-since-1970 time # will run out) is handled correctly. i.e. 97 -> 1997, 1 -> 2001 my $timestamp=timelocal(0, $min, $hour, $day, $mon -1, $year); # convert user names because they must not contain umlauts $user =~ s/\204/ae/g; $user =~ s/\224/oe/g; $user =~ s/\201/ue/g; # # nifty table for more complicated username conversion (if desired) # my %usertable = ( "user_j" => "joe.user", # "developer_d" => "donald.developer", # ); # if ( !exists $usertable{$user} ) { # print "unknown user $user\n"; # } else { # $user = $usertable{$user}; # } return ($user,$timestamp); } sub read_comment { my $comment=""; while($_=myread()) { # convert a few umlauts from DOS-Codepage to Latin-1 # ? ? ? ? ? ? $_ =~ tr/\204\224\201\216\231\232\341/??????/; # SAB 980503 - Comment can be terminated either by a new version # banner or by a Label separator... ## ***************** Version 28 ***************** ## User: User1 Date: 3/19/98 Time: 2:16p ## Checked in $/Projects/Common/AppUtils ## Comment: setup CoffFormat build configuration ## ## ********************** ## Label: "demo #1" ## User: User2 Date: 3/16/98 Time: 4:23a ## Labeled ## Label comment: Proposed demo. ## ## ********************** ## Label: "demo." ## User: User2 Date: 3/13/98 Time: 2:35a ## Labeled ## Label comment: Project ready for demo. ## ## ***************** Version 27 ***************** ## User: User2 Date: 2/17/98 Time: 10:27p ## Checked in $/Projects/Common/AppUtils ## Comment: Fixed location of output .lib/.bsc files. ## if(/^\*{22}$|^\*{17} +Version (\d+) +\*{17}$/) { $comment =~ s/^(Label comment|Comment): //; $comment =~ s/\n\s+$/\n/; # strip trailing blank lines pushback($_); return $comment; } $comment .= $_; } $comment =~ s/^Comment: //; return $comment; } # functions to read from SS with pushback my @linebuffer; sub myread { my $line; $line = pop @linebuffer; if (! defined($line)) { $line = <SS>; convert::log("${line}", 1) if defined($line) && $convert::debug_level > 1; } return $line; } sub pushback { push @linebuffer,shift; } sub read_file_history { my $filename = shift; @linebuffer || pushback("History of $filename\n\n"); my $fileref = STgetfileref($filename); my $history = STgethistory($fileref); print $history; my $version = STgetfilever($file); my $user = STgetfilever($file); my $date = STgetfiledate($file); my $time = STgetfilever($file); my $comment = STgetfilever($file); pushback("***************** Version $version *****************\n"); pushback("User: $user\tDate: $date\tTime: $time\n"); pushback("Operation\n"); pushback("Comment: $comment\n\n"); } sub exclude_file # True if file should be excluded { my $file = shift; if (!$convert::exclude) { return 0; } # note use of Quote Meta encapsulation and file matching at start of string if ($convert::exclude =~ /\|/) { my $pat; foreach $pat (split(/\|/, $convert::exclude)) { if ($file =~ /^\Q$pat\E/) { return 1; } } } elsif ($file =~ /^\Q$convert::exclude\E/) { return 1; } return 0; } # Procedure which extracts using a more painful method sub extract_vss_metadata { my $rt = shift; my $cmd; my $st_project = STgetproject($rt); # Otherwise we know we have to be careful how we produce a tree listing of all files so as to avoid # any corrupt parts that will stop us in our tracks! my @files = vss_dir_tree($st_project); my $msg = "Number of StarTeam files found: " . @files . "\n"; convert::log($msg); my ($file, $name, $prop); my $i = 0; foreach $file (@files) { print "Getting properties for file: $i" . ++$i . "\r"; $name = STgetname($file); $prop = get_file_properties($file); if ($prop) { append_file("$convert::metadata_dir/ssproperties", $prop); } else { $msg = "\nFailed to get properties for:\n$name\n"; convert::log($msg); die; } } print "\n"; } sub append_file # Append lines to file { my ($file, @lines) = @_; open(FILE, ">>$file") or die "Failed to open $file\n"; print FILE join('',@lines); close(FILE); } # Recurse down the list of VSS directories and find complete directory tree sub vss_dir_tree { my $parent = shift; my $folder = shift; # If folder is null then this is the start of the recursion my $dir = ''; if (! $folder) { my $view = STgetview($parent); $folder = STgetrootfolder($view); } else { $dir = "$parent/" . STgetname($folder); } my @files = (); if (exclude_file($dir)) { my $msg = "Ignoring directory: $dir\n"; convert::log($msg); return @files; } my $files = STgetfilelist($folder); while (my $file = STgetnext($files)) { push(@files, $file); } my $folders = STgetsubfolders($folder); foreach my $folder (in $folders) { push(@files, vss_dir_tree($dir, $folder)); } return @files; } sub get_file_properties { my ($file) = @_; my $filepathname = STgetfilepathname($file); $filepathname =~ s/\\/\//g; return "File: " . $filepathname . "\n" . "Type: " . STgetfiletype($file) . "\n" . "Size: " . STgetfilesize($file) . "\n" . "Store only latest version: No\n" . "Latest:\n" . " Version: " . STgetfilever($file) . "\n" . " Date: " . STgetfiledate($file) . "\n" . "Comment:\n" . STgetfilecomment($file) . "\n"; }
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#1 | 5264 | Jeffery G. Smith | Create branch for Python conversion. | ||
//guest/jeffery_g_smith/perforce/utils/sttop4/main/readhist.pl | |||||
#8 | 5262 | Jeffery G. Smith | Rebase with integration branch. | ||
#7 | 4895 | Jeffery G. Smith | Tried getting file histories. | ||
#6 | 4818 | Jeffery G. Smith |
Finished extracting metadata. Improved logging subsystem. |
||
#5 | 4795 | Jeffery G. Smith | Worked on first phase of metadata collection. | ||
#4 | 4793 | Jeffery G. Smith | Updated sources to more closely match VSStoP4 baseline. | ||
#3 | 4710 | Jeffery G. Smith | Initial changes for StarTeam support after combining PVCS and VSS converters. | ||
#2 | 4705 | Jeffery G. Smith | Integrated the VSS version. | ||
#1 | 4704 | Jeffery G. Smith | Rename //guest/jeffery_g_smith/perforce/utils/sttop4/main/readvlog.pl To //guest/jeffery_g_smith/perforce/utils/sttop4/main/readhist.pl | ||
//guest/jeffery_g_smith/perforce/utils/sttop4/main/readvlog.pl | |||||
#2 | 4701 | Jeffery G. Smith | Initial converion to StarTeam. | ||
#1 | 4700 | Jeffery G. Smith | Initialize branch from PVCS converter. | ||
//guest/perforce_software/utils/pvcstop4/main/readvlog.pl | |||||
#1 | 4664 | Robert Cowham | Branch into permanent location. | ||
//guest/robert_cowham/perforce/utils/pvcstop4/main/readvlog.pl | |||||
#1 | 4647 | Robert Cowham |
Rename //guest/robert_cowham/perforce/utils/pvcstop4/... To //guest/robert_cowham/perforce/utils/pvcstop4/main/... |
||
//guest/robert_cowham/perforce/utils/pvcstop4/readvlog.pl | |||||
#7 | 3791 | Robert Cowham | Added exclude option (but not fully tested!) | ||
#6 | 3789 | Robert Cowham | Fixed problem with dir separators. | ||
#5 | 3728 | Robert Cowham | Merged in old changes. | ||
#4 | 3727 | Robert Cowham | Hayden's changes | ||
#3 | 3721 | Robert Cowham |
Various changes (tested at a client): - Use PCLI to read file location info (e.g. if moved) - Use P4Perl for speed - Removed DB_File as it has a bug - Do labels differently for vastly improved speed |
||
#2 | 3638 | Robert Cowham |
Renamed some files with windows extensions. Imported changes from Vsstop4 scripts: - Use P4Perl if installed - Use DB_File if installed - Rework labelling algorithm for speed Net result should be much improved performance. Next step is to use pcli. |
||
#1 | 2289 | Robert Cowham | Initial version from Perforce web page |