#! /usr/bin/perl -w use strict; =head1 Notices Originally developed for Perforce by VIZIM (www.vizim.com) Copyright (c) 2014 Perforce Software, Inc. and VIZIM Worldwide, Inc. All rights reserved. Please see LICENSE.txt in top-level folder of this distribution for license information =cut use v5.14.0; # Earliest version testing was performed against use File::Copy; use File::Path; use File::Basename; use File::Spec::Functions qw(rel2abs catfile); use IO::Handle; use Time::Local; use Getopt::Long; my $APPNAME = 'TFSHistory.pl'; my $versionMajor = 7; my $versionMinor = '07'; my $APPWHAT = "TFS repository history extract; Version $versionMajor.$versionMinor"; my $APPNOTICES = "Copyright (c) 2014 Perforce Software, Inc. and VIZIM Worldwide, Inc. All rights reserved. See LICENSE.txt for license information."; =head1 TFS extract tool This tool extracts history information from a TFS repository. =cut =head2 Operational Context The extraction runs from a TFS workspace that has the collection to be extracted as its operational context. =cut #################### # # Program constants and globals # #################### $| = 1; # force STDOUT to keep logs up to date in case of failure/ abort =head2 Stop file A clean, coordinated stop of an extraction is accomplished by creating a stop file in the directory of execution. Existence of the stop file is the important characteristic. Content of the stop file, if any, is ignored. Extraction will not be initiated if the stop file exists. The full path to the stop file is identified in the first few lines of output from this tool. =cut my $pathStopFile = rel2abs( catfile( '.', 'History.stop' ) ); =head2 Report Access mode Normally, access errors terminate extraction processing to allow restart of the extraction process. This is appropriate as TFS servers have been observed to fail when presented with a long series of historic requests. There is also a known 2012 TFS access error invovlving user GUIDs. The Report Access mode provides a mechanism for the extraction to identify corrupt user GUIDs that would impact the import. Extractions with access errors are not useable for import. To assure that extractions with access errors are not used by generation processing, an ACCESS tag is added to the extraction. As an invalid tag, ACCESS will cause generation processing to fail. =cut my $modeAccessReport = 0; my $reportAccessCounter = 0; =head1 Progress Tracking Extraction often requires hours of elapsed time to complete. Progress tracking provides both a heart-beat and a completion estimate. The primary factor influencing extraction time is the number of TFS requests required to extract information for a changeset. Each changeset is one request. The number of files involved is not a significant factor to extraction. =cut my $progressEvery = 50; my $historyFilename = 'history.raw'; my $hHistory = undef; #################### # # Messaging support # #################### my $pathLog = ''; my $hLog = undef; my $ifERRORReported = 0; my $errorContext = undef; sub Msg($) { print "$_[0]\n"; print $hLog "$_[0]\n" if defined $hLog; } sub MsgERROR($) { print STDERR "$errorContext\n" if defined $errorContext; print STDERR "***" . $_[0] . "***\n"; print $hLog "$errorContext\n" if defined $errorContext && defined $hLog; print $hLog "***" . $_[0] . "***\n" if defined $hLog; $errorContext = undef; $ifERRORReported = 1; return 1; } sub MsgERRORResponse($@) { my ($error, @results) = @_; MsgERROR( $error ); Msg( "Response details:" ); foreach my $line (@results) { chomp $line; Msg( ".. $line" ); } } sub CoordinateExit(;$) { my $exitCode = $_[0]; $exitCode = $ifERRORReported if ! defined $exitCode; $errorContext = undef; Msg( "\n" ); MsgERROR( "Processing terminated by errors" ) if $exitCode != 0; Msg( ">>> Processing completed without errors" ) if $exitCode == 0; close $hLog if defined $hLog; close $hHistory if defined $hHistory; exit( $exitCode ); } sub OptionUsage(;$) { my $errorMessage = $_[0]; MsgERROR( $errorMessage ) if defined $errorMessage; print "$APPWHAT $APPNOTICES Usage: $APPNAME -V $APPNAME [-h | -?] $APPNAME [options] FIRST [ LAST ] Options: -log LOG - Create a copy of all message output in the log file LOG. Default is output to STDOUT only. -progress COUNT - Issue progress message every COUNT changeset extractions. COUNT defaults to $progressEvery. 0 is no progress tracking. -rawhistory RAWHISTORY - Generate raw history information into the file RAWHISTORY. Error if RAWHISTORY exists (to avoid loss due to overwrite). If not specified, RAWHISTORY is $historyFilename. -accessreport - Access problems are reported but not as errors. Normally access problems are reported as errors. Arguments: FIRST - First changeset to start extraction from. Must be greater than 0. LAST - Last changeset to extract. If not specified, the last changeset known. Must be larger than or equal to FIRST. Adjusted to be the last known changeset if specified value is greater than the last known changeset. "; exit 0; } sub OptionVersion() { print "$APPWHAT\n"; exit 0; } #################### # # Progress tracking # #################### my $progressEventTotal = 0; # total progress events my $progressTimeStart = undef; # time() at start of progress tracking my $progressTimeLast = undef; # time() as of last progress report my @progressTimes = (); # seconds per unit during last reporting interval my $progressFullHistory = 0; # number of progress events to fill history buffer my $PROGRESS_IDX_FIRST = 0; my $PROGRESS_IDX_LAST = 9; my $PROGRESS_IDX_COUNT = ($PROGRESS_IDX_LAST - $PROGRESS_IDX_FIRST) + 1; my $progressIDX = $PROGRESS_IDX_FIRST; sub TimeHHMMSS($) { return sprintf "%2d:%02d:%02d", $_[0]/3600, ($_[0]/60) % 60, $_[0] % 60; } sub ProgressInit() { my $idx; for( $idx = $PROGRESS_IDX_FIRST; $idx <= $PROGRESS_IDX_LAST; ++$idx ) { $progressTimes[$idx] = 0; } $progressFullHistory = $PROGRESS_IDX_COUNT * $progressEvery; $progressEventTotal = 0; $progressTimeStart = time(); $progressTimeLast = $progressTimeStart; if( $progressEvery <= 0 ) { Msg( "No progress tracking" ); } else { Msg( "Progress tracking every $progressEvery changesets" ); } } sub Progress($$) { my ($progressThis, $progressLast) = @_; return if $progressEvery <= 0; ++$progressEventTotal; if( ($progressThis == $progressLast) || ($progressEventTotal % $progressEvery == 0) ) { my $now = time(); $progressTimes[$progressIDX] = $now - $progressTimeLast; ++$progressIDX; $progressIDX = $PROGRESS_IDX_FIRST if $progressIDX > $PROGRESS_IDX_LAST; my $historicTime = 0; for( my $idx = $PROGRESS_IDX_FIRST; $idx <= $PROGRESS_IDX_LAST; ++$idx ) { $historicTime += $progressTimes[$idx]; } my $events = $progressEventTotal >= $progressFullHistory ? $progressFullHistory : $progressEventTotal; my $remaining = $progressLast - $progressThis; $remaining = int( $remaining * ($historicTime / $events) ); =head2 Progress Output Progress output uses the format: NN of LL Last HHMMSS Elapsed HHMMSS Remaining HHMMSS NN is the most recent changeset processed and LL is the last changeset to process. HHMMSS is a time value. Last is the time required to complete the most recent processing, Elapsed is the time since start of processing, and Remaining is an estimate of the time required to complete extraction. =cut Msg( sprintf "%6d of %6d Last %s Elapsed %s Remaining %s", $progressThis, $progressLast, TimeHHMMSS( $now - $progressTimeLast ), TimeHHMMSS( $now - $progressTimeStart ), TimeHHMMSS( $remaining ) ); $progressTimeLast = $now; } } sub ProgressLast() { } #################### # # General utility functions. # #################### sub utilAssurePathFile($) { my $FilePath = $_[0]; my ($name, $Dir, $suffix) = fileparse( $FilePath, (qr(\.[^\.]+),qr(\.))); unless( -e $Dir ) { mkpath( $Dir, 0, 0777 ); } } sub XMLTextEncode($) { my $Text = $_[0]; return '' unless defined $Text; # safety $Text =~ s/\&/\&/g; # must be first encode $Text =~ s/\>/\>/g; $Text =~ s/\</\</g; $Text =~ s/\'/\'/g; $Text =~ s/\"/\"/g; return $Text; } #################### # # Extraction and extraction support # #################### sub RunTFCommand($;$) { my ($command, $exitGood) = @_; $exitGood = 0 unless defined $exitGood; return (0, ()) if $command eq ''; my @results = `tf $command 2\>\&1`; my $exitCode = $? >> 8; MsgERRORResponse( "tf exit code $exitCode was not expected $exitGood", @results ) if $exitCode != 0 && $exitCode != $exitGood; return ($exitCode, @results); } sub EstablishFirstLast() { my ($first, $last) = (undef, undef); my ($exitCode, @results) = RunTFCommand( "changeset /latest /noprompt" ); return (undef, undef) if $exitCode != 0; my $knownLast = 0; foreach my $line (@results) { if( $line =~ m!^Changeset\: (\d+)! ) { $knownLast = $1; last; } } if( $knownLast == 0 ) { MsgERRORResponse( "Can't establish last known changeset", @results ); return (undef, undef); } Msg( "Last known changeset: $knownLast" ); if( scalar @ARGV == 1 ) { $first = $ARGV[0]; $last = $knownLast; } else { $first = $ARGV[0]; $last = $ARGV[1]; if( $last > $knownLast ) { Msg( "LAST limited to $knownLast" ); $last = $knownLast; } } MsgERROR( "FIRST must be >= 1" ) if $first < 1; MsgERROR( "LAST must be >= 1" ) if $last < 1; MsgERROR( "FIRST ($first) can not be greater than LAST ($last)" ) if $first > $last; return $ifERRORReported ? (undef, undef) : ($first, $last); } my %actions = (); my %changesetKeywords = (); sub ExtractionSummary() { my $now = time(); print $hHistory "<ACCESS count\=\"$reportAccessCounter\"\>\n" if $modeAccessReport && $reportAccessCounter > 0; print $hHistory "<!--\n"; print $hHistory "Extraction time: " . TimeHHMMSS( $now - $progressTimeStart ) . "\n"; print $hHistory " Changesets: $progressEventTotal\n"; print $hHistory "\nAction distribution:\n"; foreach my $key (sort keys %actions) { print $hHistory sprintf " %6d %s\n", $actions{$key}, $key; } print $hHistory "\nChangeset other keyword distribution:\n"; foreach my $key (sort keys %changesetKeywords) { print $hHistory sprintf " %6d %s\n", $changesetKeywords{$key}, $key; } print $hHistory "-->\n"; } =head1 Extraction Information Extraction information is encoded using XML style tags and constructs. However, the extraction information is not intended to be a valid XML document. Extracted information is fronted by a TFSEXTRACT tag. The tag has a version attribute indicating the version of this script that created the extract. Unless otherwise stated, all values are XML encodings of values from the TFS output. Generation provides manipulation of these values if appropriate. =cut =head2 Date Format TFS 2010 appears to use the fixed date format: MMM dd, yyyy hh:mm:ss AM/PM As such, this is the lowest common denominator format. TFS 2012 and later respond to the "date, time or number format" setting of the local host. The default date format for Win7 and Win8 clients is to prepend the name of the day before the month (MMM) information. A search was made but nothing was found relative to which source (server or client) controls this format. =over 6 =item NOTE This is the one place where the TFS date/time format is significant. From this point forward the import package uses an internal format that is numeric. This is the only place where alternative date/time format processing is required during extract. Using Perl DateTime module packages or other techniques to make date/time recognition more generic is not desirable. Date/time is critical to imports. Flexibility in date/time recognition leads to highly undesirable false positives. Assume a fixed date/time format and reject anything else. =back =cut my %mon2num = qw( jan 1 feb 2 mar 3 apr 4 may 5 jun 6 jul 7 aug 8 sep 9 oct 10 nov 11 dec 12 ); sub TFSDateAsUTC($) { my $TFSDate = $_[0]; my ($month, $mday, $year, $hour, $min, $sec, $AMPM) = (undef, 0, 0, 0, 0, 0, ''); ($month, $mday, $year, $hour, $min, $sec, $AMPM) = ($1, $2, $3, $4, $5, $6, $7, $8) if $TFSDate =~ m!(\S+)\s+(\d+),\s+(\d+)\s+(\d+)\:(\d+)\:(\d+)\s+(\S+)!; if( ! defined $month ) { MsgERROR( "Don't recognized date: '$TFSDate'" ); return "? $TFSDate"; } $month = $mon2num{ lc substr($month, 0, 3) }; $hour = 0 if $AMPM eq 'AM' && $hour == 12; $hour += 12 if $AMPM eq 'PM' && $hour != 12; my $timeUTC = timelocal( $sec, $min, $hour, $mday, $month - 1 , $year - 1900 ); ($sec, $min, $hour, $mday, $month, $year) = gmtime($timeUTC); =head2 Action Time Action time is represented by the UTC of the extracted date-time followed by a YEAR.MON.DAY:HOUR:MIN:SEC of the UTC date-time. The YEAR.MON information is provided for convenience and to help users avoid context relative conversion errors. The UTC conversion assumes that the date-time from TFS is the local version of an internal UTC value. The import needs a UTC value. An operational timezone dependence is created between the extraction and other phases if the UTC conversion is done at other stages of the import process. The extracted TFS date-time is not included in the extracted information. =cut return sprintf "%d %04d.%02d.%02d:%02d:%02d:%02d", $timeUTC, $year+1900, $month+1, $mday, $hour, $min, $sec; } sub Extract($$) { my ($first, $last) = @_; print $hHistory "<TFSEXTRACT version=\"$versionMajor\.$versionMinor\"/>\n"; while( ! $ifERRORReported && $first <= $last ) { $errorContext = "Changeset $first description"; if( -e $pathStopFile ) { MsgERROR( "Stop file encountered" ); last; } my $context = "Changeset $first access failure"; my ($exitCode, @results) = RunTFCommand( "changeset $first /noprompt", 100 ); if( $modeAccessReport && $exitCode == 100 ) { ++$reportAccessCounter; print $hHistory "<!--- $context -->\n"; Msg( $context ); Progress( $first, $last ); ++$first; next; } elsif( $exitCode != 0 ) { MsgERROR( $context ); last; } my ($changeset, $user, $date) = ('', '', ''); my ($comment, $other, $blanks) = ('', '', ''); my @items = (); my $isComment = 0; my $isItem = 0; my $isOther = 0; foreach my $line (@results) { chomp $line; # Lines that start with a non-blank character are usually keywords. # A select set of keywords significant to the extract have # specific identification and processing. All other lines are # collectively part of what is called the other. # # NOTE: The changeset, user, and date keywords always occur at the # head of the information. Instances of these keywords in # comment and other text have been encountered. Once they # have been processed do not attempt to re-process them. if( $line =~ m!^Changeset\: (.+)! && $changeset eq '' ) { $changeset = $1; $isComment = 0; $isItem = 0; $isOther = 0; } elsif( $line =~ m!^User\: (.+)$! && $user eq '' ) { $user = $1; $isComment = 0; $isItem = 0; $isOther = 0; } elsif( $line =~ m!^Date\: (.+)$! && $date eq '' ) { $date = TFSDateAsUTC( $1 ); $isComment = 0; $isItem = 0; $isOther = 0; } elsif( $line =~ m!^Comment\:$! ) { $isComment = 1; $isItem = 0; $isOther = 0; $blanks = ''; } elsif( $line =~ m!^Items\:$! ) { $isComment = 0; $isItem = 1; $isOther = 0; $blanks = ''; } elsif( $line =~ m!^(\S.+)\:$! ) { my $otherKeyword = $1; ++$changesetKeywords{$otherKeyword}; $isComment = 0; $isItem = 0; $isOther = 1; $other .= "$blanks" if $other ne ''; $other .= "$otherKeyword:\n"; $blanks = ''; # Values are established by the context of a keyword. Anything outside # a keyword context is an error that needs to be reported. } elsif( $line =~ m!^\s*$! ) { $blanks .= "\n"; } elsif( $isComment && $line ne '' ) { $comment .= "\n" if $comment ne ''; $comment .= "$blanks$line"; $blanks = ''; } elsif( $isOther && $line ne '' ) { $other .= "\n" if $other ne ''; $other .= "$blanks$line"; $blanks = ''; } elsif( $isItem && $line =~ m!^\s+([^\$]+)(\$.+)$! ) { my ($action, $reference) = ($1, $2); $action =~ s!\s+$!!; ++$actions{$action}; push @items, { 'action' => $action, 'reference' => $reference }; } else { MsgERROR( "Don't understand value line '$line'" ); last; } } =head2 Changeset Start The start of information associated with a changeset is specified by a CSS (ChangeSet Start) tag. The CSS tag has attributes that describe the changeset 'number', 'user', and 'date'. COMMENT and NOTE tags are values of the CSS tag. The COMMENT values are associated with the 'Comment:' keyword. Other keywords and their value lines are collectively the value of the NOTE tag. =cut print $hHistory "<CSS number\=\"$changeset\" user\=\"$user\" date\=\"$date\"/>\n"; print $hHistory " <COMMENT>" . XMLTextEncode( $comment ) . "</COMMENT>\n"; print $hHistory " <NOTE>" . XMLTextEncode( $other ) . "</NOTE>\n" if $other ne ''; =head2 Changeset Items Each item associated with a changeset is represented by an ITEM tag within the extraction. The ITEM tag has attributes describing the 'action' and 'reference' of the TFS item. ITEMs may reference either a directory or a file. Item type does not impact generation so the extraction does not spend time establishing the type of the ITEM's reference. =cut foreach my $item (@items) { if( -e $pathStopFile ) { MsgERROR( "Stop file encountered" ); last; } print $hHistory " <ITEM action\=\"$item->{action}\" reference\=\"$item->{reference}\"/>\n"; } =head2 Changeset End The end of items associated with the most previous CSS tag is indicated by a CSE (ChangeSet End) tag. =cut print $hHistory "<CSE/>\n"; Progress( $first, $last ); ++$first; } } #################### # # Main processing point # #################### my $optHelp = 0; my $optVersion = 0; Getopt::Long::Configure( "auto_abbrev", "no_ignore_case" ); OptionUsage( "Invalid specification" ) unless( GetOptions( "help|?" => \$optHelp, "log=s" => \$pathLog, "accessreport" => \$modeAccessReport, "progress=i" => \$progressEvery, "rawhistory=s" => \$historyFilename, "Version" => \$optVersion ) ); # Help and version are one description and we're done OptionVersion() if $optVersion; OptionUsage() if $optHelp || scalar @ARGV == 0; # Must be one or two arguments. 0 has already been covered above. OptionUsage( "Don't understand arguments" ) if( scalar @ARGV > 2 ); # Establish a log if( $pathLog ne '' ) { $pathLog = rel2abs( $pathLog ); utilAssurePathFile( $pathLog ); Msg("Activity logged in: $pathLog"); open $hLog, ">", $pathLog; } # Identify the processing. Msg( $APPWHAT ); # Verify the TFS context and establish the changeset range. my ($first, $last) = EstablishFirstLast(); CoordinateExit() unless defined $first; Msg( "History for changeset $first" ) if $first == $last; Msg( "History for changesets $first to $last" ) if $first != $last; Msg( "Extraction stop file: $pathStopFile" ); my $historyPath = rel2abs( $historyFilename ); Msg( "Extracted history information to: $historyPath" ); if( -e $historyPath ) { MsgERROR( "History file exists '$historyFilename'" ); CoordinateExit( ); } else { utilAssurePathFile( $historyPath ); open $hHistory, '>', $historyPath; $hHistory->autoflush(1); print $hHistory "<!---\nGenerated " . localtime() . " by $APPNAME\n -->\n"; } ProgressInit(); Extract( $first, $last ); ExtractionSummary(); ProgressLast(); CoordinateExit();
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#2 | 10934 | Neal Firth |
Deal with a lower common denominator date/time format to support TFS 2010 history. Add a lot of perldoc text about dealing with date/time since this is the only place in the tool suite where TFS date/time representation is significant. Changeset, User, and Date recognition is now context relative. |
||
#1 | 10087 | Neal Firth | Versions verified against current migration document |