#!/usr/bin/perl -w
#
# VSS to Perforce converter, phase II: improve metadata
#
# Copyright 1998 Perforce Software. All rights reserved.
# Written by James Strickland, April 1998
#
# This script applies a heuristic to group multiple checkins as one change if
# a) they are within a user specified time interval of each other
# b) the change descriptions match
# c) the authors match
# d) no file appears twice
#
# Changes are searched for within the user specified time interval; they do
# not have to be adjacent. e.g. if the time interval is 10 minutes and the
# following operations occurred between 9:00 and 9:10:
# 1 edit foo.c#2 9:00 by papa 'don't allow gear retraction if weight on wheels'
# 2 add blah.c#1 9:00 by papa 'don't allow gear retraction if weight on wheels'
# 3 edit bar.c#4 9:00 by mama 'overspeed sensor wasn't read properly before'
# 4 edit bap.c#7 9:02 by baby 'wrong attendant call button lit up'
# 5 edit goo.c#2 9:05 by papa 'rudder controls were backward'
# 6 edit baz.c#8 9:08 by mama 'overspeed sensor wasn't read properly before'
# 7 add foop.c#1 9:09 by papa 'don't allow gear retraction if weight on wheels'
# 8 edit foo.c#3 9:09 by papa 'fix typo in comment'
#
# then changes 1,2, and 7 will be amalgamated as one change (changes 3,4,6
# are by a different author, change 5 has a different change description,
# change 8 involves the same file as change 1).
#
# The next step will involve examining all changes within 10 minutes
# of change 3; this is the same list. Now changes 3 and 6 will be
# amalgamated as one change.
#
# The next step will involve examining all changes within 10 minutes
# of change 4; that may involve examining, say,
# 9 add goop.c#1 9:11 by baby 'wrong attendant call button lit up'
# So now change 4 and 9 will be amalgamated. etc..
#
# This process of looking ahead is typically called a "sliding window".
# Only the revisions within the search time interval are in memory at any
# given time. Once objects are no longer referenced in Perl, they disappear.
# This should avoid problems with running out of memory.
#
# RHGC - fixed problem with more than one "unget" being called at a time.
require 5.0;
use strict;
use integer;
use lib '.';
use convert;
use Change;
# it is assumed that the changes have been sorted - initially they are written
# out as changes.ns ("not sorted") and changes is deleted, so this script
# should fail with a "can't open" message if the changes have not been sorted
open(CHANGES, "<$convert::metadata_dir/changes") or die "can't open: $!";
open(NEWCHANGES, ">$convert::metadata_dir/changes.new") or die "can't open: $!";
my (@nearby_changes,$current,$anotherchange,$num_changes,@ungotten_change);
$num_changes=0;
while(!finished Change(\*CHANGES) || scalar(@nearby_changes) || scalar(@ungotten_change)) {
# The next change to consider is the first one in @nearby_changes, if any,
# otherwise the next one is read in
if(scalar(@nearby_changes)) {
$current = shift(@nearby_changes);
}
elsif(scalar(@ungotten_change)) {
$current = shift(@ungotten_change);
}
else {
$current = get Change(\*CHANGES);
}
# Extend @nearby_changes to include all changes with timestamps within
# $time_interval of $current's timestamp
while( $anotherchange = scalar(@ungotten_change) ? shift(@ungotten_change)
: get Change(\*CHANGES) ) {
if( $anotherchange->timestamp - $current->timestamp > $convert::time_interval ) {
push(@ungotten_change,$anotherchange); # put this one back to be returned later by get
last;
}
push(@nearby_changes,$anotherchange);
}
my (%seen,$file,$duplicate_file,$i,$c);
# mark all files in the current change as being seen
# (note: typically there's only one file in the change)
foreach $file ($current->filelist) {
$seen{$file}=1;
}
# Look for matches in @nearby_changes
for($i=0;$i<scalar(@nearby_changes);$i++) { # need index in order to do splice
$c=$nearby_changes[$i];
# we can only amalgamate changes if no filenames are duplicated within
# the time period we're considering
$duplicate_file=0;
foreach $file ($c->filelist) {
$duplicate_file=1 if( $seen{$file} );
$seen{$file}=1;
}
if($c->change_description eq $current->change_description &&
$c->author eq $current->author &&
! $duplicate_file ) {
# add the changes to $current's changelist
$current->changelist([ @{$current->changelist}, @{$c->changelist} ]);
# remove the change which was just amalgamated
splice(@nearby_changes,$i,1); $i--;
}
}
$current->put(\*NEWCHANGES);
$num_changes++;
}
close(CHANGES);
close(NEWCHANGES);
unlink("$convert::metadata_dir/changes") or die "******** can't delete changes file: $!";
rename("$convert::metadata_dir/changes.new","$convert::metadata_dir/changes") or die "********* can't rename: $!";
print "Conversion will result in $num_changes changes ";
if(convert::p4run(" changes -m 1") =~ m@Change\s+(\d+)@) {
my $c = $1 + 1;
print "starting with changelist $c.\n";
}
else {
print "starting with changelist 1.\n";
}