#!/usr/local/bin/perl =head1 NAME pverify =head1 SYNOPSIS pverify -p [host:]port [-n] [-head] [-z] [-m M] [-nproc N] [-d //path1[,//path2,...]] [-l log] [-maxerr N] [-v] =head1 DESCRIPTION Run several verify processes in parallel to increase CPU activity and decrease the time we keep the database locked. Options: -d path[,...] piece(s) of the repository to verify Default: all directories from //* except //tmw, which will also be split up in pieces Example: -d //bat/...,//private/... -head Verify only the head revision. Convinient to verify that the latest revision is present in the archive. -l log If this option is used, the verify output is redirected to the specified log file. -m N verify only N revisions of every file -p port Perforce port (required) -maxerr N max number of errors displayed (default: 10) -nproc N max number of processes (default: 30) -n dont run verify - just tell what would happen -v verbose mode -z use the -z option for p4 verify =head1 EXAMPLES pverify -nproc 4 -p 1666 -z -d //sandbox/batscm/... =cut use strict; use warnings; use Cwd qw(abs_path); use File::Basename qw(dirname fileparse); use Getopt::Long qw(GetOptions); use IO::Handle; use Parallel::ForkManager; use Pod::Usage qw(pod2usage); my ($Myname, $Mydir); BEGIN { ($Myname, $Mydir) = fileparse($0); $Myname =~ s{/$}{}; $Mydir = abs_path($Mydir); unshift @INC, dirname($Mydir) . "/lib"; } use MW::Util::StackStdout (); use MW::Util::Util (); # duration() use MW::P4::FixEnv (); # fix %ENV # no buffering *STDOUT->autoflush(); *STDERR->autoflush(); my $start = time; my ($dont, $head_only, $log, $max_err, $MAX_PROCESSES, $max_verified_revs); my (@p4pieces, $port, $verbose, $zopt); GetOptions( 'd=s' => sub { push @p4pieces, split /,/, $_[1] }, 'head' => \$head_only, 'help' => sub { pod2usage(-verbose => 2, -exit => 0) }, 'l=s' => \$log, 'm=i' => \$max_verified_revs, 'maxerr=i' => \$max_err, 'n' => \$dont, 'nproc=i' => \$MAX_PROCESSES, 'p=s' => \$port, 'v' => \$verbose, 'z' => \$zopt, ) or die "Error parsing arguments\n"; push @p4pieces, @ARGV; print "\n * * * * *\n"; die "Specify -p port\n" unless $port; $max_err ||= 10; # max number of errors to display $MAX_PROCESSES ||= 30; # this provides reasonable but not overwhelming load open my $DATA, '<', "$Mydir/$Myname.data" or die "ERROR: Cannot open $Mydir/$Myname.data: $!\n"; my %expected_time; while ( <$DATA> ) { my ($path, $time) = m{^ (\S+) \s+ (\d+) \s* $ }x or next; # in case if we have dups in the data file, take the greater value $expected_time{$path} = $time if (! $expected_time{$path} || $expected_time{$path} < $time); } close $DATA; my $PIECE_TIME_LIMIT = 3000; my %time4sort; if ( ! @p4pieces ) { my @dirs = `p4 -p '$port' dirs '//*'`; chomp(@dirs); while ( @dirs ) { my $piece = shift @dirs; # unclear whether this is a good thing or a bad thing # if ( $piece =~ m{^//sandbox/(\w+)/} ) { # my $user = $1; # if ( ! getpwnam($user) ) { # msg("Skipping $piece because $user is not a valid user\n"); # next; # } # } # If the $piece is 0 we must split it; no comparison is needed. # If the $piece is defined with a positive number, compare it # with $PIECE_TIME_LIMIT. # If the $piece is not defined we won't split it. if ( $expected_time{$piece} && $expected_time{$piece} <= $PIECE_TIME_LIMIT ) { $time4sort{"$piece/..."} = $expected_time{$piece}; push @p4pieces, "$piece/..."; } elsif ( ! defined $expected_time{$piece} ) { $time4sort{"$piece/..."} = 5000; # arbitrary number push @p4pieces, "$piece/..."; } else { # split it push @p4pieces, "$piece/*"; $time4sort{"$piece/*"} = 1; push @dirs, map { chomp; /no such file\(s\)\./ ? () : $_ } `p4 -p '$port' dirs '$piece/\*' 2>&1`; } } } msg("Verifying " . @p4pieces . " parts of the $port repository\n"); if ( $log ) { MW::Util::StackStdout::push_stdout($log) or die "Failed to redirect output:\n $@\n"; } my @err; # Run a child process and analyze the output from that child process if ( my $pid = open my $CHILD, '-|' ) { # parent can read from the child while ( <$CHILD> ) { if ( /\b(MISSING|BAD)\b/ || /^(RCS no such revision \S+)/ || m{Done: //.* \(rc=[1-9]} ) { push @err, $_; s/\n/ ***ERROR***\n/; } print; } waitpid $pid, 0; if ( $? ) { my $msg = "Top child process failed with code $? ***ERROR***\n"; push @err, $msg; print $msg; } } elsif ( defined $pid ) { # child process will perform verify's do_verify(); exit; } else { die "Failed to fork the main child process: $!\n"; } MW::Util::StackStdout::pop_stdout() if $log; msg("Verification is done. Errors: ". @err . ". Duration: ", MW::Util::Util::duration(time - $start), " sec\n", (@err ? "Search for ***ERROR*** in the log. Here " . (@err > $max_err ? join('', "are the first 10 errors:\n", @err[0..9]) : join('', "they are:\n", @err)) : '') ); exit @err; # # Actual verify's # sub do_verify { open STDERR, '>>&STDOUT' or warn "$$: Cannot redirect STDERR to STDOUT: $!\n"; # Sort it the way so the longest appear first my @sorted = sort { $time4sort{$b} <=> $time4sort{$a} } @p4pieces; my $pm = new Parallel::ForkManager($MAX_PROCESSES); foreach ( @sorted ) { # Forks and returns the pid for the child: my $pid = $pm->start and next; my $start = time; my $piece = $_ . ($head_only ? '#head' : ''); $piece = "'$piece'" if $piece =~ /\s/; # verify $piece in the child process my $cmd = "p4 -p '$port' verify" . ($max_verified_revs ? " -m $max_verified_revs" : '') # Note: It's OK to generate missing digests for the meta depot . ( # we cannot use -u on replicas. either add logic # to see whether $port is a replica or not, or always # use -q (which is obviously easier) # $piece =~ m{ ^ //meta/ }smx ? ' -qu ' : $zopt ? ' -qz ' : ' -q ' ) . $piece; msg("Starting: $cmd\n"); my $rc = $dont ? 0 : system($cmd); $rc >>= 8 if $rc>255; msg("Done: $piece (rc=$rc; duration: ", time - $start, " sec)\n"); $pm->finish($rc); # Terminates the child process } $pm->wait_all_children; return; } # do_verify sub msg { my ($sec,$min,$hour,$mday,$mon,$year) = localtime; printf "%4d-%02d-%02d %02d:%02d:%02d ", $year+1900, $mon+1, $mday, $hour, $min, $sec; print @_; } # msg