#!/bin/perl
# ======================================================================
# oblit_check.pl
#
# This script parses the output of an obliterate command (done without -y), and does several things:
# it determines how many files have more than one lazy copy, and lists them with space statistics
# it provides information on how much space would be freed, or used if lazy copy files obliterated or not.
# it determines how much space would be freed by other revisions of files with some mult copy revisions
# it writes a client spec that can be used to do the actual obliterate, with exclusion lines for files with
# more than one lazy copy
#
# usage: oblit_check.pl file1
# file1 the output of p4 obliterate (no -y)
#
# Note: Handling of lazy copies in obliterates changes in 2005.1, this script is for releases before that
#
# $Id: //esi/perforce/scripts/tools/oblit_check.pl#7 $
# $Author: sstevens $
# $Date: 2005/04/12 $
# ======================================================================
# Get system modules.
use strict;
# this path is needed for the space checks
my $Root = "/perforce/1780/root";
# this is the name of the client spec and file that will be created
my $client_spec_name = "sstevens_oblit3";
# edit this to your name, it is the owner field of the client spec
my $owner = "sstevens";
# list oblit paths here, they are used to determine if lazy copies are undone to areas also to be obliterated
# be sure to leave off trailing /'s
# for example: my @oblit_paths = ("//releases/test1", "//releases/test2");
my @oblit_paths = ();
if($#ARGV < 0)
{
print "usage: oblit_check.pl file1
file - the output of p4 obliterate (no -y) \n
This script will list the files with mulitple references, so pointless to obliterate, in
a format for exclusions in a file named after $client_spec_name, and report on space statistics\n";
exit 1;
}
my $File1 = $ARGV[0];
my %branched; # hash of files mentioned in oblit output file as having lazy copies or being obliterated
my %cleared; # files that have no branches, will save space by being obliterated
my %spec_line; # files that should go in client spec as exclusions
if(!open (FILE1, "< $File1"))
{
print "$File1: can't open, exiting.\n";
exit 1;
}
print "Searching $File1 for multiple lazy copy undos...\n";
printf ("%s", `date`);
print "Sizes are in bytes\n";
print "Paths to be obliterated are:\n";
foreach my $opath (@oblit_paths)
{
print "\t$opath\n";
}
print "\n";
# put each source file (to be obliterated) into a hash, count times it is hit
my $all_count = 0;
my $dup_count_total = 0;
my @lines = <FILE1>;
close(FILE1);
LINE: foreach my $line (@lines)
{
if($line =~ /^(.*) - copy from \/(.*)$/)
{
my $dest = $1;
my $tsrc = $2;
# put src name in #xx format (like obliterate output should do)
$tsrc =~ /^(.*) 1.(.*)$/;
my $src = $1 . "#" . $2;
my $name = $1;
# if it is copying to a path to also be purged, don't count as a hit
foreach my $path (@oblit_paths)
{
if($dest =~ /$path/)
{
next LINE;
}
}
# push dest into array of values for this file's hash key
if(!exists($branched{$src}))
{
$all_count++;
}
push ( @{$branched{$src}}, $dest);
}
elsif($line =~ /^\/(.*) - purged/)
{
my $file = $1;
# if it's to be purged but had no lazy copies, put it in the list of files safe to oblit
if(!exists($branched{$file}))
{
$cleared{$file} = 0;
$all_count++;
}
# more than one lazy copy means it will cost space to oblit it
# one lazy copy means no space savings, but still oblit it
elsif ($#{$branched{$file}} > 0)
{
$file =~ /(.*)#.*$/;
$spec_line{$1} = 1;
$dup_count_total++;
}
}
}
close(FILE1);
my $tot_size = 0; # bytes reclaimed if no files with more than one copy obliterated
my $extra_size = 0; # bytes lost if all lazy copies undone
my $extra_reclaim_size = 0; # bytes reclaimed by obliterating other revs of files that have a rev which > 1 copy
# open up a file to make a client spec from for obliteration, add exclusion lines for multiple link files
if(!open (CLIENT_FILE, "> $client_spec_name"))
{
print "$client_spec_name: can't open, exiting.\n";
exit 1;
}
print CLIENT_FILE "
Client: $client_spec_name
Owner: $owner
Description:
Created by $owner for offline obliterate
Root: /user/$owner/workspaces/oblit
Options: noallwrite noclobber nocompress unlocked nomodtime normdir
LineEnd: local
View:\n";
foreach my $Path (@oblit_paths)
{
$Path =~ /\/(.*)/;
printf CLIENT_FILE ("\t\"%s...\" \"/\/%s%s...\"\n", $Path, $client_spec_name, $1);
}
foreach my $File (keys %spec_line)
{
printf CLIENT_FILE ("\t\"-\/%s\" \"/\/%s%s\"\n", $File, $client_spec_name, $File);
}
close(CLIENT_FILE);
# sort on greatest number of branches and print
foreach my $File (sort { ($#{$branched{$b}}+1) <=> ($#{$branched{$a}}+1) } keys %branched)
{
# count up the bytes of revs that can safely be obliterated with no space loss;
# put together a real filename
$File =~ /(.*)#(.*)$/;
my $FileSize = 0;
my $fpath = $1;
my $fver = $2;
my $pname = $Root . $fpath . ",d/1." . $fver . ".gz";
if(-e $pname)
{
$FileSize = (stat($pname))[7];
}
elsif ((-e ($pname = $Root . $fpath . ",v")) || (-e ($pname = $Root . $fpath . ",t")))
{
$FileSize = (stat($pname))[7];
}
elsif (-e ($pname = $Root . $fpath . ",d/1." . $fver))
{
$FileSize = (stat($pname))[7];
}
# there are other types, but amount of space used by them is insignificant (I think)
# no reason to mention files with only one copy, they can be obliterated
# but will save no space
if ($#{$branched{$File}} > 0)
{
my $tot_gain = $FileSize * ($#{$branched{$File}});
my $copies = $#{$branched{$File}} + 1;
print "$copies copies, $FileSize * $#{$branched{$File}} = $tot_gain. $File\n";
$extra_size += $tot_gain;
# print out the where each copy would be made to
foreach my $match (@{$branched{$File}})
{
print "\t$match\n";
}
}
}
foreach my $File (keys %cleared)
{
# count up the actual amount of stuff that can safely be obliterated;
# put together a real filename
$File =~ /(.*)#(.*)$/;
my $FileSize = 0;
my $fpath = $1;
my $fver = $2;
my $pname = $Root . $fpath . ",d/1." . $fver . ".gz";
if(-e $pname)
{
$FileSize = (stat($pname))[7];
}
elsif ((-e ($pname = $Root . $fpath . ",v")) || (-e ($pname = $Root . $fpath . ",t")))
{
$FileSize = (stat($pname))[7];
}
elsif (-e ($pname = $Root . $fpath . ",d/1." . $fver))
{
$FileSize = (stat($pname))[7];
}
# there are other types, but amount of space used by them is insignificant (I think)
# no lazy copies, count up space to be reclaimed
# all revs of files with more than one multiple link version will be excluded from total
if(!exists($spec_line{$fpath}))
{
$tot_size += $FileSize;
}
else
{
$extra_reclaim_size += $FileSize;
}
}
print "$dup_count_total files have more than one lazy copy, out of $all_count files\n";
printf "$tot_size bytes (%.2f GB) can be cleaned out with no lazy copy space gain, using client $client_spec_name\n", $tot_size/1000000000.0;
printf "$extra_size bytes (%.2f GB) would be added as duplicate files, if files with duplicates obliterated.\n", $extra_size/1000000000.0;
printf "$extra_reclaim_size bytes (%.2f GB) of other file revs would be deleted, if duplicates obliterated.\n", $extra_reclaim_size/1000000000.0;
printf " so, if dups obliterated, net change in storage would be %s (%.2f GB)\n", $tot_size - $extra_size + $extra_reclaim_size, ($tot_size - $extra_size + $extra_reclaim_size)/1000000000.0;
print "client spec file with exclusion lines for dup files is $client_spec_name\n";
exit (0);