cache_clean.pl #1

  • //
  • guest/
  • stanton_stevens/
  • cache_clean.pl
  • View
  • Commits
  • Open Download .zip Download (9 KB)
#!/bin/perl
#!c:\bin\perl\bin\perl
# ======================================================================
# cache_clean.pl
# Script to monitor and control space usage by a P4Proxy cache
#
# usage: cache_clean.pl -c cache_dir -l limit_size -o output_file [-m max_empty] [-n min_empty] [-i]
#   see usage section in script below
#
# $Id: //esi/perforce/scripts/tools/cache_clean.pl#15 $
# $Author: sstevens $
# $Date: 2005/04/12 $
# ======================================================================

# Get system modules.
use strict;
use Getopt::Long;
use Time::Local;

my $LimitSize = 0;
my $CacheDir = "";
my $OutputFile = "";
my $MaxEmpty = 0;
my $MinEmpty = 0;
my $DoNothing = 0;
my $Help = 0;

my $Total = 0; 
my $Used = 0;
my $Avail = 0;
my $CurSize = 0;

# If access date is before most recent file deleted last run, it is due to Perforce bug. Some files written
#  to the cache do not have the correct access time set, it can be anywhere in the Unix epoch.
#  But if the access date is earlier than the last file cleaned out last time cache_clean.pl was run, 
#  it's bogus, reset it to the current date. Each time the script is run, the last date is saved to file. 
#  The date below is used if no date is saved so far.
my $start_date = "1/1/2005";
# edit this path as appropriate, it is where to store the files saving dates
my $date_file_folder = "/perforce/scripts";

#
# main routine starts here
#
GetOptions ("help|?" => \$Help,
	    "c=s" => \$CacheDir,
	    "l=i" => \$LimitSize,
	    "o:s" => \$OutputFile,
	    "m:i" => \$MaxEmpty,
	    "n:i" => \$MinEmpty,
	    "i"   => \$DoNothing,
           );

if($Help || ($CacheDir eq "") || ($LimitSize == 0))
{

   print "# usage: cache_clean.pl -c cache_dir -l limit_size [-m max_empty] [-n min_empty]
   -c cache_dir - where cache files are stored, such as /perforce/1710/cache
       it can also use a wildcard, like /perforce/*/cache, and will draw from all folders that match
       But - all these folders must be on the same partition. And you may have to use \* rather than *,
       in Unix shells.
   -l limit_size - maximum size in MB of the area monitored, delete files by access time to get here
   -m max_empty  If this number of MB is already free in the partition, skip most of the script
       This is a speed optimization.
       For example: if set to 50000, and 50 GB are free, no need to trim cache, don't bother with du command
   -n min-empty  The number of MB that must be left free, no matter how much cache space needs to be deleted
       For example, if min-empty is set to 5000, no matter what the limit_size is, the cache will be cleaned 
            at least enough to make sure that there is 5 GB free on the partition     
       The entire cache could be deleted to meet this minimum.
   -i do nothing, just list what would be deleted 
   -o output_file   - print output to this file, rather than stdout

   NOTE: cygwin is required for Windows machines, this script may need to be modified to use the correct path to cygwin commands.\n";
   exit(1);
}   

# take care of output
if($OutputFile)
{
    if (!open(OUT, ">>$OutputFile"))
    {
        print "Unable to open $OutputFile for appending, using STDOUT\n";
    }
    else
    {
        select(OUT);
    }
}

# look for a saved date for the oldest files deleted, use to fix Perforce access date errors
my $cachepath = $CacheDir;
$cachepath =~ s/\//\./g;
$cachepath =~ s/\*/\./g;
my $DateFile = "$date_file_folder/$cachepath.cache_clean_date.keep";
if (-e $DateFile)
{
    if (open(DATEFILE, "<$DateFile"))
    {
	while (<DATEFILE>)
	{
	    if(/Cutoff Date: (\S+)/)
	    {
		$start_date = $1;	
	    }
	}
	close (DATEFILE);
    }
    else
    {
        print "cache_clean.pl: Unable to open $DateFile to get cutoff date, though it exists. Aborting.\n";
	exit  (1);
    }
}
else
{
    print "Using default cutoff date of $start_date, no saved cutoff date.\n";
}

# set to the most recent date deleted, to be picked up next time the script is run
my $last_date = ""; 

my ($m, $d, $y) = split (/\//, $start_date);
my $cutoff_time = timelocal(0, 0, 0, $d, $m-1, $y);
my $current_time = time;
    
if($MaxEmpty)
{
    print "Will do nothing if more than $MaxEmpty MB is free\n";
}
if($DoNothing)
{
    print "Reporting on what would be cleaned from cache folder(s) $CacheDir\n";
    `date`;
    print "Space usage would be reduced to $LimitSize MB.\n";
    if($MinEmpty)
    {
	print "Cache space usage would also be reduced further if necessary to make sure $MinEmpty MB available\n";
    } 
}
else
{
    print  "Cleaning cache folder(s) $CacheDir\n";
    `date`;
    print "Space usage will be reduced to $LimitSize MB.\n";
    if($MinEmpty)
    {
	print "Cache space usage will also be reduced further if necessary to make sure $MinEmpty MB available\n";
    } 
}

# convert megabytes to kilobytes.
$MaxEmpty = $MaxEmpty * 1000;
$MinEmpty = $MinEmpty * 1000;  
$LimitSize = $LimitSize * 1000;

my @Results = "";
my $Command = "";

# Deal with platform specific stuff. I'm sure there's a better way, but this works for many cases
# Cygwin must be installed: http://www.cygwin.com/
my $ucmd_path = "";
my $uname_results = `uname -a`;
if($uname_results =~ /CYGWIN/)
{
   $ucmd_path = "c:\\cygwin\\bin\\";
}

# do a df, if more space available than MaxEmpty, we're done, exit
$Command = "df -k $CacheDir";
@Results = `$Command`;
chomp(@Results);
if ($#Results == -1)
{
    print  "Unable to run df to check partition size, check cache dir path\n";
    die;
}

shift @Results;  # first line is just a header
foreach my $Line (@Results)
{
    $Line =~ /\S+\s+(\w+)\s+(\w+)\s+(\w+).*/;
    $Total = $1; $Used = $2; $Avail = $3;
    last;
}

if($MaxEmpty && ($Avail > $MaxEmpty))
{
    # plenty of room, get out
    my $AvailMB = $Avail / 1000;
    my $MaxMB = $MaxEmpty / 1000;
    print  "Done: $AvailMB MB available, more than $MaxMB MB, plenty of room, skipping rest of check\n";
    exit(0);
}

# may be a limited space situation, use du to get current cache size
$Command = "du -Lks $CacheDir";
my @Results = `$Command`;
if ($#Results == -1)
{
    print "Unable to run du to check cache size, check cache dir path\n";
    die;
}

foreach my $Line (@Results)
{
    print  $Line;
    $Line =~ /(\w+).*/;
    $CurSize = $CurSize + $1;
}
print  "Total cache size is currently $CurSize KB\n";

my $DelSize = 0;

# determine if cache size must be reduced
if($LimitSize < $CurSize)
{
    # yes, must reduce space. 
    # if a minimum free space is specified, check against that, too, to see if more deletion needed 
    if($MinEmpty)
    {
        if(($Avail + ($CurSize - $LimitSize)) < $MinEmpty)
	{
	    $LimitSize = $CurSize - ($Used - ($Total - $MinEmpty)); 
	    if($LimitSize < 0)
	    {
                print  "Warning: deleting entire cache to comply with minimum $MinEmpty KB free.\n";
		$LimitSize = 0;
	    }
	}
    }
    $DelSize = $CurSize - $LimitSize;
}

# haven't exceeded limit, but still may need to delete because of MinEmpty limit on space use 
if($MinEmpty)
{
    if($Avail < $MinEmpty)
    {
	$LimitSize = $CurSize - ($MinEmpty - $Avail);
	if($LimitSize < 0)
	{
            print  "Warning: deleting entire cache to comply with minimum $MinEmpty KB free.\n";
	    $LimitSize = 0;
	}
        $DelSize = $CurSize - $LimitSize;
    }
}

# after all that, do we have DelSize indicating that we should do something?
if($DelSize == 0)
{
    print  "Done: No need to trim cache for its size or to ensure minimum free space\n";
    exit(0);
}

# if we're here, both Limit and DelSize refer to how to delete from the cache
# gather a list of all files, in a hash by access time
$Command = $ucmd_path . "find $CacheDir -type f -follow";
@Results = `$Command`;
if ($#Results == -1)
{
    print  "Unable to run find to check each file, check cache dir path\n";
    die;
}
chomp(@Results);

my %Fdata;
foreach my $Line (@Results)
{
   $Fdata{$Line} = (stat($Line))[8];
   if(!$DoNothing && ($Fdata{$Line} < $cutoff_time))
   {
       # reset the access time to now, it was bogus
       my ($s, $mi, $h, $d, $m, $y) = localtime($Fdata{$Line});
       printf  ("Fixing bad date: %02d/%02d/%d %02d:%02d,  $Line\n", $m + 1, $d, $y + 1900, $h, $mi);
       `touch \"$Line\"`;
       $Fdata{$Line} = $current_time;
   }
}

# for the following, work with bytes
$DelSize = $DelSize * 1000;
my $i = 0;	
my $FileSize;

foreach my $File (sort {$Fdata{$a} <=> $Fdata{$b} } keys %Fdata)
{
   my ($s, $mi, $h, $d, $m, $y) = localtime($Fdata{$File});
   $FileSize = (stat($File))[7];
   printf  ("%02d/%02d/%d %02d:%02d, %d K - $File\n", $m + 1, $d, $y + 1900, $h, $mi, int($FileSize)/1000);

   if(!$DoNothing)
   {
       my $Results = unlink($File);
       if($Results != 1)
       {
           print  "unable to rm file $File\n";
       }
   }
   $DelSize = $DelSize - $FileSize;
   $i++;
   if($DelSize <= 0)
   {
      # enough has been deleted
      $last_date = sprintf("%s/%s/%s", $m + 1, $d, $y + 1900);
      last;
   }
}

if($DelSize > 0)
{
    print  "Error, unable to free up space\n";
    exit (1);
}
my $LimitMB = $LimitSize / 1000;
if($DoNothing)
{
    print  "Files accessed before $last_date would be deleted.\n";
    print  "\nWould reduce cache size to $LimitMB MB, $i files would be deleted.\n\n\n\n";
}
else
{
    if (open(DATEFILE, ">$DateFile"))
    {
	print DATEFILE "Cutoff Date: $last_date"; 
	close (DATEFILE);
    }
    else
    {
        print "cache_clean.pl: Unable to open $DateFile to set cutoff date.\n";
    }
    print  "Files accessed before $last_date were deleted.\n";
    print  "Reduced cache size to $LimitMB MB, $i files deleted, done.\n\n";
}
exit (0);
# Change User Description Committed
#2 5948 Stanton Stevens clean up description
#1 4891 Stanton Stevens scripts and doc associated with 2005 Perforce Users Conference paper