#!/usr/local/bin/perl5 # -*-Fundamental-*- # $Id: //guest/richard_geiger/utils/snap_checkpoint/snap_checkpoint#2 $ # # Original Author: Richard Geiger, Network Appliance, Inc. # use Carp; use strict; $| = 1; # Configuration Settings # # REVIEW THESE SETTINGS, AND ADJUST THEM AS NECESSARY FOR USE IN YOUR # ENVIRONMENT: # # $P4PORT for the server you wish to checkpoint; # my $P4PORT = "p4netapp:1678"; # $P4ROOT for the server you wish to checkpoint # my $P4ROOT = "/u/p4/root.$P4PORT"; # The path to the "p4" client to be used # my $P4 = "/u/p4/VERS/bin.osf/p4"; # The path to the "p4d" server to be used # my $P4D = "/u/p4/VERS/bin.osf/p4d"; # The path to the directory where the checkpoint should be written # my $P4CHECKPOINT = "/u/p4/checkpoint.$P4PORT"; # The path to the journal file # my $P4JOURNAL = "/u/p4/checkpoint.$P4PORT/journal"; # The name of the NetApp filer that holds the volume where # $P4ROOT is stored # # my $FILER = "maglite"; # The volume name of the volume where $P4ROOT is stored # my $VOLUME = "perforce"; # Path to the host's "rsh" command # my $RSH = "/bin/rsh"; # Path to the host's "gzip" command # my $GZIP = "/usr/local/bin/gzip"; # The locking order of the db.* files, as of r99.1, per information # supplied by Perforce Software. # # ***** You should confirm the correct order for any other version # ***** of the Perforce server; if the locking order is not correct, # ***** it is possible to get into a deadlock situation!) # my $dbfiles = < $cmd\n"); if (! open(CMD, "$cmd 2>&1 |")) { die "can't open \"$cmd 2>&1 |\": $!"; } while () { print(": $_"); $output .= $_; } close CMD; if ($sts = $?) { my $sig = $sts & 0x0f; $sts = $sts >> 8; die "\"$cmd\" exited with signal $sig status $sts"; } return ($sts, $output); } # OK, here's the drill... # First, look up the journal sequence number counter... Logically, # we'd prefer to do this with the database locked, but we can't, # because we have to run "p4 counters", and the danger of a rogue # checkpoint -jc happening seems tolerable. (After all, this process # should be the only one intending to do a checkpoint!) # my ($sts, $output) = &s("$P4 -p $P4PORT counters"); my $journal_counter = ""; foreach $_ (split(/\n/, $output)) { if (/^journal = (\d+)/) { $journal_counter = $1; } } if ($journal_counter eq "") { die "can't get journal counter, nothing done."; } # Now increment the counter # $journal_counter++; ($sts, $output) = &s("$P4 -p $P4PORT counter journal $journal_counter"); chop $output; if ($output !~ /^Counter journal set\.$/) { die "couldn't increment journal counter:\n$output"; } # Next, we lock down the entire database # &p4d_lock; # Then: Copy & truncate the journal (since the server is locked, we # know that nobody will be trying to write it while we do this.) # my $journaln = "$P4JOURNAL.$journal_counter"; ($sts, $output) = &s("/bin/cp -p $P4JOURNAL $journaln"); if ($sts) { die "couldn't copy the journal file."; } if (! open(J, ">$P4JOURNAL")) { die "couldn't truncate \"$P4JOURNAL\": $!"; } close J; # Now, we snapshot the database filesystem... # # First, delete any leftover snapshot named "checkpoint": # ($sts, $output) = &s("$RSH $FILER snap delete $VOLUME checkpoint 2>&1"); chop $output; if ($sts || ($output !~ /^(deleting snapshot\.+|No such snapshot.)$/)) { die "couldn't delete snapshot."; } # Now take the snapshot: # ($sts, $output) = &s("$RSH $FILER snap create $VOLUME checkpoint 2>&1"); chop $output; if ($sts || $output !~ /^creating snapshot\.+$/) { die "couldn't create snapshot."; } # Now we can release the lock... # &p4d_unlock; # At this point, the "live" Perforce server is up and available to # users. So, now we do the time consuming step, checkpointing from the # snapshot... # # Compress the saved journal segment... # (Ignore errors - they can be dealt with later) # &s("$GZIP $journaln"); # Diddle $P4ROOT so that this checkpoint is done from the snapshot # we just took... # my $P4ROOT_sav = $P4ROOT; $P4ROOT .= "/.snapshot/checkpoint"; $ENV{"P4ROOT"} = $P4ROOT; my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); my $tstamp = sprintf("%04d%02d%02d%02d%02d%02d", 1900+$year, $mon+1, $mday, $hour, $min, $sec); ($sts, $output) = &s("$P4D -r $P4ROOT -p $P4PORT -z -jd $P4CHECKPOINT/checkpoint.$tstamp.gz"); chop $output; if ($sts || $output !~ /^Dumping to $P4CHECKPOINT\/checkpoint\.$tstamp\.gz\.\.\.$/) { die "checkpoint failed."; } # OK, the checkpoint finished; now we can delete the snapshot # (Or, you might want to keep it around; your call!) # ($sts, $output) = &s("$RSH $FILER snap delete $VOLUME checkpoint 2>&1"); chop $output; if ($sts || $output !~ /^(deleting snapshot\.+|No such snapshot.)$/) { print STDERR "WARNING: couldn't delete snapshot \"checkpoint\".\n"; # no exit here - next checkpoint will attempt to delete it again. } exit $sts;