#!/usr/local/bin/perl5 # -*-Fundamental-*- # $Id: //guest/richard_geiger/utils/snap_checkpoint/snap_checkpoint#5 $ # # Original Author: Richard Geiger, Network Appliance, Inc. # use Carp; use strict; $| = 1; my $Myname; ($Myname = $0) =~ s%^.*/%%; # Configuration Settings # # REVIEW THESE SETTINGS, AND ADJUST THEM AS NECESSARY FOR USE IN YOUR # ENVIRONMENT: # # $P4PORT for the server you wish to checkpoint; # my $P4PORT = "p4netapp:1672"; # $P4ROOT for the server you wish to checkpoint # my $P4ROOT = "/u/p4/root.$P4PORT"; # The path to the "p4" client to be used # my $P4 = "/u/p4/VERS/bin.osf/p4"; # The path to the "p4d" server to be used # my $P4D = "/u/p4/VERS/bin.osf/p4d"; # The path to the directory where the checkpoint should be written # my $P4CHECKPOINT = "/u/p4/checkpoint.$P4PORT"; # The path to the journal file # my $P4JOURNAL = "/u/p4/checkpoint.$P4PORT/journal"; # The name of the journal counter to use # (p4d r00.1 won't let us use "journal" :-() # my $P4COUNTER = "snap_journal"; # The name of the NetApp filer that holds the volume where # $P4ROOT is stored # # my $FILER = "powermatic"; # The volume name of the volume where $P4ROOT is stored # my $VOLUME = "perforce"; # Path to the host's "rsh" command # my $RSH = "/bin/rsh"; # Path to the host's "gzip" command # my $GZIP = "/usr/local/bin/gzip"; # The locking order of the db.* files, good for # # r99.1 # r99.2 # r00.1 # # per information supplied by Perforce Software. # # ***** You should confirm the correct order for any other version # ***** of the Perforce server; if the locking order is not correct, # ***** it is possible to get into a deadlock situation!) # my $dbfiles = < $cmd\n"); if (! open(CMD, "$cmd 2>&1 |")) { die "can't open \"$cmd 2>&1 |\": $!"; } while () { print(": $_"); $output .= $_; } close CMD; if ($sts = $?) { my $sig = $sts & 0x0f; $sts = $sts >> 8; die "\"$cmd\" exited with signal $sig status $sts"; } return ($sts, $output); } sub ts { my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); return sprintf("%04d%02d%02d%02d%02d%02d", 1900+$year, $mon+1, $mday, $hour, $min, $sec); } # The following function is taken from the NetApp "p4d_admin" script. # I've decided to try and keep the function the same, verbatim, as it # is there, so things might be done a bit differently than they # otherwise would. In perticular, I've stubbed out the "&msg" # function with a dummy version that ignore the later argsm which are # used n the context of p4d_admin, but not here. # # sub msg { my ($msg) = @_; print STDERR $msg; } sub p4d_snap_checkpoint { # First, delete the previous snapshot (if it exists). (We keep the # old one around, just in case, until we need to make a new one. # my ($sts, $output) = &s("$RSH $FILER snap delete perforce checkpoint 2>&1"); chop $output; my @output = split(/\n/, $output); my $bad = 0; foreach my $s (@output) { if ($s !~ /^(deleting snapshot\.+|No such snapshot.)$/) { $bad = 1; last; } } if ($sts || $bad) { &msg("$Myname: couldn't delete snapshot.\n", undef, undef, "p4"); exit 1; } # Next, look up the journal sequence number counter... Logically, # we'd prefer to do this with the database locked, but the danger of # a rouge checkpoint -jc happening seems tolerable... # ($sts, $output) = &s("$P4 -p $P4PORT counters"); my $journal_counter = ""; foreach $_ (split(/\n/, $output)) { if (/^$P4COUNTER = (\d+)/) { $journal_counter = $1; } } if ($journal_counter eq "") { &msg("$Myname: can't get $P4COUNTER counter, nothing done.\n", undef, undef, "p4"); exit 1; } # Now increment the counter # my $new_journal_counter = $journal_counter + 1; ($sts, $output) = &s("$P4 -p $P4PORT counter $P4COUNTER $new_journal_counter"); chop $output; if ($output !~ /^Counter $P4COUNTER set\.$/) { &msg("$Myname: couldn't increment $P4COUNTER counter:\n$output\n", undef, undef, "p4"); exit 1; } # Next, we lock down the entire database # &p4d_lock(0); my $tstamp = &ts; # Now, we snapshot the database filesystem... # ($sts, $output) = &s("$RSH $FILER snap create perforce checkpoint 2>&1"); chop $output; if ($sts || $output !~ /^creating snapshot\.+$/) { &msg("$Myname: couldn't create snapshot.\n", undef, undef, "p4"); exit 1; } # OK, now we are confident that we have a good snapshot. We can # proceed on the assumption that that the "off line" operations of # copying the journal and "p4 -jd" will work. All we need to do here # is to truncate the journal, and put the server back on-line by # unlocking it... # Truncate the journal: # if (! open(J, ">$P4JOURNAL")) { &msg("$Myname: couldn't truncate \"$P4JOURNAL\": $!\n", undef, undef, "p4"); exit 1; } close J; &msg("$Myname: \"$P4JOURNAL\" truncated.\n"); # Now we can release the lock... # &p4d_unlock; # At this point, the syetem is online for users. # Copy the journal (from the snapshot). # my $journaln = "$P4CHECKPOINT/$tstamp.jnl.$journal_counter"; my $P4JOURNALDIR = &dirname($P4JOURNAL); my $P4JOURNALFILE; ($P4JOURNALFILE = $P4JOURNAL) =~ s%^.*/%%; ($sts, $output) = &s("/bin/cp -p $P4JOURNALDIR/.snapshot/checkpoint/$P4JOURNALFILE $journaln"); if ($sts) { &msg("$Myname: couldn't copy \"$P4JOURNALDIR/.snapshot/checkpoint/$P4JOURNALFILE\".\n", undef, undef, "p4"); exit 1; } # Compress the saved journal segment... # (Ignore errors - they can be dealt with later) # &s("/usr/local/bin/gzip $journaln"); # Diddle $P4ROOT so that this checkpoint is done from the snapshot # we just took... # my $P4ROOT_sav = $P4ROOT; $P4ROOT .= "/.snapshot/checkpoint"; $ENV{"P4ROOT"} = $P4ROOT; # And then run the checkpoint from the snapshot... # my ($sts, $output) = &s("$P4D -r $P4ROOT -p $P4PORT -z -jd $P4CHECKPOINT/$tstamp.ckp.$new_journal_counter.gz"); if ($sts || $output !~ /^Dumping to $P4CHECKPOINT\/$tstamp\.ckp\.$new_journal_counter\.gz\.\.\.$/) { &msg("$Myname: checkpoint failed.\n", undef, undef, "p4"); exit 1; } # Mom taught me to put things back where I'd got 'em. # $P4ROOT = $P4ROOT_sav; $ENV{"P4ROOT"} = $P4ROOT; return $sts; } exit (&p4d_snap_checkpoint());