#!/usr/local/bin/perl # -*-Fundamental-*- # # This script supports Veritas snapshotting. It was derived from # the Stephen Vance's version of Richard Geiger's script. # References: # //guest/stephen_vance/utils/snap_checkpoint/snap_checkpoint#5 # http://public.perforce.com/guest/richard_geiger/utils/snap_checkpoint/snap_checkpoint.html # --Michael Mirman, The MathWorks, Inc., March 2008. # This script is intended both as an illustration, and, potentially, # as an actual tool. # # NEITHER THE AUTHOR, THE MATHWORKS, INC., VERITAS, NOR PERFORCE SOFTWARE # MAKE ANY WARRANTY, EXPLICIT OR IMPLIED, AS TO THE CORRECTNESS, # FITNESS FOR ANY APPLICATION, NOR THE SAFETY OF THE snap_checkpoint SOFTWARE. # # The directory structure that is assumed by this script is highly likely # NOT the structure you have. It is your responsibility to verify what # works and what does not work in your environment. # # First, you ought to run "snap_checkpoint -port YOURPORT -n" to see # "reasonable" output. # Then, you should verify that the p4d_lock() function works # correctly at your site. You can do this by executing # "snap_checkpoint -port YOURPORT -lockcheck". # If things are working right, the script will report that the database # has been locked. # You can then (from second login shell) attempt to execute a p4 command # ("p4 user -o' is fine). This should block until the lock is released. # Here's what you should see: # # [batscm@bat240sol64 ws1]$ scripts/snap_checkpoint -lockcheck -port 1681 # 2008-03-17 14:30:30 -0400 snap_checkpoint>> requesting lock on all db.* tables... # 2008-03-17 14:30:30 -0400 snap_checkpoint>> 38 files in /export/db/perforce/1681 were locked. # 2008-03-17 14:30:30 -0400 snap_checkpoint>> press return to unlock # # (At this point all commands to your Perforce server should block, # until you press return) # # 2008-03-17 14:30:38 -0400 snap_checkpoint>> /export/db/perforce/1681 were unlocked. use strict; use warnings; use Carp; use Fcntl ':flock'; # import LOCK_* constants use File::Basename qw(basename); use File::Copy qw(); use FileHandle qw(); use Getopt::Long qw(GetOptions); use IO::Handle; use Time::Local; # no buffering *STDOUT->autoflush(); *STDERR->autoflush(); my $Myname = basename($0); # Configuration Settings # # REVIEW THESE SETTINGS, AND ADJUST THEM AS NECESSARY FOR USE IN YOUR # ENVIRONMENT: # # The presumed directory structure is as follows: # /export/checkpoint/perforce/PORT # /export/data/perforce/PORT # /export/db/perforce/PORT # /export/journal/perforce/PORT # The p4 root is /export/db/perforce/PORT where we have symlinks for depots. # For example: # /export/db/perforce/1680/depot -> /export/data/perforce/1680/depot/ # # Checkpoint files are collected in /export/checkpoint/perforce/PORT. # All four # /export/checkpoint # /export/data # /export/db # /export/journal # are on different disks, and their snapshots are taken separately. # # Due to the assumptions above, when Veritas snapshots are taken, they # contain all Perforce instances (i.e., for all ports). # THOSE SNAPSHOTS GET REMOVED AT THE END OF THE SCRIPT. # Therefore, there must be only one instance of this script running at any # point in time. However, it can handle creating a backup of more than # one Perforce instance per run. # my @orig_args = @ARGV; my (%Handle, $HoursFromGMT); my ($preview_only, $lockcheck, @ports); my $verbose = 0; my $max_to_keep = 0; # keep all snapshots print "\n * * * * *\n"; GetOptions( 'help' => sub { Usage(); exit }, 'lockcheck' => \$lockcheck, 'max=i' => \$max_to_keep, 'n' => \$preview_only, 'port=s' => \@ports, 'v' => \$verbose, ); if ( ! @ports ) { Usage(); exit 1; } msg(0, "$0 @orig_args\n"); # Top directory of the mount points # my $TOP = '/export'; # Top where Veritas snapshots will be mounted # my $SNAPMOUNT = '/backup'; # Several functions returning values, which depend on the port (first arg) sub P4 { return P4ROOT($_[0]) . '/bin/p4' } sub P4CHECKPOINT { return "$TOP/checkpoint/perforce/$_[0]" } sub P4D { return P4ROOT($_[0]) . '/bin/p4d' } sub P4JOURNAL { return "$TOP/journal/perforce/$_[0]/journal" } sub P4ROOT { return "$TOP/db/perforce/$_[0]" } sub SNAPJOURNAL { return "$SNAPMOUNT/journal/perforce/$_[0]/journal" } # The name of the journal counter to use my $P4COUNTER = 'snap_journal'; # Path to the host's "gzip" command # my $GZIP = "/usr/local/bin/gzip"; # Command to create Veritas snapshots my $CREATE_SNAPSHOT = '/usr/local/bin/mk_vxfs_snapshot'; # Command to remove Veritas snapshots my $REMOVE_SNAPSHOT = '/usr/local/bin/rm_vxfs_snapshot'; if ( $lockcheck ) { msg(0, "requesting lock on all db.* tables...\n"); p4d_lock($_) for ( @ports ); msg(0, "press return to unlock "); my $ans = <>; p4d_unlock($_) for ( @ports ); exit 0; } p4d_snap_checkpoint(); exit 0; # # Copy (and compress) Perforce journal from a snapshot # sub copy_journal { my ($port, $tstamp, $old_counter) = @_; # The path to the directory where the checkpoint should be written # my $P4CHECKPOINT = P4CHECKPOINT($port); # Copy the journal (from the snapshot). # my $journaln = "$P4CHECKPOINT/$tstamp.jnl.$old_counter"; my $SNAPJOURNAL = SNAPJOURNAL($port); msg(0, ($preview_only ? 'NOT ' : ''), "Copying journal ($SNAPJOURNAL to $journaln)\n"); # syscopy is expected to preserve the timestamp (like cp -p) File::Copy::syscopy($SNAPJOURNAL, $journaln); # Compress the saved journal segment... # (Ignore errors - they can be dealt with later) # run_cmd("$GZIP $journaln"); return; } # copy_journal # # Create a handle name from a file name # sub file2handle { my ($file) = @_; if ( ! defined $Handle{$file} ) { $Handle{$file} = FileHandle->new(); } return $Handle{$file}; } # file2handle # # Increment counter for a given Perforce instance. # Input: 127.0.0.1:$port # Return: old journal counter for this server # sub increment_counter { my (@ports) = @_; my (%journal_counter, %p4); for my $port ( @ports ) { my $P4PORT = "127.0.0.1:$port"; # The path to the "p4" client to be used # my $P4 = P4($port); die "Error: $P4 does not exist\n" if ! -x $P4; $p4{$port} = "$P4 -p $P4PORT"; } for my $port ( @ports ) { # Next, look up the journal sequence number counter... Logically, # we'd prefer to do this with the database locked, but the danger of # a rogue checkpoint -jc happening seems tolerable... # # Note. We use `` here rather than run_cmd, so the command would run # even in case of the -n option. my $output = `$p4{$port} counters`; # # The difference between running 'counters' and "counters $P4COUNTER" # is that in the latter case we rely on the integer coming back as # the valid number. # It's more reliable, however, to verify that we get back the # output with the counter name we expect. # ($journal_counter{$port}) = map { /^$P4COUNTER = (\d+)/ ? $1 : () } split /\n/, $output; if ( ! defined $journal_counter{$port} ) { msg(0, "can't find set $P4COUNTER counter, nothing done.\n", "If this is indeed your first run against this server, you need to run\n", " $p4{$port} counter $P4COUNTER 0\n"); exit 3; } } # for each port # In case when we deal with an array of prts, we *decreased* - but not # eliminated - our chance of increasing one counter and then dying # by not being able to increase another one. for my $port ( @ports ) { # Now increment the counter # my $new_journal_counter = $journal_counter{$port} + 1; my $output = run_cmd("$p4{$port} counter -f $P4COUNTER $new_journal_counter"); if ( ! $preview_only && $output !~ /^Counter $P4COUNTER set\.$/) { msg(0, "couldn't increment $P4COUNTER counter:\n$output\n"); exit 4; } } return \%journal_counter; } # increment_counter # # Lock database files # sub p4d_lock { my ($port) = @_; # $P4ROOT for the server you wish to checkpoint # my $P4ROOT = P4ROOT($port); my $nlocked = 0; # With r01.1 (at least), empty tables don't have files yet. Real # servers almost certainly will have all the db.* files, but # just in case, we'll grep for existing files only foreach my $file ( grep { -f } map { "$P4ROOT/$_" } dbfiles($port) ) { my $handle = file2handle($file); msg(1, "Locking $file...\n"); if ( ! $preview_only ) { # Note: Solaris seems to need the "+<" open mode in order to all # LOCK_EX locks to be placed. # if (! open $handle, '+<', $file) { msg(0, "can't open $file: $!\n"); exit 5; } if (! flock($handle, LOCK_EX)) { msg(0, "can't lock $file: $!\n"); exit 6; } } $nlocked++; } if ($nlocked <= 0) { die "no tables were locked!"; } msg(0, "$nlocked files in $P4ROOT were ", ($preview_only ? 'NOT ' : ''), "locked.\n"); return; } # p4d_lock # # Unlock database files # sub p4d_unlock { my ($port) = @_; # $P4ROOT for the server you wish to checkpoint # my $P4ROOT = P4ROOT($port); foreach my $file ( grep { -f } map { "$P4ROOT/$_" } reverse(dbfiles($port)) ) { my $handle = file2handle($file); msg(1, "Unlocking $file...\n"); if ( ! $preview_only ) { close $handle or msg(0, "Error closing $file: $!\n"); } } msg(0, "$P4ROOT ", ($preview_only ? 'did NOT have to be' : 'was'), " unlocked.\n"); return; } # p4d_unlock # # Print a message in a canonical format assuming the first arg (requested # verbose level) is less or equal $verbose # sub msg { my $verbose_level = shift; if ( $verbose >= $verbose_level ) { print msg_prefix(), " $Myname>> ", @_; } return; } # msg sub msg_prefix { my $time = shift || time; $HoursFromGMT ||= do { my $t = time; my $d = $t - Time::Local::timelocal(gmtime($t)); sprintf($d >= 0 ? "+%02d00" : "%03d00", $d / 3600); }; my ($sec,$min,$hour,$mday,$mon,$year) = localtime($time); return sprintf "%4d-%02d-%02d %02d:%02d:%02d $HoursFromGMT", $year+1900, $mon+1, $mday, $hour, $min, $sec; } # msg_prefix # Run a command, returning status and output; terminate on any error. # sub run_cmd { my ($cmd) = @_; msg(0, ($preview_only ? 'NOT ' : ''), "Running: $cmd\n"); return (0, "Command was not run\n") if $preview_only; open my $CMD, "$cmd 2>&1 |" or die "can't open \"$cmd 2>&1 |\": $!"; my $output = ''; while ( <$CMD> ) { print ": $_"; $output .= $_; } close $CMD; if (my $sts = $?) { my $sig = $sts & 0x0f; $sts = $sts >> 8; Carp::croak "'$cmd' exited with " . ($sig ? "signal $sig " : '') . "status $sts"; } return $output; } # run_cmd # # return a time stamp # sub ts { my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); return sprintf("%04d%02d%02d%02d%02d%02d", 1900+$year, $mon+1, $mday, $hour, $min, $sec); } # ts # # Main function that does everything # sub p4d_snap_checkpoint { my @vols = qw(db journal); # First, delete the previous snapshots (if they exist.) for my $vol ( grep { -d "$SNAPMOUNT/$_/perforce" } @vols ) { run_cmd("$REMOVE_SNAPSHOT $vol"); } # increment_counter works with the whole array of ports, so # we would not increase one counter if another causes a failure. my $old_counter = increment_counter( @ports ); # Next, we lock down the entire database(s) for my $port ( @ports ) { p4d_lock($port); } my $tstamp = ts(); # Now, we snapshot the database filesystem... # for my $vol ( @vols ) { run_cmd("$CREATE_SNAPSHOT $vol"); } # OK, now we are confident that we have a good snapshot. We can # proceed on the assumption that that the "off line" operations of # copying the journal and "p4 -jd" will work. All we need to do here # is to truncate the journal, and put the server back on-line by # unlocking it... for my $port ( @ports ) { truncate_journal($port); # Now we can release the lock... # p4d_unlock($port); # At this point, the system is online for users. copy_journal($port, $tstamp, $old_counter->{$port}); # The path to the "p4d" server to be used # my $P4D = P4D($port); # Do this checkpoint from the snapshot we just took... # my $SNAPROOT = "$SNAPMOUNT/db/perforce/$port"; my $new_counter = $old_counter->{$port} + 1; # The path to the directory where the checkpoint should be written # my $P4CHECKPOINT = P4CHECKPOINT($port); run_cmd("$P4D -r $SNAPROOT -p 127.0.0.1:$port -z -jd " . "$P4CHECKPOINT/$tstamp.ckp.$new_counter.gz"); } # for each port (Perforce instance) # don't leave snapshots around for my $vol ( @vols ) { run_cmd("$REMOVE_SNAPSHOT $vol"); } # Delete extra checkpoint and journal files for my $port ( @ports ) { delete_extras(P4CHECKPOINT($port), $max_to_keep); } return; } # p4d_snap_checkpoint # # Truncate Perforce journal # sub truncate_journal { my ($port) = @_; # The path to the journal file # my $P4JOURNAL = P4JOURNAL($port); if ( ! $preview_only ) { # Truncate the journal open my $J, '>', $P4JOURNAL or do { msg(0, "couldn't truncate \"$P4JOURNAL\": $!\n"); exit 7; }; close $J; } msg(0, "$P4JOURNAL was ", ($preview_only ? 'NOT ' : ''), "truncated.\n"); return; } # truncate_journal sub Usage { print "Usage:\n", " $Myname -port NNNN [-port NNNN] [-lockcheck] [-max N] [-n] [-v]\n\n", "You MUST use at least one -port option (except when you use -help).\n", "Options:\n", " -n preview: shows what would be done\n", " -lockcheck is to testing whether the server is indeed locked\n", " -max N keep maximum N backups and delete the oldest (default: keep all)\n", " -v increases the verbosity level.\n\n"; return; } # Usage # # Return the list of db files in the order to be locked # sub dbfiles { my ($port) = @_; my $p4d = P4D($port); # Perforce server version: my @p4dV = `$p4d -V`; my ($p4dVers) = map { m{^Rev\. \S+/(\d\d\d\d\.\d)/\d+} ? $1 : () } @p4dV; if ( ! $p4dVers ) { die "Cannot determine the p4d version from p4d -V output:\n", @p4dV, "<<<-- You may need to adjust the regex in parsing the output above"; } # The locking order of the db.* files, good for my @order = ($p4dVers =~ /^2000\.[12]$/) # 2000.1 and 2000.2 # per information supplied by Perforce Software: # # http://www.perforce.com/perforce/doc.001/schema/index.html # http://www.perforce.com/perforce/doc.002/schema/index.html # ? qw( db.counters db.user db.group db.depot db.domain db.view db.review db.have db.integ db.locks db.rev db.revcx db.working db.change db.desc db.job db.jobpend db.jobdesc db.fix db.fixrev db.boddate db.bodtext db.ixdate db.ixtext db.protect db.trigger ) : ($p4dVers =~ /^2001\.1$/) # 2001.1 # per information supplied by Perforce Software: # # http://www.perforce.com/perforce/doc.011/schema/index.html # ? qw( db.counters db.logger db.user db.group db.depot db.domain db.view db.review db.have db.label db.integ db.integed db.resolve db.locks db.rev db.revcx db.working db.change db.desc db.job db.jobpend db.jobdesc db.fix db.fixrev db.boddate db.bodtext db.ixdate db.ixtext db.protect db.trigger ) : ($p4dVers =~ /^2002\.1$/) # 2002.1 # per information supplied by Perforce Software: # # http://www.perforce.com/perforce/doc.021/schema/index.html ? qw( db.counters db.logger db.user db.group db.depot db.domain db.view db.review db.have db.label db.integ db.integed db.resolve db.locks db.rev db.revcx db.working db.change db.changex db.desc db.job db.jobpend db.jobdesc db.fix db.fixrev db.boddate db.bodtext db.ixdate db.ixtext db.protect db.trigger ) : ($p4dVers =~ /^(2002.2|2003.1)$/) # 2002.2, 2003.1 # per information supplied by Perforce Software: # # http://www.perforce.com/perforce/doc.022/schema/index.html # http://www.perforce.com/perforce/doc.031/schema/index.html # ? qw( db.counters db.logger db.user db.group db.depot db.domain db.view db.review db.have db.label db.integ db.integed db.resolve db.locks db.rev db.revcx db.working db.change db.changex db.desc db.job db.jobpend db.jobdesc db.fix db.fixrev db.boddate db.bodtext db.ixdate db.ixtext db.protect db.trigger db.message ) : ($p4dVers =~ /^2003\.2|2004\.1$/) # 2003.2 and 2004.1 # per information supplied by Perforce Software: # # http://www.perforce.com/perforce/doc.032/schema/index.html # http://www.perforce.com/perforce/doc.041/schema/index.html # ? qw( db.counters db.logger db.user db.group db.depot db.domain db.view db.review db.have db.label db.integ db.integed db.resolve db.locks db.rev db.revcx db.working db.change db.changex db.desc db.job db.jobpend db.jobdesc db.fix db.fixrev db.boddate db.bodtext db.ixdate db.ixtext db.protect db.trigger db.message db.monitor ) : ($p4dVers =~ /^2004\.2$/) # 2004.2 # per information supplied by Perforce Software: # # http://www.perforce.com/perforce/doc.042/schema/index.html ? qw( db.counters db.logger db.user db.group db.depot db.domain db.view db.review db.have db.label db.integ db.integed db.resolve db.locks db.rev db.revcx db.revpx db.working db.traits db.trigger db.change db.changex db.desc db.job db.jobpend db.jobdesc db.fix db.fixrev db.boddate db.bodtext db.ixdate db.ixtext db.protect db.message db.monitor ) : ($p4dVers =~ /^2007\.[23]$/) # 2007.2 and 2007.3 # this information taken from # http://www.perforce.com/perforce/doc.072/schema/index.html # http://www.perforce.com/perforce/doc.073/schema/index.html ? qw( db.counters db.logger db.user db.group db.depot db.domain db.view db.review db.integ db.integed db.resolve db.have db.label db.locks db.archive db.archmap db.rev db.revcx db.revdx db.revhx db.revpx db.revsx db.working db.traits db.trigger db.change db.changex db.desc db.job db.jobpend db.jobdesc db.fix db.fixrev db.boddate db.bodtext db.ixdate db.ixtext db.protect db.message db.monitor ) : (); if ( ! @order ) { print <<EOM; Unrecognized Perforce server version "$p4dVers". You will need to confirm the db.* locking order for this version of the Perforce server, and modify this script to recognize it before proceeding. See, e.g., http://www.perforce.com/perforce/doc.072/schema/index.html http://www.perforce.com/perforce/doc.073/schema/index.html For older p4d versions see //guest/stephen_vance/utils/snap_checkpoint/snap_checkpoint#5 from http://public.perforce.com/guest/stephen_vance EOM exit 2; } return @order; } # dbfiles # # Delete old checkpoint and journal files according to the -max option. # sub delete_extras { my ($dir, $max_to_keep) = @_; return if $max_to_keep <= 0; msg(0, "Keeping up to $max_to_keep checkpoints and journals in $dir\n"); for my $suffix ( 'ckp', 'jnl' ) { my @existing = sort glob "$dir/2?????????????.$suffix.*"; if ( (my $remove = scalar(@existing) - $max_to_keep) > 0 ) { my @remove = @existing[0 .. $remove-1]; if ( $preview_only ) { msg(0, "NOT Removing: @remove\n"); } else { msg(0, "Removing: @remove\n"); unlink @remove or warn "Cannot remove old files: $!\n"; } } } } # delete_extras
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#3 | 7246 | Michael Mirman |
multiple improvements, including (but not limited to) - added option -log to log all output in a file; - added option -mail to send the summary of the run to the given address(es); - support multiple standbys; - sync versioned files |
||
#2 | 6341 | Michael Mirman |
This version creates snapshot using Veritas file system, then unlocks the database and proceeds with checkpointing. This is a copy of the TMW production version //sandbox/batscm/triggers/admin/snap_checkpoint#6, which keeps the database locked for about 8 seconds and produces a zipped up checkpoint file ~1G. |
||
#1 | 6340 | Michael Mirman |
copy of //guest/stephen_vance/utils/snap_checkpoint/snap_checkpoint#5 - starting point working on Veritas file system snapshots |
||
//guest/stephen_vance/utils/snap_checkpoint/snap_checkpoint | |||||
#5 | 4852 | Stephen Vance | Parameterize things relative to SNAPMOUNT and simplify script accordingly. | ||
#4 | 4851 | Stephen Vance | Finish parameterizing snapshot mount point. | ||
#3 | 4850 | Stephen Vance |
Fix hardcoded reference to volume. Parameterize snapshot name. Start to parameterize snapshot mount point. |
||
#2 | 4849 | Stephen Vance |
Updated locking order for newer versions. Fixed some typos. Added -f to the counter command so that "journal" can be used again, but this introduces a backward compatibility issue. |
||
#1 | 4848 | Stephen Vance | Branch snap_checkpoint to update and customize. | ||
//guest/richard_geiger/utils/snap_checkpoint/snap_checkpoint | |||||
#9 | 1544 | Richard Geiger |
Update to reflect changes in p4d 2002.1: a) The change in the locking order, due to db.changex b) The fix for job006497 |
||
#8 | 942 | Richard Geiger | Use $VOLUME, too! | ||
#7 | 941 | Richard Geiger | Use the $GZIP variable instead of the literal path. | ||
#6 | 920 | Richard Geiger |
add 2001.1 locking order; correct open mode ("+<") for Solaris; some notes; and the "lockcheck" option. |
||
#5 | 437 | Richard Geiger |
Hack to handle r2000.1's newfound reluctance to do "p4 counter journal NNNN". |
||
#4 | 248 | Richard Geiger |
The main change here is to move the copying of the journal file to done from the checkpoint, outside of the region where the server is locked. This can make the whole thing go much faster when the journal is sizable enought that the copy takes a significant amoutn of time to happen. |
||
#3 | 246 | Richard Geiger |
Update the script such that we use, verbatim, the p4d_snap_checkpoint function from "p4d_admin", which the version we're finally really deploying. This should make it much easier to maintain in the future. Also update the html doc to match. |
||
#2 | 239 | Richard Geiger |
- Use LOCK_SH when locking the database - Use ALL CAPS when shunning all responsibility for the thing (Warranty disclaimer) |
||
#1 | 238 | Richard Geiger |
Sample script illustrating how to use Data ONTAP snapshots for a "fast checkpoint", plus accompanying notes |